Conversation
| ByteBuffer buffer = ByteBuffer.allocate(totalSize); | ||
| buffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
|
||
| buffer.putLong(MAGIC); |
There was a problem hiding this comment.
I think in order to be compatible with older versions, we should design Magic Numbers after the version number, and Linux kernel images are designed in this way.
0084314 to
58ab357
Compare
|
Due to compatibility issues with this PR, I suggest that we modify all relevant content in this PR, including:
|
c28fc68 to
226f6e8
Compare
OK |
a26004b to
428e913
Compare
|
|
||
| @Immutable | ||
| public static final ConfigOption<String> BLOB_STORED_DESCRIPTOR_FIELDS = | ||
| key("blob.stored-descriptor-fields") |
| this(vectorizedColumnBatch, null, rowId); | ||
| } | ||
|
|
||
| public ColumnarRow(VectorizedColumnBatch vectorizedColumnBatch, UriReader uriReader) { |
There was a problem hiding this comment.
Do not modify constructors. Just use setFileIO.
| this.rowId = 0; | ||
| } | ||
|
|
||
| public void setUriReader(UriReader uriReader) { |
| byte version = buffer.get(); | ||
| if (version == 1) { | ||
| try { | ||
| deserialize(bytes); |
There was a problem hiding this comment.
Remove codes for v1, only works for v2.
|
|
||
| /** A factory to create and cache {@link UriReader}. */ | ||
| public class UriReaderFactory { | ||
| public class UriReaderFactory implements Serializable { |
There was a problem hiding this comment.
SparkRow may use this
| .with_description("Whether to return blob values as serialized BlobDescriptor bytes when reading.") | ||
| ) | ||
|
|
||
| BLOB_STORED_DESCRIPTOR_FIELDS: ConfigOption[str] = ( |
There was a problem hiding this comment.
Maybe It's better for BLOB_STORED_DESCRIPTOR_FIELDS and blob-descriptor-field to be the same.
| def write_blob(self, path: str, data: pyarrow.Table, blob_as_descriptor: bool, **kwargs): | ||
| try: | ||
| # Kept for API compatibility. Write behavior is adaptive and does not depend on this flag. | ||
| _ = blob_as_descriptor |
There was a problem hiding this comment.
If the param blob_as_descriptor is not needed in this method, removing the param is better?
| public static BlobDescriptor deserialize(byte[] bytes) { | ||
| ByteBuffer buffer = ByteBuffer.wrap(bytes); | ||
| buffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
|
| buffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
|
||
| byte version = buffer.get(); | ||
| if (version != CURRENT_VERSION) { |
There was a problem hiding this comment.
V1 compatiblity and add tests.
| * fields. | ||
| */ | ||
| public static Pair<RowType, RowType> splitBlob( | ||
| RowType rowType, Set<String> blobStoredDescriptorFields) { |
| boolean asyncFileWrite, | ||
| boolean statsDenseStore, | ||
| @Nullable BlobConsumer blobConsumer, | ||
| Set<String> blobStoredDescriptorFields, |
| @Nullable BlobConsumer blobConsumer) { | ||
| RowType blobRowType = BlobType.splitBlob(writeSchema).getRight(); | ||
| @Nullable BlobConsumer blobConsumer, | ||
| Set<String> blobStoredDescriptorFields) { |
| import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; | ||
|
|
||
| /** Tests for table with blob. */ | ||
| public class BlobTableTest extends TableTestBase { |
There was a problem hiding this comment.
Add a test to validate orc and avro.
| import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; | ||
|
|
||
| /** Tests for table with blob. */ | ||
| public class BlobTableTest extends TableTestBase { |
There was a problem hiding this comment.
Add a test to validate orc and avro.
| MAGIC = 0x424C4F4244455343 # "BLOBDESC" | ||
|
|
||
| def __init__(self, uri: str, offset: int, length: int, version: int = CURRENT_VERSION): | ||
| if version != self.CURRENT_VERSION: |
There was a problem hiding this comment.
Remember to sync to python.
|
Hi @JingsongLi, thanks for the review comments. I have addressed all of them in this PR and synchronized the related Java/Python changes accordingly. Could you please take another look when you have time? |
| self.assertEqual(result.column('pic1').to_pylist()[0], pic1_data) | ||
| self.assertEqual(result.column('pic2').to_pylist()[0], pic2_data) | ||
|
|
||
| def test_blob_stored_descriptor_fields_rejects_non_descriptor_input(self): |
There was a problem hiding this comment.
all blob_stored_descriptor_fields to blob_descriptor_field
Purpose
Blob v2 contains the following new features:
Tests
API and Format
Documentation