From df0537541b4c587d749ae6e5a1fd3444ed84dddc Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Sun, 29 Mar 2026 20:11:07 +0000 Subject: [PATCH 01/11] feat(inspect): Add base class for metadata table support --- src/iceberg/CMakeLists.txt | 4 + src/iceberg/inspect/history_table.cc | 56 ++++++ src/iceberg/inspect/history_table.h | 57 ++++++ src/iceberg/inspect/metadata_table.cc | 87 +++++++++ src/iceberg/inspect/metadata_table.h | 180 ++++++++++++++++++ src/iceberg/inspect/metadata_table_factory.cc | 37 ++++ src/iceberg/inspect/metadata_table_factory.h | 61 ++++++ src/iceberg/inspect/snapshots_table.cc | 61 ++++++ src/iceberg/inspect/snapshots_table.h | 57 ++++++ src/iceberg/test/CMakeLists.txt | 4 + src/iceberg/test/metadata_table_test.cc | 141 ++++++++++++++ src/iceberg/type_fwd.h | 5 + 12 files changed, 750 insertions(+) create mode 100644 src/iceberg/inspect/history_table.cc create mode 100644 src/iceberg/inspect/history_table.h create mode 100644 src/iceberg/inspect/metadata_table.cc create mode 100644 src/iceberg/inspect/metadata_table.h create mode 100644 src/iceberg/inspect/metadata_table_factory.cc create mode 100644 src/iceberg/inspect/metadata_table_factory.h create mode 100644 src/iceberg/inspect/snapshots_table.cc create mode 100644 src/iceberg/inspect/snapshots_table.h create mode 100644 src/iceberg/test/metadata_table_test.cc diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index b641bb75e..47b91ebdd 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -41,6 +41,9 @@ set(ICEBERG_SOURCES file_io_registry.cc file_reader.cc file_writer.cc + inspect/history_table.cc + inspect/metadata_table_factory.cc + inspect/snapshots_table.cc inheritable_metadata.cc json_serde.cc location_provider.cc @@ -58,6 +61,7 @@ set(ICEBERG_SOURCES manifest/v2_metadata.cc manifest/v3_metadata.cc metadata_columns.cc + inspect/metadata_table.cc metrics_config.cc metrics/commit_report.cc metrics/counter.cc diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc new file mode 100644 index 000000000..55fd75168 --- /dev/null +++ b/src/iceberg/inspect/history_table.cc @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/history_table.h" + +#include +#include + +#include "iceberg/inspect/metadata_table.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/table_identifier.h" +#include "iceberg/type.h" + +namespace iceberg { + +HistoryTable::HistoryTable(std::shared_ptr table) + : BaseMetadataTable(table, CreateName(table->name()), CreateSchema()) {} + +HistoryTable::~HistoryTable() = default; + +std::shared_ptr HistoryTable::CreateSchema() { + return std::make_shared( + std::vector{ + SchemaField::MakeRequired(1, "made_current_at", int64()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeOptional(3, "parent_id", int64()), + SchemaField::MakeRequired(4, "is_current_ancestor", boolean())}, + 1); +} + +TableIdentifier HistoryTable::CreateName(const TableIdentifier& source_name) { + return TableIdentifier{source_name.ns, source_name.name + ".history"}; +} + +Result> HistoryTable::Make(std::shared_ptr
table) { + return std::shared_ptr(new HistoryTable(table)); +} + +} // namespace iceberg diff --git a/src/iceberg/inspect/history_table.h b/src/iceberg/inspect/history_table.h new file mode 100644 index 000000000..e624359f3 --- /dev/null +++ b/src/iceberg/inspect/history_table.h @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/inspect/metadata_table.h" +#include "iceberg/result.h" +#include "iceberg/table.h" + +namespace iceberg { + +/// \brief History metadata table +/// +/// History is based on the table's snapshot log, which logs each update +/// to the table's current snapshot. Each row has columns: +/// - made_current_at (long, timestamp) +/// - snapshot_id (long) +/// - parent_id (long, optional) +/// - is_current_ancestor (bool) +class ICEBERG_EXPORT HistoryTable : public BaseMetadataTable { + public: + /// \brief Create a HistoryTable from table metadata + /// + /// \param[in] table The source table + /// \return A HistoryTable instance or error status + static Result> Make(std::shared_ptr
table); + + ~HistoryTable() override; + + private: + HistoryTable(std::shared_ptr
table); + + std::shared_ptr CreateSchema(); + + TableIdentifier CreateName(const TableIdentifier& source_name); +}; + +} // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table.cc b/src/iceberg/inspect/metadata_table.cc new file mode 100644 index 000000000..9a56ce362 --- /dev/null +++ b/src/iceberg/inspect/metadata_table.cc @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/metadata_table.h" + +#include +#include +#include + +#include "iceberg/file_io.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/table_identifier.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_scan.h" +#include "iceberg/type.h" +#include "iceberg/util/uuid.h" + +namespace iceberg { + +BaseMetadataTable::BaseMetadataTable(std::shared_ptr
source_table, + TableIdentifier identifier, + std::shared_ptr schema) + : Table(identifier, source_table->metadata(), + std::string(source_table->metadata_file_location()), source_table->io(), + source_table->catalog()), + source_table_(std::move(source_table)), + schema_(schema) { + uuid_ = Uuid::GenerateV4().ToString(); + schemas_[schema->schema_id()] = schema; +} + +BaseMetadataTable::~BaseMetadataTable() = default; + +Status BaseMetadataTable::Refresh() { + return NotSupported("Cannot refresh a metadata table"); +} + +Result> BaseMetadataTable::NewScan() const { + return NotSupported("TODO: Scanning metadata tables is not yet supported"); +}; + +Result> BaseMetadataTable::NewTransaction() { + return NotSupported("Cannot create a transaction for a metadata table"); +} + +Result> BaseMetadataTable::NewUpdateProperties() { + return NotSupported("Cannot create an update properties for a metadata table"); +} + +Result> BaseMetadataTable::NewUpdateSchema() { + return NotSupported("Cannot create an update schema for a metadata table"); +} + +Result> BaseMetadataTable::NewUpdateLocation() { + return NotSupported("Cannot create an update location for a metadata table"); +} + +Result> BaseMetadataTable::NewUpdatePartitionSpec() { + return NotSupported("Cannot create an update partition spec for a metadata table"); +} + +Result> BaseMetadataTable::NewUpdateSortOrder() { + return NotSupported("Cannot create an update sort order for a metadata table"); +} + +Result> BaseMetadataTable::NewExpireSnapshots() { + return NotSupported("Cannot create an expire snapshots for a metadata table"); +} + +} // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table.h b/src/iceberg/inspect/metadata_table.h new file mode 100644 index 000000000..7cb19e357 --- /dev/null +++ b/src/iceberg/inspect/metadata_table.h @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/location_provider.h" +#include "iceberg/result.h" +#include "iceberg/sort_order.h" +#include "iceberg/table.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_scan.h" + +namespace iceberg { + +/// Forward declarations +class FileIO; + +/// \brief Base class for Iceberg metadata tables +/// +/// Metadata tables expose table metadata as queryable tables with schemas and scan +/// support. They provide read-only access to metadata. +class ICEBERG_EXPORT BaseMetadataTable : public Table { + public: + /// \brief Returns the identifier of this table + const TableIdentifier& name() const { return identifier_; } + + /// \brief Returns the UUID of the table + const std::string& uuid() const { return uuid_; } + + /// \brief Returns the schema for this table, return NotFoundError if not found + Result> schema() const { return schema_; } + + /// \brief Returns a map of schema for this table + Result< + std::reference_wrapper>>> + schemas() const { + return schemas_; + } + + /// \brief Returns the partition spec for this table, return NotFoundError if not found + Result> spec() const { return partition_spec; }; + + /// \brief Returns a map of partition specs for this table + Result>>> + specs() const { + return partition_specs_; + } + + /// \brief Returns the sort order for this table, return NotFoundError if not found + Result> sort_order() const { return sort_order_; } + + /// \brief Returns a map of sort order IDs to sort orders for this table + Result>>> + sort_orders() const { + return sort_orders_; + } + + /// \brief Returns the properties of this table + const TableProperties& properties() const { return properties_; } + + /// \brief Returns the table's metadata file location + std::string_view metadata_file_location() const { + return source_table_->metadata_file_location(); + } + + /// \brief Returns the table's base location + std::string_view location() const { return source_table_->location(); } + + /// \brief Returns the time when this table was last updated + TimePointMs last_updated_ms() const { return source_table_->last_updated_ms(); } + + /// \brief Returns the table's current snapshot, return NotFoundError if not found + Result> current_snapshot() const { + return source_table_->current_snapshot(); + } + + /// \brief Get the snapshot of this table with the given id + /// + /// \param snapshot_id the ID of the snapshot to get + /// \return the Snapshot with the given id, return NotFoundError if not found + Result> SnapshotById(int64_t snapshot_id) const { + return source_table_->SnapshotById(snapshot_id); + } + + /// \brief Get the snapshots of this table + const std::vector>& snapshots() const { + return source_table_->snapshots(); + } + + /// \brief Get the snapshot history of this table + const std::vector& history() const { + return source_table_->history(); + } + + /// \brief Returns the current metadata for this table + const std::shared_ptr& metadata() const { + // TODO: or should we return an empty TableMetadata? + return source_table_->metadata(); + } + + /// \brief Returns the catalog that this table belongs to + const std::shared_ptr& catalog() const { return source_table_->catalog(); } + + /// \brief Returns a LocationProvider for this table + Result> location_provider() const { + return source_table_->location_provider(); + } + + /// \brief Refreshing is not supported in metadata tables. + Status Refresh() override; + + /// \brief Create a new table scan builder for this table + /// + /// Once a table scan builder is created, it can be refined to project columns and + /// filter data. + Result> NewScan() const; + + /// \brief Creating transactions is not supported in metadata tables. + Result> NewTransaction() override; + + /// \brief Updating partition specs is not supported in metadata tables. + Result> NewUpdatePartitionSpec() override; + + /// \brief Updating table properties is not supported in metadata tables. + Result> NewUpdateProperties() override; + + /// \brief Updating sort orders is not supported in metadata tables. + Result> NewUpdateSortOrder() override; + + /// \brief Updating schemas is not supported in metadata tables. + Result> NewUpdateSchema() override; + + /// \brief Expiring snapshots is not supported in metadata tables. + Result> NewExpireSnapshots() override; + + /// \brief Updating table location is not supported in metadata tables. + Result> NewUpdateLocation() override; + + protected: + BaseMetadataTable(std::shared_ptr
source_table, TableIdentifier identifier, + std::shared_ptr schema); + + virtual ~BaseMetadataTable(); + + std::shared_ptr
source_table_; + std::string uuid_; + std::shared_ptr schema_; + std::unordered_map> schemas_; + TableProperties properties_ = TableProperties(); + const std::shared_ptr sort_order_ = SortOrder::Unsorted(); + const std::unordered_map> sort_orders_ = { + {sort_order_->order_id(), sort_order_}}; + const std::shared_ptr partition_spec = PartitionSpec::Unpartitioned(); + const std::unordered_map> partition_specs_ = { + {partition_spec->spec_id(), partition_spec}}; +}; + +} // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table_factory.cc b/src/iceberg/inspect/metadata_table_factory.cc new file mode 100644 index 000000000..b4a981672 --- /dev/null +++ b/src/iceberg/inspect/metadata_table_factory.cc @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/metadata_table_factory.h" + +#include "iceberg/inspect/history_table.h" +#include "iceberg/inspect/snapshots_table.h" + +namespace iceberg { + +Result> MetadataTableFactory::GetSnapshotsTable( + std::shared_ptr
table) { + return SnapshotsTable::Make(table); +} + +Result> MetadataTableFactory::GetHistoryTable( + std::shared_ptr
table) { + return HistoryTable::Make(table); +} + +} // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table_factory.h b/src/iceberg/inspect/metadata_table_factory.h new file mode 100644 index 000000000..3cf031884 --- /dev/null +++ b/src/iceberg/inspect/metadata_table_factory.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/table.h" + +namespace iceberg { + +class HistoryTable; +class SnapshotsTable; +class Table; + +/// \brief Metadata table factory and inspector +/// +/// MetadataTable provides factory methods to create specific metadata tables for +/// inspecting table metadata. Each metadata table exposes a different aspect of the +/// table's metadata as a scannable Iceberg table. +/// +/// Usage: +/// auto snapshots = ICEBERG_TRY(MetadataTable::GetSnapshotsTable(table)); +/// auto scan = ICEBERG_TRY(snapshots->NewScan()); +/// // ... scan and read snapshot data +class ICEBERG_EXPORT MetadataTableFactory { + public: + /// \brief Create a SnapshotsTable from a table + /// + /// \param table The source table + /// \return A SnapshotsTable exposing all snapshots or error status + static Result> GetSnapshotsTable( + std::shared_ptr
table); + + /// \brief Create a HistoryTable from a table + /// + /// \param table The source table + /// \return A HistoryTable exposing snapshot history or error status + static Result> GetHistoryTable( + std::shared_ptr
table); +}; + +} // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc new file mode 100644 index 000000000..4b0f6f790 --- /dev/null +++ b/src/iceberg/inspect/snapshots_table.cc @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/snapshots_table.h" + +#include +#include + +#include "iceberg/inspect/metadata_table.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/table_identifier.h" +#include "iceberg/type.h" + +namespace iceberg { + +SnapshotsTable::SnapshotsTable(std::shared_ptr
table) + : BaseMetadataTable(table, CreateName(table->name()), CreateSchema()) {} + +SnapshotsTable::~SnapshotsTable() = default; + +std::shared_ptr SnapshotsTable::CreateSchema() { + return std::make_shared( + std::vector{SchemaField::MakeRequired(1, "committed_at", int64()), + SchemaField::MakeOptional(2, "snapshot_id", int64()), + SchemaField::MakeRequired(3, "parent_id", int64()), + SchemaField::MakeRequired(4, "manifest_list", string()), + SchemaField::MakeRequired( + 5, "summary", + std::make_shared( + SchemaField::MakeRequired(6, "key", string()), + SchemaField::MakeRequired(7, "value", string())))}, + 1); +} + +TableIdentifier SnapshotsTable::CreateName(const TableIdentifier& source_name) { + return TableIdentifier{source_name.ns, source_name.name + ".snapshots"}; +} + +Result> SnapshotsTable::Make( + std::shared_ptr
table) { + return std::shared_ptr(new SnapshotsTable(table)); +} + +} // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.h b/src/iceberg/inspect/snapshots_table.h new file mode 100644 index 000000000..fc6c46013 --- /dev/null +++ b/src/iceberg/inspect/snapshots_table.h @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/inspect/metadata_table.h" +#include "iceberg/result.h" +#include "iceberg/table.h" + +namespace iceberg { + +/// \brief Snapshots metadata table +/// +/// Exposes all snapshots in the table as rows with columns: +/// - committed_at (timestamp) +/// - snapshot_id (long) +/// - parent_id (long) +/// - manifest_list (string) +/// - summary (map) +class ICEBERG_EXPORT SnapshotsTable : public BaseMetadataTable { + public: + /// \brief Create a SnapshotsTable from table metadata + /// + /// \param[in] table The source table + /// \return A SnapshotsTable instance or error status + static Result> Make(std::shared_ptr
table); + + ~SnapshotsTable() override; + + private: + SnapshotsTable(std::shared_ptr
table); + + std::shared_ptr CreateSchema(); + + TableIdentifier CreateName(const TableIdentifier& source_name); +}; + +} // namespace iceberg diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 2d56d7f35..d6f50fc57 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -242,6 +242,10 @@ if(ICEBERG_BUILD_BUNDLE) delete_filter_test.cc delete_loader_test.cc file_scan_task_reader_test.cc) + + add_iceberg_test(metadata_table_test SOURCES metadata_table_test.cc) + + add_iceberg_test(data_writer_test USE_BUNDLE SOURCES data_writer_test.cc) endif() diff --git a/src/iceberg/test/metadata_table_test.cc b/src/iceberg/test/metadata_table_test.cc new file mode 100644 index 000000000..9260d76ec --- /dev/null +++ b/src/iceberg/test/metadata_table_test.cc @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/metadata_table.h" + +#include +#include + +#include "iceberg/inspect/metadata_table_factory.h" +#include "iceberg/inspect/snapshots_table.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/table.h" +#include "iceberg/table_identifier.h" +#include "iceberg/table_metadata.h" +#include "iceberg/test/matchers.h" +#include "iceberg/test/mock_catalog.h" +#include "iceberg/test/mock_io.h" + +namespace iceberg { + +class MetadataTableTest : public ::testing::Test { + protected: + void SetUp() override { + io_ = std::make_shared(); + catalog_ = std::make_shared(); + + auto schema = std::make_shared( + std::vector{SchemaField::MakeRequired(1, "id", int64()), + SchemaField::MakeOptional(2, "name", string())}, + 1); + metadata_ = std::make_shared( + TableMetadata{.format_version = 2, .schemas = {schema}, .current_schema_id = 1}); + + TableIdentifier source_ident{.ns = Namespace{.levels = {"db"}}, + .name = "source_table"}; + auto source_table_result = + Table::Make(source_ident, metadata_, "s3://bucket/meta.json", io_, catalog_); + EXPECT_THAT(source_table_result, IsOk()); + source_table_ = *source_table_result; + + auto snapshots_table_result = MetadataTableFactory::GetSnapshotsTable(source_table_); + EXPECT_THAT(snapshots_table_result, IsOk()); + snapshots_table_ = *snapshots_table_result; + } + + std::shared_ptr io_; + std::shared_ptr catalog_; + std::shared_ptr metadata_; + std::shared_ptr
source_table_; + std::shared_ptr snapshots_table_; +}; + +TEST_F(MetadataTableTest, Constructor) { + EXPECT_EQ(snapshots_table_->name().name, "source_table.snapshots"); + EXPECT_FALSE(snapshots_table_->uuid().empty()); + auto schema_result = snapshots_table_->schema(); + EXPECT_THAT(schema_result, IsOk()); + EXPECT_EQ((*schema_result)->schema_id(), 1); +} + +TEST_F(MetadataTableTest, DelegatesToSourceTable) { + EXPECT_EQ(snapshots_table_->location(), source_table_->location()); + EXPECT_EQ(snapshots_table_->last_updated_ms(), source_table_->last_updated_ms()); + EXPECT_EQ(snapshots_table_->metadata(), source_table_->metadata()); + EXPECT_EQ(snapshots_table_->catalog(), source_table_->catalog()); +} + +TEST_F(MetadataTableTest, NotSupportedOperations) { + EXPECT_THAT(snapshots_table_->Refresh(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewTransaction(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewUpdateProperties(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewUpdateSchema(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewUpdateLocation(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewUpdatePartitionSpec(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewUpdateSortOrder(), HasErrorMessage("Cannot")); + EXPECT_THAT(snapshots_table_->NewExpireSnapshots(), HasErrorMessage("Cannot")); +} + +TEST_F(MetadataTableTest, SchemasAndSpecs) { + auto schemas_result = snapshots_table_->schemas(); + EXPECT_THAT(schemas_result, IsOk()); + EXPECT_EQ(schemas_result->get().size(), 1); + EXPECT_EQ(schemas_result->get().at(1)->schema_id(), 1); + + auto spec_result = snapshots_table_->spec(); + EXPECT_THAT(spec_result, IsOk()); + EXPECT_EQ(*spec_result, PartitionSpec::Unpartitioned()); + + auto specs_result = snapshots_table_->specs(); + EXPECT_THAT(specs_result, IsOk()); + EXPECT_EQ(specs_result->get().size(), 1); +} + +TEST_F(MetadataTableTest, SortOrders) { + auto sort_order_result = snapshots_table_->sort_order(); + EXPECT_THAT(sort_order_result, IsOk()); + EXPECT_EQ(*sort_order_result, SortOrder::Unsorted()); + + auto sort_orders_result = snapshots_table_->sort_orders(); + EXPECT_THAT(sort_orders_result, IsOk()); + EXPECT_EQ(sort_orders_result->get().size(), 1); +} + +TEST_F(MetadataTableTest, Properties) { + EXPECT_EQ(snapshots_table_->properties().configs().size(), 0); +} + +TEST_F(MetadataTableTest, Snapshots) { + // Assuming source table has no current snapshot + auto cur_snapshot_result = snapshots_table_->current_snapshot(); + EXPECT_THAT(cur_snapshot_result, IsError(ErrorKind::kNotFound)); + auto snapshot_result = snapshots_table_->SnapshotById(1); + EXPECT_THAT(snapshot_result, IsError(ErrorKind::kNotFound)); + EXPECT_TRUE(snapshots_table_->snapshots().empty()); +} + +TEST_F(MetadataTableTest, History) { EXPECT_TRUE(snapshots_table_->history().empty()); } + +TEST_F(MetadataTableTest, LocationProvider) { + auto lp_result = snapshots_table_->location_provider(); + EXPECT_THAT(lp_result, IsOk()); +} + +} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 745c63acb..6b4160037 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -240,6 +240,11 @@ class UpdateStatistics; class DeleteLoader; class PositionDeleteIndex; +/// \brief Metadata tables. +class HistoryTable; +class MetadataTable; +class SnapshotsTable; + /// ---------------------------------------------------------------------------- /// TODO: Forward declarations below are not added yet. /// ---------------------------------------------------------------------------- From 6c646e089efa7ae7c297126af1d4fa38f4bf37f3 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Sat, 2 May 2026 17:49:19 +0000 Subject: [PATCH 02/11] Update MetadataTable to inherit from StaticTable --- src/iceberg/inspect/history_table.cc | 13 ++-- src/iceberg/inspect/history_table.h | 6 +- src/iceberg/inspect/metadata_table.cc | 56 +++++----------- src/iceberg/inspect/metadata_table.h | 67 +++++++++++-------- src/iceberg/inspect/metadata_table_factory.cc | 37 ---------- src/iceberg/inspect/metadata_table_factory.h | 61 ----------------- src/iceberg/inspect/snapshots_table.cc | 31 ++++----- src/iceberg/inspect/snapshots_table.h | 8 ++- src/iceberg/table.cc | 16 +++++ src/iceberg/table.h | 12 +++- src/iceberg/test/metadata_table_test.cc | 2 - src/iceberg/type_fwd.h | 2 +- 12 files changed, 116 insertions(+), 195 deletions(-) delete mode 100644 src/iceberg/inspect/metadata_table_factory.cc delete mode 100644 src/iceberg/inspect/metadata_table_factory.h diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc index 55fd75168..a857a3a9f 100644 --- a/src/iceberg/inspect/history_table.cc +++ b/src/iceberg/inspect/history_table.cc @@ -31,20 +31,19 @@ namespace iceberg { HistoryTable::HistoryTable(std::shared_ptr
table) - : BaseMetadataTable(table, CreateName(table->name()), CreateSchema()) {} - -HistoryTable::~HistoryTable() = default; - -std::shared_ptr HistoryTable::CreateSchema() { - return std::make_shared( + : MetadataTable(table, CreateName(table->name())) { + this->schema_ = std::make_shared( std::vector{ - SchemaField::MakeRequired(1, "made_current_at", int64()), + SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), SchemaField::MakeRequired(2, "snapshot_id", int64()), SchemaField::MakeOptional(3, "parent_id", int64()), SchemaField::MakeRequired(4, "is_current_ancestor", boolean())}, 1); + this->schemas_[schema_->schema_id()] = schema_; } +HistoryTable::~HistoryTable() = default; + TableIdentifier HistoryTable::CreateName(const TableIdentifier& source_name) { return TableIdentifier{source_name.ns, source_name.name + ".history"}; } diff --git a/src/iceberg/inspect/history_table.h b/src/iceberg/inspect/history_table.h index e624359f3..493a5dfc8 100644 --- a/src/iceberg/inspect/history_table.h +++ b/src/iceberg/inspect/history_table.h @@ -36,7 +36,7 @@ namespace iceberg { /// - snapshot_id (long) /// - parent_id (long, optional) /// - is_current_ancestor (bool) -class ICEBERG_EXPORT HistoryTable : public BaseMetadataTable { +class ICEBERG_EXPORT HistoryTable : public MetadataTable { public: /// \brief Create a HistoryTable from table metadata /// @@ -46,8 +46,10 @@ class ICEBERG_EXPORT HistoryTable : public BaseMetadataTable { ~HistoryTable() override; + MetadataTableType type() const noexcept override { return MetadataTableType::kHistory; } + private: - HistoryTable(std::shared_ptr
table); + explicit HistoryTable(std::shared_ptr
table); std::shared_ptr CreateSchema(); diff --git a/src/iceberg/inspect/metadata_table.cc b/src/iceberg/inspect/metadata_table.cc index 9a56ce362..a4fc92bf6 100644 --- a/src/iceberg/inspect/metadata_table.cc +++ b/src/iceberg/inspect/metadata_table.cc @@ -24,64 +24,44 @@ #include #include "iceberg/file_io.h" +#include "iceberg/inspect/history_table.h" +#include "iceberg/inspect/snapshots_table.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" #include "iceberg/table_identifier.h" #include "iceberg/table_metadata.h" #include "iceberg/table_scan.h" #include "iceberg/type.h" +#include "iceberg/type_fwd.h" #include "iceberg/util/uuid.h" namespace iceberg { -BaseMetadataTable::BaseMetadataTable(std::shared_ptr
source_table, - TableIdentifier identifier, - std::shared_ptr schema) - : Table(identifier, source_table->metadata(), - std::string(source_table->metadata_file_location()), source_table->io(), - source_table->catalog()), - source_table_(std::move(source_table)), - schema_(schema) { +MetadataTable::MetadataTable(std::shared_ptr
source_table, + TableIdentifier identifier) + : StaticTable(identifier, source_table->metadata(), + std::string(source_table->metadata_file_location()), source_table->io(), + source_table->catalog()), + source_table_(std::move(source_table)) { uuid_ = Uuid::GenerateV4().ToString(); - schemas_[schema->schema_id()] = schema; } -BaseMetadataTable::~BaseMetadataTable() = default; +MetadataTable::~MetadataTable() = default; -Status BaseMetadataTable::Refresh() { - return NotSupported("Cannot refresh a metadata table"); -} +Status MetadataTable::Refresh() { return source_table_->Refresh(); } -Result> BaseMetadataTable::NewScan() const { +Result> MetadataTable::NewScan() const { return NotSupported("TODO: Scanning metadata tables is not yet supported"); }; -Result> BaseMetadataTable::NewTransaction() { - return NotSupported("Cannot create a transaction for a metadata table"); -} - -Result> BaseMetadataTable::NewUpdateProperties() { - return NotSupported("Cannot create an update properties for a metadata table"); -} - -Result> BaseMetadataTable::NewUpdateSchema() { - return NotSupported("Cannot create an update schema for a metadata table"); -} - -Result> BaseMetadataTable::NewUpdateLocation() { - return NotSupported("Cannot create an update location for a metadata table"); -} - -Result> BaseMetadataTable::NewUpdatePartitionSpec() { - return NotSupported("Cannot create an update partition spec for a metadata table"); -} - -Result> BaseMetadataTable::NewUpdateSortOrder() { - return NotSupported("Cannot create an update sort order for a metadata table"); +Result> MetadataTableFactory::GetSnapshotsTable( + std::shared_ptr
table) { + return SnapshotsTable::Make(table); } -Result> BaseMetadataTable::NewExpireSnapshots() { - return NotSupported("Cannot create an expire snapshots for a metadata table"); +Result> MetadataTableFactory::GetHistoryTable( + std::shared_ptr
table) { + return HistoryTable::Make(table); } } // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table.h b/src/iceberg/inspect/metadata_table.h index 7cb19e357..a47d2a358 100644 --- a/src/iceberg/inspect/metadata_table.h +++ b/src/iceberg/inspect/metadata_table.h @@ -29,20 +29,26 @@ #include "iceberg/table.h" #include "iceberg/table_metadata.h" #include "iceberg/table_scan.h" +#include "iceberg/type_fwd.h" namespace iceberg { -/// Forward declarations -class FileIO; +/// \brief The type of metadata table +enum class MetadataTableType { + kSnapshots, + kHistory, +}; /// \brief Base class for Iceberg metadata tables /// /// Metadata tables expose table metadata as queryable tables with schemas and scan /// support. They provide read-only access to metadata. -class ICEBERG_EXPORT BaseMetadataTable : public Table { +class ICEBERG_EXPORT MetadataTable : public StaticTable { public: /// \brief Returns the identifier of this table - const TableIdentifier& name() const { return identifier_; } + const TableIdentifier& name() const override { return identifier_; } + + virtual MetadataTableType type() const noexcept = 0; /// \brief Returns the UUID of the table const std::string& uuid() const { return uuid_; } @@ -137,32 +143,10 @@ class ICEBERG_EXPORT BaseMetadataTable : public Table { /// filter data. Result> NewScan() const; - /// \brief Creating transactions is not supported in metadata tables. - Result> NewTransaction() override; - - /// \brief Updating partition specs is not supported in metadata tables. - Result> NewUpdatePartitionSpec() override; - - /// \brief Updating table properties is not supported in metadata tables. - Result> NewUpdateProperties() override; - - /// \brief Updating sort orders is not supported in metadata tables. - Result> NewUpdateSortOrder() override; - - /// \brief Updating schemas is not supported in metadata tables. - Result> NewUpdateSchema() override; - - /// \brief Expiring snapshots is not supported in metadata tables. - Result> NewExpireSnapshots() override; - - /// \brief Updating table location is not supported in metadata tables. - Result> NewUpdateLocation() override; - protected: - BaseMetadataTable(std::shared_ptr
source_table, TableIdentifier identifier, - std::shared_ptr schema); + explicit MetadataTable(std::shared_ptr
source_table, TableIdentifier identifier); - virtual ~BaseMetadataTable(); + ~MetadataTable(); std::shared_ptr
source_table_; std::string uuid_; @@ -177,4 +161,31 @@ class ICEBERG_EXPORT BaseMetadataTable : public Table { {partition_spec->spec_id(), partition_spec}}; }; +/// \brief Metadata table factory and inspector +/// +/// MetadataTable provides factory methods to create specific metadata tables for +/// inspecting table metadata. Each metadata table exposes a different aspect of the +/// table's metadata as a scannable Iceberg table. +/// +/// Usage: +/// auto snapshots = ICEBERG_TRY(MetadataTable::GetSnapshotsTable(table)); +/// auto scan = ICEBERG_TRY(snapshots->NewScan()); +/// // ... scan and read snapshot data +class ICEBERG_EXPORT MetadataTableFactory { + public: + /// \brief Create a SnapshotsTable from a table + /// + /// \param table The source table + /// \return A SnapshotsTable exposing all snapshots or error status + static Result> GetSnapshotsTable( + std::shared_ptr
table); + + /// \brief Create a HistoryTable from a table + /// + /// \param table The source table + /// \return A HistoryTable exposing snapshot history or error status + static Result> GetHistoryTable( + std::shared_ptr
table); +}; + } // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table_factory.cc b/src/iceberg/inspect/metadata_table_factory.cc deleted file mode 100644 index b4a981672..000000000 --- a/src/iceberg/inspect/metadata_table_factory.cc +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "iceberg/inspect/metadata_table_factory.h" - -#include "iceberg/inspect/history_table.h" -#include "iceberg/inspect/snapshots_table.h" - -namespace iceberg { - -Result> MetadataTableFactory::GetSnapshotsTable( - std::shared_ptr
table) { - return SnapshotsTable::Make(table); -} - -Result> MetadataTableFactory::GetHistoryTable( - std::shared_ptr
table) { - return HistoryTable::Make(table); -} - -} // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table_factory.h b/src/iceberg/inspect/metadata_table_factory.h deleted file mode 100644 index 3cf031884..000000000 --- a/src/iceberg/inspect/metadata_table_factory.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include - -#include "iceberg/iceberg_export.h" -#include "iceberg/result.h" -#include "iceberg/table.h" - -namespace iceberg { - -class HistoryTable; -class SnapshotsTable; -class Table; - -/// \brief Metadata table factory and inspector -/// -/// MetadataTable provides factory methods to create specific metadata tables for -/// inspecting table metadata. Each metadata table exposes a different aspect of the -/// table's metadata as a scannable Iceberg table. -/// -/// Usage: -/// auto snapshots = ICEBERG_TRY(MetadataTable::GetSnapshotsTable(table)); -/// auto scan = ICEBERG_TRY(snapshots->NewScan()); -/// // ... scan and read snapshot data -class ICEBERG_EXPORT MetadataTableFactory { - public: - /// \brief Create a SnapshotsTable from a table - /// - /// \param table The source table - /// \return A SnapshotsTable exposing all snapshots or error status - static Result> GetSnapshotsTable( - std::shared_ptr
table); - - /// \brief Create a HistoryTable from a table - /// - /// \param table The source table - /// \return A HistoryTable exposing snapshot history or error status - static Result> GetHistoryTable( - std::shared_ptr
table); -}; - -} // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc index 4b0f6f790..cc5821890 100644 --- a/src/iceberg/inspect/snapshots_table.cc +++ b/src/iceberg/inspect/snapshots_table.cc @@ -31,24 +31,25 @@ namespace iceberg { SnapshotsTable::SnapshotsTable(std::shared_ptr
table) - : BaseMetadataTable(table, CreateName(table->name()), CreateSchema()) {} - -SnapshotsTable::~SnapshotsTable() = default; - -std::shared_ptr SnapshotsTable::CreateSchema() { - return std::make_shared( - std::vector{SchemaField::MakeRequired(1, "committed_at", int64()), - SchemaField::MakeOptional(2, "snapshot_id", int64()), - SchemaField::MakeRequired(3, "parent_id", int64()), - SchemaField::MakeRequired(4, "manifest_list", string()), - SchemaField::MakeRequired( - 5, "summary", - std::make_shared( - SchemaField::MakeRequired(6, "key", string()), - SchemaField::MakeRequired(7, "value", string())))}, + : MetadataTable(table, CreateName(table->name())) { + this->schema_ = std::make_shared( + std::vector{ + SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeRequired(3, "parent_id", int64()), + SchemaField::MakeOptional(4, "operation", int64()), + SchemaField::MakeOptional(5, "manifest_list", string()), + SchemaField::MakeOptional( + 6, "summary", + std::make_shared( + SchemaField::MakeRequired(7, "key", string()), + SchemaField::MakeRequired(8, "value", string())))}, 1); + this->schemas_[schema_->schema_id()] = schema_; } +SnapshotsTable::~SnapshotsTable() = default; + TableIdentifier SnapshotsTable::CreateName(const TableIdentifier& source_name) { return TableIdentifier{source_name.ns, source_name.name + ".snapshots"}; } diff --git a/src/iceberg/inspect/snapshots_table.h b/src/iceberg/inspect/snapshots_table.h index fc6c46013..daaa39382 100644 --- a/src/iceberg/inspect/snapshots_table.h +++ b/src/iceberg/inspect/snapshots_table.h @@ -36,7 +36,7 @@ namespace iceberg { /// - parent_id (long) /// - manifest_list (string) /// - summary (map) -class ICEBERG_EXPORT SnapshotsTable : public BaseMetadataTable { +class ICEBERG_EXPORT SnapshotsTable : public MetadataTable { public: /// \brief Create a SnapshotsTable from table metadata /// @@ -46,8 +46,12 @@ class ICEBERG_EXPORT SnapshotsTable : public BaseMetadataTable { ~SnapshotsTable() override; + MetadataTableType type() const noexcept override { + return MetadataTableType::kSnapshots; + } + private: - SnapshotsTable(std::shared_ptr
table); + explicit SnapshotsTable(std::shared_ptr
table); std::shared_ptr CreateSchema(); diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 1255871c3..45a3882cb 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -287,4 +287,20 @@ Result> StaticTable::NewUpdateSchema() { return NotSupported("Cannot create an update schema for a static table"); } +Result> StaticTable::NewUpdateLocation() { + return NotSupported("Cannot create an update location for a static table"); +} + +Result> StaticTable::NewUpdatePartitionSpec() { + return NotSupported("Cannot create an update partition spec for a static table"); +} + +Result> StaticTable::NewUpdateSortOrder() { + return NotSupported("Cannot create an update sort order for a static table"); +} + +Result> StaticTable::NewExpireSnapshots() { + return NotSupported("Cannot create an expire snapshots for a static table"); +} + } // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 8d8849f37..e560e0a0c 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -52,7 +52,7 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this
{ virtual ~Table(); /// \brief Returns the identifier of this table - const TableIdentifier& name() const { return identifier_; } + virtual const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table const std::string& uuid() const; @@ -212,7 +212,7 @@ class ICEBERG_EXPORT StagedTable final : public Table { /// \brief A read-only table. -class ICEBERG_EXPORT StaticTable final : public Table { +class ICEBERG_EXPORT StaticTable : public Table { public: static Result> Make( TableIdentifier identifier, std::shared_ptr metadata, @@ -228,6 +228,14 @@ class ICEBERG_EXPORT StaticTable final : public Table { Result> NewUpdateSchema() override; + Result> NewUpdateLocation() override; + + Result> NewUpdatePartitionSpec() override; + + Result> NewUpdateSortOrder() override; + + Result> NewExpireSnapshots() override; + private: using Table::Table; }; diff --git a/src/iceberg/test/metadata_table_test.cc b/src/iceberg/test/metadata_table_test.cc index 9260d76ec..aa01c73b9 100644 --- a/src/iceberg/test/metadata_table_test.cc +++ b/src/iceberg/test/metadata_table_test.cc @@ -22,7 +22,6 @@ #include #include -#include "iceberg/inspect/metadata_table_factory.h" #include "iceberg/inspect/snapshots_table.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" @@ -83,7 +82,6 @@ TEST_F(MetadataTableTest, DelegatesToSourceTable) { } TEST_F(MetadataTableTest, NotSupportedOperations) { - EXPECT_THAT(snapshots_table_->Refresh(), HasErrorMessage("Cannot")); EXPECT_THAT(snapshots_table_->NewTransaction(), HasErrorMessage("Cannot")); EXPECT_THAT(snapshots_table_->NewUpdateProperties(), HasErrorMessage("Cannot")); EXPECT_THAT(snapshots_table_->NewUpdateSchema(), HasErrorMessage("Cannot")); diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 6b4160037..532888967 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -241,8 +241,8 @@ class DeleteLoader; class PositionDeleteIndex; /// \brief Metadata tables. -class HistoryTable; class MetadataTable; +class HistoryTable; class SnapshotsTable; /// ---------------------------------------------------------------------------- From 484d0d51f077c5dcea65c850415c0aeebd73104b Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Sat, 2 May 2026 17:50:01 +0000 Subject: [PATCH 03/11] Update build files to include MetadataTable --- src/iceberg/CMakeLists.txt | 3 +-- src/iceberg/inspect/CMakeLists.txt | 18 ++++++++++++++++++ src/iceberg/inspect/meson.build | 25 +++++++++++++++++++++++++ src/iceberg/meson.build | 4 ++++ src/iceberg/test/meson.build | 1 + 5 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 src/iceberg/inspect/CMakeLists.txt create mode 100644 src/iceberg/inspect/meson.build diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 47b91ebdd..c5b3aa819 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -42,7 +42,7 @@ set(ICEBERG_SOURCES file_reader.cc file_writer.cc inspect/history_table.cc - inspect/metadata_table_factory.cc + inspect/metadata_table.cc inspect/snapshots_table.cc inheritable_metadata.cc json_serde.cc @@ -61,7 +61,6 @@ set(ICEBERG_SOURCES manifest/v2_metadata.cc manifest/v3_metadata.cc metadata_columns.cc - inspect/metadata_table.cc metrics_config.cc metrics/commit_report.cc metrics/counter.cc diff --git a/src/iceberg/inspect/CMakeLists.txt b/src/iceberg/inspect/CMakeLists.txt new file mode 100644 index 000000000..2df844359 --- /dev/null +++ b/src/iceberg/inspect/CMakeLists.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +iceberg_install_all_headers(iceberg/inspect) diff --git a/src/iceberg/inspect/meson.build b/src/iceberg/inspect/meson.build new file mode 100644 index 000000000..bcb9d8a97 --- /dev/null +++ b/src/iceberg/inspect/meson.build @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_headers( + [ + 'history_table.h', + 'metadata_table.h', + 'snapshots_table.h', + ], + subdir: 'iceberg/inspect', +) diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 15fd5d79d..ae9897d81 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -64,6 +64,9 @@ iceberg_sources = files( 'file_reader.cc', 'file_writer.cc', 'inheritable_metadata.cc', + 'inspect/metadata_table.cc', + 'inspect/snapshots_table.cc', + 'inspect/history_table.cc', 'json_serde.cc', 'location_provider.cc', 'manifest/manifest_adapter.cc', @@ -294,6 +297,7 @@ subdir('puffin') subdir('row') subdir('update') subdir('util') +subdir('inspect') if get_option('tests').enabled() subdir('test') diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 8d2805900..4772bbaf9 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -58,6 +58,7 @@ iceberg_tests = { 'table_requirements_test.cc', 'table_test.cc', 'table_update_test.cc', + 'metadata_table_test.cc', ), }, 'expression_test': { From 9edb8bca72667ec6325a92f56fbf9c3b1ee6c049 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Sat, 2 May 2026 17:55:13 +0000 Subject: [PATCH 04/11] Remove unused CreateSchema definitions --- src/iceberg/inspect/history_table.h | 2 -- src/iceberg/inspect/snapshots_table.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/iceberg/inspect/history_table.h b/src/iceberg/inspect/history_table.h index 493a5dfc8..c313b7456 100644 --- a/src/iceberg/inspect/history_table.h +++ b/src/iceberg/inspect/history_table.h @@ -51,8 +51,6 @@ class ICEBERG_EXPORT HistoryTable : public MetadataTable { private: explicit HistoryTable(std::shared_ptr
table); - std::shared_ptr CreateSchema(); - TableIdentifier CreateName(const TableIdentifier& source_name); }; diff --git a/src/iceberg/inspect/snapshots_table.h b/src/iceberg/inspect/snapshots_table.h index daaa39382..4347c3517 100644 --- a/src/iceberg/inspect/snapshots_table.h +++ b/src/iceberg/inspect/snapshots_table.h @@ -53,8 +53,6 @@ class ICEBERG_EXPORT SnapshotsTable : public MetadataTable { private: explicit SnapshotsTable(std::shared_ptr
table); - std::shared_ptr CreateSchema(); - TableIdentifier CreateName(const TableIdentifier& source_name); }; From 6737c3fa7c0ec832952c8db556da5698ba5fb3a8 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Sat, 2 May 2026 18:00:09 +0000 Subject: [PATCH 05/11] Update Metadata creation to return unique_ptr --- src/iceberg/inspect/history_table.cc | 4 ++-- src/iceberg/inspect/history_table.h | 2 +- src/iceberg/inspect/metadata_table.cc | 4 ++-- src/iceberg/inspect/metadata_table.h | 4 ++-- src/iceberg/inspect/snapshots_table.cc | 4 ++-- src/iceberg/inspect/snapshots_table.h | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc index a857a3a9f..353982e9e 100644 --- a/src/iceberg/inspect/history_table.cc +++ b/src/iceberg/inspect/history_table.cc @@ -48,8 +48,8 @@ TableIdentifier HistoryTable::CreateName(const TableIdentifier& source_name) { return TableIdentifier{source_name.ns, source_name.name + ".history"}; } -Result> HistoryTable::Make(std::shared_ptr
table) { - return std::shared_ptr(new HistoryTable(table)); +Result> HistoryTable::Make(std::shared_ptr
table) { + return std::unique_ptr(new HistoryTable(table)); } } // namespace iceberg diff --git a/src/iceberg/inspect/history_table.h b/src/iceberg/inspect/history_table.h index c313b7456..5116dbccf 100644 --- a/src/iceberg/inspect/history_table.h +++ b/src/iceberg/inspect/history_table.h @@ -42,7 +42,7 @@ class ICEBERG_EXPORT HistoryTable : public MetadataTable { /// /// \param[in] table The source table /// \return A HistoryTable instance or error status - static Result> Make(std::shared_ptr
table); + static Result> Make(std::shared_ptr
table); ~HistoryTable() override; diff --git a/src/iceberg/inspect/metadata_table.cc b/src/iceberg/inspect/metadata_table.cc index a4fc92bf6..6ab8803ad 100644 --- a/src/iceberg/inspect/metadata_table.cc +++ b/src/iceberg/inspect/metadata_table.cc @@ -54,12 +54,12 @@ Result> MetadataTable::NewScan() const { return NotSupported("TODO: Scanning metadata tables is not yet supported"); }; -Result> MetadataTableFactory::GetSnapshotsTable( +Result> MetadataTableFactory::GetSnapshotsTable( std::shared_ptr
table) { return SnapshotsTable::Make(table); } -Result> MetadataTableFactory::GetHistoryTable( +Result> MetadataTableFactory::GetHistoryTable( std::shared_ptr
table) { return HistoryTable::Make(table); } diff --git a/src/iceberg/inspect/metadata_table.h b/src/iceberg/inspect/metadata_table.h index a47d2a358..f2749f872 100644 --- a/src/iceberg/inspect/metadata_table.h +++ b/src/iceberg/inspect/metadata_table.h @@ -177,14 +177,14 @@ class ICEBERG_EXPORT MetadataTableFactory { /// /// \param table The source table /// \return A SnapshotsTable exposing all snapshots or error status - static Result> GetSnapshotsTable( + static Result> GetSnapshotsTable( std::shared_ptr
table); /// \brief Create a HistoryTable from a table /// /// \param table The source table /// \return A HistoryTable exposing snapshot history or error status - static Result> GetHistoryTable( + static Result> GetHistoryTable( std::shared_ptr
table); }; diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc index cc5821890..580887216 100644 --- a/src/iceberg/inspect/snapshots_table.cc +++ b/src/iceberg/inspect/snapshots_table.cc @@ -54,9 +54,9 @@ TableIdentifier SnapshotsTable::CreateName(const TableIdentifier& source_name) { return TableIdentifier{source_name.ns, source_name.name + ".snapshots"}; } -Result> SnapshotsTable::Make( +Result> SnapshotsTable::Make( std::shared_ptr
table) { - return std::shared_ptr(new SnapshotsTable(table)); + return std::unique_ptr(new SnapshotsTable(table)); } } // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.h b/src/iceberg/inspect/snapshots_table.h index 4347c3517..ea1a6569c 100644 --- a/src/iceberg/inspect/snapshots_table.h +++ b/src/iceberg/inspect/snapshots_table.h @@ -42,7 +42,7 @@ class ICEBERG_EXPORT SnapshotsTable : public MetadataTable { /// /// \param[in] table The source table /// \return A SnapshotsTable instance or error status - static Result> Make(std::shared_ptr
table); + static Result> Make(std::shared_ptr
table); ~SnapshotsTable() override; From 6d2438e5dd605f5b387735b98ffe3b0f3b070066 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Fri, 29 May 2026 18:23:20 +0000 Subject: [PATCH 06/11] Refactor MetadataTable to use separate TableMetadata and unified entrypoint --- src/iceberg/inspect/history_table.cc | 11 +-- src/iceberg/inspect/history_table.h | 2 + src/iceberg/inspect/metadata_table.cc | 44 +++++++++--- src/iceberg/inspect/metadata_table.h | 92 ++++++------------------- src/iceberg/inspect/snapshots_table.cc | 11 +-- src/iceberg/inspect/snapshots_table.h | 2 + src/iceberg/test/metadata_table_test.cc | 12 ++-- src/iceberg/type_fwd.h | 2 +- 8 files changed, 78 insertions(+), 98 deletions(-) diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc index 353982e9e..1a8041f41 100644 --- a/src/iceberg/inspect/history_table.cc +++ b/src/iceberg/inspect/history_table.cc @@ -31,19 +31,20 @@ namespace iceberg { HistoryTable::HistoryTable(std::shared_ptr
table) - : MetadataTable(table, CreateName(table->name())) { - this->schema_ = std::make_shared( + : MetadataTable(table, CreateName(table->name())) {} + +HistoryTable::~HistoryTable() = default; + +std::shared_ptr HistoryTable::GetSchema() const { + return std::make_shared( std::vector{ SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), SchemaField::MakeRequired(2, "snapshot_id", int64()), SchemaField::MakeOptional(3, "parent_id", int64()), SchemaField::MakeRequired(4, "is_current_ancestor", boolean())}, 1); - this->schemas_[schema_->schema_id()] = schema_; } -HistoryTable::~HistoryTable() = default; - TableIdentifier HistoryTable::CreateName(const TableIdentifier& source_name) { return TableIdentifier{source_name.ns, source_name.name + ".history"}; } diff --git a/src/iceberg/inspect/history_table.h b/src/iceberg/inspect/history_table.h index 5116dbccf..d10a4967e 100644 --- a/src/iceberg/inspect/history_table.h +++ b/src/iceberg/inspect/history_table.h @@ -48,6 +48,8 @@ class ICEBERG_EXPORT HistoryTable : public MetadataTable { MetadataTableType type() const noexcept override { return MetadataTableType::kHistory; } + std::shared_ptr GetSchema() const override; + private: explicit HistoryTable(std::shared_ptr
table); diff --git a/src/iceberg/inspect/metadata_table.cc b/src/iceberg/inspect/metadata_table.cc index 6ab8803ad..f285bc81f 100644 --- a/src/iceberg/inspect/metadata_table.cc +++ b/src/iceberg/inspect/metadata_table.cc @@ -26,10 +26,13 @@ #include "iceberg/file_io.h" #include "iceberg/inspect/history_table.h" #include "iceberg/inspect/snapshots_table.h" +#include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" +#include "iceberg/sort_order.h" #include "iceberg/table_identifier.h" #include "iceberg/table_metadata.h" +#include "iceberg/table_properties.h" #include "iceberg/table_scan.h" #include "iceberg/type.h" #include "iceberg/type_fwd.h" @@ -43,7 +46,30 @@ MetadataTable::MetadataTable(std::shared_ptr
source_table, std::string(source_table->metadata_file_location()), source_table->io(), source_table->catalog()), source_table_(std::move(source_table)) { - uuid_ = Uuid::GenerateV4().ToString(); + auto schema = GetSchema(); + if (!schema) { + schema = std::make_shared(std::vector{}, 1); + } + + auto builder = + TableMetadataBuilder::BuildFromEmpty(TableMetadata::kDefaultTableFormatVersion); + auto result = builder->AssignUUID(Uuid::GenerateV4().ToString()) + .SetLocation(std::string(source_table_->location())) + .SetCurrentSchema(schema, schema->schema_id()) + .SetDefaultSortOrder(SortOrder::Unsorted()) + .SetDefaultPartitionSpec(PartitionSpec::Unpartitioned()) + .SetProperties({}) + .Build(); + + if (!result.has_value()) { + // If metadata building fails, keep the original metadata from source_table + return; + } + + std::shared_ptr built_metadata = std::move(result.value()); + + metadata_ = built_metadata; + metadata_cache_ = std::make_unique(metadata_.get()); } MetadataTable::~MetadataTable() = default; @@ -54,14 +80,16 @@ Result> MetadataTable::NewScan() const { return NotSupported("TODO: Scanning metadata tables is not yet supported"); }; -Result> MetadataTableFactory::GetSnapshotsTable( - std::shared_ptr
table) { - return SnapshotsTable::Make(table); -} +Result> MetadataTableFactory::CreateMetadataTable( + std::shared_ptr
table, MetadataTableType type) { + switch (type) { + case MetadataTableType::kSnapshots: + return SnapshotsTable::Make(table); + case MetadataTableType::kHistory: + return HistoryTable::Make(table); + } -Result> MetadataTableFactory::GetHistoryTable( - std::shared_ptr
table) { - return HistoryTable::Make(table); + return Invalid("Unsupported metadata table type"); } } // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table.h b/src/iceberg/inspect/metadata_table.h index f2749f872..54c6a89fc 100644 --- a/src/iceberg/inspect/metadata_table.h +++ b/src/iceberg/inspect/metadata_table.h @@ -45,47 +45,8 @@ enum class MetadataTableType { /// support. They provide read-only access to metadata. class ICEBERG_EXPORT MetadataTable : public StaticTable { public: - /// \brief Returns the identifier of this table - const TableIdentifier& name() const override { return identifier_; } - virtual MetadataTableType type() const noexcept = 0; - /// \brief Returns the UUID of the table - const std::string& uuid() const { return uuid_; } - - /// \brief Returns the schema for this table, return NotFoundError if not found - Result> schema() const { return schema_; } - - /// \brief Returns a map of schema for this table - Result< - std::reference_wrapper>>> - schemas() const { - return schemas_; - } - - /// \brief Returns the partition spec for this table, return NotFoundError if not found - Result> spec() const { return partition_spec; }; - - /// \brief Returns a map of partition specs for this table - Result>>> - specs() const { - return partition_specs_; - } - - /// \brief Returns the sort order for this table, return NotFoundError if not found - Result> sort_order() const { return sort_order_; } - - /// \brief Returns a map of sort order IDs to sort orders for this table - Result>>> - sort_orders() const { - return sort_orders_; - } - - /// \brief Returns the properties of this table - const TableProperties& properties() const { return properties_; } - /// \brief Returns the table's metadata file location std::string_view metadata_file_location() const { return source_table_->metadata_file_location(); @@ -120,12 +81,6 @@ class ICEBERG_EXPORT MetadataTable : public StaticTable { return source_table_->history(); } - /// \brief Returns the current metadata for this table - const std::shared_ptr& metadata() const { - // TODO: or should we return an empty TableMetadata? - return source_table_->metadata(); - } - /// \brief Returns the catalog that this table belongs to const std::shared_ptr& catalog() const { return source_table_->catalog(); } @@ -141,24 +96,23 @@ class ICEBERG_EXPORT MetadataTable : public StaticTable { /// /// Once a table scan builder is created, it can be refined to project columns and /// filter data. - Result> NewScan() const; + Result> NewScan() const override; + + ~MetadataTable(); protected: explicit MetadataTable(std::shared_ptr
source_table, TableIdentifier identifier); - ~MetadataTable(); + /// \brief Returns the schema for this metadata table + /// + /// Subclasses override this method to provide their custom schema during + /// MetadataTable construction. The returned schema is used to initialize + /// the underlying TableMetadata. + /// + /// \return The schema for this metadata table, or nullptr for default schema + virtual std::shared_ptr GetSchema() const { return nullptr; } std::shared_ptr
source_table_; - std::string uuid_; - std::shared_ptr schema_; - std::unordered_map> schemas_; - TableProperties properties_ = TableProperties(); - const std::shared_ptr sort_order_ = SortOrder::Unsorted(); - const std::unordered_map> sort_orders_ = { - {sort_order_->order_id(), sort_order_}}; - const std::shared_ptr partition_spec = PartitionSpec::Unpartitioned(); - const std::unordered_map> partition_specs_ = { - {partition_spec->spec_id(), partition_spec}}; }; /// \brief Metadata table factory and inspector @@ -168,24 +122,20 @@ class ICEBERG_EXPORT MetadataTable : public StaticTable { /// table's metadata as a scannable Iceberg table. /// /// Usage: -/// auto snapshots = ICEBERG_TRY(MetadataTable::GetSnapshotsTable(table)); -/// auto scan = ICEBERG_TRY(snapshots->NewScan()); -/// // ... scan and read snapshot data +/// auto metadata_table = ICEBERG_TRY( +/// MetadataTableFactory::CreateMetadataTable( +/// table, MetadataTableType::kSnapshots)); +/// auto scan = ICEBERG_TRY(metadata_table->NewScan()); +/// // ... scan and read metadata table data class ICEBERG_EXPORT MetadataTableFactory { public: - /// \brief Create a SnapshotsTable from a table - /// - /// \param table The source table - /// \return A SnapshotsTable exposing all snapshots or error status - static Result> GetSnapshotsTable( - std::shared_ptr
table); - - /// \brief Create a HistoryTable from a table + /// \brief Create a metadata table from a table /// /// \param table The source table - /// \return A HistoryTable exposing snapshot history or error status - static Result> GetHistoryTable( - std::shared_ptr
table); + /// \param type The metadata table type to create + /// \return A MetadataTable instance or error status + static Result> CreateMetadataTable( + std::shared_ptr
table, MetadataTableType type); }; } // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc index 580887216..22b189027 100644 --- a/src/iceberg/inspect/snapshots_table.cc +++ b/src/iceberg/inspect/snapshots_table.cc @@ -31,8 +31,12 @@ namespace iceberg { SnapshotsTable::SnapshotsTable(std::shared_ptr
table) - : MetadataTable(table, CreateName(table->name())) { - this->schema_ = std::make_shared( + : MetadataTable(table, CreateName(table->name())) {} + +SnapshotsTable::~SnapshotsTable() = default; + +std::shared_ptr SnapshotsTable::GetSchema() const { + return std::make_shared( std::vector{ SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), SchemaField::MakeRequired(2, "snapshot_id", int64()), @@ -45,11 +49,8 @@ SnapshotsTable::SnapshotsTable(std::shared_ptr
table) SchemaField::MakeRequired(7, "key", string()), SchemaField::MakeRequired(8, "value", string())))}, 1); - this->schemas_[schema_->schema_id()] = schema_; } -SnapshotsTable::~SnapshotsTable() = default; - TableIdentifier SnapshotsTable::CreateName(const TableIdentifier& source_name) { return TableIdentifier{source_name.ns, source_name.name + ".snapshots"}; } diff --git a/src/iceberg/inspect/snapshots_table.h b/src/iceberg/inspect/snapshots_table.h index ea1a6569c..86e57dc13 100644 --- a/src/iceberg/inspect/snapshots_table.h +++ b/src/iceberg/inspect/snapshots_table.h @@ -50,6 +50,8 @@ class ICEBERG_EXPORT SnapshotsTable : public MetadataTable { return MetadataTableType::kSnapshots; } + std::shared_ptr GetSchema() const override; + private: explicit SnapshotsTable(std::shared_ptr
table); diff --git a/src/iceberg/test/metadata_table_test.cc b/src/iceberg/test/metadata_table_test.cc index aa01c73b9..058b10107 100644 --- a/src/iceberg/test/metadata_table_test.cc +++ b/src/iceberg/test/metadata_table_test.cc @@ -54,16 +54,17 @@ class MetadataTableTest : public ::testing::Test { EXPECT_THAT(source_table_result, IsOk()); source_table_ = *source_table_result; - auto snapshots_table_result = MetadataTableFactory::GetSnapshotsTable(source_table_); + auto snapshots_table_result = MetadataTableFactory::CreateMetadataTable( + source_table_, MetadataTableType::kSnapshots); EXPECT_THAT(snapshots_table_result, IsOk()); - snapshots_table_ = *snapshots_table_result; + snapshots_table_ = std::move(*snapshots_table_result); } std::shared_ptr io_; std::shared_ptr catalog_; std::shared_ptr metadata_; std::shared_ptr
source_table_; - std::shared_ptr snapshots_table_; + std::unique_ptr snapshots_table_; }; TEST_F(MetadataTableTest, Constructor) { @@ -71,13 +72,11 @@ TEST_F(MetadataTableTest, Constructor) { EXPECT_FALSE(snapshots_table_->uuid().empty()); auto schema_result = snapshots_table_->schema(); EXPECT_THAT(schema_result, IsOk()); - EXPECT_EQ((*schema_result)->schema_id(), 1); } TEST_F(MetadataTableTest, DelegatesToSourceTable) { EXPECT_EQ(snapshots_table_->location(), source_table_->location()); EXPECT_EQ(snapshots_table_->last_updated_ms(), source_table_->last_updated_ms()); - EXPECT_EQ(snapshots_table_->metadata(), source_table_->metadata()); EXPECT_EQ(snapshots_table_->catalog(), source_table_->catalog()); } @@ -95,11 +94,9 @@ TEST_F(MetadataTableTest, SchemasAndSpecs) { auto schemas_result = snapshots_table_->schemas(); EXPECT_THAT(schemas_result, IsOk()); EXPECT_EQ(schemas_result->get().size(), 1); - EXPECT_EQ(schemas_result->get().at(1)->schema_id(), 1); auto spec_result = snapshots_table_->spec(); EXPECT_THAT(spec_result, IsOk()); - EXPECT_EQ(*spec_result, PartitionSpec::Unpartitioned()); auto specs_result = snapshots_table_->specs(); EXPECT_THAT(specs_result, IsOk()); @@ -109,7 +106,6 @@ TEST_F(MetadataTableTest, SchemasAndSpecs) { TEST_F(MetadataTableTest, SortOrders) { auto sort_order_result = snapshots_table_->sort_order(); EXPECT_THAT(sort_order_result, IsOk()); - EXPECT_EQ(*sort_order_result, SortOrder::Unsorted()); auto sort_orders_result = snapshots_table_->sort_orders(); EXPECT_THAT(sort_orders_result, IsOk()); diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 532888967..6b4160037 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -241,8 +241,8 @@ class DeleteLoader; class PositionDeleteIndex; /// \brief Metadata tables. -class MetadataTable; class HistoryTable; +class MetadataTable; class SnapshotsTable; /// ---------------------------------------------------------------------------- From e0a7d1618fac2b0751c61f8db7cab7ac95419ec8 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Mon, 8 Jun 2026 01:03:25 +0000 Subject: [PATCH 07/11] Fix wrong template parameter in MetadataTable::NewScan --- src/iceberg/inspect/metadata_table.cc | 2 +- src/iceberg/inspect/metadata_table.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iceberg/inspect/metadata_table.cc b/src/iceberg/inspect/metadata_table.cc index f285bc81f..3d32b6014 100644 --- a/src/iceberg/inspect/metadata_table.cc +++ b/src/iceberg/inspect/metadata_table.cc @@ -76,7 +76,7 @@ MetadataTable::~MetadataTable() = default; Status MetadataTable::Refresh() { return source_table_->Refresh(); } -Result> MetadataTable::NewScan() const { +Result> MetadataTable::NewScan() const { return NotSupported("TODO: Scanning metadata tables is not yet supported"); }; diff --git a/src/iceberg/inspect/metadata_table.h b/src/iceberg/inspect/metadata_table.h index 54c6a89fc..cc6021067 100644 --- a/src/iceberg/inspect/metadata_table.h +++ b/src/iceberg/inspect/metadata_table.h @@ -96,7 +96,7 @@ class ICEBERG_EXPORT MetadataTable : public StaticTable { /// /// Once a table scan builder is created, it can be refined to project columns and /// filter data. - Result> NewScan() const override; + Result> NewScan() const override; ~MetadataTable(); From f8f7aeae2bc293506d1ff9778e1f6b9253ae01a4 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Mon, 8 Jun 2026 01:09:24 +0000 Subject: [PATCH 08/11] Fix format errors in Meson and CMake build files --- src/iceberg/inspect/meson.build | 6 +----- src/iceberg/meson.build | 2 +- src/iceberg/test/CMakeLists.txt | 2 +- src/iceberg/test/meson.build | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/iceberg/inspect/meson.build b/src/iceberg/inspect/meson.build index bcb9d8a97..5c738008a 100644 --- a/src/iceberg/inspect/meson.build +++ b/src/iceberg/inspect/meson.build @@ -16,10 +16,6 @@ # under the License. install_headers( - [ - 'history_table.h', - 'metadata_table.h', - 'snapshots_table.h', - ], + ['history_table.h', 'metadata_table.h', 'snapshots_table.h'], subdir: 'iceberg/inspect', ) diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index ae9897d81..5a50d5423 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -64,9 +64,9 @@ iceberg_sources = files( 'file_reader.cc', 'file_writer.cc', 'inheritable_metadata.cc', + 'inspect/history_table.cc', 'inspect/metadata_table.cc', 'inspect/snapshots_table.cc', - 'inspect/history_table.cc', 'json_serde.cc', 'location_provider.cc', 'manifest/manifest_adapter.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index d6f50fc57..4c9b55975 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -242,7 +242,7 @@ if(ICEBERG_BUILD_BUNDLE) delete_filter_test.cc delete_loader_test.cc file_scan_task_reader_test.cc) - + add_iceberg_test(metadata_table_test SOURCES metadata_table_test.cc) add_iceberg_test(data_writer_test USE_BUNDLE SOURCES data_writer_test.cc) diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 4772bbaf9..7bc9fb846 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -48,6 +48,7 @@ iceberg_tests = { 'table_test': { 'sources': files( 'location_provider_test.cc', + 'metadata_table_test.cc', 'metrics_config_test.cc', 'metrics_reporter_test.cc', 'metrics_test.cc', @@ -58,7 +59,6 @@ iceberg_tests = { 'table_requirements_test.cc', 'table_test.cc', 'table_update_test.cc', - 'metadata_table_test.cc', ), }, 'expression_test': { From 8188d1d9b9479daa08ab061487be6da48daebf7d Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Mon, 8 Jun 2026 01:31:45 +0000 Subject: [PATCH 09/11] Add initializer names to SnapshotsTable and HistoryTable --- src/iceberg/inspect/history_table.cc | 4 ++-- src/iceberg/inspect/snapshots_table.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc index 1a8041f41..a90058aeb 100644 --- a/src/iceberg/inspect/history_table.cc +++ b/src/iceberg/inspect/history_table.cc @@ -35,7 +35,7 @@ HistoryTable::HistoryTable(std::shared_ptr
table) HistoryTable::~HistoryTable() = default; -std::shared_ptr HistoryTable::GetSchema() const { +std::shared_ptr HistoryTable::GetSchema() const override { return std::make_shared( std::vector{ SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), @@ -46,7 +46,7 @@ std::shared_ptr HistoryTable::GetSchema() const { } TableIdentifier HistoryTable::CreateName(const TableIdentifier& source_name) { - return TableIdentifier{source_name.ns, source_name.name + ".history"}; + return TableIdentifier{.ns = source_name.ns, .name = source_name.name + ".history"}; } Result> HistoryTable::Make(std::shared_ptr
table) { diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc index 22b189027..dc85380c6 100644 --- a/src/iceberg/inspect/snapshots_table.cc +++ b/src/iceberg/inspect/snapshots_table.cc @@ -35,7 +35,7 @@ SnapshotsTable::SnapshotsTable(std::shared_ptr
table) SnapshotsTable::~SnapshotsTable() = default; -std::shared_ptr SnapshotsTable::GetSchema() const { +std::shared_ptr SnapshotsTable::GetSchema() const override { return std::make_shared( std::vector{ SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), @@ -52,7 +52,7 @@ std::shared_ptr SnapshotsTable::GetSchema() const { } TableIdentifier SnapshotsTable::CreateName(const TableIdentifier& source_name) { - return TableIdentifier{source_name.ns, source_name.name + ".snapshots"}; + return TableIdentifier{.ns = source_name.ns, .name = source_name.name + ".snapshots"}; } Result> SnapshotsTable::Make( From 18ebed795e5ada94bd86c4be42883dfed639dbb5 Mon Sep 17 00:00:00 2001 From: Huangshi Tian Date: Mon, 8 Jun 2026 01:37:03 +0000 Subject: [PATCH 10/11] Remove override keyword from GetSchema impl --- src/iceberg/inspect/history_table.cc | 2 +- src/iceberg/inspect/snapshots_table.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc index a90058aeb..980ac1a89 100644 --- a/src/iceberg/inspect/history_table.cc +++ b/src/iceberg/inspect/history_table.cc @@ -35,7 +35,7 @@ HistoryTable::HistoryTable(std::shared_ptr
table) HistoryTable::~HistoryTable() = default; -std::shared_ptr HistoryTable::GetSchema() const override { +std::shared_ptr HistoryTable::GetSchema() const { return std::make_shared( std::vector{ SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc index dc85380c6..5cf5ced47 100644 --- a/src/iceberg/inspect/snapshots_table.cc +++ b/src/iceberg/inspect/snapshots_table.cc @@ -35,7 +35,7 @@ SnapshotsTable::SnapshotsTable(std::shared_ptr
table) SnapshotsTable::~SnapshotsTable() = default; -std::shared_ptr SnapshotsTable::GetSchema() const override { +std::shared_ptr SnapshotsTable::GetSchema() const { return std::make_shared( std::vector{ SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), From 92fe7fe3587cdcb4f1ff8fcec18533619b881c81 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 15 Jun 2026 10:16:11 +0800 Subject: [PATCH 11/11] polish and simplify metadata table design --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/inspect/history_table.cc | 40 ++++---- src/iceberg/inspect/history_table.h | 21 +---- src/iceberg/inspect/metadata_table.cc | 67 +++----------- src/iceberg/inspect/metadata_table.h | 117 ++++-------------------- src/iceberg/inspect/snapshots_table.cc | 49 +++++----- src/iceberg/inspect/snapshots_table.h | 23 +---- src/iceberg/table.cc | 17 ++++ src/iceberg/table.h | 11 ++- src/iceberg/test/CMakeLists.txt | 5 +- src/iceberg/test/metadata_table_test.cc | 99 +++++++++----------- src/iceberg/test/table_test.cc | 16 ++++ 12 files changed, 175 insertions(+), 291 deletions(-) diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index c5b3aa819..95bcb6094 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -239,6 +239,7 @@ add_subdirectory(puffin) add_subdirectory(row) add_subdirectory(update) add_subdirectory(util) +add_subdirectory(inspect) add_subdirectory(metrics) if(ICEBERG_BUILD_BUNDLE) diff --git a/src/iceberg/inspect/history_table.cc b/src/iceberg/inspect/history_table.cc index 980ac1a89..7fa840043 100644 --- a/src/iceberg/inspect/history_table.cc +++ b/src/iceberg/inspect/history_table.cc @@ -21,36 +21,42 @@ #include #include +#include -#include "iceberg/inspect/metadata_table.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" +#include "iceberg/table.h" #include "iceberg/table_identifier.h" #include "iceberg/type.h" namespace iceberg { - -HistoryTable::HistoryTable(std::shared_ptr
table) - : MetadataTable(table, CreateName(table->name())) {} - -HistoryTable::~HistoryTable() = default; - -std::shared_ptr HistoryTable::GetSchema() const { - return std::make_shared( - std::vector{ - SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), - SchemaField::MakeRequired(2, "snapshot_id", int64()), - SchemaField::MakeOptional(3, "parent_id", int64()), - SchemaField::MakeRequired(4, "is_current_ancestor", boolean())}, - 1); +namespace { + +std::shared_ptr MakeHistoryTableSchema() { + return std::make_shared(std::vector{ + SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeOptional(3, "parent_id", int64()), + SchemaField::MakeRequired(4, "is_current_ancestor", boolean())}); } -TableIdentifier HistoryTable::CreateName(const TableIdentifier& source_name) { +TableIdentifier MakeHistoryTableName(const TableIdentifier& source_name) { return TableIdentifier{.ns = source_name.ns, .name = source_name.name + ".history"}; } +} // namespace + +HistoryTable::HistoryTable(std::shared_ptr
table) + : MetadataTable(table, MakeHistoryTableName(table->name()), + MakeHistoryTableSchema()) {} + +HistoryTable::~HistoryTable() = default; + Result> HistoryTable::Make(std::shared_ptr
table) { - return std::unique_ptr(new HistoryTable(table)); + if (table == nullptr) [[unlikely]] { + return InvalidArgument("Table cannot be null"); + } + return std::unique_ptr(new HistoryTable(std::move(table))); } } // namespace iceberg diff --git a/src/iceberg/inspect/history_table.h b/src/iceberg/inspect/history_table.h index d10a4967e..94a26368f 100644 --- a/src/iceberg/inspect/history_table.h +++ b/src/iceberg/inspect/history_table.h @@ -24,36 +24,21 @@ #include "iceberg/iceberg_export.h" #include "iceberg/inspect/metadata_table.h" #include "iceberg/result.h" -#include "iceberg/table.h" +#include "iceberg/type_fwd.h" namespace iceberg { -/// \brief History metadata table -/// -/// History is based on the table's snapshot log, which logs each update -/// to the table's current snapshot. Each row has columns: -/// - made_current_at (long, timestamp) -/// - snapshot_id (long) -/// - parent_id (long, optional) -/// - is_current_ancestor (bool) +/// \brief History metadata table. class ICEBERG_EXPORT HistoryTable : public MetadataTable { public: - /// \brief Create a HistoryTable from table metadata - /// - /// \param[in] table The source table - /// \return A HistoryTable instance or error status static Result> Make(std::shared_ptr
table); ~HistoryTable() override; - MetadataTableType type() const noexcept override { return MetadataTableType::kHistory; } - - std::shared_ptr GetSchema() const override; + Kind kind() const noexcept override { return Kind::kHistory; } private: explicit HistoryTable(std::shared_ptr
table); - - TableIdentifier CreateName(const TableIdentifier& source_name); }; } // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table.cc b/src/iceberg/inspect/metadata_table.cc index 3d32b6014..5e9504003 100644 --- a/src/iceberg/inspect/metadata_table.cc +++ b/src/iceberg/inspect/metadata_table.cc @@ -20,76 +20,35 @@ #include "iceberg/inspect/metadata_table.h" #include -#include #include -#include "iceberg/file_io.h" #include "iceberg/inspect/history_table.h" #include "iceberg/inspect/snapshots_table.h" -#include "iceberg/partition_spec.h" -#include "iceberg/schema.h" -#include "iceberg/schema_field.h" -#include "iceberg/sort_order.h" -#include "iceberg/table_identifier.h" -#include "iceberg/table_metadata.h" -#include "iceberg/table_properties.h" -#include "iceberg/table_scan.h" -#include "iceberg/type.h" -#include "iceberg/type_fwd.h" -#include "iceberg/util/uuid.h" namespace iceberg { MetadataTable::MetadataTable(std::shared_ptr
source_table, - TableIdentifier identifier) - : StaticTable(identifier, source_table->metadata(), - std::string(source_table->metadata_file_location()), source_table->io(), - source_table->catalog()), - source_table_(std::move(source_table)) { - auto schema = GetSchema(); - if (!schema) { - schema = std::make_shared(std::vector{}, 1); - } - - auto builder = - TableMetadataBuilder::BuildFromEmpty(TableMetadata::kDefaultTableFormatVersion); - auto result = builder->AssignUUID(Uuid::GenerateV4().ToString()) - .SetLocation(std::string(source_table_->location())) - .SetCurrentSchema(schema, schema->schema_id()) - .SetDefaultSortOrder(SortOrder::Unsorted()) - .SetDefaultPartitionSpec(PartitionSpec::Unpartitioned()) - .SetProperties({}) - .Build(); - - if (!result.has_value()) { - // If metadata building fails, keep the original metadata from source_table - return; - } - - std::shared_ptr built_metadata = std::move(result.value()); - - metadata_ = built_metadata; - metadata_cache_ = std::make_unique(metadata_.get()); -} + TableIdentifier identifier, std::shared_ptr schema) + : identifier_(std::move(identifier)), + schema_(std::move(schema)), + source_table_(std::move(source_table)) {} MetadataTable::~MetadataTable() = default; -Status MetadataTable::Refresh() { return source_table_->Refresh(); } - -Result> MetadataTable::NewScan() const { - return NotSupported("TODO: Scanning metadata tables is not yet supported"); -}; +Result> MetadataTable::Make(std::shared_ptr
table, + Kind kind) { + if (table == nullptr) [[unlikely]] { + return InvalidArgument("Table cannot be null"); + } -Result> MetadataTableFactory::CreateMetadataTable( - std::shared_ptr
table, MetadataTableType type) { - switch (type) { - case MetadataTableType::kSnapshots: + switch (kind) { + case Kind::kSnapshots: return SnapshotsTable::Make(table); - case MetadataTableType::kHistory: + case Kind::kHistory: return HistoryTable::Make(table); } - return Invalid("Unsupported metadata table type"); + return NotSupported("Unsupported metadata table type"); } } // namespace iceberg diff --git a/src/iceberg/inspect/metadata_table.h b/src/iceberg/inspect/metadata_table.h index cc6021067..7d0ac22da 100644 --- a/src/iceberg/inspect/metadata_table.h +++ b/src/iceberg/inspect/metadata_table.h @@ -20,122 +20,43 @@ #pragma once #include -#include #include "iceberg/iceberg_export.h" -#include "iceberg/location_provider.h" #include "iceberg/result.h" -#include "iceberg/sort_order.h" -#include "iceberg/table.h" -#include "iceberg/table_metadata.h" -#include "iceberg/table_scan.h" +#include "iceberg/table_identifier.h" #include "iceberg/type_fwd.h" namespace iceberg { -/// \brief The type of metadata table -enum class MetadataTableType { - kSnapshots, - kHistory, -}; - -/// \brief Base class for Iceberg metadata tables -/// -/// Metadata tables expose table metadata as queryable tables with schemas and scan -/// support. They provide read-only access to metadata. -class ICEBERG_EXPORT MetadataTable : public StaticTable { +/// \brief Base class for Iceberg metadata tables. +class ICEBERG_EXPORT MetadataTable { public: - virtual MetadataTableType type() const noexcept = 0; - - /// \brief Returns the table's metadata file location - std::string_view metadata_file_location() const { - return source_table_->metadata_file_location(); - } - - /// \brief Returns the table's base location - std::string_view location() const { return source_table_->location(); } - - /// \brief Returns the time when this table was last updated - TimePointMs last_updated_ms() const { return source_table_->last_updated_ms(); } - - /// \brief Returns the table's current snapshot, return NotFoundError if not found - Result> current_snapshot() const { - return source_table_->current_snapshot(); - } + enum class Kind { + kSnapshots, + kHistory, + }; - /// \brief Get the snapshot of this table with the given id - /// - /// \param snapshot_id the ID of the snapshot to get - /// \return the Snapshot with the given id, return NotFoundError if not found - Result> SnapshotById(int64_t snapshot_id) const { - return source_table_->SnapshotById(snapshot_id); - } + static Result> Make(std::shared_ptr
table, + Kind kind); - /// \brief Get the snapshots of this table - const std::vector>& snapshots() const { - return source_table_->snapshots(); - } + virtual ~MetadataTable(); - /// \brief Get the snapshot history of this table - const std::vector& history() const { - return source_table_->history(); - } + virtual Kind kind() const noexcept = 0; - /// \brief Returns the catalog that this table belongs to - const std::shared_ptr& catalog() const { return source_table_->catalog(); } + const TableIdentifier& name() const { return identifier_; } - /// \brief Returns a LocationProvider for this table - Result> location_provider() const { - return source_table_->location_provider(); - } + const std::shared_ptr& schema() const { return schema_; } - /// \brief Refreshing is not supported in metadata tables. - Status Refresh() override; - - /// \brief Create a new table scan builder for this table - /// - /// Once a table scan builder is created, it can be refined to project columns and - /// filter data. - Result> NewScan() const override; - - ~MetadataTable(); + const std::shared_ptr
& source_table() const { return source_table_; } protected: - explicit MetadataTable(std::shared_ptr
source_table, TableIdentifier identifier); - - /// \brief Returns the schema for this metadata table - /// - /// Subclasses override this method to provide their custom schema during - /// MetadataTable construction. The returned schema is used to initialize - /// the underlying TableMetadata. - /// - /// \return The schema for this metadata table, or nullptr for default schema - virtual std::shared_ptr GetSchema() const { return nullptr; } + explicit MetadataTable(std::shared_ptr
source_table, TableIdentifier identifier, + std::shared_ptr schema); + private: + TableIdentifier identifier_; + std::shared_ptr schema_; std::shared_ptr
source_table_; }; -/// \brief Metadata table factory and inspector -/// -/// MetadataTable provides factory methods to create specific metadata tables for -/// inspecting table metadata. Each metadata table exposes a different aspect of the -/// table's metadata as a scannable Iceberg table. -/// -/// Usage: -/// auto metadata_table = ICEBERG_TRY( -/// MetadataTableFactory::CreateMetadataTable( -/// table, MetadataTableType::kSnapshots)); -/// auto scan = ICEBERG_TRY(metadata_table->NewScan()); -/// // ... scan and read metadata table data -class ICEBERG_EXPORT MetadataTableFactory { - public: - /// \brief Create a metadata table from a table - /// - /// \param table The source table - /// \param type The metadata table type to create - /// \return A MetadataTable instance or error status - static Result> CreateMetadataTable( - std::shared_ptr
table, MetadataTableType type); -}; - } // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.cc b/src/iceberg/inspect/snapshots_table.cc index 5cf5ced47..4b0c3ce9f 100644 --- a/src/iceberg/inspect/snapshots_table.cc +++ b/src/iceberg/inspect/snapshots_table.cc @@ -21,43 +21,48 @@ #include #include +#include -#include "iceberg/inspect/metadata_table.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" +#include "iceberg/table.h" #include "iceberg/table_identifier.h" #include "iceberg/type.h" namespace iceberg { +namespace { -SnapshotsTable::SnapshotsTable(std::shared_ptr
table) - : MetadataTable(table, CreateName(table->name())) {} - -SnapshotsTable::~SnapshotsTable() = default; - -std::shared_ptr SnapshotsTable::GetSchema() const { - return std::make_shared( - std::vector{ - SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), - SchemaField::MakeRequired(2, "snapshot_id", int64()), - SchemaField::MakeRequired(3, "parent_id", int64()), - SchemaField::MakeOptional(4, "operation", int64()), - SchemaField::MakeOptional(5, "manifest_list", string()), - SchemaField::MakeOptional( - 6, "summary", - std::make_shared( - SchemaField::MakeRequired(7, "key", string()), - SchemaField::MakeRequired(8, "value", string())))}, - 1); +std::shared_ptr MakeSnapshotsTableSchema() { + return std::make_shared(std::vector{ + SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeOptional(3, "parent_id", int64()), + SchemaField::MakeOptional(4, "operation", string()), + SchemaField::MakeOptional(5, "manifest_list", string()), + SchemaField::MakeOptional(6, "summary", + std::make_shared( + SchemaField::MakeRequired(7, "key", string()), + SchemaField::MakeRequired(8, "value", string())))}); } -TableIdentifier SnapshotsTable::CreateName(const TableIdentifier& source_name) { +TableIdentifier MakeSnapshotsTableName(const TableIdentifier& source_name) { return TableIdentifier{.ns = source_name.ns, .name = source_name.name + ".snapshots"}; } +} // namespace + +SnapshotsTable::SnapshotsTable(std::shared_ptr
table) + : MetadataTable(table, MakeSnapshotsTableName(table->name()), + MakeSnapshotsTableSchema()) {} + +SnapshotsTable::~SnapshotsTable() = default; + Result> SnapshotsTable::Make( std::shared_ptr
table) { - return std::unique_ptr(new SnapshotsTable(table)); + if (table == nullptr) [[unlikely]] { + return InvalidArgument("Table cannot be null"); + } + return std::unique_ptr(new SnapshotsTable(std::move(table))); } } // namespace iceberg diff --git a/src/iceberg/inspect/snapshots_table.h b/src/iceberg/inspect/snapshots_table.h index 86e57dc13..50017796f 100644 --- a/src/iceberg/inspect/snapshots_table.h +++ b/src/iceberg/inspect/snapshots_table.h @@ -24,38 +24,21 @@ #include "iceberg/iceberg_export.h" #include "iceberg/inspect/metadata_table.h" #include "iceberg/result.h" -#include "iceberg/table.h" +#include "iceberg/type_fwd.h" namespace iceberg { -/// \brief Snapshots metadata table -/// -/// Exposes all snapshots in the table as rows with columns: -/// - committed_at (timestamp) -/// - snapshot_id (long) -/// - parent_id (long) -/// - manifest_list (string) -/// - summary (map) +/// \brief Snapshots metadata table. class ICEBERG_EXPORT SnapshotsTable : public MetadataTable { public: - /// \brief Create a SnapshotsTable from table metadata - /// - /// \param[in] table The source table - /// \return A SnapshotsTable instance or error status static Result> Make(std::shared_ptr
table); ~SnapshotsTable() override; - MetadataTableType type() const noexcept override { - return MetadataTableType::kSnapshots; - } - - std::shared_ptr GetSchema() const override; + Kind kind() const noexcept override { return Kind::kSnapshots; } private: explicit SnapshotsTable(std::shared_ptr
table); - - TableIdentifier CreateName(const TableIdentifier& source_name); }; } // namespace iceberg diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 45a3882cb..ba83de123 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -303,4 +303,21 @@ Result> StaticTable::NewExpireSnapshots() { return NotSupported("Cannot create an expire snapshots for a static table"); } +Result> StaticTable::NewUpdateStatistics() { + return NotSupported("Cannot create an update statistics for a static table"); +} + +Result> +StaticTable::NewUpdatePartitionStatistics() { + return NotSupported("Cannot create an update partition statistics for a static table"); +} + +Result> StaticTable::NewFastAppend() { + return NotSupported("Cannot create a fast append for a static table"); +} + +Result> StaticTable::NewSnapshotManager() { + return NotSupported("Cannot create a snapshot manager for a static table"); +} + } // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index e560e0a0c..a5e102686 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -52,7 +52,7 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this
{ virtual ~Table(); /// \brief Returns the identifier of this table - virtual const TableIdentifier& name() const { return identifier_; } + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table const std::string& uuid() const; @@ -236,6 +236,15 @@ class ICEBERG_EXPORT StaticTable : public Table { Result> NewExpireSnapshots() override; + Result> NewUpdateStatistics() override; + + Result> NewUpdatePartitionStatistics() + override; + + Result> NewFastAppend() override; + + Result> NewSnapshotManager() override; + private: using Table::Table; }; diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 4c9b55975..5a237f870 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -89,6 +89,7 @@ add_iceberg_test(schema_test add_iceberg_test(table_test SOURCES location_provider_test.cc + metadata_table_test.cc metrics_config_test.cc metrics_reporter_test.cc metrics_test.cc @@ -243,10 +244,6 @@ if(ICEBERG_BUILD_BUNDLE) delete_loader_test.cc file_scan_task_reader_test.cc) - add_iceberg_test(metadata_table_test SOURCES metadata_table_test.cc) - - add_iceberg_test(data_writer_test USE_BUNDLE SOURCES data_writer_test.cc) - endif() if(ICEBERG_BUILD_SQL_CATALOG diff --git a/src/iceberg/test/metadata_table_test.cc b/src/iceberg/test/metadata_table_test.cc index 058b10107..1e0a664c3 100644 --- a/src/iceberg/test/metadata_table_test.cc +++ b/src/iceberg/test/metadata_table_test.cc @@ -22,7 +22,6 @@ #include #include -#include "iceberg/inspect/snapshots_table.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" #include "iceberg/table.h" @@ -31,8 +30,33 @@ #include "iceberg/test/matchers.h" #include "iceberg/test/mock_catalog.h" #include "iceberg/test/mock_io.h" +#include "iceberg/type.h" namespace iceberg { +namespace { + +std::shared_ptr MakeSnapshotsSchema() { + return std::make_shared(std::vector{ + SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeOptional(3, "parent_id", int64()), + SchemaField::MakeOptional(4, "operation", string()), + SchemaField::MakeOptional(5, "manifest_list", string()), + SchemaField::MakeOptional( + 6, "summary", + std::make_shared(SchemaField::MakeRequired(7, "key", string()), + SchemaField::MakeRequired(8, "value", string())))}); +} + +std::shared_ptr MakeHistorySchema() { + return std::make_shared(std::vector{ + SchemaField::MakeRequired(1, "made_current_at", timestamp_tz()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeOptional(3, "parent_id", int64()), + SchemaField::MakeRequired(4, "is_current_ancestor", boolean())}); +} + +} // namespace class MetadataTableTest : public ::testing::Test { protected: @@ -54,8 +78,8 @@ class MetadataTableTest : public ::testing::Test { EXPECT_THAT(source_table_result, IsOk()); source_table_ = *source_table_result; - auto snapshots_table_result = MetadataTableFactory::CreateMetadataTable( - source_table_, MetadataTableType::kSnapshots); + auto snapshots_table_result = + MetadataTable::Make(source_table_, MetadataTable::Kind::kSnapshots); EXPECT_THAT(snapshots_table_result, IsOk()); snapshots_table_ = std::move(*snapshots_table_result); } @@ -68,68 +92,29 @@ class MetadataTableTest : public ::testing::Test { }; TEST_F(MetadataTableTest, Constructor) { + EXPECT_EQ(snapshots_table_->kind(), MetadataTable::Kind::kSnapshots); + EXPECT_EQ(snapshots_table_->source_table(), source_table_); EXPECT_EQ(snapshots_table_->name().name, "source_table.snapshots"); - EXPECT_FALSE(snapshots_table_->uuid().empty()); - auto schema_result = snapshots_table_->schema(); - EXPECT_THAT(schema_result, IsOk()); + EXPECT_EQ(snapshots_table_->name().ns.levels, (std::vector{"db"})); + EXPECT_NE(snapshots_table_->schema(), nullptr); } -TEST_F(MetadataTableTest, DelegatesToSourceTable) { - EXPECT_EQ(snapshots_table_->location(), source_table_->location()); - EXPECT_EQ(snapshots_table_->last_updated_ms(), source_table_->last_updated_ms()); - EXPECT_EQ(snapshots_table_->catalog(), source_table_->catalog()); +TEST_F(MetadataTableTest, SnapshotsSchemaMatchesIcebergSchema) { + EXPECT_TRUE(*snapshots_table_->schema() == *MakeSnapshotsSchema()); } -TEST_F(MetadataTableTest, NotSupportedOperations) { - EXPECT_THAT(snapshots_table_->NewTransaction(), HasErrorMessage("Cannot")); - EXPECT_THAT(snapshots_table_->NewUpdateProperties(), HasErrorMessage("Cannot")); - EXPECT_THAT(snapshots_table_->NewUpdateSchema(), HasErrorMessage("Cannot")); - EXPECT_THAT(snapshots_table_->NewUpdateLocation(), HasErrorMessage("Cannot")); - EXPECT_THAT(snapshots_table_->NewUpdatePartitionSpec(), HasErrorMessage("Cannot")); - EXPECT_THAT(snapshots_table_->NewUpdateSortOrder(), HasErrorMessage("Cannot")); - EXPECT_THAT(snapshots_table_->NewExpireSnapshots(), HasErrorMessage("Cannot")); -} - -TEST_F(MetadataTableTest, SchemasAndSpecs) { - auto schemas_result = snapshots_table_->schemas(); - EXPECT_THAT(schemas_result, IsOk()); - EXPECT_EQ(schemas_result->get().size(), 1); +TEST_F(MetadataTableTest, HistorySchemaMatchesIcebergSchema) { + auto history_table_result = + MetadataTable::Make(source_table_, MetadataTable::Kind::kHistory); + ASSERT_THAT(history_table_result, IsOk()); - auto spec_result = snapshots_table_->spec(); - EXPECT_THAT(spec_result, IsOk()); - - auto specs_result = snapshots_table_->specs(); - EXPECT_THAT(specs_result, IsOk()); - EXPECT_EQ(specs_result->get().size(), 1); + EXPECT_TRUE(*(*history_table_result)->schema() == *MakeHistorySchema()); } -TEST_F(MetadataTableTest, SortOrders) { - auto sort_order_result = snapshots_table_->sort_order(); - EXPECT_THAT(sort_order_result, IsOk()); - - auto sort_orders_result = snapshots_table_->sort_orders(); - EXPECT_THAT(sort_orders_result, IsOk()); - EXPECT_EQ(sort_orders_result->get().size(), 1); -} - -TEST_F(MetadataTableTest, Properties) { - EXPECT_EQ(snapshots_table_->properties().configs().size(), 0); -} - -TEST_F(MetadataTableTest, Snapshots) { - // Assuming source table has no current snapshot - auto cur_snapshot_result = snapshots_table_->current_snapshot(); - EXPECT_THAT(cur_snapshot_result, IsError(ErrorKind::kNotFound)); - auto snapshot_result = snapshots_table_->SnapshotById(1); - EXPECT_THAT(snapshot_result, IsError(ErrorKind::kNotFound)); - EXPECT_TRUE(snapshots_table_->snapshots().empty()); -} - -TEST_F(MetadataTableTest, History) { EXPECT_TRUE(snapshots_table_->history().empty()); } - -TEST_F(MetadataTableTest, LocationProvider) { - auto lp_result = snapshots_table_->location_provider(); - EXPECT_THAT(lp_result, IsOk()); +TEST_F(MetadataTableTest, FactoryRejectsNullSourceTable) { + auto result = MetadataTable::Make(nullptr, MetadataTable::Kind::kSnapshots); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("Table cannot be null")); } } // namespace iceberg diff --git a/src/iceberg/test/table_test.cc b/src/iceberg/test/table_test.cc index 0ad8e8ced..9cfe9063e 100644 --- a/src/iceberg/test/table_test.cc +++ b/src/iceberg/test/table_test.cc @@ -147,4 +147,20 @@ TYPED_TEST(TypedTableTest, NewTransaction) { } } +TEST(StaticTableTest, NewMutatingOperationsAreNotSupported) { + auto io = std::make_shared(); + auto schema = std::make_shared( + std::vector{SchemaField::MakeRequired(1, "id", int64())}, 1); + auto metadata = std::make_shared( + TableMetadata{.format_version = 2, .schemas = {schema}, .current_schema_id = 1}); + TableIdentifier ident{.ns = Namespace{.levels = {"db"}}, .name = "test_table"}; + ICEBERG_UNWRAP_OR_FAIL(auto table, StaticTable::Make(ident, std::move(metadata), + "s3://bucket/meta.json", io)); + + EXPECT_THAT(table->NewUpdateStatistics(), IsError(ErrorKind::kNotSupported)); + EXPECT_THAT(table->NewUpdatePartitionStatistics(), IsError(ErrorKind::kNotSupported)); + EXPECT_THAT(table->NewFastAppend(), IsError(ErrorKind::kNotSupported)); + EXPECT_THAT(table->NewSnapshotManager(), IsError(ErrorKind::kNotSupported)); +} + } // namespace iceberg