From 23aeca3ce8f33ac7cb799c2e1a840a5ca7802dc0 Mon Sep 17 00:00:00 2001 From: Fokko Date: Mon, 11 Nov 2024 12:24:39 +0100 Subject: [PATCH] Core,Open-API: Don't expose the `last-column-id` Okay, I've added this to the spec a while ago: https://github.com/apache/iceberg/pull/7445 But I think this was a mistake, and we should not expose this to the public APIs, as it is much better to track this internally. I noticed this while reviewing https://github.com/apache/iceberg-rust/pull/587 Removing this as part of the APIs in Java, and the Open-API update makes it much more resilient, and don't require the clients to compute this value. For example. when there are two conflicting schema changes, the last-column-id must be recomputed correctly when doing the retry operation. --- .../org/apache/iceberg/MetadataUpdate.java | 10 ++++++++ .../apache/iceberg/MetadataUpdateParser.java | 5 ++++ .../java/org/apache/iceberg/SchemaUpdate.java | 2 +- .../org/apache/iceberg/TableMetadata.java | 25 ++++++++++++++++++- .../org/apache/iceberg/view/ViewMetadata.java | 7 +----- open-api/rest-catalog-open-api.py | 5 ---- open-api/rest-catalog-open-api.yaml | 3 --- 7 files changed, 41 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java index 49fb1fe01c44..ba038c196e43 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java @@ -86,6 +86,16 @@ class AddSchema implements MetadataUpdate { private final Schema schema; private final int lastColumnId; + public AddSchema(Schema schema) { + this(schema, schema.highestFieldId()); + } + + /** + * Set the schema + * + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema). + */ + @Deprecated public AddSchema(Schema schema, int lastColumnId) { this.schema = schema; this.lastColumnId = lastColumnId; diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java b/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java index 8cdfd3c72b6e..9c9dca4d8dd7 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java @@ -30,8 +30,11 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.util.JsonUtil; import org.apache.iceberg.view.ViewVersionParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class MetadataUpdateParser { + private static final Logger LOG = LoggerFactory.getLogger(MetadataUpdateParser.class); private MetadataUpdateParser() {} @@ -462,6 +465,8 @@ private static MetadataUpdate readAddSchema(JsonNode node) { Schema schema = SchemaParser.fromJson(schemaNode); int lastColumnId; if (node.has(LAST_COLUMN_ID)) { + LOG.warn( + "Field last-column-id of MetadataUpdate.AddSchema is since 1.8.0, will be removed 1.9.0 or 2.0.0"); lastColumnId = JsonUtil.getInt(LAST_COLUMN_ID, node); } else { lastColumnId = schema.highestFieldId(); diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index 069097778606..2b541080ac72 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -444,7 +444,7 @@ public Schema apply() { @Override public void commit() { - TableMetadata update = applyChangesToMetadata(base.updateSchema(apply(), lastColumnId)); + TableMetadata update = applyChangesToMetadata(base.updateSchema(apply())); ops.commit(base, update); } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 3cdc53995dce..d9b5b4f93249 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -562,10 +562,23 @@ public TableMetadata withUUID() { return new Builder(this).assignUUID().build(); } + /** + * Updates the schema + * + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use updateSchema(schema). + */ + @Deprecated public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) { return new Builder(this).setCurrentSchema(newSchema, newLastColumnId).build(); } + /** Updates the schema */ + public TableMetadata updateSchema(Schema newSchema) { + return new Builder(this) + .setCurrentSchema(newSchema, Math.max(this.lastColumnId, newSchema.highestFieldId())) + .build(); + } + // The caller is responsible to pass a newPartitionSpec with correct partition field IDs public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) { return new Builder(this).setDefaultPartitionSpec(newPartitionSpec).build(); @@ -1081,8 +1094,18 @@ public Builder setCurrentSchema(int schemaId) { return this; } + public Builder addSchema(Schema schema) { + addSchemaInternal(schema, schema.highestFieldId()); + return this; + } + + /** + * Add a new schema. + * + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema). + */ + @Deprecated public Builder addSchema(Schema schema, int newLastColumnId) { - // TODO: remove requirement for newLastColumnId addSchemaInternal(schema, newLastColumnId); return this; } diff --git a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java index ae837ff96882..94f3a56ba931 100644 --- a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java +++ b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java @@ -372,20 +372,15 @@ private int addSchemaInternal(Schema schema) { newSchema = schema; } - int highestFieldId = Math.max(highestFieldId(), newSchema.highestFieldId()); schemas.add(newSchema); schemasById.put(newSchema.schemaId(), newSchema); - changes.add(new MetadataUpdate.AddSchema(newSchema, highestFieldId)); + changes.add(new MetadataUpdate.AddSchema(newSchema)); this.lastAddedSchemaId = newSchemaId; return newSchemaId; } - private int highestFieldId() { - return schemas.stream().map(Schema::highestFieldId).max(Integer::compareTo).orElse(0); - } - private int reuseOrCreateNewSchemaId(Schema newSchema) { // if the schema already exists, use its id; otherwise use the highest id + 1 int newSchemaId = INITIAL_SCHEMA_ID; diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py index c3372544ef95..1c6e05e49a0c 100644 --- a/open-api/rest-catalog-open-api.py +++ b/open-api/rest-catalog-open-api.py @@ -1149,11 +1149,6 @@ class ViewMetadata(BaseModel): class AddSchemaUpdate(BaseUpdate): action: Literal['add-schema'] schema_: Schema = Field(..., alias='schema') - last_column_id: Optional[int] = Field( - None, - alias='last-column-id', - description='The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.', - ) class TableUpdate(BaseModel): diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml index 9635af96c1ca..3006b2aef86e 100644 --- a/open-api/rest-catalog-open-api.yaml +++ b/open-api/rest-catalog-open-api.yaml @@ -2690,9 +2690,6 @@ components: enum: ["add-schema"] schema: $ref: '#/components/schemas/Schema' - last-column-id: - type: integer - description: The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side. SetCurrentSchemaUpdate: allOf: