Skip to content

Commit ca60558

Browse files
committed
[SPARK-55645][SQL][FOLLOWUP] Move serdeName to last parameter and filter empty strings
### What changes were proposed in this pull request? Two followup improvements to #54467 (SPARK-55645): 1. Move `serdeName` to the last parameter of `CatalogStorageFormat` with a default value of `None`, so that existing callers that construct `CatalogStorageFormat` positionally remain source-compatible without code changes. 2. Filter empty strings when reading `serdeName` from the Hive Metastore API — Hive returns `""` for tables without an explicit serde name, which should map to `None` rather than `Some("")`. ### Why are the changes needed? 1. Adding `serdeName` as a required positional parameter in the middle of the parameter list breaks source compatibility for all external callers (e.g., third-party connectors) that construct `CatalogStorageFormat` positionally. Moving it to the last position with a default value avoids this. 2. The Hive Metastore returns an empty string for `SerDeInfo.name` when no serde name is explicitly set. Without filtering, `Option("")` produces `Some("")` instead of the semantically correct `None`, which could cause unexpected behavior in downstream code that checks `serdeName.isDefined` or pattern-matches on it. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a new test `serdeName should be None for tables without an explicit serde name` that verifies the empty string filtering. Existing tests cover the parameter reordering. ### Was this patch authored or co-authored using generative AI tooling? Yes Closes #54860 from cloud-fan/SPARK-55645-followup. Lead-authored-by: Wenchen Fan <wenchen@databricks.com> Co-authored-by: Wenchen Fan <cloud0fan@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent cd2b43d commit ca60558

14 files changed

Lines changed: 34 additions & 34 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,10 @@ case class CatalogStorageFormat(
144144
locationUri: Option[URI],
145145
inputFormat: Option[String],
146146
outputFormat: Option[String],
147-
serdeName: Option[String],
148147
serde: Option[String],
149148
compressed: Boolean,
150-
properties: Map[String, String]) extends MetadataMapSupport {
149+
properties: Map[String, String],
150+
serdeName: Option[String] = None) extends MetadataMapSupport {
151151

152152
override def toString: String = {
153153
toLinkedHashMap.map { case (key, value) =>
@@ -181,7 +181,7 @@ case class CatalogStorageFormat(
181181
object CatalogStorageFormat {
182182
/** Empty storage format for default values and copies. */
183183
val empty = CatalogStorageFormat(locationUri = None, inputFormat = None, outputFormat = None,
184-
serdeName = None, serde = None, compressed = false, properties = Map.empty)
184+
serde = None, compressed = false, properties = Map.empty)
185185
}
186186

187187
/**
@@ -616,11 +616,11 @@ case class CatalogTable(
616616
inputFormat: Option[String] = storage.inputFormat,
617617
outputFormat: Option[String] = storage.outputFormat,
618618
compressed: Boolean = false,
619-
serdeName: Option[String] = storage.serdeName,
620619
serde: Option[String] = storage.serde,
621-
properties: Map[String, String] = storage.properties): CatalogTable = {
620+
properties: Map[String, String] = storage.properties,
621+
serdeName: Option[String] = storage.serdeName): CatalogTable = {
622622
copy(storage = CatalogStorageFormat(
623-
locationUri, inputFormat, outputFormat, serdeName, serde, compressed, properties))
623+
locationUri, inputFormat, outputFormat, serde, compressed, properties, serdeName))
624624
}
625625

626626
def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,6 @@ class ExternalCatalogEventSuite extends SparkFunSuite {
247247
locationUri = Some(tableUri),
248248
inputFormat = Some("tableInputFormat"),
249249
outputFormat = Some("tableOutputFormat"),
250-
serdeName = None,
251250
serde = None,
252251
compressed = false,
253252
properties = Map.empty)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite {
911911
tableType = CatalogTableType.EXTERNAL,
912912
storage = CatalogStorageFormat(
913913
Some(Utils.createTempDir().toURI),
914-
None, None, None, None, false, Map.empty),
914+
None, None, None, false, Map.empty),
915915
schema = new StructType().add("a", "int").add("b", "string"),
916916
provider = Some(defaultProvider)
917917
)
@@ -959,7 +959,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite {
959959
Map("partCol1" -> "7", "partCol2" -> "8"),
960960
CatalogStorageFormat(
961961
Some(tempPath.toURI),
962-
None, None, None, None, false, Map.empty))
962+
None, None, None, false, Map.empty))
963963
catalog.createPartitions("db1", "tbl", Seq(partWithExistingDir), ignoreIfExists = false)
964964

965965
tempPath.delete()
@@ -968,7 +968,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite {
968968
Map("partCol1" -> "9", "partCol2" -> "10"),
969969
CatalogStorageFormat(
970970
Some(tempPath.toURI),
971-
None, None, None, None, false, Map.empty))
971+
None, None, None, false, Map.empty))
972972
catalog.createPartitions("db1", "tbl", Seq(partWithNonExistingDir), ignoreIfExists = false)
973973
assert(tempPath.exists())
974974
}
@@ -1030,7 +1030,6 @@ abstract class CatalogTestUtils {
10301030
locationUri = None,
10311031
inputFormat = Some(tableInputFormat),
10321032
outputFormat = Some(tableOutputFormat),
1033-
serdeName = None,
10341033
serde = None,
10351034
compressed = false,
10361035
properties = Map.empty)

sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class AlwaysPersistedConfigsSuite extends QueryTest with SharedSparkSession {
158158
val catalogTable = new CatalogTable(
159159
identifier = TableIdentifier(testViewName),
160160
tableType = CatalogTableType.VIEW,
161-
storage = CatalogStorageFormat(None, None, None, None, None, false, Map.empty),
161+
storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
162162
schema = new StructType(),
163163
properties = Map.empty[String, String]
164164
)

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ trait DDLSuiteBase extends SQLTestUtils {
317317
spec: TablePartitionSpec,
318318
tableName: TableIdentifier): Unit = {
319319
val part = CatalogTablePartition(
320-
spec, CatalogStorageFormat(None, None, None, None, None, false, Map()))
320+
spec, CatalogStorageFormat(None, None, None, None, false, Map()))
321321
catalog.createPartitions(tableName, Seq(part), ignoreIfExists = false)
322322
}
323323

sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,6 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
944944
locationUri = None,
945945
inputFormat = None,
946946
outputFormat = None,
947-
serdeName = None,
948947
serde = None,
949948
compressed = false,
950949
properties = Map.empty),
@@ -973,7 +972,6 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
973972
locationUri = None,
974973
inputFormat = None,
975974
outputFormat = None,
976-
serdeName = None,
977975
serde = None,
978976
compressed = false,
979977
properties = Map.empty),

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,23 +177,23 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
177177
val options = storage.properties + (ParquetOptions.MERGE_SCHEMA ->
178178
SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString)
179179
storage.copy(
180-
serdeName = None,
181180
serde = None,
182-
properties = options
181+
properties = options,
182+
serdeName = None
183183
)
184184
} else {
185185
val options = storage.properties
186186
if (SQLConf.get.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
187187
storage.copy(
188-
serdeName = None,
189188
serde = None,
190-
properties = options
189+
properties = options,
190+
serdeName = None
191191
)
192192
} else {
193193
storage.copy(
194-
serdeName = None,
195194
serde = None,
196-
properties = options
195+
properties = options,
196+
serdeName = None
197197
)
198198
}
199199
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,11 @@ private[hive] class HiveClientImpl(
557557
outputFormat = Option(h.getTTable.getSd.getOutputFormat).orElse {
558558
Option(h.getStorageHandler).map(_.getOutputFormatClass.getName)
559559
},
560-
serdeName = Option(h.getTTable.getSd.getSerdeInfo.getName),
561560
serde = Option(h.getSerializationLib),
562561
compressed = h.getTTable.getSd.isCompressed,
563562
properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters)
564-
.map(_.asScala.toMap).orNull
563+
.map(_.asScala.toMap).orNull,
564+
serdeName = Option(h.getTTable.getSd.getSerdeInfo.getName).filter(_.nonEmpty)
565565
),
566566
// For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added
567567
// in the function toHiveTable.
@@ -1202,7 +1202,7 @@ private[hive] object HiveClientImpl extends Logging {
12021202
hiveTable.getTTable.getSd.setLocation(loc)}
12031203
table.storage.inputFormat.map(toInputFormat).foreach(hiveTable.setInputFormatClass)
12041204
table.storage.outputFormat.map(toOutputFormat).foreach(hiveTable.setOutputFormatClass)
1205-
table.storage.serdeName.foreach(hiveTable.getSd.getSerdeInfo.setName)
1205+
table.storage.serdeName.foreach(hiveTable.getTTable.getSd.getSerdeInfo.setName)
12061206
hiveTable.setSerializationLib(
12071207
table.storage.serde.getOrElse("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
12081208
table.storage.properties.foreach { case (k, v) => hiveTable.setSerdeParam(k, v) }
@@ -1283,11 +1283,11 @@ private[hive] object HiveClientImpl extends Logging {
12831283
locationUri = Option(CatalogUtils.stringToURI(apiPartition.getSd.getLocation)),
12841284
inputFormat = Option(apiPartition.getSd.getInputFormat),
12851285
outputFormat = Option(apiPartition.getSd.getOutputFormat),
1286-
serdeName = Option(apiPartition.getSd.getSerdeInfo.getName),
12871286
serde = Option(apiPartition.getSd.getSerdeInfo.getSerializationLib),
12881287
compressed = apiPartition.getSd.isCompressed,
12891288
properties = Option(apiPartition.getSd.getSerdeInfo.getParameters)
1290-
.map(_.asScala.toMap).orNull),
1289+
.map(_.asScala.toMap).orNull,
1290+
serdeName = Option(apiPartition.getSd.getSerdeInfo.getName).filter(_.nonEmpty)),
12911291
createTime = apiPartition.getCreateTime.toLong * 1000,
12921292
lastAccessTime = apiPartition.getLastAccessTime.toLong * 1000,
12931293
parameters = properties,

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,4 +513,16 @@ class DataSourceWithHiveMetastoreCatalogSuite
513513
assert(tableWithSerdeName.storage.serdeName === Some("testSerdeName"))
514514
}
515515
}
516+
517+
test("SPARK-55645: serdeName should be None for tables without an explicit serde name") {
518+
withTable("t") {
519+
sql("CREATE TABLE t (d1 DECIMAL(10,3), d2 STRING) STORED AS TEXTFILE")
520+
521+
val hiveTable =
522+
sessionState.catalog.getTableMetadata(TableIdentifier("t", Some("default")))
523+
// Hive Metastore returns "" for tables without an explicit serde name.
524+
// This should be mapped to None, not Some("").
525+
assert(hiveTable.storage.serdeName === None)
526+
}
527+
}
516528
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ class HiveSchemaInferenceSuite
107107
locationUri = Option(dir.toURI),
108108
inputFormat = serde.inputFormat,
109109
outputFormat = serde.outputFormat,
110-
serdeName = None,
111110
serde = serde.serde,
112111
compressed = false,
113112
properties = Map("serialization.format" -> "1")),

0 commit comments

Comments
 (0)