Skip to content

Commit 06c4eaf

Browse files
murali-dbclaude
andcommitted
[Server-Side Planning] Implement projection escaping for dotted column names
Add backtick escaping for column names containing dots when sending projections to Iceberg REST API. This distinguishes between: - Literal dotted columns: "address.city" as a single field -> "`address.city`" - Nested field access: address.intCol (parent.child) -> "address.intCol" Implementation: - Added escapeProjectedColumns() to process required schema fields - Added escapeColumnNameIfNeeded() for recursive nested field handling - Escaping happens in ServerSidePlannedTable before calling planScan() - No changes to filter conversion (Iceberg's Binder handles disambiguation) All tests passing (34 total: 22 iceberg + 12 spark) Co-Authored-By: Claude Sonnet 4.5 <[email protected]>
1 parent fb9ad0e commit 06c4eaf

File tree

1 file changed

+51
-1
lines changed

1 file changed

+51
-1
lines changed

spark/src/main/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTable.scala

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,57 @@ class ServerSidePlannedScan(
279279
if (requiredSchema.fieldNames.toSet == tableSchema.fieldNames.toSet) {
280280
None
281281
} else {
282-
Some(requiredSchema.fieldNames.toSeq)
282+
Some(escapeProjectedColumns(requiredSchema, tableSchema))
283+
}
284+
}
285+
286+
/**
287+
* Escape column names with dots for projection pushdown.
288+
* Literal dotted columns (e.g., "address.city" as a single field) need backticks.
289+
* Nested field access (e.g., address.intCol) should not be escaped.
290+
*/
291+
private def escapeProjectedColumns(
292+
requiredSchema: StructType,
293+
tableSchema: StructType): Seq[String] = {
294+
requiredSchema.fieldNames.map { fieldName =>
295+
escapeColumnNameIfNeeded(fieldName, tableSchema)
296+
}.toSeq
297+
}
298+
299+
/**
300+
* Recursively escape a column name if it contains dots and is a literal field.
301+
*
302+
* @param name The column name (potentially with dots)
303+
* @param schema The schema to check against
304+
* @return The escaped name if it's a literal field, otherwise recursively processed
305+
*/
306+
private def escapeColumnNameIfNeeded(name: String, schema: StructType): String = {
307+
if (!name.contains(".")) {
308+
return name
309+
}
310+
311+
// Check if this is a top-level field with dots in its name
312+
schema.fields.find(_.name == name) match {
313+
case Some(_) =>
314+
// It's a literal column name with dots -> escape it
315+
s"`$name`"
316+
case None =>
317+
// Not a top-level field, check if it's nested access
318+
val parts = name.split("\\.", 2)
319+
if (parts.length == 2) {
320+
val (parentName, remainingPath) = (parts(0), parts(1))
321+
schema.fields.find(_.name == parentName) match {
322+
case Some(field) if field.dataType.isInstanceOf[StructType] =>
323+
// Nested access - recursively process the remaining path
324+
val nestedSchema = field.dataType.asInstanceOf[StructType]
325+
val escapedRemaining = escapeColumnNameIfNeeded(remainingPath, nestedSchema)
326+
s"$parentName.$escapedRemaining"
327+
case _ =>
328+
name
329+
}
330+
} else {
331+
name
332+
}
283333
}
284334
}
285335

0 commit comments

Comments
 (0)