Skip to content

Commit

Permalink
SNOW-1335123 Fix DF Alias in the JOIN Condition (#100)
Browse files Browse the repository at this point in the history
* fix error

* remove an out of date test
  • Loading branch information
sfc-gh-bli committed Apr 25, 2024
1 parent ea80168 commit 6b661c0
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 98 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/com/snowflake/snowpark/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ class DataFrame private[snowpark] (
* @param alias The alias name of the dataframe
* @return a [[DataFrame]]
*/
def alias(alias: String): DataFrame = withPlan(DataframeAlias(alias, plan))
def alias(alias: String): DataFrame = withPlan(DataframeAlias(alias, plan, output))

/**
* Returns a new DataFrame with the specified Column expressions as output (similar to SELECT in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ private[snowpark] class ExpressionAnalyzer(
val normalizedColName = quoteName(aliasColName)
val col = aliasOutput.filter(attr => attr.name.equals(normalizedColName))
if (col.length == 1) {
col.head.withName(normalizedColName)
// analyze new attributes at the same time
analyze(col.head.withName(normalizedColName))
} else {
throw ErrorMessage.DF_CANNOT_RESOLVE_COLUMN_NAME(aliasColName, aliasOutput.map(_.name))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,18 @@ private[snowpark] case class Sort(order: Seq[SortOrder], child: LogicalPlan) ext
Sort(order, _)
}

private[snowpark] case class DataframeAlias(alias: String, child: LogicalPlan) extends UnaryNode {
// requires childOutput when creating,
// since child's SnowflakePlan can be empty
private[snowpark] case class DataframeAlias(
alias: String,
child: LogicalPlan,
childOutput: Seq[Attribute])
extends UnaryNode {

override lazy val dfAliasMap: Map[String, Seq[Attribute]] =
Utils.addToDataframeAliasMap(Map(alias -> child.getSnowflakePlan.get.output), child)
Utils.addToDataframeAliasMap(Map(alias -> childOutput), child)
override protected def createFromAnalyzedChild: LogicalPlan => LogicalPlan =
DataframeAlias(alias, _)
DataframeAlias(alias, _, childOutput)

override protected def updateChild: LogicalPlan => LogicalPlan =
createFromAnalyzedChild
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ private object SqlGenerator extends Logging {
.transformations(transformations)
.options(options)
.createSnowflakePlan()
case DataframeAlias(_, child) => resolveChild(child)
case DataframeAlias(_, child, _) => resolveChild(child)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,6 @@ class DataFrameAliasSuite extends TestData with BeforeAndAfterEach with EagerSes
.join(df2, df1.col("id") === df2.col("id"))
.select(df2.col("B.num")),
Seq(Row(7), Row(8), Row(9)))

// The following use case is out of the scope of supporting alias
// We still follow the old ambiguity resolving policy and require DF to be used
assertThrows[SnowparkClientException](
df1
.join(df2, df1.col("id") === df2.col("id"))
.select($"A.num"))
}

test("Test for alias conflict") {
Expand All @@ -113,4 +106,35 @@ class DataFrameAliasSuite extends TestData with BeforeAndAfterEach with EagerSes
.join(df2, df1.col("id") === df2.col("id"))
.select(df1.col("A.num")))
}

test("snow-1335123") {
val df1 = Seq((1, 2, 3, 4), (11, 12, 13, 14), (21, 12, 23, 24), (11, 32, 33, 34)).toDF(
"col_a",
"col_b",
"col_c",
"col_d")

val df2 = Seq((1, 2, 5, 6), (11, 12, 15, 16), (41, 12, 25, 26), (11, 42, 35, 36)).toDF(
"col_a",
"col_b",
"col_e",
"col_f")

val df3 = df1
.alias("a")
.join(
df2.alias("b"),
col("a.col_a") === col("b.col_a")
&& col("a.col_b") === col("b.col_b"),
"left")
.select("a.col_a", "a.col_b", "col_c", "col_d", "col_e", "col_f")

checkAnswer(
df3,
Seq(
Row(1, 2, 3, 4, 5, 6),
Row(11, 12, 13, 14, 15, 16),
Row(11, 32, 33, 34, null, null),
Row(21, 12, 23, 24, null, null)))
}
}
Loading

0 comments on commit 6b661c0

Please sign in to comment.