Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions .github/workflows/pr_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ jobs:
java_version: "17"
maven_opts: "-Pspark-4.0"
scan_impl: "auto"

- name: "Spark 4.1, JDK 17"
java_version: "17"
maven_opts: "-Pspark-4.1"
scan_impl: "auto"
suite:
- name: "fuzz"
value: |
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,20 +115,24 @@ jobs:
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
# Test combinations:
# - auto scan: all Spark versions (3.4, 3.5, 4.0)
# - auto scan: all Spark versions (3.4, 3.5, 4.0, 4.1)
# - native_comet: Spark 3.4, 3.5
# - native_iceberg_compat: Spark 3.5 only
config:
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto', scan-env: ''}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto', scan-env: ''}
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
- {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto', scan-env: ''}
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'}
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
# Skip sql_hive-1 for Spark 4.0+ due to https://github.com/apache/datafusion-comet/issues/2946
exclude:
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
- config: { spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto', scan-env: '' }
module: { name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest" }

fail-fast: false
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
runs-on: ${{ matrix.os }}
Expand Down Expand Up @@ -156,6 +160,10 @@ jobs:
run: |
cd apache-spark
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
rm -rf /root/.m2/repository/org/scala-lang/modules/scala-xml_2.13
rm -rf /root/.m2/repository/org/xerial/snappy/snappy-java/1.1.10.4
rm -rf /root/.m2/repository/com/google/protobuf/protobuf-java/4.28.2
rm -rf /root/.m2/repository/org/bouncycastle/bcprov-jdk18on/1.80
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ${{ matrix.config.scan-env }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
Expand Down
3,998 changes: 3,998 additions & 0 deletions dev/diffs/4.1.1.diff

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions dev/regenerate-golden-files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# Usage: ./dev/regenerate-golden-files.sh [--spark-version <version>]
#
# Options:
# --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, or 4.0)
# --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, 4.0 or 4.1)
# If not specified, regenerates for all versions.
#
# Examples:
Expand Down Expand Up @@ -119,7 +119,7 @@ main() {
echo "Usage: $0 [--spark-version <version>]"
echo ""
echo "Options:"
echo " --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, or 4.0)"
echo " --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, 4.0 or 4.1)"
echo " If not specified, regenerates for all versions."
exit 0
;;
Expand All @@ -133,9 +133,9 @@ main() {

# Validate target version if specified
if [ -n "$target_version" ]; then
if [[ ! "$target_version" =~ ^(3\.4|3\.5|4\.0)$ ]]; then
if [[ ! "$target_version" =~ ^(3\.4|3\.5|4\.0|4\.1)$ ]]; then
echo "[ERROR] Invalid Spark version: $target_version"
echo "[ERROR] Supported versions: 3.4, 3.5, 4.0"
echo "[ERROR] Supported versions: 3.4, 3.5, 4.0, 4.1"
exit 1
fi
fi
Expand All @@ -155,7 +155,7 @@ main() {
if [ -n "$target_version" ]; then
versions=("$target_version")
else
versions=("3.4" "3.5" "4.0")
versions=("3.4" "3.5" "4.0", "4.1")
fi

# Install and regenerate for each version
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/latest/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use only and should not be used in production yet.
| Spark Version | Java Version | Scala Version | Comet Tests in CI | Spark SQL Tests in CI |
| ------------- | ------------ | ------------- | ----------------- | --------------------- |
| 4.0.1 | 17 | 2.13 | Yes | Yes |
| 4.1.1 | 17 | 2.13 | Yes | Yes |

Note that Comet may not fully work with proprietary forks of Apache Spark such as the Spark versions offered by
Cloud Service Providers.
Expand Down
25 changes: 23 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -642,8 +642,29 @@ under the License.
<parquet.version>1.15.2</parquet.version>
<semanticdb.version>4.13.6</semanticdb.version>
<slf4j.version>2.0.16</slf4j.version>
<shims.majorVerSrc>spark-4.0</shims.majorVerSrc>
<shims.minorVerSrc>not-needed-yet</shims.minorVerSrc>
<shims.majorVerSrc>spark-4.x</shims.majorVerSrc>
<shims.minorVerSrc>spark-4.0</shims.minorVerSrc>
<!-- Use jdk17 by default -->
<java.version>17</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
</properties>
</profile>

<profile>
<!-- FIXME: this is WIP. Tests may fail https://github.com/apache/datafusion-comet/issues/551 -->
<id>spark-4.1</id>
<properties>
<!-- Use Scala 2.13 by default -->
<scala.version>2.13.17</scala.version>
<scala.binary.version>2.13</scala.binary.version>
<spark.version>4.1.1</spark.version>
<spark.version.short>4.1</spark.version.short>
<parquet.version>1.16.0</parquet.version>
<semanticdb.version>4.13.9</semanticdb.version>
<slf4j.version>2.0.17</slf4j.version>
<shims.majorVerSrc>spark-4.x</shims.majorVerSrc>
<shims.minorVerSrc>spark-4.1</shims.minorVerSrc>
<!-- Use jdk17 by default -->
<java.version>17</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
Expand Down
27 changes: 27 additions & 0 deletions spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,33 @@ under the License.
</dependency>
</dependencies>
</profile>

<profile>
<id>spark-4.1</id>
<dependencies>
<dependency>
<groupId>org.apache.iceberg</groupId>
<!-- TODO: Upgrade after iceberg-spark-runtime-4.1_2.13 release -->
<artifactId>iceberg-spark-runtime-4.0_${scala.binary.version}</artifactId>
<version>1.10.0</version>
<scope>test</scope>
</dependency>
<!-- Jetty 11.x for Spark 4.1 (jakarta.servlet) -->
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>11.0.24</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<version>11.0.24</version>
<scope>test</scope>
</dependency>
</dependencies>
</profile>

<profile>
<id>generate-docs</id>
<build>
Expand Down
Loading
Loading