Skip to content

Commit

Permalink
chore: Add GitHub workflow to publish Docker image (#847)
Browse files Browse the repository at this point in the history
* Add workflow to publish Docker images

* update workflow name

* remove regex check

* improve

* fix

* use maven to get project version

* add scalastyle config

* fix

* remove java distro name

* add dev folder

* save progress

* docker build works

* Update kube/Dockerfile

Co-authored-by: Oleks V <[email protected]>

* Update .github/workflows/docker-publish.yml

Co-authored-by: Edmondo Porcu <[email protected]>

* address feedback

---------

Co-authored-by: Oleks V <[email protected]>
Co-authored-by: Edmondo Porcu <[email protected]>
  • Loading branch information
3 people authored Aug 19, 2024
1 parent 27ab86b commit 364887d
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 17 deletions.
18 changes: 18 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.git
.github
.idea
bin
conf
docs/build
docs/temp
docs/venv
metastore_db
target
common/target
spark-integration/target
fuzz-testing/target
spark/target
native/target
core/target
spark-warehouse
venv
63 changes: 63 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: Publish Docker images

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

on:
push:
tags:
- '*.*.*'
- '*.*.*-rc*'
- 'test-docker-publish-*'

docker:
name: Docker
runs-on: ubuntu-22.04
permissions:
contents: read
packages: write
steps:
- name: Set up Java
uses: actions/setup-java@v3
with:
java-version: '17'
- name: Extract Comet version
id: extract_version
run: |
COMET_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
echo "COMET_VERSION=$COMET_VERSION" >> $GITHUB_ENV
- name: Echo Comet version
run: echo "The current Comet version is ${{ env.COMET_VERSION }}"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v6
with:
platforms: linux/amd64,linux/arm64
push: true
tags: apache/datafusion-comet:spark-3.4-scala-2.12-${{ env.COMET_VERSION }}
file: kube/Dockerfile
6 changes: 5 additions & 1 deletion docs/source/user-guide/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ Make sure the following requirements are met and software installed on your mach
- JDK 8 and up
- GLIBC 2.17 (Centos 7) and up

## Using a Published Binary Release
## Using a Published Docker Image

Docker images are available at https://github.com/orgs/apache/packages?repo_name=datafusion-comet

## Using a Published JAR File

There are no published binary releases yet.

Expand Down
35 changes: 29 additions & 6 deletions kube/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ USER root

# Installing JDK11 as the image comes with JRE
RUN apt update \
&& apt install -y git \
&& apt install -y curl \
&& apt install -y openjdk-11-jdk \
&& apt clean
Expand All @@ -32,14 +31,38 @@ ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false"
ENV SPARK_VERSION=3.4
ENV SCALA_VERSION=2.12

# copy source files to Docker image
RUN mkdir /comet
WORKDIR /comet

# build native code first so that this layer can be re-used
# if only Scala code gets modified
COPY rust-toolchain.toml /comet/rust-toolchain.toml
COPY native /comet/native
RUN cd native && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release

# copy the rest of the project
COPY .mvn /comet/.mvn
COPY mvnw /comet/mvnw
COPY common /comet/common
COPY dev /comet/dev
COPY docs /comet/docs
COPY fuzz-testing /comet/fuzz-testing
COPY spark /comet/spark
COPY spark-integration /comet/spark-integration
COPY scalafmt.conf /comet/scalafmt.conf
COPY .scalafix.conf /comet/.scalafix.conf
COPY Makefile /comet/Makefile
COPY pom.xml /comet/pom.xml

# Pick the JDK instead of JRE to compile Comet
RUN cd /opt \
&& git clone https://github.com/apache/datafusion-comet.git \
&& cd datafusion-comet \
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
RUN cd /comet \
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"

FROM apache/spark:3.4.2
ENV SPARK_VERSION=3.4
ENV SCALA_VERSION=2.12
USER root
COPY --from=builder /opt/datafusion-comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.1.0-SNAPSHOT.jar $SPARK_HOME/jars

# ntoe the use of a wildcard in the file name so that this works with both snapshot and final release versions
COPY --from=builder /comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.2.0*.jar $SPARK_HOME/jars
18 changes: 9 additions & 9 deletions native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ arrow-buffer = { version = "52.2.0" }
arrow-data = { version = "52.2.0" }
arrow-schema = { version = "52.2.0" }
parquet = { version = "52.2.0", default-features = false, features = ["experimental"] }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1" }
datafusion = { default-features = false, git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", features = ["unicode_expressions", "crypto_expressions"] }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", features = ["crypto_expressions"] }
datafusion-functions-nested = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
datafusion-execution = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false }
datafusion-common = { version = "41.0.0" }
datafusion = { default-features = false, version = "41.0.0", features = ["unicode_expressions", "crypto_expressions"] }
datafusion-functions = { version = "41.0.0", features = ["crypto_expressions"] }
datafusion-functions-nested = { version = "41.0.0", default-features = false }
datafusion-expr = { version = "41.0.0", default-features = false }
datafusion-execution = { version = "41.0.0", default-features = false }
datafusion-physical-plan = { version = "41.0.0", default-features = false }
datafusion-physical-expr-common = { version = "41.0.0", default-features = false }
datafusion-physical-expr = { version = "41.0.0", default-features = false }
datafusion-comet-spark-expr = { path = "spark-expr", version = "0.2.0" }
datafusion-comet-proto = { path = "proto", version = "0.2.0" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
Expand Down
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,10 @@ under the License.
</properties>
</profile>

<profile>
<id>scala-2.12</id>
</profile>

<profile>
<id>scala-2.13</id>
<properties>
Expand Down Expand Up @@ -938,6 +942,7 @@ under the License.
<exclude>**/build/**</exclude>
<exclude>**/target/**</exclude>
<exclude>**/apache-spark/**</exclude>
<exclude>.dockerignore</exclude>
<exclude>.git/**</exclude>
<exclude>.github/**</exclude>
<exclude>.gitignore</exclude>
Expand All @@ -963,7 +968,7 @@ under the License.
<exclude>docs/source/_static/images/**</exclude>
<exclude>dev/release/rat_exclude_files.txt</exclude>
<exclude>dev/release/requirements.txt</exclude>
<exclude>native/core/src/execution/generated/**</exclude>
<exclude>native/proto/src/generated/**</exclude>
</excludes>
</configuration>
</plugin>
Expand Down

0 comments on commit 364887d

Please sign in to comment.