diff --git a/.github/actions/docker-image-cache-restore/action.yml b/.github/actions/docker-image-cache-restore/action.yml new file mode 100644 index 0000000..b035938 --- /dev/null +++ b/.github/actions/docker-image-cache-restore/action.yml @@ -0,0 +1,22 @@ +name: Restore cached Docker image +description: Restore cached Docker image +inputs: + key: + description: The cache key + required: true + sha: + description: Commit SHA + required: true + default: ${{ github.sha }} + +runs: + using: composite + steps: + - name: Download Docker image artifact + uses: actions/download-artifact@v5 + with: + name: docker-custom-python-${{ inputs.key }}-${{ inputs.sha }} + path: /tmp + - name: Load Docker image + shell: bash + run: docker load --input /tmp/docker-custom-python-${{ inputs.key }}.tar diff --git a/.github/actions/docker-push/action.yml b/.github/actions/docker-push/action.yml new file mode 100644 index 0000000..b6a2496 --- /dev/null +++ b/.github/actions/docker-push/action.yml @@ -0,0 +1,50 @@ +name: Push to Registries +description: Push Docker image to multiple registries +inputs: + imageCacheKey: + description: The image cache key (Used to construct artifact name) + required: true + sourceImage: + description: The source image name + required: true + sourceTag: + description: The source image tag + required: true + default: latest + targetImage: + description: The target image name + required: true + targetTag: + description: The target image tag + required: true + acrRegistry: + description: ACR registry URL + required: true + acrUsername: + description: ACR login username + required: true + acrPassword: + description: ACR login password + required: true + +runs: + using: composite + steps: + - name: Restore cached Docker image + uses: ./.github/actions/docker-image-cache-restore + with: + key: ${{ inputs.imageCacheKey }} + + - name: Authenticate with ACR + id: authAcr + uses: ./.github/actions/docker-push/auth-acr + with: + acrRegistry: ${{ inputs.acrRegistry }} + acrUsername: ${{ inputs.acrUsername }} + acrPassword: ${{ inputs.acrPassword }} + + - name: Push to ACR + shell: bash + run: | + docker tag ${{ inputs.sourceImage }}:${{ inputs.sourceTag }} ${{ steps.authAcr.outputs.repositoryName }}/${{ inputs.targetImage }}:${{ inputs.targetTag }} + docker push ${{ steps.authAcr.outputs.repositoryName }}/${{ inputs.targetImage }}:${{ inputs.targetTag }} diff --git a/.github/actions/docker-push/auth-acr/action.yml b/.github/actions/docker-push/auth-acr/action.yml new file mode 100644 index 0000000..cda557b --- /dev/null +++ b/.github/actions/docker-push/auth-acr/action.yml @@ -0,0 +1,29 @@ +name: Push to ACR +description: Push Docker image to ACR +inputs: + acrRegistry: + description: ACR registry URL + required: true + acrUsername: + description: ACR login username + required: true + acrPassword: + description: ACR login password + required: true +outputs: + repositoryName: + description: Repository name + value: ${{ steps.repositoryName.outputs.repository }} + +runs: + using: composite + steps: + - uses: azure/docker-login@v2 + with: + login-server: ${{ inputs.acrRegistry }} + username: ${{ inputs.acrUsername }} + password: ${{ inputs.acrPassword }} + + - id: repositoryName + shell: bash + run: echo "repository=${{ inputs.acrRegistry }}" >> $GITHUB_OUTPUT diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e735fba --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,101 @@ +name: Build and publish +on: [ push ] + +jobs: + detect-changes: + name: Detect changes + runs-on: ubuntu-latest + outputs: + changedProjects_python38: ${{ steps.findChanges.outputs.changedProjects_python38 }} + changedProjects_python310: ${{ steps.findChanges.outputs.changedProjects_python310 }} + changedProjects: ${{ steps.findChanges.outputs.changedProjects }} + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Find changes + id: findChanges + run: | + ./bin/ci-find-changes.sh master \ + python38:python-3.8/ \ + python310:python-3.10/ + + build: + name: Build ${{ matrix.python-version }} + needs: detect-changes + runs-on: ubuntu-latest + strategy: + matrix: + include: + - python-version: "3.8" + target: python38 + changed: ${{ needs.detect-changes.outputs.changedProjects_python38 }} + - python-version: "3.10" + target: python310 + changed: ${{ needs.detect-changes.outputs.changedProjects_python310 }} + steps: + - name: Checkout + uses: actions/checkout@v5 + if: matrix.changed == '1' + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + if: matrix.changed == '1' + + - name: Build Docker image + if: matrix.changed == '1' + run: | + echo "Building ${{ matrix.target }}" + docker buildx build \ + --tag keboola/docker-custom-python:python-${{ matrix.python-version }} \ + --output type=docker,dest=/tmp/docker-custom-python-${{ matrix.target }}.tar \ + ./python-${{ matrix.python-version }}/ + + - name: Upload Docker image artifact + uses: actions/upload-artifact@v4 + if: matrix.changed == '1' + with: + name: docker-custom-python-${{ matrix.target }}-${{ github.sha }} + path: /tmp/docker-custom-python-${{ matrix.target }}.tar + retention-days: 1 + + publish-images: + name: Publish images to ACR + if: startsWith(github.ref, 'refs/tags/') + needs: [detect-changes, build] + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Push python-3.8 image to ACR + if: needs.detect-changes.outputs.changedProjects_python38 == '1' + uses: ./.github/actions/docker-push + with: + imageCacheKey: python38 + sourceImage: keboola/docker-custom-python + sourceTag: python-3.8 + targetImage: docker-custom-python + targetTag: python-3.8-${{ github.ref_name }} + acrRegistry: keboola.azurecr.io + acrUsername: docker-custom-python-push + acrPassword: ${{ secrets.DOCKER_CUSTOM_PYTHON_ACR_PASSWORD }} + + - name: Push python-3.10 image to ACR + if: needs.detect-changes.outputs.changedProjects_python310 == '1' + uses: ./.github/actions/docker-push + with: + imageCacheKey: python310 + sourceImage: keboola/docker-custom-python + sourceTag: python-3.10 + targetImage: docker-custom-python + targetTag: python-3.10-${{ github.ref_name }} + acrRegistry: keboola.azurecr.io + acrUsername: docker-custom-python-push + acrPassword: ${{ secrets.DOCKER_CUSTOM_PYTHON_ACR_PASSWORD }} diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index ebc4f40..0000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,104 +0,0 @@ -pr: none -trigger: - batch: true - branches: - include: - - '*' - tags: - include: - - '*' - -pool: - vmImage: ubuntu-latest - -variables: - - name: isTagBuild - value: ${{ startsWith(variables['Build.SourceBranch'],'refs/tags/') }} - - name: imageTag - value: ${{ replace(variables['Build.SourceBranch'],'refs/tags/','') }} - - name: azureContainerRegistryConnection - value: Keboola ACR - - name: azureContainerRegistry - value: keboola.azurecr.io - - name: imageRepository_base - value: docker-custom-python - - name: imageRepository_databricks - value: docker-python-databricks - - name: imageRepository_snowpark - value: docker-python-snowpark - - -steps: - - script: | - ./bin/ci-find-changes.sh master \ - python38:python-3.8/ \ - python310:python-3.10/ \ - pythonSnowpark:python-snowpark/ - displayName: Find changes - name: findChanges - - - script: docker buildx bake --load $(changedProjects) - displayName: Build Docker images - -# Push Quay - - script: | - set -Eeuo pipefail - docker login -u="$(QUAY_USERNAME)" -p="$(QUAY_PASSWORD)" quay.io - docker tag keboola/docker-custom-python:python-3.8 quay.io/keboola/$(imageRepository_base):python-3.8-$(imageTag) - docker push quay.io/keboola/$(imageRepository_base):python-3.8-$(imageTag) - displayName: Push 3.8 image to quay.io - condition: and(eq(variables['isTagBuild'], 'true'), eq(variables['changedProjects_python38'], 1)) - - - script: | - set -Eeuo pipefail - docker login -u="$(QUAY_USERNAME)" -p="$(QUAY_PASSWORD)" quay.io - docker tag keboola/docker-custom-python:python-3.10 quay.io/keboola/$(imageRepository_base):python-3.10-$(imageTag) - docker tag keboola/docker-custom-python:python-3.10 quay.io/keboola/$(imageRepository_base):latest - docker push quay.io/keboola/$(imageRepository_base):python-3.10-$(imageTag) - docker push quay.io/keboola/$(imageRepository_base):latest - displayName: Push 3.10 image to quay.io - condition: and(eq(variables['isTagBuild'], 'true'), eq(variables['changedProjects_python310'], 1)) - -# Push ACR - - task: Docker@2 - displayName: Login to ACR - inputs: - command: login - containerRegistry: $(azureContainerRegistryConnection) - condition: eq(variables['isTagBuild'], 'true') - - - script: | - set -Eeuo pipefail - docker tag keboola/docker-custom-python:python-3.8 $(azureContainerRegistry)/$(imageRepository_base):python-3.8-$(imageTag) - docker push $(azureContainerRegistry)/$(imageRepository_base):python-3.8-$(imageTag) - displayName: Push 3.8 image to ACR - condition: and(eq(variables['isTagBuild'], 'true'), eq(variables['changedProjects_python38'], 1)) - - - script: | - set -Eeuo pipefail - docker tag keboola/docker-custom-python:python-3.10 $(azureContainerRegistry)/$(imageRepository_base):python-3.10-$(imageTag) - docker push $(azureContainerRegistry)/$(imageRepository_base):python-3.10-$(imageTag) - - docker tag keboola/docker-custom-python:python-3.10 $(azureContainerRegistry)/$(imageRepository_databricks):$(imageTag) - docker push $(azureContainerRegistry)/$(imageRepository_databricks):$(imageTag) - displayName: Push 3.10 images to ACR - condition: and(eq(variables['isTagBuild'], 'true'), eq(variables['changedProjects_python310'], 1)) - - - script: | - set -Eeuo pipefail - docker tag keboola/docker-custom-python-snowpark $(azureContainerRegistry)/$(imageRepository_snowpark):$(imageTag) - docker push -a $(azureContainerRegistry)/$(imageRepository_snowpark) - displayName: Push python-snowpark image to ACR - condition: and(eq(variables['isTagBuild'], 'true'), eq(variables['changedProjects_pythonSnowpark'], 1)) - -# Publish the latest tag info - - script: printf "%s" "$(imageTag)" > base-python-artifact - condition: eq(variables['isTagBuild'], 'true') - displayName: Create artifact - - - task: PublishPipelineArtifact@1 - inputs: - targetPath: 'base-python-artifact' - artifact: 'keboola.docker-custom-python.latest-build' - condition: eq(variables['isTagBuild'], 'true') - displayName: 'Publish Tag Artifact' diff --git a/bin/ci-find-changes.sh b/bin/ci-find-changes.sh index 225446b..2196a9c 100755 --- a/bin/ci-find-changes.sh +++ b/bin/ci-find-changes.sh @@ -10,6 +10,12 @@ fi TARGET_BRANCH=$1 ALL_CHANGES= +set_output() { + local var_name=$1 + local value=$2 + echo "${var_name}=${value}" >> "$GITHUB_OUTPUT" +} + for PROJECT in ${@:2}; do PROJECT_CONFIG=(${PROJECT//:/ }) PROJECT_VAR_NAME=${PROJECT_CONFIG[0]} @@ -20,10 +26,10 @@ for PROJECT in ${@:2}; do if [[ $PROJECT_CHANGES_COUNT -eq 0 ]]; then echo "no changes" - echo "##vso[task.setvariable variable=changedProjects_${PROJECT_VAR_NAME}]0" + set_output "changedProjects_${PROJECT_VAR_NAME}" "0" else echo "has changes" - echo "##vso[task.setvariable variable=changedProjects_${PROJECT_VAR_NAME}]1" + set_output "changedProjects_${PROJECT_VAR_NAME}" "1" ALL_CHANGES="${ALL_CHANGES} \"${PROJECT_VAR_NAME}\"" fi done @@ -34,9 +40,9 @@ if [[ "${ALL_CHANGES}" == "" ]]; then PROJECT_CONFIG=(${PROJECT//:/ }) PROJECT_VAR_NAME=${PROJECT_CONFIG[0]} - echo "##vso[task.setvariable variable=changedProjects_${PROJECT_VAR_NAME}]1" + set_output "changedProjects_${PROJECT_VAR_NAME}" "1" ALL_CHANGES="${ALL_CHANGES} \"${PROJECT_VAR_NAME}\"" done fi -echo "##vso[task.setvariable variable=changedProjects]$ALL_CHANGES" +set_output "changedProjects" "$ALL_CHANGES" diff --git a/docker-bake.hcl b/docker-bake.hcl index 25def83..74bb900 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -1,8 +1,7 @@ group "default" { targets = [ "python38", - "python310", - "pythonSnowpark" + "python310" ] } @@ -15,11 +14,3 @@ target "python310" { context = "./python-3.10/" tags = ["keboola/docker-custom-python:python-3.10"] } - -target "pythonSnowpark" { - context = "./python-snowpark/" - tags = ["keboola/docker-custom-python-snowpark"] - contexts = { - python = "target:python38" - } -} diff --git a/python-3.10/Dockerfile b/python-3.10/Dockerfile index eb7ad51..25b639f 100644 --- a/python-3.10/Dockerfile +++ b/python-3.10/Dockerfile @@ -1,9 +1,12 @@ FROM python:3.10.10-buster -ENV PYTHONIOENCODING utf-8 +ENV PYTHONIOENCODING=utf-8 WORKDIR /home -RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ +RUN sed -i 's/deb.debian.org/archive.debian.org/g' /etc/apt/sources.list \ + && sed -i 's|security.debian.org|archive.debian.org|g' /etc/apt/sources.list \ + && sed -i '/stretch-updates/d' /etc/apt/sources.list \ + && curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ && curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list \ && apt-get update && ACCEPT_EULA=Y apt-get install -y --no-install-recommends \ libgeos-c1v5 \ @@ -20,14 +23,14 @@ RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ graphviz \ && rm -rf /var/lib/apt/lists/* -ENV PATH $PATH:/opt/mssql-tools/bin +ENV PATH="$PATH:/opt/mssql-tools/bin" -ENV VIRTUAL_ENV /home/default +ENV VIRTUAL_ENV=/home/default # From https://jdk.java.net/13/, stolen from https://github.com/docker-library/openjdk/blob/master/8/jdk/Dockerfile#L22 -ENV JAVA_HOME /usr/local/openjdk +ENV JAVA_HOME=/usr/local/openjdk -ENV PATH $JAVA_HOME/bin:$PATH +ENV PATH="$JAVA_HOME/bin:$PATH" RUN wget https://download.java.net/java/GA/jdk13.0.2/d4173c853231432d94f001e99d882ca7/8/GPL/openjdk-13.0.2_linux-x64_bin.tar.gz \ && mkdir $JAVA_HOME \ @@ -66,7 +69,7 @@ RUN mkdir $VIRTUAL_ENV \ lineage-bundle \ logger-bundle \ matplotlib \ - mlflow \ + 'mlflow<2.11' \ nltk \ nose \ numba \ @@ -95,7 +98,7 @@ RUN mkdir $VIRTUAL_ENV \ scipy \ seaborn \ simpleeval \ - snowflake-connector-python[pandas] \ + 'snowflake-connector-python[pandas]<3.7' \ sqlalchemy\ statsmodels \ sympy \ @@ -111,6 +114,7 @@ RUN mkdir $VIRTUAL_ENV \ && pip3 install --no-cache-dir --upgrade --force-reinstall \ git+https://github.com/keboola/sapi-python-client.git@0.4.0 \ keboola.component \ + 'cffi<2.0.0' \ charset-normalizer\<3 \ cryptography\<41 \ pytz\<2023 \ @@ -121,7 +125,7 @@ RUN mkdir $VIRTUAL_ENV \ && chmod a+rwx -R $VIRTUAL_ENV # Import matplotlib the first time to build the font cache. -ENV XDG_CACHE_HOME /tmp/ +ENV XDG_CACHE_HOME=/tmp/ RUN . $VIRTUAL_ENV/bin/activate \ && MPLBACKEND=Agg python -c "import matplotlib.pyplot" \ diff --git a/python-3.8/Dockerfile b/python-3.8/Dockerfile index 9b6e390..b0533c9 100644 --- a/python-3.8/Dockerfile +++ b/python-3.8/Dockerfile @@ -1,21 +1,14 @@ -FROM python:3.8.8 -ENV PYTHONIOENCODING utf-8 +FROM python:3.8-bullseye +ENV PYTHONIOENCODING=utf-8 WORKDIR /home RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ - && curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list \ + && curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list \ && apt-get update && ACCEPT_EULA=Y apt-get install -y --no-install-recommends \ libgeos-c1v5 \ - python-numpy \ - python-scipy \ - python-matplotlib \ - ipython \ msodbcsql17 \ mssql-tools \ - python-pandas \ - python-sympy \ - python-nose \ g++ \ libsasl2-dev \ libatlas-base-dev \ @@ -28,13 +21,13 @@ RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ graphviz \ && rm -rf /var/lib/apt/lists/* -ENV PATH $PATH:/opt/mssql-tools/bin +ENV PATH="$PATH:/opt/mssql-tools/bin" ENV VIRTUAL_ENV=/home/default # From https://jdk.java.net/13/, stolen from https://github.com/docker-library/openjdk/blob/master/8/jdk/Dockerfile#L22 -ENV JAVA_HOME /usr/local/openjdk -ENV PATH $JAVA_HOME/bin:$PATH +ENV JAVA_HOME=/usr/local/openjdk +ENV PATH="$JAVA_HOME/bin:$PATH" RUN wget https://download.java.net/java/GA/jdk13.0.2/d4173c853231432d94f001e99d882ca7/8/GPL/openjdk-13.0.2_linux-x64_bin.tar.gz \ && mkdir $JAVA_HOME \ && tar xv --file openjdk-13*_bin.tar.gz --directory "$JAVA_HOME" --no-same-owner --strip-components 1 \ @@ -128,7 +121,7 @@ RUN . $VIRTUAL_ENV/bin/activate \ && chmod a+rwx -R /home/default # Import matplotlib the first time to build the font cache. -ENV XDG_CACHE_HOME /tmp/ +ENV XDG_CACHE_HOME=/tmp/ RUN . $VIRTUAL_ENV/bin/activate \ && MPLBACKEND=Agg python -c "import matplotlib.pyplot" \ diff --git a/python-snowpark/Dockerfile b/python-snowpark/Dockerfile deleted file mode 100644 index 117c790..0000000 --- a/python-snowpark/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# syntax=docker/dockerfile:1 -FROM python - -RUN python3 -m venv $VIRTUAL_ENV \ - && . $VIRTUAL_ENV/bin/activate \ - && pip3 install --no-cache-dir snowflake-snowpark-python \ - && chown :users -R /home/default \ - && chmod a+rwx -R /home/default \ - && pip3 check