From 5a0a39001b36e249c43e8991932c6bfd67f3b8d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20K=C3=BCnzel?= <simonk@fsmpi.rwth-aachen.de>
Date: Fri, 28 Feb 2025 04:17:01 +0100
Subject: [PATCH] fixup! Add package caching for CI

---
 README.md                                  | 34 ++++++++++++++++++++++
 api/Dockerfile                             |  7 +++--
 api/docker_start.sh                        | 10 +++----
 common_py/Dockerfile                       | 28 ++++++++++++++----
 generate_ci_pipeline.py                    |  8 ++++-
 job_controller/Dockerfile                  |  7 +++--
 job_controller/jobs/ffmpeg_base/Dockerfile |  4 ++-
 7 files changed, 81 insertions(+), 17 deletions(-)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b8e5c2c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+
+# A few technical notes
+
+## Dockerfiles
+
+### Venv
+
+The base common py image uses a venv so you need to use `.venv/bin/python` and `pip --python .venv/bin/python`
+
+### Caching
+
+There are two types of caching used: One for local building and one in the CI with kaniko.
+
+#### Local Building
+
+The `PIP_CACHE_DIR` and `APT_CACHE_DIR` variables are empty, pip and apt use their default cache location. However, the
+`--mount=type=CACHE` option before the RUN command mounts these cache locations and so the cache can be reused between
+different builds. Since the locations are mounted, the files put into there (during the installation command), are not
+put into the final image.
+
+Additionally, Docker provides a script for apt to automatically clean the cache (so it does not stay in the final image).
+However, we don't want that, and so we need to remove the `/etc/apt/apt.conf.d/docker-clean` file.
+
+#### CI Building
+
+The `PIP_CACHE_DIR` and `APT_CACHE_DIR` variables are set by the CI to a location (`.cache/pip`, `.cache/apt`) inside
+the project dir. The cache option of the GitLab CI means these locations are persistent between different job runs. They
+need to be inside the project dir since GitLab cannot cache locations outside. When kaniko now executes the Dockerfile 
+it will provide the environment vars with the caching locations. Pip directly reads the `PIP_CACHE_DIR` env var (Note 
+that the ARG command in Docker provides the build argument as an environment variable to the container). For apt we 
+need to put the location into `/etc/apt/apt.conf.d/`. The `--ignore-path` option of kaniko ensures that the cache is not
+included in the final image.
+
+Also see https://github.com/GoogleContainerTools/kaniko/issues/969#issuecomment-2160910028.
\ No newline at end of file
diff --git a/api/Dockerfile b/api/Dockerfile
index 7272314..1f4ccd8 100755
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -3,15 +3,16 @@ ARG ENV_TYPE
 ARG GIT_COMMIT_SHA
 FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA}
 
-# used by pip as env var
+# READ THE NOTE on caching in the README before changing this/for more info!
 ARG PIP_CACHE_DIR=
 
+
 ENV VIDEOAG_API_GIT_COMMIT_HASH $GIT_COMMIT_SHA
 
-RUN ls -la /builds/videoag/backend/.cache/pip || true
 COPY extra_requirements.txt ./
+# READ THE NOTE on caching in the README before changing this/for more info!
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip3 install -r extra_requirements.txt
+    pip --python .venv/bin/python install -r extra_requirements.txt
 
 COPY docker_start.sh ./
 COPY .pylintrc ./
diff --git a/api/docker_start.sh b/api/docker_start.sh
index 09f181c..c7f9113 100755
--- a/api/docker_start.sh
+++ b/api/docker_start.sh
@@ -10,14 +10,14 @@ if [ $# = 1 ] && [ $1 = "-test" ]; then
   cd src
   uname -a
   export VIDEOAG_TEST_CONFIG_OVERRIDE="../config/test_config_override.py"
-  python3 -V
-  python3 -m coverage run --data-file "../coverage/.data" run_tests.py ||
+  ../.venv/bin/python -V
+  ../.venv/bin/python -m coverage run --data-file "../coverage/.data" run_tests.py ||
     { echo "Test failed!"; exit 1; }
-  python3 -m coverage report --data-file "../coverage/.data" --include "./*" ||
+  ../.venv/bin/python -m coverage report --data-file "../coverage/.data" --include "./*" ||
     { echo "Coverage report stdout failed"; exit 1; }
-  python3 -m coverage report --data-file "../coverage/.data" -m --include "./*" > ../coverage/report.txt ||
+  ../.venv/bin/python -m coverage report --data-file "../coverage/.data" -m --include "./*" > ../coverage/report.txt ||
     { echo "Coverage report report.txt failed"; exit 1; }
-  python3 -m coverage html -d "../coverage/html/" --data-file "../coverage/.data" --include "./*" ||
+  ../.venv/bin/python -m coverage html -d "../coverage/html/" --data-file "../coverage/.data" --include "./*" ||
     { echo "Coverage report html failed"; exit 1; }
 else
   echo "Running uWSGI"
diff --git a/common_py/Dockerfile b/common_py/Dockerfile
index e901fc3..ce34bf8 100755
--- a/common_py/Dockerfile
+++ b/common_py/Dockerfile
@@ -1,16 +1,34 @@
-FROM python:3.12
+FROM python:3.13-slim AS base
+
+# READ THE NOTE on caching in the README before changing this/for more info!
+ARG PIP_CACHE_DIR=
+ARG APT_CACHE_DIR=
+RUN rm -f /etc/apt/apt.conf.d/docker-clean
+RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives $APT_CACHE_DIR" >>  /etc/apt/apt.conf.d/ci_caching; fi
 
-ARG PIP_CACHE_DIR
 
 WORKDIR /code
 
 RUN mkdir -p /code
 WORKDIR /code
 
-RUN ls -la /builds/videoag/backend/.cache/pip || true
+# Install and build requirements in different image too reduce final image size
+FROM base AS builder
+
+# Packages needed to build psycopg with pip
+# READ THE NOTE on caching in the README before changing this/for more info!
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    apt-get update && apt-get --no-install-recommends install -y libpq-dev gcc python3-dev
+
 COPY requirements.txt /code
+RUN python -m venv --without-pip .venv/
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip3 install -r requirements.txt
+    pip --python /code/.venv/bin/python install -r requirements.txt
 
-COPY src/videoag_common /code/src/videoag_common
+FROM base AS final
+
+# Copy venv created in builder image with the dependencies
+COPY --from=builder /code/.venv /code/.venv
 
+COPY src/videoag_common /code/src/videoag_common
diff --git a/generate_ci_pipeline.py b/generate_ci_pipeline.py
index c123dd8..0a4a93b 100644
--- a/generate_ci_pipeline.py
+++ b/generate_ci_pipeline.py
@@ -93,7 +93,8 @@ echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"username\\":\\"$CI_REGISTRY_USER\\",\
             f"--build-arg=GIT_COMMIT_SHA={self.context.commit_sha}",
             f"--build-arg=ENV_TYPE={self.context.env_type()}",
             
-            # See https://github.com/GoogleContainerTools/kaniko/issues/969#issuecomment-2160910028
+            # READ THE NOTE on caching in the README before changing this/for more info!
+            f"--ignore-path=$CI_PROJECT_DIR/.cache",
             f"--build-arg=PIP_CACHE_DIR=$CI_PROJECT_DIR/.cache/pip",
             f"--build-arg=APT_CACHE_DIR=$CI_PROJECT_DIR/.cache/apt",
             f"--cache=true",
@@ -111,6 +112,7 @@ echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"username\\":\\"$CI_REGISTRY_USER\\",\
         name: gcr.io/kaniko-project/executor:v1.23.2-debug
         entrypoint: [""]
     cache:
+        # READ THE NOTE on caching in the README before changing this/for more info!
         key: videoag-cache-{self.context.env_type()}
         paths:
             - .cache/pip
@@ -183,6 +185,10 @@ run-api-tests:
 
 def gen_pipeline(context: BuildContext) -> str:
     pipeline = """
+####################################################################
+##### AUTOMATICALLY GENERATED PIPELINE. DO NOT CHANGE MANUALLY! ####
+####################################################################
+
 stages:
     - build-and-test
     - deploy
diff --git a/job_controller/Dockerfile b/job_controller/Dockerfile
index 86768f2..4a42b99 100644
--- a/job_controller/Dockerfile
+++ b/job_controller/Dockerfile
@@ -3,8 +3,10 @@ ARG ENV_TYPE
 ARG GIT_COMMIT_SHA
 FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA}
 
-# used by pip as env var
+# READ THE NOTE on caching in the README before changing this/for more info!
 ARG PIP_CACHE_DIR=
+
+
 # Empty by default
 ARG GIT_COMMIT_TAG=
 
@@ -12,8 +14,9 @@ ENV VIDEOAG_JOB_CONTROLLER_GIT_COMMIT_HASH $GIT_COMMIT_SHA
 ENV VIDEOAG_JOB_CONTROLLER_GIT_COMMIT_TAG $GIT_COMMIT_TAG
 
 COPY extra_requirements.txt ./
+# READ THE NOTE on caching in the README before changing this/for more info!
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install -r extra_requirements.txt
+    pip --python .venv/bin/python install -r extra_requirements.txt
 
 COPY jobs ./jobs/
 
diff --git a/job_controller/jobs/ffmpeg_base/Dockerfile b/job_controller/jobs/ffmpeg_base/Dockerfile
index b1ea20a..afc4bd6 100644
--- a/job_controller/jobs/ffmpeg_base/Dockerfile
+++ b/job_controller/jobs/ffmpeg_base/Dockerfile
@@ -5,8 +5,10 @@ ARG ENV_TYPE
 ARG GIT_COMMIT_SHA
 FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_job_base:${GIT_COMMIT_SHA}
 
+# READ THE NOTE on caching in the README before changing this/for more info!
 ARG APT_CACHE_DIR=
-RUN if [[ ! -z "$APT_CACHE_DIR" ]]; then echo "Dir::Cache::Archives $APT_CACHE_DIR" >> /etc/apt/apt.conf.d/ci_caching; fi
+RUN rm -f /etc/apt/apt.conf.d/docker-clean
+RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives $APT_CACHE_DIR" >>  /etc/apt/apt.conf.d/ci_caching; fi
 
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
     --mount=type=cache,target=/var/lib/apt,sharing=locked \
-- 
GitLab