From 5a0a39001b36e249c43e8991932c6bfd67f3b8d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20K=C3=BCnzel?= <simonk@fsmpi.rwth-aachen.de> Date: Fri, 28 Feb 2025 04:17:01 +0100 Subject: [PATCH] fixup! Add package caching for CI --- README.md | 34 ++++++++++++++++++++++ api/Dockerfile | 7 +++-- api/docker_start.sh | 10 +++---- common_py/Dockerfile | 28 ++++++++++++++---- generate_ci_pipeline.py | 8 ++++- job_controller/Dockerfile | 7 +++-- job_controller/jobs/ffmpeg_base/Dockerfile | 4 ++- 7 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..b8e5c2c --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ + +# A few technical notes + +## Dockerfiles + +### Venv + +The base common py image uses a venv so you need to use `.venv/bin/python` and `pip --python .venv/bin/python` + +### Caching + +There are two types of caching used: One for local building and one in the CI with kaniko. + +#### Local Building + +The `PIP_CACHE_DIR` and `APT_CACHE_DIR` variables are empty, pip and apt use their default cache location. However, the +`--mount=type=CACHE` option before the RUN command mounts these cache locations and so the cache can be reused between +different builds. Since the locations are mounted, the files put into there (during the installation command), are not +put into the final image. + +Additionally, Docker provides a script for apt to automatically clean the cache (so it does not stay in the final image). +However, we don't want that, and so we need to remove the `/etc/apt/apt.conf.d/docker-clean` file. + +#### CI Building + +The `PIP_CACHE_DIR` and `APT_CACHE_DIR` variables are set by the CI to a location (`.cache/pip`, `.cache/apt`) inside +the project dir. The cache option of the GitLab CI means these locations are persistent between different job runs. They +need to be inside the project dir since GitLab cannot cache locations outside. When kaniko now executes the Dockerfile +it will provide the environment vars with the caching locations. Pip directly reads the `PIP_CACHE_DIR` env var (Note +that the ARG command in Docker provides the build argument as an environment variable to the container). For apt we +need to put the location into `/etc/apt/apt.conf.d/`. The `--ignore-path` option of kaniko ensures that the cache is not +included in the final image. + +Also see https://github.com/GoogleContainerTools/kaniko/issues/969#issuecomment-2160910028. \ No newline at end of file diff --git a/api/Dockerfile b/api/Dockerfile index 7272314..1f4ccd8 100755 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -3,15 +3,16 @@ ARG ENV_TYPE ARG GIT_COMMIT_SHA FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA} -# used by pip as env var +# READ THE NOTE on caching in the README before changing this/for more info! ARG PIP_CACHE_DIR= + ENV VIDEOAG_API_GIT_COMMIT_HASH $GIT_COMMIT_SHA -RUN ls -la /builds/videoag/backend/.cache/pip || true COPY extra_requirements.txt ./ +# READ THE NOTE on caching in the README before changing this/for more info! RUN --mount=type=cache,target=/root/.cache/pip \ - pip3 install -r extra_requirements.txt + pip --python .venv/bin/python install -r extra_requirements.txt COPY docker_start.sh ./ COPY .pylintrc ./ diff --git a/api/docker_start.sh b/api/docker_start.sh index 09f181c..c7f9113 100755 --- a/api/docker_start.sh +++ b/api/docker_start.sh @@ -10,14 +10,14 @@ if [ $# = 1 ] && [ $1 = "-test" ]; then cd src uname -a export VIDEOAG_TEST_CONFIG_OVERRIDE="../config/test_config_override.py" - python3 -V - python3 -m coverage run --data-file "../coverage/.data" run_tests.py || + ../.venv/bin/python -V + ../.venv/bin/python -m coverage run --data-file "../coverage/.data" run_tests.py || { echo "Test failed!"; exit 1; } - python3 -m coverage report --data-file "../coverage/.data" --include "./*" || + ../.venv/bin/python -m coverage report --data-file "../coverage/.data" --include "./*" || { echo "Coverage report stdout failed"; exit 1; } - python3 -m coverage report --data-file "../coverage/.data" -m --include "./*" > ../coverage/report.txt || + ../.venv/bin/python -m coverage report --data-file "../coverage/.data" -m --include "./*" > ../coverage/report.txt || { echo "Coverage report report.txt failed"; exit 1; } - python3 -m coverage html -d "../coverage/html/" --data-file "../coverage/.data" --include "./*" || + ../.venv/bin/python -m coverage html -d "../coverage/html/" --data-file "../coverage/.data" --include "./*" || { echo "Coverage report html failed"; exit 1; } else echo "Running uWSGI" diff --git a/common_py/Dockerfile b/common_py/Dockerfile index e901fc3..ce34bf8 100755 --- a/common_py/Dockerfile +++ b/common_py/Dockerfile @@ -1,16 +1,34 @@ -FROM python:3.12 +FROM python:3.13-slim AS base + +# READ THE NOTE on caching in the README before changing this/for more info! +ARG PIP_CACHE_DIR= +ARG APT_CACHE_DIR= +RUN rm -f /etc/apt/apt.conf.d/docker-clean +RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives $APT_CACHE_DIR" >> /etc/apt/apt.conf.d/ci_caching; fi -ARG PIP_CACHE_DIR WORKDIR /code RUN mkdir -p /code WORKDIR /code -RUN ls -la /builds/videoag/backend/.cache/pip || true +# Install and build requirements in different image too reduce final image size +FROM base AS builder + +# Packages needed to build psycopg with pip +# READ THE NOTE on caching in the README before changing this/for more info! +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update && apt-get --no-install-recommends install -y libpq-dev gcc python3-dev + COPY requirements.txt /code +RUN python -m venv --without-pip .venv/ RUN --mount=type=cache,target=/root/.cache/pip \ - pip3 install -r requirements.txt + pip --python /code/.venv/bin/python install -r requirements.txt -COPY src/videoag_common /code/src/videoag_common +FROM base AS final + +# Copy venv created in builder image with the dependencies +COPY --from=builder /code/.venv /code/.venv +COPY src/videoag_common /code/src/videoag_common diff --git a/generate_ci_pipeline.py b/generate_ci_pipeline.py index c123dd8..0a4a93b 100644 --- a/generate_ci_pipeline.py +++ b/generate_ci_pipeline.py @@ -93,7 +93,8 @@ echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"username\\":\\"$CI_REGISTRY_USER\\",\ f"--build-arg=GIT_COMMIT_SHA={self.context.commit_sha}", f"--build-arg=ENV_TYPE={self.context.env_type()}", - # See https://github.com/GoogleContainerTools/kaniko/issues/969#issuecomment-2160910028 + # READ THE NOTE on caching in the README before changing this/for more info! + f"--ignore-path=$CI_PROJECT_DIR/.cache", f"--build-arg=PIP_CACHE_DIR=$CI_PROJECT_DIR/.cache/pip", f"--build-arg=APT_CACHE_DIR=$CI_PROJECT_DIR/.cache/apt", f"--cache=true", @@ -111,6 +112,7 @@ echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"username\\":\\"$CI_REGISTRY_USER\\",\ name: gcr.io/kaniko-project/executor:v1.23.2-debug entrypoint: [""] cache: + # READ THE NOTE on caching in the README before changing this/for more info! key: videoag-cache-{self.context.env_type()} paths: - .cache/pip @@ -183,6 +185,10 @@ run-api-tests: def gen_pipeline(context: BuildContext) -> str: pipeline = """ +#################################################################### +##### AUTOMATICALLY GENERATED PIPELINE. DO NOT CHANGE MANUALLY! #### +#################################################################### + stages: - build-and-test - deploy diff --git a/job_controller/Dockerfile b/job_controller/Dockerfile index 86768f2..4a42b99 100644 --- a/job_controller/Dockerfile +++ b/job_controller/Dockerfile @@ -3,8 +3,10 @@ ARG ENV_TYPE ARG GIT_COMMIT_SHA FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA} -# used by pip as env var +# READ THE NOTE on caching in the README before changing this/for more info! ARG PIP_CACHE_DIR= + + # Empty by default ARG GIT_COMMIT_TAG= @@ -12,8 +14,9 @@ ENV VIDEOAG_JOB_CONTROLLER_GIT_COMMIT_HASH $GIT_COMMIT_SHA ENV VIDEOAG_JOB_CONTROLLER_GIT_COMMIT_TAG $GIT_COMMIT_TAG COPY extra_requirements.txt ./ +# READ THE NOTE on caching in the README before changing this/for more info! RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r extra_requirements.txt + pip --python .venv/bin/python install -r extra_requirements.txt COPY jobs ./jobs/ diff --git a/job_controller/jobs/ffmpeg_base/Dockerfile b/job_controller/jobs/ffmpeg_base/Dockerfile index b1ea20a..afc4bd6 100644 --- a/job_controller/jobs/ffmpeg_base/Dockerfile +++ b/job_controller/jobs/ffmpeg_base/Dockerfile @@ -5,8 +5,10 @@ ARG ENV_TYPE ARG GIT_COMMIT_SHA FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_job_base:${GIT_COMMIT_SHA} +# READ THE NOTE on caching in the README before changing this/for more info! ARG APT_CACHE_DIR= -RUN if [[ ! -z "$APT_CACHE_DIR" ]]; then echo "Dir::Cache::Archives $APT_CACHE_DIR" >> /etc/apt/apt.conf.d/ci_caching; fi +RUN rm -f /etc/apt/apt.conf.d/docker-clean +RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives $APT_CACHE_DIR" >> /etc/apt/apt.conf.d/ci_caching; fi RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ -- GitLab