diff --git a/.gitignore b/.gitignore index 2c08f23d80501b32f623d9e00c3565e80ef953fd..0dbe1eac2b522bfeca9acc62197551b8d0280490 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ __pycache__ *.sqlite child-pipeline.yml +.dockerfiles diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e3a982d745ef25a4f7921dc96e4ad4a5d6914ad7..f75c1abcca9bcc033d6508af29138080f95fb04c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,11 +6,18 @@ generate-pipeline: stage: build image: python:3.13-slim script: - - python generate_ci_pipeline.py + - >- + python build_pipeline_generator.py \ + --ci-pipeline-dest child-pipeline.yml \ + --commit-sha $CI_COMMIT_SHA \ + --commit-tag $CI_COMMIT_TAG \ + api job_controller \ + $(find job_controller/jobs/ -mindepth 1 -maxdepth 1) artifacts: expire_in: 1 week paths: - child-pipeline.yml + - .dockerfiles run-pipeline: stage: run diff --git a/api/Dockerfile b/api/Dockerfile deleted file mode 100755 index 27e380f435defb64ee329f0b97c2ae68750b17f9..0000000000000000000000000000000000000000 --- a/api/Dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA} AS base - -# READ THE NOTE on caching in the README before changing this/for more info! -ARG PIP_CACHE_DIR= -ARG APT_CACHE_DIR= -RUN rm -f /etc/apt/apt.conf.d/docker-clean -RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives '$APT_CACHE_DIR';" > /etc/apt/apt.conf.d/ci_caching; fi - - -ENV VIDEOAG_API_GIT_COMMIT_HASH $GIT_COMMIT_SHA - - -# Install and build requirements in different image too reduce final image size -FROM base AS builder - -# READ THE NOTE on caching in the README before changing this/for more info! -COPY extra_requirements.txt ./ -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/var/cache/apt,sharing=locked \ - --mount=type=cache,target=/var/lib/apt,sharing=locked \ - if ! pip --python .venv/bin/python install -r extra_requirements.txt; \ - then echo "Pip failed (packages weren't cached) but you can ignore the error above. We will install the build dependencies and try again" \ - # Packages needed to build lxml with pip - && apt-get update && apt-get --no-install-recommends install -y gcc python3-dev libxml2-dev libxslt-dev \ - && pip --python .venv/bin/python install -r extra_requirements.txt; \ - fi - -FROM base AS final - -COPY --from=builder /code/.venv /code/.venv - -COPY docker_start.sh ./ -COPY .pylintrc ./ -COPY tests/ ./tests/ -COPY config/api_example_config.py ./config/ -COPY config/test_config_override.py ./config/ -COPY config/uwsgi_example.ini ./config/ -COPY config/db_test_data.sql ./config/ - -# The source has a symlink file at src/videoag_common -# The actual files are already in the image at src/videoag_common -# So we move the actual files temporarily, copy the src directory, remove the symlink and move the actual files back -# In the future, COPY --exclude src/videoag_common might work (but right now it doesn't, some "failed to compute cache key") -RUN mv src/videoag_common src/.temp -COPY src/ ./src/ -RUN rm src/videoag_common -RUN mv src/.temp src/videoag_common - -ENTRYPOINT ["/code/docker_start.sh"] \ No newline at end of file diff --git a/api/build_config.py b/api/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..4bb44c8ba5054bb8a3e7a24dbef363beaa0079fa --- /dev/null +++ b/api/build_config.py @@ -0,0 +1,42 @@ +TARGET_IMAGE_NAME = "api" +BUILD_DEPENDENCIES = ["../common_py/"] + +PIP_REQUIREMENTS_FILE = "requirements.txt" +APT_RUNTIME_DEPENDENCIES = [ + "libxml2", # For uwsgi +] +APT_BUILD_DEPENDENCIES = [ + "gcc", "python3-dev", "libxslt-dev", "libxml2-dev" +] +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/docker_start.sh ./ +COPY $MODULE_DIR/.pylintrc ./ +COPY $MODULE_DIR/src/ ./src/ +COPY $MODULE_DIR/tests/ ./tests/ +COPY $MODULE_DIR/config/api_example_config.py ./config/ +COPY $MODULE_DIR/config/test_config_override.py ./config/ +COPY $MODULE_DIR/config/uwsgi_example.ini ./config/ +COPY $MODULE_DIR/config/db_test_data.sql ./config/ + +ENTRYPOINT ["/code/docker_start.sh"] +""" + +# stage, needs, image is added automatically +CI_TEST_JOB_TEMPLATE = """\ +timeout: 30m +variables: + VIDEOAG_CONFIG: /code/config/api_example_config.py +command: -test +artifacts: + paths: + - /code/coverage/report.txt + - /code/coverage/html/* +services: + - name: postgres:17 + # So we can have the same host for local and CI testing + alias: host.docker.internal + variables: + POSTGRES_DB: videoag + POSTGRES_USER: videoag + POSTGRES_PASSWORD: videoag +""" \ No newline at end of file diff --git a/api/config/api_example_config.py b/api/config/api_example_config.py index b30711d18db37122e08ad14a4b5af8204fb95c5c..b96f67c7659d998c82a6b6ff998548a04a18c7f5 100644 --- a/api/config/api_example_config.py +++ b/api/config/api_example_config.py @@ -65,7 +65,7 @@ DATABASE = { "engine": "postgres", "postgres": { "host": "host.docker.internal", - "port": 9343, + "port": 5432, "user": "videoag", "password": "videoag", "database": "videoag", diff --git a/api/config/test_config_override.py b/api/config/test_config_override.py index 5ed452dc296bdc453ffc4f05c6ea6af0bad15661..26a5616f82272ea22b0ad9fa9c9d69c87e273bb4 100644 --- a/api/config/test_config_override.py +++ b/api/config/test_config_override.py @@ -8,11 +8,11 @@ DB_DATA = "../config/db_test_data.sql" DATABASE = { "postgres": { - "host": "ci-database", + "host": "host.docker.internal", "port": 5432, - "user": "videoagtest", - "password": "LetMeTest...", - "database": "videoagtest", + "user": "videoag", + "password": "videoag", + "database": "videoag", "auto_migration": True }, "log_all_statements": False diff --git a/api/docker_start.sh b/api/docker_start.sh index c7f91133b9e8b125b90274a64f15f5994cabe6a2..82c82dad924330cd31a46af5b2c38db8bce40444 100755 --- a/api/docker_start.sh +++ b/api/docker_start.sh @@ -10,14 +10,14 @@ if [ $# = 1 ] && [ $1 = "-test" ]; then cd src uname -a export VIDEOAG_TEST_CONFIG_OVERRIDE="../config/test_config_override.py" - ../.venv/bin/python -V - ../.venv/bin/python -m coverage run --data-file "../coverage/.data" run_tests.py || + python -V + python -m coverage run --data-file "../coverage/.data" run_tests.py || { echo "Test failed!"; exit 1; } - ../.venv/bin/python -m coverage report --data-file "../coverage/.data" --include "./*" || + python -m coverage report --data-file "../coverage/.data" --include "./*" || { echo "Coverage report stdout failed"; exit 1; } - ../.venv/bin/python -m coverage report --data-file "../coverage/.data" -m --include "./*" > ../coverage/report.txt || + python -m coverage report --data-file "../coverage/.data" -m --include "./*" > ../coverage/report.txt || { echo "Coverage report report.txt failed"; exit 1; } - ../.venv/bin/python -m coverage html -d "../coverage/html/" --data-file "../coverage/.data" --include "./*" || + python -m coverage html -d "../coverage/html/" --data-file "../coverage/.data" --include "./*" || { echo "Coverage report html failed"; exit 1; } else echo "Running uWSGI" diff --git a/api/extra_requirements.txt b/api/requirements.txt similarity index 79% rename from api/extra_requirements.txt rename to api/requirements.txt index e874a7a5efad7a3e50ef9bb62ac6c4045a64077a..17cf5b58396b3829313d3f63a29fb65b75a01756 100644 --- a/api/extra_requirements.txt +++ b/api/requirements.txt @@ -1,4 +1,5 @@ # The Api Dependencies (Versions picked in January 2025) +# Note that this does NOT include the common_py dependencies! # required flask==3.1.0 diff --git a/api/src/app.py b/api/src/app.py index 24e0368961b151eec6100ad143be20fb2dc9c12e..2101716457e72d4576b9067d160bcb0b20eae652 100644 --- a/api/src/app.py +++ b/api/src/app.py @@ -30,3 +30,6 @@ if "SECRET_KEY" not in app.config: # Import routes AFTER initialization # noinspection PyUnresolvedReferences import api.routes + +# Flush all statements after startup +print(flush=True) diff --git a/api/src/videoag_common b/api/src/videoag_common deleted file mode 120000 index 6eaec4c319fd0e4873498aa6cf73f5e339c3c8f7..0000000000000000000000000000000000000000 --- a/api/src/videoag_common +++ /dev/null @@ -1 +0,0 @@ -../../common_py/src/videoag_common \ No newline at end of file diff --git a/api/tests/api_test.py b/api/tests/api_test.py index 7235ea669586d3f7363f8c2364e65d84bdc34046..699c07abadec14f58c95c21543546d1cc581392d 100644 --- a/api/tests/api_test.py +++ b/api/tests/api_test.py @@ -29,6 +29,7 @@ class ApiTest(DatabaseTest): f"{pretty_json_or_string(self._last_request[1])}\n" f"Response:\n" f"{pretty_json_or_string(self._last_request[2])}") + print(flush=True, end="") return res def moderator_login(self) -> str: diff --git a/build_pipeline_generator.py b/build_pipeline_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..8735cf73a313f094909b4792e91826f9c557d112 --- /dev/null +++ b/build_pipeline_generator.py @@ -0,0 +1,320 @@ +import os +import re +from pathlib import Path +from argparse import ArgumentParser + + +class ModuleBuildConfig: + + def __init__(self, context: "BuildContext", module_dir: Path): + super().__init__() + + self.context = context + self.module_dir = module_dir.resolve() + self.config_file = self.module_dir.joinpath("build_config.py") + self.name = str(self.module_dir.relative_to(context.build_dir)) + + globals = {} + exec(self.config_file.read_text(), globals) + + self.target_image_name = globals.pop("TARGET_IMAGE_NAME", None) + if self.target_image_name is not None and not isinstance(self.target_image_name, str): + raise TypeError("TARGET_IMAGE_NAME must be a str (or absent)") + + context.add_module(self) + + self.dependencies: list[ModuleBuildConfig] = [] + for dependency in globals.pop("BUILD_DEPENDENCIES", []): + if not isinstance(dependency, str): + raise ValueError("BUILD_DEPENDENCIES must be list of str") + dependency_name = str(module_dir.joinpath(Path(dependency)).resolve().relative_to(context.build_dir)) + self.dependencies.append(context.get_or_load_module(dependency_name)) + + self.pip_req_file = None + pip_req_file_name = globals.pop("PIP_REQUIREMENTS_FILE", None) + if pip_req_file_name is not None: + assert isinstance(pip_req_file_name, str) + self.pip_req_file = self.module_dir.joinpath(Path(pip_req_file_name)) + if not self.pip_req_file.is_file(): + raise ValueError(f"Cannot find pip requirements file {self.pip_req_file}") + + self.apt_runtime_dependencies = globals.pop("APT_RUNTIME_DEPENDENCIES", []) + if any(not isinstance(r, str) for r in self.apt_runtime_dependencies): + raise TypeError(f"APT_RUNTIME_DEPENDENCIES must be list of str") + + self.apt_build_dependencies = globals.pop("APT_BUILD_DEPENDENCIES", []) + if any(not isinstance(r, str) for r in self.apt_build_dependencies): + raise TypeError(f"APT_BUILD_DEPENDENCIES must be list of str") + + self.dockerfile_extra = globals.pop("DOCKERFILE_EXTRA", "") + if not isinstance(self.dockerfile_extra, str): + raise TypeError("DOCKERFILE_EXTRA must be str (or absent)") + self.dockerfile_extra = self.dockerfile_extra.replace( + "$MODULE_DIR", + str(self.module_dir.relative_to(context.build_dir)) + ) + + self.ci_test_job_template: str or None = globals.pop("CI_TEST_JOB_TEMPLATE", None) + if self.ci_test_job_template is not None and not isinstance(self.dockerfile_extra, str): + raise TypeError("CI_TEST_JOB_TEMPLATE must be str (or absent)") + + for g in globals.keys(): + assert isinstance(g, str) + if g.isupper(): + raise ValueError(f"Unknown key {g} in config file") + + def check_cyclic_dependency(self, dependents_stack: list[str]): + if self.name in dependents_stack: + raise ValueError(f"Dependency cycle involving {self.name} detected") + dependents_stack = dependents_stack + [self.name] + for dependency in self.dependencies: + dependency.check_cyclic_dependency(dependents_stack) + + def collect_docker_dependencies( + self, + pip_requirement_files: list[Path], + apt_runtime_dependencies: list[str], + apt_build_dependencies: list[str], + dockerfile_extras: list[str] + ): + for dep in self.dependencies: + dep.collect_docker_dependencies( + pip_requirement_files, + apt_runtime_dependencies, + apt_build_dependencies, + dockerfile_extras + ) + + if self.pip_req_file is not None: + pip_requirement_files.append(self.pip_req_file) + + apt_runtime_dependencies.extend(self.apt_runtime_dependencies) + apt_build_dependencies.extend(self.apt_build_dependencies) + dockerfile_extras.append(self.dockerfile_extra) + + def generate_dockerfile(self): + pip_requirement_files: list[Path] = [] + apt_runtime_dependencies: list[str] = [] + apt_build_dependencies: list[str] = [] + dockerfile_extras: list[str] = [] + self.collect_docker_dependencies( + pip_requirement_files, + apt_runtime_dependencies, + apt_build_dependencies, + dockerfile_extras + ) + # Remove duplicates and ensure same order every time + apt_runtime_dependencies = list(set(apt_runtime_dependencies)) + apt_runtime_dependencies.sort() + apt_build_dependencies = list(set(apt_build_dependencies)) + apt_build_dependencies.sort() + + res = f"""\ +##################################################################### +### WARNING: THIS FILE WAS AUTOMATICALLY GENERATED. DO NOT EDIT ! ### +##################################################################### +FROM python:3.13-slim AS base + +RUN mkdir -p /code +WORKDIR /code + +ENV PIP_CACHE_DIR=/tmp/pip-cache +RUN apt-get update && apt-get --no-install-recommends install -y {' '.join(apt_runtime_dependencies)} +""" + for path, i in zip(pip_requirement_files, range(len(pip_requirement_files))): + res += f"COPY {str(path.relative_to(self.context.build_dir))} /tmp/req-{i}.txt\n" + + pip_requirement_list_arg = " ".join(f"-r /tmp/req-{i}.txt" for i in range(len(pip_requirement_files))) + + res += f""" +FROM base AS builder + +# This step builds (and installs) the requirements and also puts the build result into the pip cache +RUN apt-get update && apt-get --no-install-recommends install -y {' '.join(apt_build_dependencies)} +RUN pip install {pip_requirement_list_arg} + +FROM base AS final + +# Here we copy the pip cache with the built packages and install them again. Pip will use the cache and won't need the +# apt build dependencies. This saves a lot of space, compared to leaving the build dependencies in the final image +# (reduces the final image size by about half) +COPY --from=builder /tmp/pip-cache /tmp/pip-cache +RUN pip install {pip_requirement_list_arg} +""" + for docker in dockerfile_extras: + res += "\n" + docker + "\n" + + return res + + def generate_ci_jobs(self): + self.context.ensure_in_ci() + if self.target_image_name is None: + raise ValueError("This module has no target image name and therefore cannot be built") + return (self._generate_ci_build_job() + + "\n" + self._generate_ci_test_job() + + "\n" + self._generate_ci_deploy_job()) + + def output_dockerfile_path(self) -> Path: + return self.context.build_dir.joinpath(".dockerfiles").joinpath(self.target_image_name) + + def image_full_name(self): + return f"{self.context.env_type()}/{self.target_image_name}" + + def ci_build_job_name(self): + return f"build-{self.target_image_name}" + + @staticmethod + def _get_auth_echo(): + return """\ +echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"username\\":\\"$CI_REGISTRY_USER\\",\\"password\\":\\"$CI_REGISTRY_PASSWORD\\"}}}" > /kaniko/.docker/config.json\ +""" + + def _generate_ci_build_job(self): + kaniko_args = [ + f"--context=git://git.fsmpi.rwth-aachen.de/videoag/backend.git#{self.context.commit_sha}", + f"--dockerfile={str(self.output_dockerfile_path().relative_to(self.context.build_dir))}" + f"--git recurse-submodules=true", + f"--destination=$CI_REGISTRY_IMAGE/{self.image_full_name()}#{self.context.commit_sha}", + f"--build-arg=GIT_COMMIT_SHA={self.context.commit_sha}", + f"--cache=true", + ] + + if self.context.commit_tag is not None: + kaniko_args.append(f"--build-arg=GIT_COMMIT_TAG={self.context.commit_tag}") + + return f""" +{self.ci_build_job_name()}: + stage: build-and-test + timeout: 1h + needs: + - pipeline: $PARENT_PIPELINE_ID + job: generate-pipeline + image: + name: gcr.io/kaniko-project/executor:v1.23.2-debug + entrypoint: [""] + script: + - {self._get_auth_echo()} + - echo {self.context.commit_sha} + - >- + /kaniko/executor + {"\n ".join(kaniko_args)} +""" + + def _generate_ci_test_job(self): + if self.ci_test_job_template is None: + return "" + assert isinstance(self.ci_test_job_template, str) + res = f""" +test-{self.target_image_name}: + stage: build-and-test + needs: [{self.ci_build_job_name()}] + image: + name: $CI_REGISTRY_IMAGE/{self.image_full_name()}#{self.context.commit_sha} +""" + res += " " + "\n ".join(self.ci_test_job_template.splitlines()) + "\n" + return res + + def _generate_ci_deploy_job(self): + destination_args = [ + f"--destination=$CI_REGISTRY_IMAGE/{self.image_full_name()}:latest" + ] + if self.context.is_production: + destination_args.append(f"--destination=$CI_REGISTRY_IMAGE/{self.image_full_name()}:{self.context.commit_tag}") + + return f""" +deploy-{self.target_image_name}: + stage: deploy + timeout: 1h + image: + name: gcr.io/kaniko-project/executor:v1.23.2-debug + entrypoint: [""] + script: + - {self._get_auth_echo()} + - mkdir /workdir + - echo "FROM $CI_REGISTRY_IMAGE/{self.image_full_name()} > /workdir/Dockerfile" + - echo {self.context.commit_sha} + - >- + /kaniko/executor + --context=dir:///workdir + {"\n ".join(destination_args)} +""" + + +class BuildContext: + + def __init__(self, build_dir: Path, commit_sha: str or None, commit_tag: str or None): + super().__init__() + self.build_dir = build_dir + self.modules: dict[str, ModuleBuildConfig] = {} + self._target_image_names: set[str] = set() + self.commit_sha = commit_sha or None # Make empty string to None + self.commit_tag = commit_tag or None + self.is_production = commit_tag is not None and re.fullmatch("v.*", commit_tag) is not None + + def env_type(self) -> str: + return "production" if self.is_production else "development" + + def ensure_in_ci(self): + if self.commit_sha is None: + raise Exception("Not in GitLab CI. No commit sha given") + + def add_module(self, module: ModuleBuildConfig): + if module.target_image_name in self._target_image_names: + raise ValueError(f"Duplicate target image name {module.target_image_name}") + self.modules[module.name] = module + + def get_or_load_module(self, name: str): + if name in self.modules: + return self.modules[name] + module_dir = self.build_dir.joinpath(name) + try: + return ModuleBuildConfig(self, module_dir) + except Exception as e: + raise Exception(f"Exception while loading module {module_dir}", e) + + def generate_ci_pipeline(self): + self.ensure_in_ci() + + pipeline = """ +#################################################################### +##### AUTOMATICALLY GENERATED PIPELINE. DO NOT CHANGE MANUALLY! #### +#################################################################### + +stages: + - build-and-test + - deploy +""" + for module in self.modules.values(): + if module.target_image_name is not None: + pipeline += module.generate_ci_jobs() + + return pipeline + + +def main(): + parser = ArgumentParser() + parser.add_argument("--build-dir", type=Path, default=Path(".")) + parser.add_argument("--commit-sha", type=str, required=False) + parser.add_argument("--commit-tag", type=str, required=False) + parser.add_argument("--ci-pipeline-dest", type=Path, required=False) + parser.add_argument("modules", nargs="+", type=Path) + + args = parser.parse_args() + + context = BuildContext(args.build_dir.resolve(), args.commit_sha, args.commit_tag) + for module in args.modules: + context.get_or_load_module(str(module.resolve().relative_to(context.build_dir))) + + for module in context.modules.values(): + if module.target_image_name is None: + continue + module.output_dockerfile_path().parent.mkdir(parents=True, exist_ok=True) + module.output_dockerfile_path().write_text(module.generate_dockerfile()) + + if args.ci_pipeline_dest is not None: + args.ci_pipeline_dest.parent.mkdir(parents=True, exist_ok=True) + args.ci_pipeline_dest.write_text(context.generate_ci_pipeline()) + + +if __name__ == "__main__": + main() diff --git a/common_py/Dockerfile b/common_py/Dockerfile deleted file mode 100755 index e1ed8c49350c2daae21962ebfe90d4a3c9d6f8e6..0000000000000000000000000000000000000000 --- a/common_py/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -FROM python:3.13-slim AS base - -# READ THE NOTE on caching in the README before changing this/for more info! -ARG PIP_CACHE_DIR= -ARG APT_CACHE_DIR= -RUN rm -f /etc/apt/apt.conf.d/docker-clean -RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives '$APT_CACHE_DIR';" > /etc/apt/apt.conf.d/ci_caching; fi - - -WORKDIR /code - -RUN mkdir -p /code -WORKDIR /code - - -# Install and build requirements in different image too reduce final image size -FROM base AS builder - -RUN python -m venv --without-pip .venv/ - -# READ THE NOTE on caching in the README before changing this/for more info! -COPY requirements.txt /code -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/var/cache/apt,sharing=locked \ - --mount=type=cache,target=/var/lib/apt,sharing=locked \ - if ! pip --python .venv/bin/python install -r requirements.txt; \ - then echo "Pip failed (packages weren't cached) but you can ignore the error above. We will install the build dependencies and try again" \ - # Packages needed to build psycopg with pip - && apt-get update && apt-get --no-install-recommends install -y gcc python3-dev libpq-dev \ - && pip --python .venv/bin/python install -r requirements.txt; \ - fi - -FROM base AS final - -# Copy venv created in builder image with the dependencies -COPY --from=builder /code/.venv /code/.venv - -COPY src/videoag_common /code/src/videoag_common diff --git a/common_py/build_config.py b/common_py/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..36eafd4c8c85ae3d8efa169f4494cfb5402891fb --- /dev/null +++ b/common_py/build_config.py @@ -0,0 +1,10 @@ +PIP_REQUIREMENTS_FILE = "requirements.txt" +APT_RUNTIME_DEPENDENCIES = [ + "libpq-dev", # For psycopg +] +APT_BUILD_DEPENDENCIES = [ + "gcc", "python3-dev", +] +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/src ./src/ +""" \ No newline at end of file diff --git a/common_py/src/videoag_common/miscellaneous/util.py b/common_py/src/videoag_common/miscellaneous/util.py index 54c50b108834638290c821c75ec48239388b3ab3..d1df743676fd5575a6131dd54da99dd809866d89 100644 --- a/common_py/src/videoag_common/miscellaneous/util.py +++ b/common_py/src/videoag_common/miscellaneous/util.py @@ -102,7 +102,7 @@ def hash_json_sha256(data: JsonTypes) -> str: def load_config_file(path: Path): config_module = ModuleType("config") - config_module.__file__ = path.name + config_module.__file__ = str(path.absolute()) try: exec(compile(path.read_text(), path.name, "exec"), config_module.__dict__) except Exception as e: diff --git a/api/docker-compose.yaml b/docker-compose.yaml similarity index 54% rename from api/docker-compose.yaml rename to docker-compose.yaml index 072035d495718d9c2ce1b2449ca97317db51f1df..b792ee86933ca4a4693884d837d8db39209e8c15 100644 --- a/api/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,10 +1,10 @@ services: - videoag_api: - build: . + api: + build: + context: . + dockerfile: .dockerfiles/api ports: - 5000:5000 - volumes: - - ./:/code extra_hosts: host.docker.internal: host-gateway environment: @@ -12,22 +12,39 @@ services: - VIDEOAG_API_LIVE_CONFIG=../config/live_config.json - VIDEOAG_UWSGI_CONFIG=/code/config/uwsgi_example.ini depends_on: - database: + db: + condition: service_healthy + + api-test: + profiles: ["test"] + build: + context: . + dockerfile: .dockerfiles/api + extra_hosts: + host.docker.internal: host-gateway + environment: + - VIDEOAG_CONFIG=../config/api_example_config.py + volumes: + - ./api/coverage/:/code/coverage + command: -test + depends_on: + db: condition: service_healthy - database: + + db: image: postgres:16-alpine environment: - POSTGRES_USER=videoag - POSTGRES_PASSWORD=videoag ports: - - "9343:5432" + - "5432:5432" volumes: - - .data/:/var/lib/postgresql/data + - db-data:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U videoag"] interval: 2s timeout: 5s retries: 5 + volumes: - database: - driver: local + db-data: diff --git a/generate_ci_pipeline.py b/generate_ci_pipeline.py deleted file mode 100644 index 0a4a93b13fea628765c2100f43aaa9e5e3a14f9e..0000000000000000000000000000000000000000 --- a/generate_ci_pipeline.py +++ /dev/null @@ -1,250 +0,0 @@ -import os -import re -import traceback -from dataclasses import dataclass -from pathlib import Path - - -# This file dynamically generates a GitLab CI pipeline. This is needed since the GitLab CI is not quite flexible enough -# to handle the different "development" and "production" builds and to automatically build all the jobs. -# For development (everything that is not a tag pipeline whose tag starts with v) the images are put into the registry -# with the "development" prefix. For production the images are put into the registry with the "production" prefix and -# that prefix is write-protected, so only maintainers (or pipelines triggered by them) can push new images to production. -# Kubernetes directly uses those images from the registry. - - -class BuildContext: - - def __init__(self, commit_sha: str, commit_tag: str or None): - super().__init__() - self.commit_sha = commit_sha - self.commit_tag = commit_tag - self.is_production = commit_tag is not None and re.fullmatch("v.*", commit_tag) is not None - self.targets: dict[str, "ImageTarget"] = {} - - def env_type(self) -> str: - return "production" if self.is_production else "development" - - def add_image_target(self, - image_name: str, - dependency_targets: list["ImageTarget"], - context_sub_path: str, - only_intermediate: bool - ) -> "ImageTarget": - target = ImageTarget( - self, - image_name, - dependency_targets, - context_sub_path, - only_intermediate - ) - self.targets[target.image_name] = target - return target - - -@dataclass -class ImageTarget: - context: BuildContext - image_name: str - dependency_targets: list["ImageTarget" or str] - context_sub_path: str - only_intermediate: bool - - def full_name(self): - return f"{self.context.env_type()}_{self.image_name}" - - def versioned_full_name(self): - return f"{self.full_name()}:{self.context.commit_sha}" - - def build_job_name(self): - return f"build-{self.image_name}" - - def deploy_job_name(self): - return f"deploy-{self.image_name}" - - def _validate(self): - for i in range(len(self.dependency_targets)): - target = self.dependency_targets[i] - if isinstance(target, ImageTarget): - if target not in self.context.targets.values(): - raise ValueError(f"Unknown target {target.image_name} (object not in context)") - else: - assert isinstance(target, str) - if target not in self.context.targets: - raise ValueError(f"Unknown target {target}") - self.dependency_targets[i] = self.context.targets[target] - - def gen_jobs(self): - self._validate() - return self._gen_build_job() + self._gen_deploy_job() - - @staticmethod - def _get_auth_echo(): - return """\ -echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"username\\":\\"$CI_REGISTRY_USER\\",\\"password\\":\\"$CI_REGISTRY_PASSWORD\\"}}}" > /kaniko/.docker/config.json\ -""" - - def _gen_build_job(self): - kaniko_args = [ - f"--context=git://git.fsmpi.rwth-aachen.de/videoag/backend.git#{self.context.commit_sha}", - f"--context-sub-path={self.context_sub_path}", - f"--git recurse-submodules=true", - f"--destination=$CI_REGISTRY_IMAGE/{self.versioned_full_name()}", - f"--build-arg=GIT_COMMIT_SHA={self.context.commit_sha}", - f"--build-arg=ENV_TYPE={self.context.env_type()}", - - # READ THE NOTE on caching in the README before changing this/for more info! - f"--ignore-path=$CI_PROJECT_DIR/.cache", - f"--build-arg=PIP_CACHE_DIR=$CI_PROJECT_DIR/.cache/pip", - f"--build-arg=APT_CACHE_DIR=$CI_PROJECT_DIR/.cache/apt", - f"--cache=true", - ] - - if self.context.commit_tag is not None: - kaniko_args.append(f"--build-arg=GIT_COMMIT_TAG={self.context.commit_tag}") - - return f""" -{self.build_job_name()}: - stage: build-and-test - needs: [{",".join(t.build_job_name() for t in self.dependency_targets)}] - timeout: 1h - image: - name: gcr.io/kaniko-project/executor:v1.23.2-debug - entrypoint: [""] - cache: - # READ THE NOTE on caching in the README before changing this/for more info! - key: videoag-cache-{self.context.env_type()} - paths: - - .cache/pip - - .cache/apt - script: - - {self._get_auth_echo()} - - echo {self.context.commit_sha} - - >- - /kaniko/executor - {"\n ".join(kaniko_args)} -""" - - def _gen_deploy_job(self): - if self.only_intermediate: - return "" - - destination_args = [ - f"--destination=$CI_REGISTRY_IMAGE/{self.full_name()}:latest" - ] - if self.context.is_production: - destination_args.append(f"--destination=$CI_REGISTRY_IMAGE/{self.full_name()}:{self.context.commit_tag}") - - return f""" -{self.deploy_job_name()}: - stage: deploy - timeout: 1h - image: - name: gcr.io/kaniko-project/executor:v1.23.2-debug - entrypoint: [""] - script: - - {self._get_auth_echo()} - - mkdir /workdir - - echo "FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/{self.versioned_full_name()} > /workdir/Dockerfile" - - echo {self.context.commit_sha} - - >- - /kaniko/executor - --context=dir:///workdir - {"\n ".join(destination_args)} -""" - - -def gen_test_api(context: BuildContext) -> str: - return f""" -run-api-tests: - stage: build-and-test - needs: [build-api] - timeout: 30m - variables: - VIDEOAG_CONFIG: /code/config/api_example_config.py - image: - name: registry.git.fsmpi.rwth-aachen.de/videoag/backend/{context.env_type()}_api:{context.commit_sha} - entrypoint: [""] - script: - - cd /code - - /code/docker_start.sh -test - artifacts: - paths: - - /code/coverage/report.txt - - /code/coverage/html/* - services: - - name: postgres:17 - alias: ci-database - variables: - POSTGRES_DB: videoagtest - POSTGRES_USER: videoagtest - POSTGRES_PASSWORD: LetMeTest... - -""" - - -def gen_pipeline(context: BuildContext) -> str: - pipeline = """ -#################################################################### -##### AUTOMATICALLY GENERATED PIPELINE. DO NOT CHANGE MANUALLY! #### -#################################################################### - -stages: - - build-and-test - - deploy -""" - - for target in context.targets.values(): - pipeline += target.gen_jobs() - - pipeline += gen_test_api(context) - - return pipeline - - -def main(): - commit_sha = os.environ["CI_COMMIT_SHA"] - if not isinstance(commit_sha, str) or commit_sha == "": - raise ValueError("Empty or invalid commit sha") - - commit_tag = os.environ.get("CI_COMMIT_TAG", None) - if commit_tag == "": - commit_tag = None - if commit_tag is not None and not isinstance(commit_tag, str): - raise ValueError("Invalid commit tag") - - context = BuildContext(commit_sha, commit_tag) - - common_py = context.add_image_target("common_py", [], "common_py/", True) - context.add_image_target("api", [common_py], "api/", False) - context.add_image_target("job_controller", [common_py], "job_controller/", False) - - for job_dir in Path("job_controller/jobs").iterdir(): - assert isinstance(job_dir, Path) - job_name = job_dir.name - - job_dir = Path("job_controller/jobs").joinpath(job_name) - dockerfile = job_dir.joinpath("Dockerfile") - pattern = "FROM registry\\.git\\.fsmpi\\.rwth-aachen\\.de\\/videoag\\/backend/\\$\\{ENV_TYPE}_([a-zA-Z0-9-_]+):\\$\\{GIT_COMMIT_SHA}" - matches = re.findall(pattern, dockerfile.read_text()) - if len(matches) != 1: - raise Exception(f"{dockerfile}: Unable to determine base image for pipeline dependencies. Cannot find" - f"special FROM instruction (Or found multiple) (See other job's images)") - base_image_name = matches[0] - - context.add_image_target( - f"job_{job_name}", - [base_image_name], - f"job_controller/jobs/{job_dir.name}", - not job_dir.joinpath("metadata.json").exists() - ) - - Path("child-pipeline.yml").write_text(gen_pipeline(context)) - - -if __name__ == "__main__": - try: - main() - except Exception as e: - traceback.print_exception(e) - exit(-1) diff --git a/generate_dockerfiles.sh b/generate_dockerfiles.sh new file mode 100755 index 0000000000000000000000000000000000000000..49dd60c6eeb117125fc1f322a7bb5c5692985b58 --- /dev/null +++ b/generate_dockerfiles.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python build_pipeline_generator.py api job_controller $(find job_controller/jobs/ -mindepth 1 -maxdepth 1) diff --git a/job_controller/Dockerfile b/job_controller/Dockerfile deleted file mode 100644 index 4a42b99be24802bf6c079de8e83abbf4c8a01d96..0000000000000000000000000000000000000000 --- a/job_controller/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA} - -# READ THE NOTE on caching in the README before changing this/for more info! -ARG PIP_CACHE_DIR= - - -# Empty by default -ARG GIT_COMMIT_TAG= - -ENV VIDEOAG_JOB_CONTROLLER_GIT_COMMIT_HASH $GIT_COMMIT_SHA -ENV VIDEOAG_JOB_CONTROLLER_GIT_COMMIT_TAG $GIT_COMMIT_TAG - -COPY extra_requirements.txt ./ -# READ THE NOTE on caching in the README before changing this/for more info! -RUN --mount=type=cache,target=/root/.cache/pip \ - pip --python .venv/bin/python install -r extra_requirements.txt - -COPY jobs ./jobs/ - -# The source has a symlink file at src/videoag_common -# The actual files are already in the image at src/videoag_common -# So we move the actual files temporarily, copy the src directory, remove the symlink and move the actual files back -# In the future, COPY --exclude src/videoag_common might work (but right now it doesn't, some "failed to compute cache key") -RUN mv src/videoag_common src/.temp -COPY src/ ./src/ -RUN rm src/videoag_common -RUN mv src/.temp src/videoag_common - - -WORKDIR src - -CMD ["python", "run.py"] diff --git a/job_controller/build_config.py b/job_controller/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..83718b17c46a7039c44595fe4039ee4481ab12b6 --- /dev/null +++ b/job_controller/build_config.py @@ -0,0 +1,12 @@ +TARGET_IMAGE_NAME = "job_controller" +BUILD_DEPENDENCIES = ["../common_py/"] + +PIP_REQUIREMENTS_FILE = "requirements.txt" +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/src ./src/ +COPY $MODULE_DIR/jobs/ ./jobs/ + +WORKDIR src + +CMD ["python", "run.py"] +""" \ No newline at end of file diff --git a/job_controller/extra_requirements.txt b/job_controller/extra_requirements.txt deleted file mode 100644 index 1f048bb3fc1b068454acc8d9088349fa4a12a851..0000000000000000000000000000000000000000 --- a/job_controller/extra_requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -requests -kubernetes==27.2.0 diff --git a/job_controller/jobs/base/Dockerfile b/job_controller/jobs/base/Dockerfile deleted file mode 100644 index 739fd186e059c630e993921f47d993c851878ad4..0000000000000000000000000000000000000000 --- a/job_controller/jobs/base/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_common_py:${GIT_COMMIT_SHA} - -COPY . src/ - -WORKDIR src/ - -ENTRYPOINT ["python", "run.py"] diff --git a/job_controller/jobs/base/build_config.py b/job_controller/jobs/base/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..ef232b9719f00b8cfa9bd4bc7d129f22bf2ff11d --- /dev/null +++ b/job_controller/jobs/base/build_config.py @@ -0,0 +1,7 @@ +BUILD_DEPENDENCIES = ["../../../common_py/"] + +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/ ./src/ + +CMD ["python", "run.py"] +""" \ No newline at end of file diff --git a/job_controller/jobs/ffmpeg_base/Dockerfile b/job_controller/jobs/ffmpeg_base/Dockerfile deleted file mode 100644 index 6006611a019cf812111e4d736df6e258ad7be827..0000000000000000000000000000000000000000 --- a/job_controller/jobs/ffmpeg_base/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -# We have this image to avoid installing ffmpeg multiple times - -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_job_base:${GIT_COMMIT_SHA} - -# READ THE NOTE on caching in the README before changing this/for more info! -ARG APT_CACHE_DIR= -RUN rm -f /etc/apt/apt.conf.d/docker-clean -RUN if ! [ -z "$APT_CACHE_DIR" ]; then echo "Dir::Cache::Archives '$APT_CACHE_DIR';" > /etc/apt/apt.conf.d/ci_caching; fi - -RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ - --mount=type=cache,target=/var/lib/apt,sharing=locked \ - apt-get update && apt-get --no-install-recommends install -y ffmpeg - diff --git a/job_controller/jobs/media_process_scheduler/Dockerfile b/job_controller/jobs/media_process_scheduler/Dockerfile deleted file mode 100644 index 62c1502bf773250f8201fc3d16b6b52140549072..0000000000000000000000000000000000000000 --- a/job_controller/jobs/media_process_scheduler/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_job_base:${GIT_COMMIT_SHA} - -COPY . . diff --git a/job_controller/jobs/media_process_scheduler/build_config.py b/job_controller/jobs/media_process_scheduler/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..e73ed5960bc7ac739b51ee252e3f074f3a11b9aa --- /dev/null +++ b/job_controller/jobs/media_process_scheduler/build_config.py @@ -0,0 +1,6 @@ +TARGET_IMAGE_NAME = "job_media_process_scheduler" +BUILD_DEPENDENCIES = ["../base/"] + +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/job.py ./src/ +""" \ No newline at end of file diff --git a/job_controller/jobs/sample_thumbnail/Dockerfile b/job_controller/jobs/sample_thumbnail/Dockerfile deleted file mode 100644 index 4a7248d245c12cf539aec8288fb1f1024a259aaa..0000000000000000000000000000000000000000 --- a/job_controller/jobs/sample_thumbnail/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_job_ffmpeg_base:${GIT_COMMIT_SHA} - -COPY . /app/src diff --git a/job_controller/jobs/sample_thumbnail/build_config.py b/job_controller/jobs/sample_thumbnail/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d83b260044ce6b52d23831f34b16ee09fdde5d --- /dev/null +++ b/job_controller/jobs/sample_thumbnail/build_config.py @@ -0,0 +1,7 @@ +TARGET_IMAGE_NAME = "job_sample_thumbnail" +BUILD_DEPENDENCIES = ["../base/"] + +APT_RUNTIME_DEPENDENCIES = ["ffmpeg"] +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/job.py ./src/ +""" \ No newline at end of file diff --git a/job_controller/jobs/source_file_sorter/Dockerfile b/job_controller/jobs/source_file_sorter/Dockerfile deleted file mode 100644 index 4a7248d245c12cf539aec8288fb1f1024a259aaa..0000000000000000000000000000000000000000 --- a/job_controller/jobs/source_file_sorter/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -# Can be "development" or "production" -ARG ENV_TYPE -ARG GIT_COMMIT_SHA -FROM registry.git.fsmpi.rwth-aachen.de/videoag/backend/${ENV_TYPE}_job_ffmpeg_base:${GIT_COMMIT_SHA} - -COPY . /app/src diff --git a/job_controller/jobs/source_file_sorter/build_config.py b/job_controller/jobs/source_file_sorter/build_config.py new file mode 100644 index 0000000000000000000000000000000000000000..14c9d521d6af7af32b7f00486841e7e0eca0658b --- /dev/null +++ b/job_controller/jobs/source_file_sorter/build_config.py @@ -0,0 +1,7 @@ +TARGET_IMAGE_NAME = "job_source_file_sorter" +BUILD_DEPENDENCIES = ["../base/"] + +APT_RUNTIME_DEPENDENCIES = ["ffmpeg"] +DOCKERFILE_EXTRA = """ +COPY $MODULE_DIR/job.py ./src/ +""" \ No newline at end of file diff --git a/job_controller/requirements.txt b/job_controller/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a60cc87931a5ef4d8c171ce479f9ab8c876b9291 --- /dev/null +++ b/job_controller/requirements.txt @@ -0,0 +1,5 @@ +# The Job Controller Dependencies (Versions picked in February 2025) +# Note that this does NOT include the common_py dependencies! + +requests==2.32.3 +kubernetes==32.0.1 diff --git a/job_controller/src/videoag_common b/job_controller/src/videoag_common deleted file mode 120000 index 6eaec4c319fd0e4873498aa6cf73f5e339c3c8f7..0000000000000000000000000000000000000000 --- a/job_controller/src/videoag_common +++ /dev/null @@ -1 +0,0 @@ -../../common_py/src/videoag_common \ No newline at end of file