From 743df080a14f362e70908271b99c533a5ee4f184 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20K=C3=BCnzel?= <simonk@fsmpi.rwth-aachen.de>
Date: Wed, 23 Apr 2025 21:03:13 +0200
Subject: [PATCH] Ensure DB returns datetime with timezone set and fix more
 issues with timezones

---
 api/src/api/routes/site.py                    |  4 +--
 .../api_object/fields/basic_fields.py         |  4 +--
 .../src/videoag_common/database/__init__.py   |  3 +++
 .../videoag_common/database/drift_detector.py |  5 ++++
 .../videoag_common/database/utc_timestamp.py  | 27 +++++++++++++++++++
 .../src/videoag_common/miscellaneous/util.py  | 13 ++++++---
 .../src/videoag_common/objects/changelog.py   |  2 +-
 .../src/videoag_common/objects/course.py      |  4 +--
 common_py/src/videoag_common/objects/job.py   |  6 ++---
 .../src/videoag_common/objects/medium.py      |  4 +--
 common_py/src/videoag_common/objects/site.py  |  4 +--
 common_py/src/videoag_common/objects/stats.py |  2 +-
 common_py/src/videoag_common/objects/user.py  |  2 +-
 job_controller/jobs/source_file_sorter/job.py |  4 +--
 .../executor_api/job_executor_api.py          |  6 ++++-
 .../executor_api/kubernetes_api.py            |  8 ++++--
 16 files changed, 74 insertions(+), 24 deletions(-)
 create mode 100644 common_py/src/videoag_common/database/utc_timestamp.py

diff --git a/api/src/api/routes/site.py b/api/src/api/routes/site.py
index 3ac5171..928da6c 100644
--- a/api/src/api/routes/site.py
+++ b/api/src/api/routes/site.py
@@ -69,8 +69,8 @@ def api_route_is_running():
 
 
 def _db_execute_get_homepage(session: SessionDb, is_mod: bool):
-    upcoming_start: date = date.today()
-    upcoming_end: date = upcoming_start + timedelta(days=7)
+    upcoming_start: datetime = get_standard_datetime_now().replace(hour=0, minute=0, second=0, microsecond=0)
+    upcoming_end: datetime = upcoming_start + timedelta(days=7)
 
     upcoming_lectures = session.scalars(
         Lecture.select({
diff --git a/common_py/src/videoag_common/api_object/fields/basic_fields.py b/common_py/src/videoag_common/api_object/fields/basic_fields.py
index 675d33d..6df057b 100644
--- a/common_py/src/videoag_common/api_object/fields/basic_fields.py
+++ b/common_py/src/videoag_common/api_object/fields/basic_fields.py
@@ -169,8 +169,8 @@ class ApiDatetimeField[_O: "ApiObject"](ApiSimpleColumnField[_O]):
     def post_init_check(self, context: FieldContext):
         super().post_init_check(context)
         
-        if not isinstance(self._column.type, sql.types.DateTime):  # pragma: no cover
-            raise TypeError(f"SQL type for datetime field must be datetime, but is '{self._column.type}'")
+        if not isinstance(self._column.type, UTCTimestamp):  # pragma: no cover
+            raise TypeError(f"SQL type for datetime field must be UTCTimestamp (our custom type), but is '{self._column.type}'")
     
     @property
     def may_be_none_allowed_for_config_fields(self) -> bool:
diff --git a/common_py/src/videoag_common/database/__init__.py b/common_py/src/videoag_common/database/__init__.py
index a97d498..a65007e 100644
--- a/common_py/src/videoag_common/database/__init__.py
+++ b/common_py/src/videoag_common/database/__init__.py
@@ -19,3 +19,6 @@ from .database import (
     TransactionConflictError,
     Database,
 )
+from .utc_timestamp import (
+    UTCTimestamp
+)
diff --git a/common_py/src/videoag_common/database/drift_detector.py b/common_py/src/videoag_common/database/drift_detector.py
index 8d5becc..72963a9 100644
--- a/common_py/src/videoag_common/database/drift_detector.py
+++ b/common_py/src/videoag_common/database/drift_detector.py
@@ -10,6 +10,8 @@ from sqlalchemy.sql.base import _NoneName, ReadOnlyColumnCollection
 from sqlalchemy.sql.schema import ColumnCollectionConstraint, ForeignKey
 from sqlalchemy.dialects import postgresql as postgresql
 
+from .utc_timestamp import UTCTimestamp
+
 
 #
 # This file attempts its best to detect any difference between the schema in the python files and the actual database.
@@ -264,6 +266,9 @@ def _check_types_equal(actual: types.TypeEngine, schema: types.TypeEngine) -> bo
     if type(schema) is types.TIMESTAMP:
         return isinstance(actual, types.TIMESTAMP)
     
+    if type(schema) is UTCTimestamp:
+        return isinstance(actual, types.TIMESTAMP)
+    
     if isinstance(schema, types.Enum):
         if not isinstance(actual, types.Enum):
             return False
diff --git a/common_py/src/videoag_common/database/utc_timestamp.py b/common_py/src/videoag_common/database/utc_timestamp.py
new file mode 100644
index 0000000..da05bf1
--- /dev/null
+++ b/common_py/src/videoag_common/database/utc_timestamp.py
@@ -0,0 +1,27 @@
+import datetime
+# noinspection PyPep8Naming
+from datetime import datetime as Datetime
+import sqlalchemy as sql
+
+
+class UTCTimestamp(sql.TypeDecorator[Datetime]):
+    impl = sql.TIMESTAMP()
+    cache_ok = True
+
+    @property
+    def python_type(self) -> type[Datetime]:
+        return Datetime
+
+    def process_bind_param(self, value: Datetime | None, dialect: sql.Dialect) -> Datetime | None:
+        if value is None:
+            return None
+        if not value.tzinfo:
+            raise ValueError(f"Missing timezone in datetime: {value}")
+        return value.astimezone(datetime.timezone.utc)
+
+    def process_result_value(self, value: Datetime | None, dialect: sql.Dialect) -> Datetime | None:
+        if value is None:
+            return None
+        if value.tzinfo is None:
+            value = value.replace(tzinfo=datetime.UTC)
+        return value
diff --git a/common_py/src/videoag_common/miscellaneous/util.py b/common_py/src/videoag_common/miscellaneous/util.py
index 4ab8d61..8cb2a82 100644
--- a/common_py/src/videoag_common/miscellaneous/util.py
+++ b/common_py/src/videoag_common/miscellaneous/util.py
@@ -35,10 +35,17 @@ def pad_string(val: str, padding: str, length: int):
     return (padding * math.ceil( (length - len(val)) / len(padding) )) + val
 
 
-# Python doesn't have a formatter for milliseconds
-# Don't just remove last three numbers with microseconds. See here: https://stackoverflow.com/a/35643540
 def format_standard_datetime(dt: Datetime):
-    return dt.astimezone(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%S.") + pad_string(str(dt.microsecond // 1000), "0", 3) + "Z"
+    def zero_pad(val: int, length: int):
+        return pad_string(str(val), "0", length)
+    
+    # Python's strftime can't handle milliseconds and is unreliably regarding the length (e.g. sometimes microseconds or
+    # years are not padded to full length) https://stackoverflow.com/a/35643540
+    # So we just format it ourselves
+    dt = dt.astimezone(datetime.UTC)
+    return (f"{zero_pad(dt.year, 4)}-{zero_pad(dt.month, 2)}-{zero_pad(dt.day, 2)}"
+            f"T{zero_pad(dt.hour, 2)}:{zero_pad(dt.minute, 2)}:{zero_pad(dt.second, 2)}"
+            f".{zero_pad(dt.microsecond // 1000, 3)}Z")
 
 
 def parse_standard_datetime(val: str):
diff --git a/common_py/src/videoag_common/objects/changelog.py b/common_py/src/videoag_common/objects/changelog.py
index 2eb3e33..8b44281 100644
--- a/common_py/src/videoag_common/objects/changelog.py
+++ b/common_py/src/videoag_common/objects/changelog.py
@@ -23,7 +23,7 @@ class ChangelogEntry(ApiObject, Base):
     }
     
     change_time: Mapped[datetime] = api_mapped(
-        mapped_column(TIMESTAMP(), nullable=False, index=True, server_default=sql.text("CURRENT_TIMESTAMP")),
+        mapped_column(UTCTimestamp(), nullable=False, index=True, server_default=sql.text("CURRENT_TIMESTAMP")),
         ApiDatetimeField(
             include_in_data=True
         )
diff --git a/common_py/src/videoag_common/objects/course.py b/common_py/src/videoag_common/objects/course.py
index 82ae8c9..72a3af0 100644
--- a/common_py/src/videoag_common/objects/course.py
+++ b/common_py/src/videoag_common/objects/course.py
@@ -108,7 +108,7 @@ class Lecture(DeletableApiObject, VisibilityApiObject, ApiViewPermissionsObject,
         )
     )
     time: Mapped[datetime] = api_mapped(
-        mapped_column(TIMESTAMP(), nullable=False, index=True),
+        mapped_column(UTCTimestamp(), nullable=False, index=True),
         ApiDatetimeField(
             include_in_config=True, config_directly_modifiable=True,
             include_in_data=True
@@ -154,7 +154,7 @@ class Lecture(DeletableApiObject, VisibilityApiObject, ApiViewPermissionsObject,
         )
     )
     publish_time: Mapped[datetime] = api_mapped(
-        mapped_column(TIMESTAMP(), nullable=True, index=True),
+        mapped_column(UTCTimestamp(), nullable=True, index=True),
         ApiDatetimeField(
             include_in_data=True, data_only_mod=True
         )
diff --git a/common_py/src/videoag_common/objects/job.py b/common_py/src/videoag_common/objects/job.py
index 80d60d5..89e77cd 100644
--- a/common_py/src/videoag_common/objects/job.py
+++ b/common_py/src/videoag_common/objects/job.py
@@ -73,19 +73,19 @@ class Job(ApiObject, Base):
         )
     )
     creation_time: Mapped[datetime] = api_mapped(
-        mapped_column(sql.DateTime(), nullable=False, server_default=sql.text("CURRENT_TIMESTAMP")),
+        mapped_column(UTCTimestamp(), nullable=False, server_default=sql.text("CURRENT_TIMESTAMP")),
         ApiDatetimeField(
             include_in_data=True
         )
     )
     run_start_time: Mapped[datetime] = api_mapped(
-        mapped_column(sql.DateTime(), nullable=True),
+        mapped_column(UTCTimestamp(), nullable=True),
         ApiDatetimeField(
             include_in_data=True
         )
     )
     run_end_time: Mapped[datetime] = api_mapped(
-        mapped_column(sql.DateTime(), nullable=True),
+        mapped_column(UTCTimestamp(), nullable=True),
         ApiDatetimeField(
             include_in_data=True
         )
diff --git a/common_py/src/videoag_common/objects/medium.py b/common_py/src/videoag_common/objects/medium.py
index 52a539c..c8de5dc 100644
--- a/common_py/src/videoag_common/objects/medium.py
+++ b/common_py/src/videoag_common/objects/medium.py
@@ -74,7 +74,7 @@ class SorterFile(DeletableApiObject, Base):
         )
     )
     file_modification_time: Mapped[datetime] = api_mapped(
-        mapped_column(sql.DateTime, nullable=False),
+        mapped_column(UTCTimestamp, nullable=False),
         ApiDatetimeField(
             include_in_data=True
         )
@@ -101,7 +101,7 @@ class SorterFile(DeletableApiObject, Base):
         )
     )
     update_time: Mapped[datetime] = api_mapped(
-        mapped_column(sql.DateTime, nullable=False),
+        mapped_column(UTCTimestamp, nullable=False),
         ApiDatetimeField(
             include_in_data=True
         )
diff --git a/common_py/src/videoag_common/objects/site.py b/common_py/src/videoag_common/objects/site.py
index 0d0e420..39d6538 100644
--- a/common_py/src/videoag_common/objects/site.py
+++ b/common_py/src/videoag_common/objects/site.py
@@ -49,14 +49,14 @@ class Announcement(DeletableApiObject, VisibilityApiObject, Base):
         )
     )
     publish_time: Mapped[datetime] = api_mapped(
-        mapped_column(TIMESTAMP(), nullable=True),
+        mapped_column(UTCTimestamp(), nullable=True),
         ApiDatetimeField(
             include_in_config=True,
             include_in_data=True, data_only_mod=True
         )
     )
     expiration_time: Mapped[datetime] = api_mapped(
-        mapped_column(TIMESTAMP(), nullable=True),
+        mapped_column(UTCTimestamp(), nullable=True),
         ApiDatetimeField(
             include_in_config=True,
             include_in_data=True, data_only_mod=True
diff --git a/common_py/src/videoag_common/objects/stats.py b/common_py/src/videoag_common/objects/stats.py
index 566fbfe..7488074 100755
--- a/common_py/src/videoag_common/objects/stats.py
+++ b/common_py/src/videoag_common/objects/stats.py
@@ -12,7 +12,7 @@ from videoag_common.miscellaneous import JsonSerializableEnum
 class PublishMediumWatchLogEntry(Base):
     
     watch_id: Mapped[str] = mapped_column(String(length=64, collation=STRING_COLLATION), nullable=False, primary_key=True)
-    timestamp: Mapped[datetime] = mapped_column(TIMESTAMP(), nullable=False, primary_key=True,
+    timestamp: Mapped[datetime] = mapped_column(UTCTimestamp(), nullable=False, primary_key=True,
                                                 server_default=sql.text("CURRENT_TIMESTAMP"))
     # No foreign key to improve performance. Values are validated when aggregating
     publish_medium_id: Mapped[int] = mapped_column(nullable=False)
diff --git a/common_py/src/videoag_common/objects/user.py b/common_py/src/videoag_common/objects/user.py
index 53b2ec0..3aec3bf 100644
--- a/common_py/src/videoag_common/objects/user.py
+++ b/common_py/src/videoag_common/objects/user.py
@@ -36,7 +36,7 @@ class User(ApiObject, Base):
         )
     )
     
-    last_login: Mapped[datetime] = mapped_column(TIMESTAMP(), nullable=True)
+    last_login: Mapped[datetime] = mapped_column(UTCTimestamp(), nullable=True)
     
     enable_mail_notifications: Mapped[bool] = mapped_column(nullable=False, default=True)
     notify_new_video: Mapped[bool] = mapped_column(nullable=False, default=True)
diff --git a/job_controller/jobs/source_file_sorter/job.py b/job_controller/jobs/source_file_sorter/job.py
index 0cdc7e0..f4b221e 100644
--- a/job_controller/jobs/source_file_sorter/job.py
+++ b/job_controller/jobs/source_file_sorter/job.py
@@ -2,7 +2,7 @@ import hashlib
 import logging
 import shutil
 import time
-from datetime import date, datetime
+from datetime import date, datetime, UTC
 from pathlib import Path
 
 import videoag_common
@@ -40,7 +40,7 @@ def _check_file(
         to_sort_file_db_paths: list[str],
         file: Path
 ):
-    file_mod_time = datetime.fromtimestamp(file.lstat().st_mtime)
+    file_mod_time = datetime.fromtimestamp(file.lstat().st_mtime, tz=UTC)
     seconds_since_modification = time.time() - file.lstat().st_mtime
     
     error_message = None
diff --git a/job_controller/src/job_controller/executor_api/job_executor_api.py b/job_controller/src/job_controller/executor_api/job_executor_api.py
index 96d83c3..370a5c1 100644
--- a/job_controller/src/job_controller/executor_api/job_executor_api.py
+++ b/job_controller/src/job_controller/executor_api/job_executor_api.py
@@ -17,7 +17,9 @@ class JobExecutionInfo(ABC):
     @abstractmethod
     def get_start_time(self) -> datetime or None:
         """
-        Returns the time when the job execution started or None if the execution has not started yet
+        Returns the time when the job execution started or None if the execution has not started yet.
+        
+        Always with UTC timezone
         """
         pass
     
@@ -25,6 +27,8 @@ class JobExecutionInfo(ABC):
     def get_finish_time(self) -> datetime or None:
         """
         Only call this if is_success() did not return None. May be None only if the job failed
+        
+        Always with UTC timezone
         """
         pass
 
diff --git a/job_controller/src/job_controller/executor_api/kubernetes_api.py b/job_controller/src/job_controller/executor_api/kubernetes_api.py
index 1e62f01..dd8b7ec 100644
--- a/job_controller/src/job_controller/executor_api/kubernetes_api.py
+++ b/job_controller/src/job_controller/executor_api/kubernetes_api.py
@@ -1,4 +1,4 @@
-from datetime import datetime
+from datetime import datetime, UTC
 
 import kubernetes as k8s
 from kubernetes.client import ApiException
@@ -54,11 +54,15 @@ class K8sJobInfo(JobExecutionInfo):
     def get_start_time(self) -> datetime or None:
         start_time = self._k8s_job.status.start_time
         assert start_time is None or isinstance(start_time, datetime)
+        if start_time is not None:
+            start_time = start_time.astimezone(UTC)
         return start_time
     
-    def get_finish_time(self) -> datetime:
+    def get_finish_time(self) -> datetime | None:
         finish_time = self._k8s_job.status.completion_time
         assert finish_time is None or isinstance(finish_time, datetime)
+        if finish_time is not None:
+            finish_time = finish_time.astimezone(UTC)
         return finish_time
 
 
-- 
GitLab