Fix some bugs in stats aggregator

f04193d5 · Simon Künzel · 3ab187c2 · f04193d5 · f04193d5 · f04193d5
Commit f04193d5 authored 3 months ago by Simon Künzel
--- a/common_py/src/videoag_common/objects/stats.py
+++ b/common_py/src/videoag_common/objects/stats.py
@@ -67,7 +67,7 @@ class LectureDailyWatchStats(Base):
    lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False, primary_key=True)
    date: Mapped[Date] = mapped_column(nullable=False, primary_key=True)
-    view_count: Mapped[int] = mapped_column(nullable=False)
+    view_count: Mapped[int] = mapped_column(nullable=False, default=0)
 # General stats for a lecture

--- a/job_controller/debug_job_dev.py
+++ b/job_controller/debug_job_dev.py
@@ -41,8 +41,13 @@ _ffmpeg_filter_graph_target = {
    "input_data": {"ordered_nodes_and_data": [{"data": {"file_path": "permanent_media/course-2.07ws-buk/lecture-1.071019/source-1008.07ws-buk.071019.mp4"}, "node": {"type": "input_file", "streams": [{"graph_id": "video", "stream_id": 0}], "medium_file_target_id": "video"}}, {"data": {}, "node": {"type": "video_slide", "slide": {"rows": [{"text": "Hello some reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly long text here", "text_color": "#ffffff", "height_percent": 25, "vertical_alignment": "bottom", "horizontal_alignment": "left"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "left"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "center"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "right"}], "padding_percent": 5, "background_color": "#ff0000"}, "position": "beginning", "input_ids": ["video"], "output_ids": ["video_with_intro"], "duration_sec": 3, "fade_duration_sec": 1}}, {"data": {"file_path": "permanent_media/course-2.07ws-buk/lecture-1.071019/target-1016.processed_video.mp4"}, "node": {"type": "output_file", "stream_graph_ids": ["video_with_intro"], "medium_file_target_id": "processed_video"}}]}
 }
+_view_stats_aggregator = {
+    "type": "view_stats_aggregator",
+    "input_data": {}
+}
 # Configure what job to run
-TO_RUN = _media_process_scheduler
+TO_RUN = _view_stats_aggregator
 JOB_ID = 42
 CLEAN_JOB_DIR = True

--- a/job_controller/jobs/view_stats_aggregator/job.py
+++ b/job_controller/jobs/view_stats_aggregator/job.py
@@ -160,7 +160,8 @@ def _add_seconds_array_to_database(
    for segment_index in range(math.ceil(medium_duration_sec / segment_duration_sec)):
        segment_start_timestamp_sec = segment_index * segment_duration_sec
        # Exclusive
-        segment_end_timestamp_sec = segment_start_timestamp_sec + segment_duration_sec
+        segment_end_timestamp_sec = min(segment_start_timestamp_sec + segment_duration_sec, medium_duration_sec)
+        actual_segment_duration_sec = segment_end_timestamp_sec - segment_start_timestamp_sec
        # Calculate how often the segment was watched. We count it as being watched if at least half of the seconds
        # were watched.
@@ -172,7 +173,7 @@ def _add_seconds_array_to_database(
            for i in range(segment_start_timestamp_sec, segment_end_timestamp_sec):
                if view_count_array[i] > segment_watch_count:
                    viable_seconds += 1
-            if viable_seconds < segment_duration_sec / 2:
+            if viable_seconds < actual_segment_duration_sec / 2:
                break
            segment_watch_count += 1
@@ -191,6 +192,7 @@ def _add_seconds_array_to_database(
                segment_index=segment_index,
            )
            session.add(segment)
+            session.flush([segment])  # Ensure default values are present
        assert segment_watch_count > 0
        segment.unique_watch_count += 1
@@ -213,6 +215,7 @@ def _store_client_stats_to_database(session: SessionDb, publish_medium_id: int,
            value=value
        )
        session.add(stat)
+        session.flush([stat])  # Ensure default values are present
    stat.client_count += 1
@@ -260,6 +263,7 @@ def _add_daily_stats(session: SessionDb, lecture_id: int, watch_date: Date):
            date=watch_date
        )
        session.add(stat)
+        session.flush([stat])  # Ensure default values are present
    stat.view_count += 1
@@ -275,6 +279,7 @@ def _update_and_get_generic_stats(session: SessionDb, lecture_id: int, total_wat
            segment_duration_sec=_DEFAULT_SEGMENT_DURATION_SECONDS,
        )
        session.add(stats)
+        session.flush([stats])  # Ensure default values are present
    stats.average_watch_speed = (
            ((stats.average_watch_speed * stats.view_count) + average_watch_speed)
@@ -333,7 +338,7 @@ def _trans_process_watch(session: SessionDb, watch_id: str):
        if current_medium_id not in publish_media_by_id:
            logger.info(f"Ignoring watcher '{watch_id}' who managed to watch a non-existent lecture {current_medium_id}")
            return
-        while entry_iter.peek().publish_medium_id == current_medium_id:
+        while entry_iter.peek(None) is not None and entry_iter.peek().publish_medium_id == current_medium_id:
            medium_entries.append(next(entry_iter))
        medium = publish_media_by_id[current_medium_id]