Skip to content
Snippets Groups Projects
Commit f04193d5 authored by Simon Künzel's avatar Simon Künzel
Browse files

Fix some bugs in stats aggregator

parent 3ab187c2
No related branches found
No related tags found
No related merge requests found
Pipeline #7528 passed
Pipeline: backend

#7529

    ...@@ -67,7 +67,7 @@ class LectureDailyWatchStats(Base): ...@@ -67,7 +67,7 @@ class LectureDailyWatchStats(Base):
    lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False, primary_key=True) lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False, primary_key=True)
    date: Mapped[Date] = mapped_column(nullable=False, primary_key=True) date: Mapped[Date] = mapped_column(nullable=False, primary_key=True)
    view_count: Mapped[int] = mapped_column(nullable=False) view_count: Mapped[int] = mapped_column(nullable=False, default=0)
    # General stats for a lecture # General stats for a lecture
    ......
    ...@@ -41,8 +41,13 @@ _ffmpeg_filter_graph_target = { ...@@ -41,8 +41,13 @@ _ffmpeg_filter_graph_target = {
    "input_data": {"ordered_nodes_and_data": [{"data": {"file_path": "permanent_media/course-2.07ws-buk/lecture-1.071019/source-1008.07ws-buk.071019.mp4"}, "node": {"type": "input_file", "streams": [{"graph_id": "video", "stream_id": 0}], "medium_file_target_id": "video"}}, {"data": {}, "node": {"type": "video_slide", "slide": {"rows": [{"text": "Hello some reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly long text here", "text_color": "#ffffff", "height_percent": 25, "vertical_alignment": "bottom", "horizontal_alignment": "left"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "left"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "center"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "right"}], "padding_percent": 5, "background_color": "#ff0000"}, "position": "beginning", "input_ids": ["video"], "output_ids": ["video_with_intro"], "duration_sec": 3, "fade_duration_sec": 1}}, {"data": {"file_path": "permanent_media/course-2.07ws-buk/lecture-1.071019/target-1016.processed_video.mp4"}, "node": {"type": "output_file", "stream_graph_ids": ["video_with_intro"], "medium_file_target_id": "processed_video"}}]} "input_data": {"ordered_nodes_and_data": [{"data": {"file_path": "permanent_media/course-2.07ws-buk/lecture-1.071019/source-1008.07ws-buk.071019.mp4"}, "node": {"type": "input_file", "streams": [{"graph_id": "video", "stream_id": 0}], "medium_file_target_id": "video"}}, {"data": {}, "node": {"type": "video_slide", "slide": {"rows": [{"text": "Hello some reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly reeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaallly long text here", "text_color": "#ffffff", "height_percent": 25, "vertical_alignment": "bottom", "horizontal_alignment": "left"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "left"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "center"}, {"text": "Hello!", "text_color": "#00ff00", "height_percent": 25, "vertical_alignment": "middle", "horizontal_alignment": "right"}], "padding_percent": 5, "background_color": "#ff0000"}, "position": "beginning", "input_ids": ["video"], "output_ids": ["video_with_intro"], "duration_sec": 3, "fade_duration_sec": 1}}, {"data": {"file_path": "permanent_media/course-2.07ws-buk/lecture-1.071019/target-1016.processed_video.mp4"}, "node": {"type": "output_file", "stream_graph_ids": ["video_with_intro"], "medium_file_target_id": "processed_video"}}]}
    } }
    _view_stats_aggregator = {
    "type": "view_stats_aggregator",
    "input_data": {}
    }
    # Configure what job to run # Configure what job to run
    TO_RUN = _media_process_scheduler TO_RUN = _view_stats_aggregator
    JOB_ID = 42 JOB_ID = 42
    CLEAN_JOB_DIR = True CLEAN_JOB_DIR = True
    ......
    ...@@ -160,7 +160,8 @@ def _add_seconds_array_to_database( ...@@ -160,7 +160,8 @@ def _add_seconds_array_to_database(
    for segment_index in range(math.ceil(medium_duration_sec / segment_duration_sec)): for segment_index in range(math.ceil(medium_duration_sec / segment_duration_sec)):
    segment_start_timestamp_sec = segment_index * segment_duration_sec segment_start_timestamp_sec = segment_index * segment_duration_sec
    # Exclusive # Exclusive
    segment_end_timestamp_sec = segment_start_timestamp_sec + segment_duration_sec segment_end_timestamp_sec = min(segment_start_timestamp_sec + segment_duration_sec, medium_duration_sec)
    actual_segment_duration_sec = segment_end_timestamp_sec - segment_start_timestamp_sec
    # Calculate how often the segment was watched. We count it as being watched if at least half of the seconds # Calculate how often the segment was watched. We count it as being watched if at least half of the seconds
    # were watched. # were watched.
    ...@@ -172,7 +173,7 @@ def _add_seconds_array_to_database( ...@@ -172,7 +173,7 @@ def _add_seconds_array_to_database(
    for i in range(segment_start_timestamp_sec, segment_end_timestamp_sec): for i in range(segment_start_timestamp_sec, segment_end_timestamp_sec):
    if view_count_array[i] > segment_watch_count: if view_count_array[i] > segment_watch_count:
    viable_seconds += 1 viable_seconds += 1
    if viable_seconds < segment_duration_sec / 2: if viable_seconds < actual_segment_duration_sec / 2:
    break break
    segment_watch_count += 1 segment_watch_count += 1
    ...@@ -191,6 +192,7 @@ def _add_seconds_array_to_database( ...@@ -191,6 +192,7 @@ def _add_seconds_array_to_database(
    segment_index=segment_index, segment_index=segment_index,
    ) )
    session.add(segment) session.add(segment)
    session.flush([segment]) # Ensure default values are present
    assert segment_watch_count > 0 assert segment_watch_count > 0
    segment.unique_watch_count += 1 segment.unique_watch_count += 1
    ...@@ -213,6 +215,7 @@ def _store_client_stats_to_database(session: SessionDb, publish_medium_id: int, ...@@ -213,6 +215,7 @@ def _store_client_stats_to_database(session: SessionDb, publish_medium_id: int,
    value=value value=value
    ) )
    session.add(stat) session.add(stat)
    session.flush([stat]) # Ensure default values are present
    stat.client_count += 1 stat.client_count += 1
    ...@@ -260,6 +263,7 @@ def _add_daily_stats(session: SessionDb, lecture_id: int, watch_date: Date): ...@@ -260,6 +263,7 @@ def _add_daily_stats(session: SessionDb, lecture_id: int, watch_date: Date):
    date=watch_date date=watch_date
    ) )
    session.add(stat) session.add(stat)
    session.flush([stat]) # Ensure default values are present
    stat.view_count += 1 stat.view_count += 1
    ...@@ -275,6 +279,7 @@ def _update_and_get_generic_stats(session: SessionDb, lecture_id: int, total_wat ...@@ -275,6 +279,7 @@ def _update_and_get_generic_stats(session: SessionDb, lecture_id: int, total_wat
    segment_duration_sec=_DEFAULT_SEGMENT_DURATION_SECONDS, segment_duration_sec=_DEFAULT_SEGMENT_DURATION_SECONDS,
    ) )
    session.add(stats) session.add(stats)
    session.flush([stats]) # Ensure default values are present
    stats.average_watch_speed = ( stats.average_watch_speed = (
    ((stats.average_watch_speed * stats.view_count) + average_watch_speed) ((stats.average_watch_speed * stats.view_count) + average_watch_speed)
    ...@@ -333,7 +338,7 @@ def _trans_process_watch(session: SessionDb, watch_id: str): ...@@ -333,7 +338,7 @@ def _trans_process_watch(session: SessionDb, watch_id: str):
    if current_medium_id not in publish_media_by_id: if current_medium_id not in publish_media_by_id:
    logger.info(f"Ignoring watcher '{watch_id}' who managed to watch a non-existent lecture {current_medium_id}") logger.info(f"Ignoring watcher '{watch_id}' who managed to watch a non-existent lecture {current_medium_id}")
    return return
    while entry_iter.peek().publish_medium_id == current_medium_id: while entry_iter.peek(None) is not None and entry_iter.peek().publish_medium_id == current_medium_id:
    medium_entries.append(next(entry_iter)) medium_entries.append(next(entry_iter))
    medium = publish_media_by_id[current_medium_id] medium = publish_media_by_id[current_medium_id]
    ......
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please register or to comment