diff --git a/api/api_specification.json b/api/api_specification.json index 90509bf4fd3f41f9463dd740dacbb391b100ab08..f84f7866836c0cf8fb667cbecf78e991d5c81860 100644 --- a/api/api_specification.json +++ b/api/api_specification.json @@ -2145,6 +2145,24 @@ "only_mod": false, "optional": false, "type": "segmented_stats" + }, + "total_watched_seconds": { + "config_directly_modifiable": false, + "id": "total_watched_seconds", + "notes": "", + "object_variant": null, + "only_mod": false, + "optional": false, + "type": "int" + }, + "view_count": { + "config_directly_modifiable": false, + "id": "view_count", + "notes": "", + "object_variant": null, + "only_mod": false, + "optional": false, + "type": "int" } } }, diff --git a/api/api_specification.md b/api/api_specification.md index eaa2b9fd0243009ba14f1b6271024209b175f215..324a7f26b87831d5a52dec6eb93ddb48c53e0d6a 100644 --- a/api/api_specification.md +++ b/api/api_specification.md @@ -1,4 +1,4 @@ -# Specification of the Web API for the Video-AG Website (v0.84). +# Specification of the Web API for the Video-AG Website (v0.85). ## Introduction @@ -1634,6 +1634,16 @@ This may only be used by a moderator. <td>segmented_stats</td> <td>Contains view count statistics for segments of a medium. The medium is split into equal length segments, and each segment has a view count</td> </tr> + <tr> + <td>total_watched_seconds</td> + <td>int</td> + <td></td> + </tr> + <tr> + <td>view_count</td> + <td>int</td> + <td></td> + </tr> </tbody> </table> @@ -3218,6 +3228,11 @@ Possible `error_code`: ## Changelog +### v0.85 + +* Updated `GET /stats/publish_medium/{publish_medium_id}` + * Added fields `total_watched_seconds` and `view_count` + ### v0.84 * Updated `medium_metadata` diff --git a/api/api_specification_template.md b/api/api_specification_template.md index 21874e01f3d561f1eaaba6f80e2535539d1d8d1d..b177b67f8c4d72daf289a96b30b65ff2085f0628 100644 --- a/api/api_specification_template.md +++ b/api/api_specification_template.md @@ -1,4 +1,4 @@ -# Specification of the Web API for the Video-AG Website (v0.84). +# Specification of the Web API for the Video-AG Website (v0.85). ## Introduction @@ -139,6 +139,11 @@ Possible `error_code`: ## Changelog +### v0.85 + +* Updated `GET /stats/publish_medium/{publish_medium_id}` + * Added fields `total_watched_seconds` and `view_count` + ### v0.84 * Updated `medium_metadata` diff --git a/api/src/api/routes/stats.py b/api/src/api/routes/stats.py index 96063d819c43e9aad9f37a86a5bc9efaa19ee67a..d469cb4b35be9988c52b05ca895d97018e9907f8 100755 --- a/api/src/api/routes/stats.py +++ b/api/src/api/routes/stats.py @@ -113,6 +113,8 @@ def api_route_watch_log_publish_medium(course_handle: str, publish_medium_id: in " a few hours)", response_objects={ "": [ + ("view_count", "int"), + ("total_watched_seconds", "int"), ("segmented_stats", "segmented_stats", "Contains view count statistics for segments of a medium. The" " medium is split into equal length segments, and each segment" " has a view count"), @@ -133,6 +135,7 @@ def api_route_get_publish_medium_stats(publish_medium_id: int): def _trans(session: SessionDb) -> tuple[ PublishMedium, + PublishMediumWatchStats, Sequence[PublishMediumSegmentWatchStats], Sequence[PublishMediumWatchClientStats], LectureWatchStats, @@ -146,6 +149,10 @@ def api_route_get_publish_medium_stats(publish_medium_id: int): ) if medium is None: raise ApiClientException(ERROR_UNKNOWN_OBJECT) + generic_medium_stats = session.scalar( + sql.select(PublishMediumWatchStats) + .where(PublishMediumWatchStats.publish_medium_id == medium.id) + ) segments = session.scalars( sql.select(PublishMediumSegmentWatchStats) .where(PublishMediumSegmentWatchStats.publish_medium_id == publish_medium_id) @@ -161,9 +168,9 @@ def api_route_get_publish_medium_stats(publish_medium_id: int): .where(LectureWatchStats.lecture_id == medium.lecture_id) ) session.expunge_all() - return medium, segments, client_stats, generic_lecture_stats + return medium, generic_medium_stats, segments, client_stats, generic_lecture_stats - medium, segments, client_stats, generic_lecture_stats = database.execute_read_transaction(_trans) + medium, generic_medium_stats, segments, client_stats, generic_lecture_stats = database.execute_read_transaction(_trans) metadata = medium.medium_metadata if not isinstance(metadata, DurationMetadata): raise ApiClientException(ERROR_BAD_REQUEST("Medium is not a medium with watch stats (doesn't have a duration)")) @@ -171,7 +178,11 @@ def api_route_get_publish_medium_stats(publish_medium_id: int): segment_total_views_json = [] segment_unique_views_json = [] - segment_duration_sec = generic_lecture_stats.segment_duration_sec + segment_duration_sec = None + if len(segments) > 0: + if generic_lecture_stats is None: + raise Exception("Some segments exist but no generic lecture stats exist (which store segment duration)") + segment_duration_sec = generic_lecture_stats.segment_duration_sec segments_iter = more_itertools.peekable(segments) for i in range(math.ceil(duration_sec / segment_duration_sec)): @@ -196,6 +207,8 @@ def api_route_get_publish_medium_stats(publish_medium_id: int): client_stats_json[stat.type][stat.value] = stat.client_count return { + "view_count": generic_medium_stats.view_count if generic_medium_stats else 0, + "total_watched_seconds": generic_medium_stats.total_watched_seconds if generic_medium_stats else 0, "segmented_stats": { "segment_duration_sec": segment_duration_sec, "segment_total_view_counts": segment_total_views_json, diff --git a/common_py/src/videoag_common/objects/__init__.py b/common_py/src/videoag_common/objects/__init__.py index 367d3491b9c25050f00e27cd37ec67d5d7b60929..bf490df46f6ee8cac172ecf53df46688a2f06f84 100644 --- a/common_py/src/videoag_common/objects/__init__.py +++ b/common_py/src/videoag_common/objects/__init__.py @@ -50,6 +50,7 @@ from .stats import ( PublishMediumSegmentWatchStats, ClientStatType, PublishMediumWatchClientStats, + PublishMediumWatchStats, LectureDailyWatchStats, LectureWatchStats, ) diff --git a/common_py/src/videoag_common/objects/stats.py b/common_py/src/videoag_common/objects/stats.py index 7488074f4fac2c35ca17da3a49ca212873492171..b38d16cf8f2f6489c8a2ba193d7d2ba55288591b 100755 --- a/common_py/src/videoag_common/objects/stats.py +++ b/common_py/src/videoag_common/objects/stats.py @@ -62,6 +62,17 @@ class PublishMediumWatchClientStats(Base): client_count: Mapped[int] = mapped_column(nullable=False, default=0) +# General stats for a publish medium +# Note that these might not sum up for the lecture values. One can watch half the lecture with one medium and the other +# half with another medium. +class PublishMediumWatchStats(Base): + + publish_medium_id: Mapped[int] = mapped_column(ForeignKey("publish_medium.id"), nullable=False, primary_key=True) + + view_count: Mapped[int] = mapped_column(nullable=False, default=0) + total_watched_seconds: Mapped[int] = mapped_column(sql.BigInteger(), nullable=False, default=0) + + # Counts the views per day for a lecture class LectureDailyWatchStats(Base): lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False, primary_key=True) diff --git a/job_controller/jobs/view_stats_aggregator/job.py b/job_controller/jobs/view_stats_aggregator/job.py index 6519745cbb451bcb7f786c31bc96b6ae8e2ef7ee..084b448d8e6b3fad7108248e1395127493967e1a 100644 --- a/job_controller/jobs/view_stats_aggregator/job.py +++ b/job_controller/jobs/view_stats_aggregator/job.py @@ -137,9 +137,6 @@ def _aggregate_entries( # have music videos (as far as I know)) view_count_array[i] += 1 - if total_watched_seconds < _MIN_WATCH_DURATION_SECONDS: - return None - return view_count_array, total_watched_seconds, total_realtime_seconds @@ -199,7 +196,7 @@ def _add_seconds_array_to_database( segment.total_watch_count += segment_watch_count -def _store_client_stats_to_database(session: SessionDb, publish_medium_id: int, type: ClientStatType, value: str): +def _store_medium_client_stats_to_database(session: SessionDb, publish_medium_id: int, type: ClientStatType, value: str): stat = session.scalar( sql.select(PublishMediumWatchClientStats) .where( @@ -220,7 +217,7 @@ def _store_client_stats_to_database(session: SessionDb, publish_medium_id: int, stat.client_count += 1 -def _add_user_agent_client_stats(session: SessionDb, publish_medium_id: int, user_agent: str): +def _add_medium_user_agent_client_stats(session: SessionDb, publish_medium_id: int, user_agent: str): ua_result = ua_parser.parse(user_agent) # For anonymity reasons we only include values from a set of common values, and all other values are stored as 'Other' @@ -229,7 +226,7 @@ def _add_user_agent_client_stats(session: SessionDb, publish_medium_id: int, use os = ua_result.os.family if os not in _OS_FAMILIES: os = "Other" - _store_client_stats_to_database(session, publish_medium_id, ClientStatType.OPERATING_SYSTEM, os) + _store_medium_client_stats_to_database(session, publish_medium_id, ClientStatType.OPERATING_SYSTEM, os) if ua_result.user_agent: # Not sure why PyCharm thinks .user_agent is a str @@ -237,21 +234,39 @@ def _add_user_agent_client_stats(session: SessionDb, publish_medium_id: int, use browser = ua_result.user_agent.family if browser not in _BROWSER_FAMILIES: browser = "Other" - _store_client_stats_to_database(session, publish_medium_id, ClientStatType.BROWSER, browser) + _store_medium_client_stats_to_database(session, publish_medium_id, ClientStatType.BROWSER, browser) if ua_result.device: device_family = ua_result.device.family if device_family not in _DEVICE_FAMILIES: device_family = "Other" - _store_client_stats_to_database(session, publish_medium_id, ClientStatType.DEVICE_FAMILY, device_family) + _store_medium_client_stats_to_database(session, publish_medium_id, ClientStatType.DEVICE_FAMILY, device_family) device_brand = ua_result.device.brand if device_brand not in _DEVICE_BRANDS: device_brand = "Other" - _store_client_stats_to_database(session, publish_medium_id, ClientStatType.DEVICE_BRAND, device_brand) + _store_medium_client_stats_to_database(session, publish_medium_id, ClientStatType.DEVICE_BRAND, device_brand) + + +def _update_medium_generic_stats(session: SessionDb, publish_medium_id: int, total_watched_seconds: int): + stats = session.scalar( + sql.select(PublishMediumWatchStats) + .where(PublishMediumWatchStats.publish_medium_id == publish_medium_id) + ) + if stats is None: + stats = PublishMediumWatchStats( + publish_medium_id=publish_medium_id, + ) + session.add(stats) + session.flush([stats]) # Ensure default values are present + + stats.view_count += 1 + stats.total_watched_seconds += total_watched_seconds + + return stats -def _add_daily_stats(session: SessionDb, lecture_id: int, watch_date: Date): +def _add_lecture_daily_stats(session: SessionDb, lecture_id: int, watch_date: Date): stat = session.scalar( sql.select(LectureDailyWatchStats) .where(LectureDailyWatchStats.lecture_id == lecture_id, @@ -268,7 +283,7 @@ def _add_daily_stats(session: SessionDb, lecture_id: int, watch_date: Date): stat.view_count += 1 -def _update_and_get_generic_stats(session: SessionDb, lecture_id: int, total_watched_seconds: int, average_watch_speed: float) -> LectureWatchStats: +def _update_and_get_lecture_generic_stats(session: SessionDb, lecture_id: int, total_watched_seconds: int, average_watch_speed: float) -> LectureWatchStats: stats = session.scalar( sql.select(LectureWatchStats) .where(LectureWatchStats.lecture_id == lecture_id) @@ -328,7 +343,8 @@ def _trans_process_watch(session: SessionDb, watch_id: str): } medium_duration_sec_by_id = {} view_count_arrays_by_medium_id = {} - total_watched_seconds = 0 + total_watched_seconds_by_medium_id = {} + total_watched_seconds: float = 0 total_realtime_seconds = 0 entry_iter = more_itertools.peekable(entries) @@ -355,13 +371,17 @@ def _trans_process_watch(session: SessionDb, watch_id: str): return # Client should be ignored array, medium_watched_seconds, medium_realtime_seconds = res view_count_arrays_by_medium_id[current_medium_id] = array + total_watched_seconds_by_medium_id[current_medium_id] = medium_watched_seconds total_watched_seconds += medium_watched_seconds total_realtime_seconds += medium_realtime_seconds + if total_watched_seconds < _MIN_WATCH_DURATION_SECONDS: + return + # Client won't be ignored. Now we can add data to the database (A return does NOT cause a rollback! The watch entries # always need to be deleted) - generic_stats = _update_and_get_generic_stats(session, lecture.id, total_watched_seconds, total_watched_seconds / total_realtime_seconds) + generic_stats = _update_and_get_lecture_generic_stats(session, lecture.id, round(total_watched_seconds), total_watched_seconds / total_realtime_seconds) for medium_id, array in view_count_arrays_by_medium_id.items(): _add_seconds_array_to_database( session, @@ -370,8 +390,12 @@ def _trans_process_watch(session: SessionDb, watch_id: str): medium_duration_sec_by_id[medium_id], array ) - _add_user_agent_client_stats(session, medium_id, watcher.user_agent) - _add_daily_stats(session, lecture.id, entries[0].timestamp.date()) # Use start date + _add_medium_user_agent_client_stats(session, medium_id, watcher.user_agent) + medium_watched_seconds = total_watched_seconds_by_medium_id[medium_id] + if medium_watched_seconds >= _MIN_WATCH_DURATION_SECONDS: + _update_medium_generic_stats(session, medium_id, round(medium_watched_seconds)) + + _add_lecture_daily_stats(session, lecture.id, entries[0].timestamp.date()) # Use start date def execute(database, own_job_id, input_data: CJsonObject):