From 573bf1a5cf1447757ebcf8789f00d69f930e9ffd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20K=C3=BCnzel?= <simonk@fsmpi.rwth-aachen.de>
Date: Thu, 1 May 2025 17:24:52 +0200
Subject: [PATCH] Add view stats migration script

---
 api/migrate_stats.sql | 109 ++++++++++++++++++++++++++++++++++++++++++
 api/migration.sql     |  13 ++---
 2 files changed, 116 insertions(+), 6 deletions(-)
 create mode 100644 api/migrate_stats.sql

diff --git a/api/migrate_stats.sql b/api/migrate_stats.sql
new file mode 100644
index 0000000..55bb005
--- /dev/null
+++ b/api/migrate_stats.sql
@@ -0,0 +1,109 @@
+DELETE FROM lecture_daily_watch_stats;
+DELETE FROM lecture_watch_stats;
+DELETE FROM unknown_video;
+
+CREATE OR REPLACE PROCEDURE temp_count_lecture_view(lec_id int, watch_timestamp timestamp) AS $$
+    BEGIN
+        INSERT INTO lecture_watch_stats (lecture_id, segment_duration_sec, view_count, total_watched_seconds, average_watch_speed)
+        VALUES (lec_id, 5, 1, 0, 1)
+        ON CONFLICT (lecture_id)
+        DO UPDATE SET view_count = lecture_watch_stats.view_count + 1
+        ;
+
+        INSERT INTO lecture_daily_watch_stats (lecture_id, "date", view_count)
+        VALUES (lec_id, watch_timestamp::date, 1)
+        ON CONFLICT (lecture_id, "date")
+        DO UPDATE SET view_count = lecture_daily_watch_stats.view_count + 1
+        ;
+    END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE PROCEDURE temp_count_video_view(vid_id int, watch_timestamp timestamp) AS $$
+    DECLARE
+        current_lecture_id int;
+    BEGIN
+        SELECT lecture_id FROM videos_data WHERE id = vid_id INTO current_lecture_id;
+        IF current_lecture_id IS NULL THEN
+            INSERT INTO unknown_video (video_id, view_count)
+            VALUES (vid_id, 1)
+            ON CONFLICT (video_id)
+            DO UPDATE SET view_count = unknown_video.view_count + 1
+            ;
+        ELSE
+            CALL temp_count_lecture_view(current_lecture_id, watch_timestamp);
+            IF EXISTS(SELECT 1 FROM publish_medium WHERE id = vid_id) THEN
+                INSERT INTO publish_medium_watch_stats (publish_medium_id, view_count, total_watched_seconds)
+                VALUES (vid_id, 1, 0)
+                ON CONFLICT (publish_medium_id)
+                DO UPDATE SET view_count = publish_medium_watch_stats.view_count + 1
+                ;
+            END IF;
+        END IF;
+    END;
+$$ LANGUAGE plpgsql;
+
+DO $$
+    DECLARE
+        total_processing_count int;
+        processed_count int;
+        video_watcher record;
+        log_entry record;
+        last_entry_timestamp timestamp;
+    BEGIN
+        SELECT COUNT(*) FROM (SELECT 1 FROM hlslog GROUP BY id, lecture) INTO total_processing_count;
+        SELECT 0 INTO processed_count;
+
+        FOR video_watcher IN (SELECT id, lecture FROM hlslog GROUP BY id, lecture) LOOP
+            SELECT NULL INTO last_entry_timestamp;
+
+            IF processed_count % 10000 = 0 THEN
+                RAISE NOTICE '(hlslog) Processed %/% (% %%)', processed_count, total_processing_count, (processed_count::double precision / total_processing_count * 100)::int;
+            END IF;
+            SELECT processed_count + 1 INTO processed_count;
+
+            FOR log_entry IN (SELECT * FROM hlslog WHERE id = video_watcher.id AND lecture = video_watcher.lecture ORDER BY "time" ASC) LOOP
+                IF last_entry_timestamp IS NOT NULL AND log_entry."time" > (last_entry_timestamp + interval '6 hours') THEN
+                    CALL temp_count_lecture_view(video_watcher.lecture, last_entry_timestamp);
+                END IF;
+                SELECT log_entry."time" INTO last_entry_timestamp;
+            END LOOP;
+
+            IF last_entry_timestamp IS NOT NULL THEN
+                CALL temp_count_lecture_view(video_watcher.lecture, last_entry_timestamp);
+            END IF;
+        END LOOP;
+    END;
+$$;
+
+DO $$
+    DECLARE
+        total_processing_count int;
+        processed_count int;
+        video_watcher record;
+        log_entry record;
+        last_entry_timestamp timestamp;
+    BEGIN
+        SELECT COUNT(*) FROM (SELECT 1 FROM log GROUP BY id, video) INTO total_processing_count;
+        SELECT 0 INTO processed_count;
+
+        FOR video_watcher IN (SELECT id, video FROM log GROUP BY id, video) LOOP
+            SELECT NULL INTO last_entry_timestamp;
+
+            IF processed_count % 10000 = 0 THEN
+                RAISE NOTICE '(log) Processed %/% (% %%)', processed_count, total_processing_count, (processed_count::double precision / total_processing_count * 100)::int;
+            END IF;
+            SELECT processed_count + 1 INTO processed_count;
+
+            FOR log_entry IN (SELECT * FROM log WHERE id = video_watcher.id AND video = video_watcher.video ORDER BY "time" ASC) LOOP
+                IF last_entry_timestamp IS NOT NULL AND log_entry."time" > (last_entry_timestamp + interval '6 hours') THEN
+                    CALL temp_count_video_view(video_watcher.video, last_entry_timestamp);
+                END IF;
+                SELECT log_entry."time" INTO last_entry_timestamp;
+            END LOOP;
+
+            IF last_entry_timestamp IS NOT NULL THEN
+                CALL temp_count_video_view(video_watcher.video, last_entry_timestamp);
+            END IF;
+        END LOOP;
+    END;
+$$;
\ No newline at end of file
diff --git a/api/migration.sql b/api/migration.sql
index 8427bd1..aae5953 100644
--- a/api/migration.sql
+++ b/api/migration.sql
@@ -1,12 +1,13 @@
 
 UPDATE data.job SET cause_job_id = NULL, cause_user_id = NULL WHERE id >= 0;
 
-DELETE FROM lecture_daily_watch_stats;
-DELETE FROM lecture_watch_stats;
-DELETE FROM publish_medium_segment_watch_stats;
-DELETE FROM publish_medium_watch_client_stats;
-DELETE FROM publish_medium_watch_log_entry;
-DELETE FROM publish_medium_watcher;
+DELETE FROM data.lecture_daily_watch_stats;
+DELETE FROM data.lecture_watch_stats;
+DELETE FROM data.publish_medium_segment_watch_stats;
+DELETE FROM data.publish_medium_watch_stats;
+DELETE FROM data.publish_medium_watch_client_stats;
+DELETE FROM data.publish_medium_watch_log_entry;
+DELETE FROM data.publish_medium_watcher;
 
 DELETE FROM data.featured WHERE id >= 0;
 DELETE FROM data.announcement WHERE id >= 0;
-- 
GitLab