diff --git a/api/api_specification.json b/api/api_specification.json index 5790d4b0d46d09990889a6f4cb3d9b49af25a2f3..90509bf4fd3f41f9463dd740dacbb391b100ab08 100644 --- a/api/api_specification.json +++ b/api/api_specification.json @@ -3740,6 +3740,15 @@ "medium_metadata": { "fields": { "": { + "file_format": { + "config_directly_modifiable": false, + "id": "file_format", + "notes": "", + "object_variant": null, + "only_mod": false, + "optional": false, + "type": "string" + }, "file_size": { "config_directly_modifiable": false, "id": "file_size", diff --git a/api/api_specification.md b/api/api_specification.md index 28e8aeb5d1d82b95ecbb4530e32f834644fb0877..eaa2b9fd0243009ba14f1b6271024209b175f215 100644 --- a/api/api_specification.md +++ b/api/api_specification.md @@ -1,4 +1,4 @@ -# Specification of the Web API for the Video-AG Website (v0.82). +# Specification of the Web API for the Video-AG Website (v0.84). ## Introduction @@ -2710,7 +2710,12 @@ Additionally, the following objects may appear as the type of some field: </thead> <tbody> <tr> - <td rowspan="4">any</td> + <td rowspan="5">any</td> + <td>file_format</td> + <td>string</td> + <td></td> + </tr> + <tr> <td>file_size</td> <td>int</td> <td></td> @@ -3213,6 +3218,11 @@ Possible `error_code`: ## Changelog +### v0.84 + +* Updated `medium_metadata` + * Added field `file_format` + ### v0.83 * Updated `GET /lecture/{lecture_id}/media_process_overview` diff --git a/api/api_specification_template.md b/api/api_specification_template.md index 36b13efed48bba55f952d2c0f709a09929505920..21874e01f3d561f1eaaba6f80e2535539d1d8d1d 100644 --- a/api/api_specification_template.md +++ b/api/api_specification_template.md @@ -1,4 +1,4 @@ -# Specification of the Web API for the Video-AG Website (v0.82). +# Specification of the Web API for the Video-AG Website (v0.84). ## Introduction @@ -139,6 +139,11 @@ Possible `error_code`: ## Changelog +### v0.84 + +* Updated `medium_metadata` + * Added field `file_format` + ### v0.83 * Updated `GET /lecture/{lecture_id}/media_process_overview` diff --git a/api/config/db_test_data.sql b/api/config/db_test_data.sql index c3c942cd766565fd786a0546f3b5cb5d4c3f468d..1407fb2a989f7a472365e4e6f753b795ed6e3f7c 100644 --- a/api/config/db_test_data.sql +++ b/api/config/db_test_data.sql @@ -90,17 +90,17 @@ SELECT setval('sorter_file_id_seq', 1000); -- TODO figure out correct/realistic file sizes -INSERT INTO medium_metadata (id,file_id,"type",file_size,duration_sec,vertical_resolution,horizontal_resolution,video_frame_rate_numerator,video_frame_rate_denominator,audio_sample_rate,audio_channel_count) VALUES - (6,1,'thumbnail'::medium_metadata_type,42,NULL,640,640,NULL,NULL,NULL,NULL), - (8,2,'thumbnail'::medium_metadata_type,42,NULL,640,640,NULL,NULL,NULL,NULL), - (17,3,'thumbnail'::medium_metadata_type,42,NULL,640,640,NULL,NULL,NULL,NULL), - (20,4,'thumbnail'::medium_metadata_type,42,NULL,640,640,NULL,NULL,NULL,NULL), - (15,5,'plain_video'::medium_metadata_type,42,5431,1080,1920,25,1,44000,2), - (5,6,'plain_video'::medium_metadata_type,42,5243,720,1280,25,1,44000,2), - (7,7,'plain_video'::medium_metadata_type,42,5420,720,1280,25,1,44000,2), - (19,8,'plain_video'::medium_metadata_type,42,5001,720,1280,25,1,44000,2), - (16,9,'plain_video'::medium_metadata_type,42,5431,720,1280,25,1,44000,2), - (25,10,'plain_video'::medium_metadata_type,42,5431,480,854,25,1,44000,2); +INSERT INTO medium_metadata (id,file_id,"type",file_size,file_format,duration_sec,vertical_resolution,horizontal_resolution,video_frame_rate_numerator,video_frame_rate_denominator,audio_sample_rate,audio_channel_count) VALUES + (6,1,'thumbnail'::medium_metadata_type,42,'jpg',NULL,640,640,NULL,NULL,NULL,NULL), + (8,2,'thumbnail'::medium_metadata_type,42,'jpg',NULL,640,640,NULL,NULL,NULL,NULL), + (17,3,'thumbnail'::medium_metadata_type,42,'jpg',NULL,640,640,NULL,NULL,NULL,NULL), + (20,4,'thumbnail'::medium_metadata_type,42,'jpg',NULL,640,640,NULL,NULL,NULL,NULL), + (15,5,'plain_video'::medium_metadata_type,42,'mp4',5431,1080,1920,25,1,44000,2), + (5,6,'plain_video'::medium_metadata_type,42,'mp4',5243,720,1280,25,1,44000,2), + (7,7,'plain_video'::medium_metadata_type,42,'mp4',5420,720,1280,25,1,44000,2), + (19,8,'plain_video'::medium_metadata_type,42,'mp4',5001,720,1280,25,1,44000,2), + (16,9,'plain_video'::medium_metadata_type,42,'mp4',5431,720,1280,25,1,44000,2), + (25,10,'plain_video'::medium_metadata_type,42,'mp4',5431,480,854,25,1,44000,2); SELECT setval('medium_metadata_id_seq', 1000); diff --git a/api/migration.sql b/api/migration.sql index 8eacea3e2db626e1824e2f989cf275ace492e089..8427bd18163fb2b115b0ecfb8723a54b6a89cdbe 100644 --- a/api/migration.sql +++ b/api/migration.sql @@ -245,6 +245,30 @@ SELECT id, deleted, visible, lecture_id, "time", text FROM old_data.chapters ; +CREATE OR REPLACE FUNCTION temp_get_file_extension(path text) RETURNS text AS $$ + DECLARE + str text; + BEGIN + SELECT path INTO str; + + -- delete path in front + SELECT regexp_replace(str, '.*/', '', 'g') INTO str; + + IF (str NOT LIKE '%.%') THEN + RAISE EXCEPTION 'File has no extension: %', path; + END IF; + + -- delete part before extension + SELECT regexp_replace(str, '.*\.', '', 'g') INTO str; + + IF (str = '') THEN + RAISE EXCEPTION 'File has no extension: %', path; + END IF; + + RETURN str; + END; +$$ LANGUAGE plpgsql; + DO $$ DECLARE lecture record; @@ -343,6 +367,7 @@ DO $$ file_id, type, file_size, + file_format, duration_sec, audio_sample_rate, audio_channel_count, @@ -354,6 +379,7 @@ DO $$ medium_file_id, 'plain_video', video.file_size, + temp_get_file_extension(video.path), video.duration, 44000, 2, @@ -444,6 +470,7 @@ DO $$ file_id, type, file_size, + file_format, duration_sec, audio_sample_rate, audio_channel_count, @@ -457,6 +484,7 @@ DO $$ -- Some of these values are all probably wrong, but we can't get them here. Can be updated later by -- a script inspecting the actual files 0, + 'mp4', video.duration, 44000, 2, @@ -536,12 +564,14 @@ DO $$ file_id, type, file_size, + file_format, vertical_resolution, horizontal_resolution ) VALUES ( thumbnail_medium_file_id, 'thumbnail', 0, + 'jpg', 640, 640 ) RETURNING id INTO thumbnail_medium_metadata_id; diff --git a/common_py/src/videoag_common/media_process/basic_targets.py b/common_py/src/videoag_common/media_process/basic_targets.py index 55252a816c6906e54b972f0f110013cbfc98034b..fe103a57dd9739ab4e9e840efcdffce8156c3fbb 100644 --- a/common_py/src/videoag_common/media_process/basic_targets.py +++ b/common_py/src/videoag_common/media_process/basic_targets.py @@ -1,9 +1,11 @@ import re import math +from pathlib import Path from typing import TYPE_CHECKING from videoag_common.database import * from videoag_common.miscellaneous import * +from ..ffmpeg import get_file_extension if TYPE_CHECKING: # Can't actually import due to circular dependency @@ -60,6 +62,10 @@ class SourceFileTargetProducer(SingleOutputTargetProducer["SourceMedium"]): assert isinstance(intermediate, SorterFile) sorter_file: SorterFile = intermediate + + if get_file_extension(Path(sorter_file.file_path)) is None: + raise MediaProcessException(f"Sorter File {sorter_file.file_path} (#{sorter_file.id}) does not have a file" + f" extension") medium_file = output_files_by_id[self.output_id] medium_file.file_path = sorter_file.file_path @@ -92,7 +98,7 @@ class RescaleVideoTargetProducer(SingleInputTargetProducer, SingleOutputTargetPr raise MediaProcessException(f"Input {self.input_id} is not of type {MediumMetadataType.PLAIN_VIDEO}") output_file = output_files_by_id[self.output_id] - output_file.file_path += ".mp4" + output_file.file_path += f".{input_medium.file_format}" return "rescale_video", { "input_file": input_medium.file.file_path, diff --git a/common_py/src/videoag_common/objects/medium.py b/common_py/src/videoag_common/objects/medium.py index c8de5dcabd76371d0ba67c9fd9d512dea024c8ff..9fbabc0b36facddb17d73c9556770d135137b46d 100644 --- a/common_py/src/videoag_common/objects/medium.py +++ b/common_py/src/videoag_common/objects/medium.py @@ -313,6 +313,12 @@ class MediumMetadata(ApiObject, Base): include_in_data=True ) ) + file_format: Mapped[str] = api_mapped( + mapped_column(Text(collation=STRING_COLLATION), nullable=False), + ApiStringField( + include_in_data=True + ) + ) file: Mapped["MediumFile"] = relationship( primaryjoin=lambda: MediumMetadata.file_id == MediumFile.id, diff --git a/common_py/src/videoag_common/test/object_data.py b/common_py/src/videoag_common/test/object_data.py index 1d768076ba67cea46620fa12dc9ffe7537f95db8..75ac52015e7f67961c3ff72d19cafb10d79b81bd 100644 --- a/common_py/src/videoag_common/test/object_data.py +++ b/common_py/src/videoag_common/test/object_data.py @@ -183,6 +183,7 @@ TEST_DATA_MEDIUM_METADATA_5 = \ "id": 5, "type": "plain_video", "file_size": 42, + "file_format": "mp4", "vertical_resolution": 720, "horizontal_resolution": 1280, "video_frame_rate_numerator": 25, @@ -196,6 +197,7 @@ TEST_DATA_MEDIUM_METADATA_6 = \ "id": 6, "type": "thumbnail", "file_size": 42, + "file_format": "jpg", "vertical_resolution": 640, "horizontal_resolution": 640, } @@ -206,6 +208,7 @@ TEST_DATA_MEDIUM_METADATA_7 = \ "id": 7, "type": "plain_video", "file_size": 42, + "file_format": "mp4", "vertical_resolution": 720, "horizontal_resolution": 1280, "video_frame_rate_numerator": 25, @@ -219,6 +222,7 @@ TEST_DATA_MEDIUM_METADATA_8 = \ "id": 8, "type": "thumbnail", "file_size": 42, + "file_format": "jpg", "vertical_resolution": 640, "horizontal_resolution": 640, } @@ -229,6 +233,7 @@ TEST_DATA_MEDIUM_METADATA_15 = \ "id": 15, "type": "plain_video", "file_size": 42, + "file_format": "mp4", "vertical_resolution": 1080, "horizontal_resolution": 1920, "video_frame_rate_numerator": 25, @@ -242,6 +247,7 @@ TEST_DATA_MEDIUM_METADATA_16 = \ "id": 16, "type": "plain_video", "file_size": 42, + "file_format": "mp4", "vertical_resolution": 720, "horizontal_resolution": 1280, "video_frame_rate_numerator": 25, @@ -255,6 +261,7 @@ TEST_DATA_MEDIUM_METADATA_25 = \ "id": 25, "type": "plain_video", "file_size": 42, + "file_format": "mp4", "vertical_resolution": 480, "horizontal_resolution": 854, "video_frame_rate_numerator": 25, @@ -268,6 +275,7 @@ TEST_DATA_MEDIUM_METADATA_17 = \ "id": 17, "type": "thumbnail", "file_size": 42, + "file_format": "jpg", "vertical_resolution": 640, "horizontal_resolution": 640, } @@ -278,6 +286,7 @@ TEST_DATA_MEDIUM_METADATA_19 = \ "id": 19, "type": "plain_video", "file_size": 42, + "file_format": "mp4", "vertical_resolution": 720, "horizontal_resolution": 1280, "video_frame_rate_numerator": 25, @@ -291,6 +300,7 @@ TEST_DATA_MEDIUM_METADATA_20 = \ "id": 20, "type": "thumbnail", "file_size": 42, + "file_format": "jpg", "vertical_resolution": 640, "horizontal_resolution": 640, } diff --git a/job_controller/jobs/media_process_scheduler/job.py b/job_controller/jobs/media_process_scheduler/job.py index a7fa1f8c8e992e7ed6e1cfc7a49793d74a7116d7..012fc4615e44e8d50ad3d24e8ef380761f833681 100644 --- a/job_controller/jobs/media_process_scheduler/job.py +++ b/job_controller/jobs/media_process_scheduler/job.py @@ -218,6 +218,13 @@ class ProcessScheduler: file.medium_metadata = res assert isinstance(file.medium_metadata, MediumMetadata) file.medium_metadata.file_size = probe_context.file_size() + if file.medium_metadata.file_format is None: + extension = get_file_extension(Path(file.file_path)) + if extension is None: + raise ValueError(f"Got no file format from try_create_for_file for {medium_type} and file" + f" {file.file_path} does not have an extension. Target Producers need to make sure" + f" that their medium files have an extension.") + file.medium_metadata.file_format = extension self._session.flush() # Ensure metadata id present logger.info(f"Assigned metadata {file.medium_metadata.id} of type {medium_type} to medium file {file.file_path}" f" ({file.process_target_id}, {file.id})")