Select Git revision
profiling.py
Forked from
Video AG Infrastruktur / website
Source project has a limited visibility.
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
medium.py 17.57 KiB
from abc import abstractmethod
from datetime import datetime
from enum import Enum
from typing import Any
import videoag_common
from videoag_common.database import *
from videoag_common.miscellaneous import *
from videoag_common.api_object import *
from videoag_common.media_process import *
from videoag_common.ffmpeg import *
from .course import Lecture
from .job import Job
_API_BASE_URL = videoag_common.config["API_BASE_URL"]
if _API_BASE_URL.endswith("/"): # pragma: no cover
raise ValueError("API_BASE_URL must NOT end with /")
class SorterFileStatus(Enum):
SORTING_UPCOMING = "sorting_upcoming"
NO_SORT_DUE_TO_RECENT_MODIFICATION = "no_sort_due_to_recent_modification"
NO_SORT_DUE_TO_FILE_FORMAT = "no_sort_due_to_file_format"
ERROR = "error"
SORTED = "sorted"
_SORTER_FILE_STATUS_ENUM = create_enum_type(SorterFileStatus)
class SorterFile(DeletableApiObject, Base):
"""
This table stores information about files in the sorter, which need to be assigned to a lecture.
Once assigned to a lecture and process target, they are moved to the permanent media directory and a corresponding
MediumFile is created.
SorterFile instances should **only** be deleted when the file was deleted without being sorted. If it was moved and
a MediumFile was created it must stay visible. Once for SourceFileTargetProducer to know the sha of the latest
source file. And also for a better UX experience (e.g. the sort log)
"""
__api_class__ = ApiObjectClass(
config_allow_creation=False
)
__table_args__ = (
CheckConstraint(
"file_path NOT ILIKE '/%'",
name="check_file_path_does_not_start_with_slash",
comment="file_path must NOT start with /"
),
CheckConstraint(
"NOT tag IS NULL OR lecture_id IS NULL",
name="check_tag_is_set",
comment="tag must be set if this has a lecture id"
),
CheckConstraint(
"NOT sha256 IS NULL OR lecture_id IS NULL",
name="check_sha256_is_set",
comment="sha256 must be set if this has a lecture id"
),
CheckConstraint(
"NOT lecture_id IS NULL OR designated_medium_file_id IS NULL",
name="check_lecture_id_is_set",
comment="lecture_id must be set if this has a designated_medium_file_id"
),
)
file_path: Mapped[str] = api_mapped(
mapped_column(String(collation=STRING_COLLATION), nullable=False, index=True), # Can't be unique because of deleted entries
ApiStringField(
include_in_data=True,
data_notes="Path is relative to website data directory. Does NOT start with a /"
)
)
file_modification_time: Mapped[datetime] = api_mapped(
mapped_column(UTCTimestamp, nullable=False),
ApiDatetimeField(
include_in_data=True
)
)
status: Mapped[SorterFileStatus] = api_mapped(
mapped_column(_SORTER_FILE_STATUS_ENUM, nullable=False),
ApiEnumField(
include_in_data=True
)
)
force_immediate_sort: Mapped[bool] = api_mapped(
mapped_column(nullable=False, default=False),
ApiBooleanField(
include_in_config=True, config_directly_modifiable=True,
include_in_data=True,
data_notes="Set this to true to sort the file immediately (once sorter is running) ignoring a recent "
"modification, file format or any previous error"
)
)
sorter_error_message: Mapped[str] = api_mapped(
mapped_column(Text(collation=STRING_COLLATION), nullable=True),
ApiStringField(
include_in_data=True
)
)
update_time: Mapped[datetime] = api_mapped(
mapped_column(UTCTimestamp, nullable=False),
ApiDatetimeField(
include_in_data=True
)
)
lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=True, index=True)
sha256: Mapped[str] = api_mapped(
mapped_column(String(length=64, collation=STRING_COLLATION), nullable=True),
ApiStringField(
include_in_data=True,
data_notes="Only calculated once this is assigned to a lecture"
)
)
tag: Mapped[str] = api_mapped(
mapped_column(String(collation=STRING_COLLATION), nullable=True),
ApiStringField(
include_in_data=True
)
)
# MediumFile to which this was moved
designated_medium_file_id: Mapped[int] = mapped_column(ForeignKey("medium_file.id"), nullable=True, unique=True)
lecture: Mapped[Lecture] = api_mapped(
relationship(
primaryjoin=lambda: Lecture.id == SorterFile.lecture_id,
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True, data_foreign_in_context=True
)
)
designated_medium_file: Mapped["MediumFile"] = api_mapped(
relationship(
primaryjoin=lambda: MediumFile.id == SorterFile.designated_medium_file_id,
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True, data_foreign_in_context=True
)
)
@hybrid_method
def has_access(self, context: dict[AccessContextKey, Any]):
cond = super().has_access(context)
if not AC_IS_MOD.get(context):
return sql.False_() if isinstance(self, type) else False
return cond
class MediumFile(DeletableApiObject, Base):
__api_class = ApiObjectClass(
config_allow_creation=False
)
__table_args__ = (
CheckConstraint(
"file_path NOT ILIKE '/%'",
name="check_file_path_does_not_start_with_slash",
comment="file_path must NOT start with /"
),
)
file_path: Mapped[str] = api_mapped(
mapped_column(String(collation=STRING_COLLATION), nullable=False),
# Can't be unique because of deleted entries
ApiStringField(
include_in_data=True,
data_notes="Path is relative to website data directory. Does NOT start with a /"
)
)
# Note that these four may NOT be unique. E.g. if one output is generated twice (because another output of the same
# producer was deleted, etc.)
lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False, index=True)
process_sha256: Mapped[str] = api_mapped(
mapped_column(String(length=64, collation=STRING_COLLATION), nullable=False),
ApiStringField(
include_in_data=True,
)
)
process_target_id: Mapped[str] = api_mapped(
mapped_column(String(collation=STRING_COLLATION), nullable=False),
ApiStringField(
include_in_data=True,
)
)
input_data_sha256: Mapped[str] = api_mapped(
mapped_column(String(length=64, collation=STRING_COLLATION), nullable=False),
ApiStringField(
include_in_data=True,
)
)
producer_job_id: Mapped[int] = mapped_column(ForeignKey("job.id"), nullable=True)
to_be_replaced: Mapped[bool] = api_mapped(
mapped_column(nullable=False, default=False),
ApiBooleanField(
include_in_data=True,
)
)
lecture: Mapped[Lecture] = api_mapped(
relationship(
primaryjoin=lambda: Lecture.id == MediumFile.lecture_id,
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True, data_foreign_in_context=True
)
)
producer_job: Mapped[Job] = api_mapped(
relationship(
primaryjoin=lambda: Job.id == MediumFile.producer_job_id,
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True, data_foreign_in_context=True
)
)
medium_metadata: Mapped["MediumMetadata"] = api_mapped(
relationship(
primaryjoin=lambda: MediumMetadata.file_id == MediumFile.id,
back_populates="file",
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True, data_foreign_in_context=True,
data_if=lambda file, args: args.medium_file_include_metadata,
)
)
def get_default_file_path_no_ending(self):
assert self.id is not None and self.process_target_id is not None
return f"{get_permanent_lecture_dir(self.lecture)}/target-{self.id}.{self.process_target_id}"
@hybrid_method
def has_access(self, context: dict[AccessContextKey, Any]):
cond = super().has_access(context)
if not AC_IS_MOD.get(context):
cond &= self.can_access_relationship_remote(MediumFile.medium_metadata, context)
return cond
class MediumMetadataType(Enum):
# Note that this is also the order in which a medium file is probed, and the first to successfully create metadata
# will be the type of that medium file. (See _try_create_metadata_for_file in media_process_scheduler)
PLAIN_VIDEO = "plain_video"
PLAIN_AUDIO = "plain_audio"
THUMBNAIL = "thumbnail"
IMAGE = "image"
_MEDIUM_METADATA_TYPE = create_enum_type(MediumMetadataType)
# Note deletable since the file can already be marked as 'deleted' when it no longer exists
class MediumMetadata(ApiObject, Base):
"""
This table stores information about the **contents** of all media files.
"""
__mapper_args__ = {
"polymorphic_on": "type",
"with_polymorphic": "*" # Always load all attributes for all types
}
# This isn't pretty. With a joined table inheritance it might be a bit nicer, but we would have much more columns
# (since here we can reuse columns for different types)
__table_args__ = (
CheckConstraint(
f"type NOT IN ('plain_video', 'plain_audio') OR duration_sec IS NOT NULL",
name="check_duration_sec_not_null"
),
CheckConstraint(
f"type NOT IN ('plain_audio') OR audio_sample_rate IS NOT NULL",
name="check_audio_sample_rate_not_null"
),
CheckConstraint(
f"type NOT IN ('plain_audio') OR audio_channel_count IS NOT NULL",
name="check_audio_channel_count_not_null"
),
CheckConstraint(
f"type NOT IN ('plain_video', 'thumbnail', 'image') OR vertical_resolution IS NOT NULL",
name="check_vertical_resolution_not_null"
),
CheckConstraint(
f"type NOT IN ('plain_video', 'thumbnail', 'image') OR horizontal_resolution IS NOT NULL",
name="check_horizontal_resolution_not_null"
),
CheckConstraint(
f"type NOT IN ('plain_video') OR video_frame_rate_numerator IS NOT NULL",
name="check_video_frame_rate_numerator_not_null"
),
CheckConstraint(
f"type NOT IN ('plain_video') OR video_frame_rate_denominator IS NOT NULL",
name="check_video_frame_rate_denominator_not_null"
),
)
__api_class__ = ApiObjectClass(
config_allow_creation=False
)
file_id: Mapped[int] = mapped_column(ForeignKey("medium_file.id"), nullable=False, unique=True)
type: Mapped[MediumMetadataType] = api_mapped(
mapped_column(_MEDIUM_METADATA_TYPE, nullable=False),
ApiEnumField(
include_in_data=True
)
)
file_size: Mapped[int] = api_mapped(
mapped_column(sql.BigInteger(), nullable=False),
ApiIntegerField(
include_in_data=True
)
)
file: Mapped["MediumFile"] = relationship(
primaryjoin=lambda: MediumMetadata.file_id == MediumFile.id,
lazy="raise_on_sql"
)
publish_medium: Mapped["PublishMedium"] = api_mapped(
relationship(
primaryjoin=lambda: PublishMedium.medium_metadata_id == MediumMetadata.id,
back_populates="medium_metadata",
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True, data_foreign_in_context=True,
data_if=lambda meta, args: args.medium_metadata_include_publish_medium,
)
)
@hybrid_method
def has_access(self, context: dict[AccessContextKey, Any]):
cond = super().has_access(context)
if not AC_IS_MOD.get(context):
cond &= self.can_access_relationship_remote(MediumMetadata.publish_medium, context)
return cond
@classmethod
@abstractmethod
def try_create_for_file(cls, probe_context: MediumProbeContext) -> "MediumMetadata" or str or None:
pass # pragma: no cover
@abstractmethod
def can_include_in_player(self):
pass # pragma: no cover
@abstractmethod
def can_download(self):
pass # pragma: no cover
class PublishMedium(VisibilityApiObject, DeletableApiObject, Base):
__api_class__ = ApiObjectClass(
parent_relationship_config_ids=["lecture"],
config_allow_creation=False
)
# Yes, this is a bit redundant since medium_metadata.file.lecture_id already has it. However, after struggling with
# this for a long time I decided to add it. Without the lecture_id, checking the access rights, and the publish_media
# relationship becomes a lot more complicated.
# To protect against invalid database states, we add three triggers to make the lecture_id immutable and check it
# on insert
lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False)
title: Mapped[str] = api_mapped(
mapped_column(Text(collation=STRING_COLLATION), nullable=False, default=""),
ApiStringField(
max_length=256,
include_in_config=True, config_directly_modifiable=True,
include_in_data=True
)
)
medium_metadata_id: Mapped["MediumMetadata"] = mapped_column(ForeignKey("medium_metadata.id"), nullable=False)
lecture: Mapped[Lecture] = relationship(
primaryjoin=lambda: PublishMedium.lecture_id == Lecture.id,
back_populates="publish_media",
lazy="raise_on_sql",
viewonly=True,
)
medium_metadata: Mapped["MediumMetadata"] = api_mapped(
relationship(
primaryjoin=lambda: PublishMedium.medium_metadata_id == MediumMetadata.id,
back_populates="publish_medium",
lazy="raise_on_sql"
),
Api2OneRelationshipField(
include_in_data=True,
)
)
__table_args__ = (
sql.Index(
"check_medium_metadata_unique",
medium_metadata_id,
unique=True,
postgresql_where="NOT deleted"
),
)
@api_include_in_data(
data_notes="URL to the medium's file (Maybe with a redirect)"
)
def url(self) -> str:
return f"{_API_BASE_URL}/course/{self.lecture.course.handle}/resources/medium_file/{self.medium_metadata.file.id}"
@api_include_in_data(
data_notes="URL where the medium can be downloaded. If not present, the medium may not be downloaded",
data_if=lambda medium, args: medium.medium_metadata.can_download() and (args.is_mod or medium.lecture.course.allow_download)
)
def download_url(self) -> str:
return f"{_API_BASE_URL}/course/{self.lecture.course.handle}/resources/medium_file/{self.medium_metadata.file.id}?download=true"
@api_include_in_data(
data_notes="If true, this medium might be shown with the player. Otherwise, the player should ignore it because"
" it is only intended for download, etc."
)
def include_in_player(self) -> bool:
return self.medium_metadata.can_include_in_player()
@hybrid_method
def has_access(self, context: dict[AccessContextKey, Any]):
cond = super().has_access(context)
cond &= self.can_access_relationship_remote(PublishMedium.lecture, context)
return cond
def _create_trigger_to_check_publish_medium_lecture_id():
function = sql.DDL(f"""
CREATE OR REPLACE FUNCTION publish_medium_check_valid_lecture_id()
RETURNS TRIGGER AS $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM {MediumMetadata.__table__.name} AS meta
JOIN {MediumFile.__table__.name} AS file ON meta.file_id = file.id
WHERE meta.id = NEW.medium_metadata_id
AND file.lecture_id = NEW.lecture_id
) THEN
RAISE EXCEPTION 'Got lecture id %% for publish medium which is inconsistent with the files lecture id of metadata %%.', NEW.lecture_id, NEW.medium_metadata_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""")
trigger = sql.DDL(f"""
CREATE OR REPLACE TRIGGER trigger_publish_medium_check_valid_lecture_id BEFORE INSERT ON {PublishMedium.__table__.name}
FOR EACH ROW
EXECUTE FUNCTION publish_medium_check_valid_lecture_id()
""")
sql.event.listen(Base.metadata, "after_create", function)
sql.event.listen(Base.metadata, "after_create", trigger)
_create_trigger_to_check_publish_medium_lecture_id()
create_trigger_to_make_column_immutable(MediumFile.lecture_id)
create_trigger_to_make_column_immutable(PublishMedium.lecture_id)
class MediaProcessTemplate(ApiObject, Base):
name: Mapped[str] = api_mapped(
mapped_column(String(collation=STRING_COLLATION), nullable=False),
ApiStringField(
max_length=256,
include_in_config=True, config_directly_modifiable=True,
include_in_data=True
)
)
process: Mapped[dict] = api_mapped(
mapped_column(postgresql.JSONB, nullable=False),
ApiMediaProcessField(
include_in_config=True, config_directly_modifiable=True,
include_in_data=True
)
)
@property
def process_obj(self):
return MediaProcess.from_json(CJsonValue(self.process).as_object())