medium.py

from abc import abstractmethod
from datetime import datetime
from enum import Enum
from typing import Any

import videoag_common
from videoag_common.database import *
from videoag_common.miscellaneous import *
from videoag_common.api_object import *
from videoag_common.media_process import *
from videoag_common.ffmpeg import *

from .course import Lecture
from .job import Job


_API_BASE_URL = videoag_common.config["API_BASE_URL"]
if _API_BASE_URL.endswith("/"):  # pragma: no cover
    raise ValueError("API_BASE_URL must NOT end with /")


class SorterFileStatus(Enum):
    SORTING_UPCOMING = "sorting_upcoming"
    NO_SORT_DUE_TO_RECENT_MODIFICATION = "no_sort_due_to_recent_modification"
    NO_SORT_DUE_TO_FILE_FORMAT = "no_sort_due_to_file_format"
    ERROR = "error"
    SORTED = "sorted"


_SORTER_FILE_STATUS_ENUM = create_enum_type(SorterFileStatus)


class SorterFile(DeletableApiObject, Base):
    """
    This table stores information about files in the sorter, which need to be assigned to a lecture.
    Once assigned to a lecture and process target, they are moved to the permanent media directory and a corresponding
    MediumFile is created.

    SorterFile instances should **only** be deleted when the file was deleted without being sorted. If it was moved and
    a MediumFile was created it must stay visible. Once for SourceFileTargetProducer to know the sha of the latest
    source file. And also for a better UX experience (e.g. the sort log)
    """
    __api_class__ = ApiObjectClass(
        config_allow_creation=False
    )
    __table_args__ = (
        CheckConstraint(
            "file_path NOT ILIKE '/%'",
            name="check_file_path_does_not_start_with_slash",
            comment="file_path must NOT start with /"
        ),
        CheckConstraint(
            "NOT tag IS NULL OR lecture_id IS NULL",
            name="check_tag_is_set",
            comment="tag must be set if this has a lecture id"
        ),
        CheckConstraint(
            "NOT sha256 IS NULL OR lecture_id IS NULL",
            name="check_sha256_is_set",
            comment="sha256 must be set if this has a lecture id"
        ),
        CheckConstraint(
            "NOT lecture_id IS NULL OR designated_medium_file_id IS NULL",
            name="check_lecture_id_is_set",
            comment="lecture_id must be set if this has a designated_medium_file_id"
        ),
    )

    file_path: Mapped[str] = api_mapped(
        mapped_column(String(collation=STRING_COLLATION), nullable=False, index=True),  # Can't be unique because of deleted entries
        ApiStringField(
            include_in_data=True,
            data_notes="Path is relative to website data directory. Does NOT start with a /"
        )
    )
    file_modification_time: Mapped[datetime] = api_mapped(
        mapped_column(UTCTimestamp, nullable=False),
        ApiDatetimeField(
            include_in_data=True
        )
    )
    status: Mapped[SorterFileStatus] = api_mapped(
        mapped_column(_SORTER_FILE_STATUS_ENUM, nullable=False),
        ApiEnumField(
            include_in_data=True
        )
    )
    force_immediate_sort: Mapped[bool] = api_mapped(
        mapped_column(nullable=False, default=False),
        ApiBooleanField(
            include_in_config=True, config_directly_modifiable=True,
            include_in_data=True,
            data_notes="Set this to true to sort the file immediately (once sorter is running) ignoring a recent "
                       "modification, file format or any previous error"
        )
    )
    sorter_error_message: Mapped[str] = api_mapped(
        mapped_column(Text(collation=STRING_COLLATION), nullable=True),
        ApiStringField(
            include_in_data=True
        )
    )
    update_time: Mapped[datetime] = api_mapped(
        mapped_column(UTCTimestamp, nullable=False),
        ApiDatetimeField(
            include_in_data=True
        )
    )
    lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=True, index=True)
    sha256: Mapped[str] = api_mapped(
        mapped_column(String(length=64, collation=STRING_COLLATION), nullable=True),
        ApiStringField(
            include_in_data=True,
            data_notes="Only calculated once this is assigned to a lecture"
        )
    )
    tag: Mapped[str] = api_mapped(
        mapped_column(String(collation=STRING_COLLATION), nullable=True),
        ApiStringField(
            include_in_data=True
        )
    )
    # MediumFile to which this was moved
    designated_medium_file_id: Mapped[int] = mapped_column(ForeignKey("medium_file.id"), nullable=True, unique=True)

    lecture: Mapped[Lecture] = api_mapped(
        relationship(
            primaryjoin=lambda: Lecture.id == SorterFile.lecture_id,
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True, data_foreign_in_context=True
        )
    )
    designated_medium_file: Mapped["MediumFile"] = api_mapped(
        relationship(
            primaryjoin=lambda: MediumFile.id == SorterFile.designated_medium_file_id,
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True, data_foreign_in_context=True
        )
    )

    @hybrid_method
    def has_access(self, context: dict[AccessContextKey, Any]):
        cond = super().has_access(context)
        if not AC_IS_MOD.get(context):
            return sql.False_() if isinstance(self, type) else False
        return cond


class MediumFile(DeletableApiObject, Base):
    __api_class = ApiObjectClass(
        config_allow_creation=False
    )
    __table_args__ = (
        CheckConstraint(
            "file_path NOT ILIKE '/%'",
            name="check_file_path_does_not_start_with_slash",
            comment="file_path must NOT start with /"
        ),
    )

    file_path: Mapped[str] = api_mapped(
        mapped_column(String(collation=STRING_COLLATION), nullable=False),
        # Can't be unique because of deleted entries
        ApiStringField(
            include_in_data=True,
            data_notes="Path is relative to website data directory. Does NOT start with a /"
        )
    )

    # Note that these four may NOT be unique. E.g. if one output is generated twice (because another output of the same
    # producer was deleted, etc.)
    lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False, index=True)
    process_sha256: Mapped[str] = api_mapped(
        mapped_column(String(length=64, collation=STRING_COLLATION), nullable=False),
        ApiStringField(
            include_in_data=True,
        )
    )
    process_target_id: Mapped[str] = api_mapped(
        mapped_column(String(collation=STRING_COLLATION), nullable=False),
        ApiStringField(
            include_in_data=True,
        )
    )
    input_data_sha256: Mapped[str] = api_mapped(
        mapped_column(String(length=64, collation=STRING_COLLATION), nullable=False),
        ApiStringField(
            include_in_data=True,
        )
    )

    producer_job_id: Mapped[int] = mapped_column(ForeignKey("job.id"), nullable=True)
    to_be_replaced: Mapped[bool] = api_mapped(
        mapped_column(nullable=False, default=False),
        ApiBooleanField(
            include_in_data=True,
        )
    )

    lecture: Mapped[Lecture] = api_mapped(
        relationship(
            primaryjoin=lambda: Lecture.id == MediumFile.lecture_id,
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True, data_foreign_in_context=True
        )
    )
    producer_job: Mapped[Job] = api_mapped(
        relationship(
            primaryjoin=lambda: Job.id == MediumFile.producer_job_id,
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True, data_foreign_in_context=True
        )
    )
    medium_metadata: Mapped["MediumMetadata"] = api_mapped(
        relationship(
            primaryjoin=lambda: MediumMetadata.file_id == MediumFile.id,
            back_populates="file",
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True, data_foreign_in_context=True,
            data_if=lambda file, args: args.medium_file_include_metadata,
        )
    )

    def get_default_file_path_no_ending(self):
        assert self.id is not None and self.process_target_id is not None
        return f"{get_permanent_lecture_dir(self.lecture)}/target-{self.id}.{self.process_target_id}"

    @hybrid_method
    def has_access(self, context: dict[AccessContextKey, Any]):
        cond = super().has_access(context)
        if not AC_IS_MOD.get(context):
            cond &= self.can_access_relationship_remote(MediumFile.medium_metadata, context)
        return cond


class MediumMetadataType(Enum):
    # Note that this is also the order in which a medium file is probed, and the first to successfully create metadata
    # will be the type of that medium file. (See _try_create_metadata_for_file in media_process_scheduler)
    PLAIN_VIDEO = "plain_video"
    PLAIN_AUDIO = "plain_audio"
    THUMBNAIL = "thumbnail"
    IMAGE = "image"


_MEDIUM_METADATA_TYPE = create_enum_type(MediumMetadataType)


# Note deletable since the file can already be marked as 'deleted' when it no longer exists
class MediumMetadata(ApiObject, Base):
    """
    This table stores information about the **contents** of all media files.
    """
    __mapper_args__ = {
        "polymorphic_on": "type",
        "with_polymorphic": "*"  # Always load all attributes for all types
    }
    # This isn't pretty. With a joined table inheritance it might be a bit nicer, but we would have much more columns
    # (since here we can reuse columns for different types)
    __table_args__ = (
        CheckConstraint(
            f"type NOT IN ('plain_video', 'plain_audio') OR duration_sec IS NOT NULL",
            name="check_duration_sec_not_null"
        ),
        CheckConstraint(
            f"type NOT IN ('plain_audio') OR audio_sample_rate IS NOT NULL",
            name="check_audio_sample_rate_not_null"
        ),
        CheckConstraint(
            f"type NOT IN ('plain_audio') OR audio_channel_count IS NOT NULL",
            name="check_audio_channel_count_not_null"
        ),
        CheckConstraint(
            f"type NOT IN ('plain_video', 'thumbnail', 'image') OR vertical_resolution IS NOT NULL",
            name="check_vertical_resolution_not_null"
        ),
        CheckConstraint(
            f"type NOT IN ('plain_video', 'thumbnail', 'image') OR horizontal_resolution IS NOT NULL",
            name="check_horizontal_resolution_not_null"
        ),
        CheckConstraint(
            f"type NOT IN ('plain_video') OR video_frame_rate_numerator IS NOT NULL",
            name="check_video_frame_rate_numerator_not_null"
        ),
        CheckConstraint(
            f"type NOT IN ('plain_video') OR video_frame_rate_denominator IS NOT NULL",
            name="check_video_frame_rate_denominator_not_null"
        ),
    )
    __api_class__ = ApiObjectClass(
        config_allow_creation=False
    )

    file_id: Mapped[int] = mapped_column(ForeignKey("medium_file.id"), nullable=False, unique=True)
    type: Mapped[MediumMetadataType] = api_mapped(
        mapped_column(_MEDIUM_METADATA_TYPE, nullable=False),
        ApiEnumField(
            include_in_data=True
        )
    )
    file_size: Mapped[int] = api_mapped(
        mapped_column(sql.BigInteger(), nullable=False),
        ApiIntegerField(
            include_in_data=True
        )
    )

    file: Mapped["MediumFile"] = relationship(
        primaryjoin=lambda: MediumMetadata.file_id == MediumFile.id,
        lazy="raise_on_sql"
    )
    publish_medium: Mapped["PublishMedium"] = api_mapped(
        relationship(
            primaryjoin=lambda: PublishMedium.medium_metadata_id == MediumMetadata.id,
            back_populates="medium_metadata",
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True, data_foreign_in_context=True,
            data_if=lambda meta, args: args.medium_metadata_include_publish_medium,
        )
    )

    @hybrid_method
    def has_access(self, context: dict[AccessContextKey, Any]):
        cond = super().has_access(context)
        if not AC_IS_MOD.get(context):
            cond &= self.can_access_relationship_remote(MediumMetadata.publish_medium, context)
        return cond

    @classmethod
    @abstractmethod
    def try_create_for_file(cls, probe_context: MediumProbeContext) -> "MediumMetadata" or str or None:
        pass  # pragma: no cover

    @abstractmethod
    def can_include_in_player(self):
        pass  # pragma: no cover

    @abstractmethod
    def can_download(self):
        pass  # pragma: no cover


class PublishMedium(VisibilityApiObject, DeletableApiObject, Base):
    __api_class__ = ApiObjectClass(
        parent_relationship_config_ids=["lecture"],
        config_allow_creation=False
    )

    # Yes, this is a bit redundant since medium_metadata.file.lecture_id already has it. However, after struggling with
    # this for a long time I decided to add it. Without the lecture_id, checking the access rights, and the publish_media
    # relationship becomes a lot more complicated.
    # To protect against invalid database states, we add three triggers to make the lecture_id immutable and check it
    # on insert
    lecture_id: Mapped[int] = mapped_column(ForeignKey("lecture.id"), nullable=False)
    title: Mapped[str] = api_mapped(
        mapped_column(Text(collation=STRING_COLLATION), nullable=False, default=""),
        ApiStringField(
            max_length=256,
            include_in_config=True, config_directly_modifiable=True,
            include_in_data=True
        )
    )
    medium_metadata_id: Mapped["MediumMetadata"] = mapped_column(ForeignKey("medium_metadata.id"), nullable=False)

    lecture: Mapped[Lecture] = relationship(
        primaryjoin=lambda: PublishMedium.lecture_id == Lecture.id,
        back_populates="publish_media",
        lazy="raise_on_sql",
        viewonly=True,
    )
    medium_metadata: Mapped["MediumMetadata"] = api_mapped(
        relationship(
            primaryjoin=lambda: PublishMedium.medium_metadata_id == MediumMetadata.id,
            back_populates="publish_medium",
            lazy="raise_on_sql"
        ),
        Api2OneRelationshipField(
            include_in_data=True,
        )
    )

    __table_args__ = (
        sql.Index(
            "check_medium_metadata_unique",
            medium_metadata_id,
            unique=True,
            postgresql_where="NOT deleted"
        ),
    )

    @api_include_in_data(
        data_notes="URL to the medium's file (Maybe with a redirect)"
    )
    def url(self) -> str:
        return f"{_API_BASE_URL}/course/{self.lecture.course.handle}/resources/medium_file/{self.medium_metadata.file.id}"

    @api_include_in_data(
        data_notes="URL where the medium can be downloaded. If not present, the medium may not be downloaded",
        data_if=lambda medium, args: medium.medium_metadata.can_download() and (args.is_mod or medium.lecture.course.allow_download)
    )
    def download_url(self) -> str:
        return f"{_API_BASE_URL}/course/{self.lecture.course.handle}/resources/medium_file/{self.medium_metadata.file.id}?download=true"

    @api_include_in_data(
        data_notes="If true, this medium might be shown with the player. Otherwise, the player should ignore it because"
                   " it is only intended for download, etc."
    )
    def include_in_player(self) -> bool:
        return self.medium_metadata.can_include_in_player()

    @hybrid_method
    def has_access(self, context: dict[AccessContextKey, Any]):
        cond = super().has_access(context)
        cond &= self.can_access_relationship_remote(PublishMedium.lecture, context)
        return cond


def _create_trigger_to_check_publish_medium_lecture_id():
    function = sql.DDL(f"""
CREATE OR REPLACE FUNCTION publish_medium_check_valid_lecture_id()
RETURNS TRIGGER AS $$
BEGIN
    IF NOT EXISTS (
        SELECT 1 FROM {MediumMetadata.__table__.name} AS meta
        JOIN {MediumFile.__table__.name} AS file ON meta.file_id = file.id
        WHERE meta.id = NEW.medium_metadata_id
            AND file.lecture_id = NEW.lecture_id
    ) THEN
        RAISE EXCEPTION 'Got lecture id %% for publish medium which is inconsistent with the files lecture id of metadata %%.', NEW.lecture_id, NEW.medium_metadata_id;
    END IF;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""")

    trigger = sql.DDL(f"""
CREATE OR REPLACE TRIGGER trigger_publish_medium_check_valid_lecture_id BEFORE INSERT ON {PublishMedium.__table__.name}
FOR EACH ROW
EXECUTE FUNCTION publish_medium_check_valid_lecture_id()
""")
    sql.event.listen(Base.metadata, "after_create", function)
    sql.event.listen(Base.metadata, "after_create", trigger)


_create_trigger_to_check_publish_medium_lecture_id()
create_trigger_to_make_column_immutable(MediumFile.lecture_id)
create_trigger_to_make_column_immutable(PublishMedium.lecture_id)


class MediaProcessTemplate(ApiObject, Base):

    name: Mapped[str] = api_mapped(
        mapped_column(String(collation=STRING_COLLATION), nullable=False),
        ApiStringField(
            max_length=256,
            include_in_config=True, config_directly_modifiable=True,
            include_in_data=True
        )
    )
    process: Mapped[dict] = api_mapped(
        mapped_column(postgresql.JSONB, nullable=False),
        ApiMediaProcessField(
            include_in_config=True, config_directly_modifiable=True,
            include_in_data=True
        )
    )

    @property
    def process_obj(self):
        return MediaProcess.from_json(CJsonValue(self.process).as_object())