Select Git revision
requirements.txt
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
This project manages its dependencies using pip.
Learn more
sorter.py 10.23 KiB
from server import *
import traceback
import os.path
@app.route('/internal/sort/log')
@register_navbar('Sortierlog', icon='sort-by-attributes-alt', group='weitere')
@mod_required
def sort_log():
return render_template('sortlog.html',sortlog=query('''
SELECT
sortlog.*,
lectures.id as lecture_id,
lectures.title as lecture_title,
lectures.course_id as course_id,
courses.title as course_title
FROM sortlog
JOIN lectures ON lectures.id = sortlog.lecture_id
JOIN courses ON courses.id = lectures.course_id
ORDER BY sortlog.`when` DESC
LIMIT 50
'''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC'))
def to_ascii(inputstring):
asciistring = inputstring
for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]:
asciistring = asciistring.replace(charset[0],charset[1])
return asciistring
@job_handler('probe', 'remux', 'transcode')
def update_video_metadata(jobid, jobtype, data, state, status):
if 'video_id' not in data:
return
if jobtype not in ['remux', 'transcode']:
video = query('SELECT * FROM videos WHERE id = ?', data['video_id'])[0]
if video['hash'] and video['hash'] != status['hash']:
raise Exception('Hash mismatch for video {}'.format(data['video_id']))
modify('UPDATE videos_data SET hash = ?, file_size = ?, duration = ? WHERE id = ?',
status['hash'], status['filesize'], status['duration'], data['video_id'])
def schedule_thumbnail(lectureid):
videos = query('''
SELECT videos.path
FROM videos
JOIN formats ON (videos.video_format = formats.id)
WHERE videos.lecture_id = ?
ORDER BY formats.prio DESC''', lectureid)
return schedule_job('thumbnail', {'src': videos[0]['path'], 'filename': 'l_%i.jpg'%lectureid})
@app.route('/internal/jobs/add/thumbnail', methods=['GET', 'POST'])
@mod_required
@csrf_protect
@handle_errors('jobs_overview', 'Zu dieser Veranstaltung existieren keine Videos!', 404, IndexError)
def add_thumbnail_job():
schedule_thumbnail(int(request.values['lectureid']))
return redirect(request.values.get('ref', url_for('jobs_overview')))
def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1, sourceid=None):
visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible']
video_id = modify('''INSERT INTO videos_data
(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration, source)
VALUES
(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?, ?)''',
lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration, sourceid)
if not sourceid:
query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
schedule_thumbnail(lectureid)
video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0]
course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
return video_id
def split_filename(filename):
# '_' and ' ' are handled like '-'
return filename.replace('_','-').replace(' ','-').split('-')
def parse_filename(splitFileName):
# filenames: <handle>-<sorter>-<format>.mp4
data = {'keywords': []}
for fileNameChunk in splitFileName:
fileNameChunk = fileNameChunk.replace('.mp4','')
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try:
if len(fileNameChunk) == 6:
data['date'] = datetime.strptime(fileNameChunk,'%y%m%d').date()
elif len(fileNameChunk) == 4:
data['time'] = datetime.strptime(fileNameChunk,'%H%M').time()
else:
data['keywords'].append(fileNameChunk)
except ValueError:
# if its not valid date or time, handle it as keyword
data['keywords'].append(fileNameChunk)
return data
def filter_lectures_by_datetime(lectures, date, time):
matches = []
if date or time:
for lecture in lectures:
if (not 'time' in lecture) or (not lecture['time']):
continue
if date and (lecture['time'].date() != date):
continue
if time and (lecture['time'].time() != time):
continue
matches.append(lecture)
return matches
def filter_lectures_by_keywords(lectures, keywords):
for field in ['title','speaker','comment','internal']:
for lecture in lectures:
for keyword in keywords:
# first test for exact match, else make it asci and try substring test
if (field in lecture) and (
(keyword == lecture[field]) or
(to_ascii(str(keyword).lower()) in str(to_ascii(lecture[field]).lower()))
):
return [lecture]
return []
def extract_format_keyword_from_filename(splitFileName):
return splitFileName[-1].split('.',1)[0].lower()
def filter_formats_by_filename(splitFileName):
formatstring = extract_format_keyword_from_filename(splitFileName)
formats = query('SELECT * FROM formats ORDER BY prio DESC')
for videoformat in formats:
# we match the last part of the file name without the extension
if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
return videoformat['id']
# default format is "unknown", with id 0
return 0
def sort_file(filename, course=None, lectures=None):
splitFileName = split_filename(filename)
if not course:
handle = splitFileName[0]
if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'):
handle = '-'.join(splitFileName[:2])
courses = query('SELECT * FROM courses WHERE handle = ?', handle)
if not courses:
return [], 0
course = courses[0]
if not lectures:
lectures = query('SELECT * from lectures where course_id = ?', course['id'])
# parse all data from the file name
data = parse_filename(splitFileName)
# try to match the file on a single lecture
matches = filter_lectures_by_datetime(lectures, data.get('date'), data.get('time'))
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
if not len(matches) == 0:
# only test lectures with the correct date/time, if we have any
matches = filter_lectures_by_keywords(matches, data['keywords'])
else:
# Else test for matches in all lectures of this course
matches = filter_lectures_by_keywords(lectures, data['keywords'])
# now we should have found exactly one match
fmt = filter_formats_by_filename(splitFileName)
return matches, fmt
def log_sort_error(course_id, path, matches):
matches_id = []
for match in matches:
matches_id.append(str(match['id']))
query('INSERT INTO sorterrorlog_data (course_id, path, matches, `when`, time_updated, time_created) VALUES (?, ?, ?, ?, ?, ?)',
course_id, path, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now())
def sort_api_token_required(func):
@wraps(func)
def decorator(*args, **kwargs):
if 'apikey' in request.values:
token = request.values['apikey']
elif request.get_json() and ('apikey' in request.get_json()):
token = request.get_json()['apikey']
else:
token = None
if not token == config.get('SORTER_API_KEY', [None]):
return 'Permission denied', 403
else:
return func(*args, **kwargs)
return decorator
@app.route('/internal/sort/encoded/<filename>')
@sort_api_token_required
def sort_encoded(filename):
matches, fmt = sort_file(filename)
if len(matches) != 1:
log_sort_error(-1, 'kodiert/'+filename, matches)
return "Could not match filename", 400
lecture = matches[0]
course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
if course['autopublish']:
schedule_job('publish_video', {'source': filename, 'path': 'pub/'+course['handle']+'/'+filename, 'lecture_id': lecture['id'], 'format_id': fmt})
return 'OK', 200
@app.route('/internal/sort/autoencode')
@sort_api_token_required
def sort_autoencode():
filename = request.values['path']
path = 'autoencode/'+filename
matches, fmt = sort_file(filename)
if len(matches) != 1:
log_sort_error(-1, 'raw/'+path, matches)
return "Could not match filename", 400
lecture = matches[0]
schedule_job('probe-raw', {'path': path, 'lecture_id': lecture['id'], 'import-chapters': True})
return 'OK', 200
@job_handler('publish_video')
def handle_published_video(jobid, jobtype, data, state, status):
if 'lecture_id' not in data or 'format_id' not in data:
return
insert_video(data['lecture_id'], data['path'], data['format_id'], hash=status['hash'], filesize=status['filesize'], duration=status['duration'])
@app.route('/internal/sort/now')
@mod_required
@sched_func(600)
def sort_now():
courses = query('SELECT * FROM courses')
for course in courses:
modify('BEGIN')
for mountpoint in config['VIDEOMOUNT']:
existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
ignorefiles = []
for path in existingvideos + knownerrors:
ignorefiles.append(os.path.basename(path['path']))
lectures = query('SELECT * from lectures where course_id = ?',course['id'])
coursepath = mountpoint['mountpoint']+course['handle']
try:
files = os.listdir(coursepath)
except FileNotFoundError:
files = []
for filename in files:
try:
# if the video is in the table "videos" already (with the correct course), skip it
if os.path.basename(filename) in ignorefiles:
continue
if not os.path.splitext(filename)[1] == '.mp4':
continue
matches, fmt = sort_file(filename, course=course, lectures=lectures)
dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
if len(matches) == 1:
insert_video(matches[0]['id'], dbfilepath, fmt)
else:
log_sort_error(course['id'], dbfilepath, matches)
except Exception:
traceback.print_exc()
modify('COMMIT')
if 'ref' in request.values:
return redirect(request.values['ref'])
else:
return 'OK', 200