from server import * import traceback import os.path @app.route('/internal/sort/log') @register_navbar('Sortierlog', icon='sort-by-attributes-alt', group='weitere') @mod_required def sort_log(): return render_template('sortlog.html',sortlog=query(''' SELECT sortlog.*, lectures.id as lecture_id, lectures.title as lecture_title, lectures.course_id as course_id, courses.title as course_title FROM sortlog JOIN lectures ON lectures.id = sortlog.lecture_id JOIN courses ON courses.id = lectures.course_id ORDER BY sortlog.`when` DESC LIMIT 50 '''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC')) def to_ascii(inputstring): asciistring = inputstring for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]: asciistring = asciistring.replace(charset[0],charset[1]) return asciistring @job_handler('probe', 'remux', 'transcode') def update_video_metadata(jobid, jobtype, data, state, status): if 'video_id' not in data: return if jobtype not in ['remux', 'transcode']: video = query('SELECT * FROM videos WHERE id = ?', data['video_id'])[0] if video['hash'] and video['hash'] != status['hash']: raise Exception('Hash mismatch for video {}'.format(data['video_id'])) modify('UPDATE videos_data SET hash = ?, file_size = ?, duration = ? WHERE id = ?', status['hash'], status['filesize'], status['duration'], data['video_id']) def schedule_thumbnail(lectureid): videos = query(''' SELECT videos.path FROM videos JOIN formats ON (videos.video_format = formats.id) WHERE videos.lecture_id = ? ORDER BY formats.prio DESC''', lectureid) return schedule_job('thumbnail', {'src': videos[0]['path'], 'filename': 'l_%i.jpg'%lectureid}) @app.route('/internal/jobs/add/thumbnail', methods=['GET', 'POST']) @mod_required @csrf_protect @handle_errors('jobs_overview', 'Zu dieser Veranstaltung existieren keine Videos!', 404, IndexError) def add_thumbnail_job(): schedule_thumbnail(int(request.values['lectureid'])) return redirect(request.values.get('ref', url_for('jobs_overview'))) def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1, sourceid=None): visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible'] video_id = modify('''INSERT INTO videos_data (lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration, source) VALUES (?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?, ?)''', lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration, sourceid) if not sourceid: query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now()) schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True}) schedule_thumbnail(lectureid) video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0] lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0] course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0] notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video) return video_id def split_filename(filename): # '_' and ' ' are handled like '-' return filename.replace('_','-').replace(' ','-').split('-') def parse_filename(splitFileName): # filenames: --.mp4 data = {'keywords': []} for fileNameChunk in splitFileName: fileNameChunk = fileNameChunk.replace('.mp4','') #- (date) #- (time) #- # Looking for keywords in: title,speaker,comment, comma seperated list in internal try: if len(fileNameChunk) == 6: data['date'] = datetime.strptime(fileNameChunk,'%y%m%d').date() elif len(fileNameChunk) == 4: data['time'] = datetime.strptime(fileNameChunk,'%H%M').time() else: data['keywords'].append(fileNameChunk) except ValueError: # if its not valid date or time, handle it as keyword data['keywords'].append(fileNameChunk) return data def filter_lectures_by_datetime(lectures, date, time): matches = [] if date or time: for lecture in lectures: if (not 'time' in lecture) or (not lecture['time']): continue if date and (lecture['time'].date() != date): continue if time and (lecture['time'].time() != time): continue matches.append(lecture) return matches def filter_lectures_by_keywords(lectures, keywords): for field in ['title','speaker','comment','internal']: for lecture in lectures: for keyword in keywords: # first test for exact match, else make it asci and try substring test if (field in lecture) and ( (keyword == lecture[field]) or (to_ascii(str(keyword).lower()) in str(to_ascii(lecture[field]).lower())) ): return [lecture] return [] def extract_format_keyword_from_filename(splitFileName): return splitFileName[-1].split('.',1)[0].lower() def filter_formats_by_filename(splitFileName): formatstring = extract_format_keyword_from_filename(splitFileName) formats = query('SELECT * FROM formats ORDER BY prio DESC') for videoformat in formats: # we match the last part of the file name without the extension if formatstring in videoformat['keywords'].replace(',',' ').split(' '): return videoformat['id'] # default format is "unknown", with id 0 return 0 def sort_file(filename, course=None, lectures=None): splitFileName = split_filename(filename) if not course: handle = splitFileName[0] if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'): handle = '-'.join(splitFileName[:2]) courses = query('SELECT * FROM courses WHERE handle = ?', handle) if not courses: return [], 0 course = courses[0] if not lectures: lectures = query('SELECT * from lectures where course_id = ?', course['id']) # parse all data from the file name data = parse_filename(splitFileName) # try to match the file on a single lecture matches = filter_lectures_by_datetime(lectures, data.get('date'), data.get('time')) # if we can't match exactly based on date and time, we have to match keywords if ((len(matches) != 1) and (len(data['keywords']) > 0)): if not matches: # only test lectures with the correct date/time, if we have any matches = filter_lectures_by_keywords(matches, data['keywords']) else: # Else test for matches in all lectures of this course matches = filter_lectures_by_keywords(lectures, data['keywords']) # now we should have found exactly one match fmt = filter_formats_by_filename(splitFileName) return matches, fmt def log_sort_error(course_id, path, matches): matches_id = [] for match in matches: matches_id.append(str(match['id'])) query('INSERT INTO sorterrorlog_data (course_id, path, matches, `when`, time_updated, time_created) VALUES (?, ?, ?, ?, ?, ?)', course_id, path, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now()) def sort_api_token_required(func): @wraps(func) def decorator(*args, **kwargs): if 'apikey' in request.values: token = request.values['apikey'] elif request.get_json() and ('apikey' in request.get_json()): token = request.get_json()['apikey'] else: token = None if not token == config.get('SORTER_API_KEY', [None]): return 'Permission denied', 403 else: return func(*args, **kwargs) return decorator @app.route('/internal/sort/encoded/') @sort_api_token_required def sort_encoded(filename): matches, fmt = sort_file(filename) if len(matches) != 1: log_sort_error(-1, 'kodiert/'+filename, matches) return "Could not match filename", 400 lecture = matches[0] course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0] if course['autopublish']: schedule_job('publish_video', {'source': filename, 'path': 'pub/'+course['handle']+'/'+filename, 'lecture_id': lecture['id'], 'format_id': fmt}) return 'OK', 200 @app.route('/internal/sort/autoencode') @sort_api_token_required def sort_autoencode(): filename = request.values['path'] sort_autoencode_internal(filename) return 'OK', 200 def sort_autoencode_internal(filename): path = 'autoencode/'+filename matches, fmt = sort_file(filename) if len(matches) != 1: log_sort_error(-1, 'raw/'+path, matches) return "Could not match filename", 400 lecture = matches[0] schedule_job('probe-raw', {'path': path, 'lecture_id': lecture['id'], 'import-chapters': True}) @job_handler('publish_video') def handle_published_video(jobid, jobtype, data, state, status): if 'lecture_id' not in data or 'format_id' not in data: return insert_video(data['lecture_id'], data['path'], data['format_id'], hash=status['hash'], filesize=status['filesize'], duration=status['duration']) @app.route('/internal/sort/now') @mod_required @sched_func(600) def sort_now(): courses = query('SELECT * FROM courses') for course in courses: modify('BEGIN') for mountpoint in config['VIDEOMOUNT']: existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id']) knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id']) ignorefiles = [] for path in existingvideos + knownerrors: ignorefiles.append(os.path.basename(path['path'])) lectures = query('SELECT * from lectures where course_id = ?',course['id']) coursepath = mountpoint['mountpoint']+course['handle'] try: files = os.listdir(coursepath) except FileNotFoundError: files = [] for filename in files: try: # if the video is in the table "videos" already (with the correct course), skip it if os.path.basename(filename) in ignorefiles: continue if not os.path.splitext(filename)[1] == '.mp4': continue matches, fmt = sort_file(filename, course=course, lectures=lectures) dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename if len(matches) == 1: insert_video(matches[0]['id'], dbfilepath, fmt) else: log_sort_error(course['id'], dbfilepath, matches) except Exception: traceback.print_exc() modify('COMMIT') if 'ref' in request.values: return redirect(request.values['ref']) else: return 'OK', 200