From 080cdbc5b7dee6e9f950f8bc4ca93f0e7f51cfe9 Mon Sep 17 00:00:00 2001 From: Andreas <andreasv@fsmpi.rwth-aachen.de> Date: Wed, 5 Oct 2016 04:18:04 +0200 Subject: [PATCH] extended sorter to work with old folder strukture --- config.py.example | 2 +- sorter.py | 191 ++++++++++++++++++++++++---------------------- 2 files changed, 102 insertions(+), 91 deletions(-) diff --git a/config.py.example b/config.py.example index 53d80f9..711c23f 100644 --- a/config.py.example +++ b/config.py.example @@ -1,7 +1,7 @@ # Defaults for development ,do not use in production! DEBUG = False VIDEOPREFIX = 'https://videoag.fsmpi.rwth-aachen.de' -VIDEOMOUNT = 'files/' +VIDEOMOUNT = ['files/protected/','files/pub/','files/vpnonline/'] #SECRET_KEY = 'something random' DB_SCHEMA = 'db_schema.sql' diff --git a/sorter.py b/sorter.py index 5146cb0..fcfeb75 100644 --- a/sorter.py +++ b/sorter.py @@ -19,105 +19,116 @@ def sort_log(): ''')) + + @app.route('/sort/now') -@sched_func(60) @mod_required def sort_now(): + return sort_wraper() + +@sched_func(60) +def sort_auto(): + pass +# return sort_wraper() + +def sort_wraper(): courses = query('SELECT * FROM courses') formats = query('SELECT * FROM formats ORDER BY prio') for c in courses: - existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',c['id']) - lectures = query('SELECT * from lectures where course_id = ?',c['id']) - coursepath = config['VIDEOMOUNT']+c['handle'] - try: - files = os.listdir(coursepath) - except FileNotFoundError: - files = [] - - for f in files: - # if the video is in the table "videos" already, skip it - exists = False - for e in existingvideos: - # vpnonline/08ws-swt/08ws-swt-081118.mp4 - e_filename = e['path'].split('/',2)[1] - if f == e_filename: - exists = True - break - if exists: - continue - - filepath = coursepath + '/' + f - # filenames: <handle>-<sorter>-<format>.mp4 - # sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop) - splitfilename = f.replace('_','-').replace(' ','-').split('-') - if not os.path.splitext(f)[1] == '.mp4': - continue - data = {'keywords': []} - # parse the file name and save all data in 'data' - for s in splitfilename: - s = s.replace('.mp4','') - #-<YYMMDD> (date) - #-<HHMM> (time) - #-<keyword> - # Looking for keywords in: id,title,speaker,comment, comma seperated list in internal + for basepath in config['VIDEOMOUNT']: + existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',c['id']) + lectures = query('SELECT * from lectures where course_id = ?',c['id']) + coursepath = basepath+c['handle'] + try: + files = os.listdir(coursepath) + except FileNotFoundError: + files = [] + for f in files: try: - if len(s) == 6: - data['date'] = datetime.strptime(s,'%y%m%d').date() - elif len(s) == 4: - data['time'] = datetime.strptime(s,'%H%M').time() - else: - data['keywords'].append(s) - except ValueError: - data['keywords'].append(s) - # try to match the file on a single lecture - matches = [] - - # first try date and time (if one of them is set) - if 'date' in data: - for l in lectures: - if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) : - matches.append(l) - # if we can't match based on date and time, we have to match keywords - if ((len(matches) != 1) and (len(data['keywords']) > 0)): - found = False - for field in ['id','title','speaker','comment','internal']: - #only test lectures with the correct date/time - if len(matches) == 0: - matches.extend(lectures) - for l in matches: - for k in data['keywords']: - # first test for exact match, else make it asci and try substring test - if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ): - found = True - matches = [l] + # if the video is in the table "videos" already, skip it + exists = False + for e in existingvideos: + # vpnonline/08ws-swt/08ws-swt-081118.mp4 + e_filename = e['path'].split('/',2)[1] + if f == e_filename: + exists = True + break + if exists: + continue + filepath = coursepath + '/' + f + # filenames: <handle>-<sorter>-<format>.mp4 + # sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop) + splitfilename = f.replace('_','-').replace(' ','-').split('-') + if not os.path.splitext(f)[1] == '.mp4': + continue + data = {'keywords': []} + # parse the file name and save all data in 'data' + for s in splitfilename: + s = s.replace('.mp4','') + #-<YYMMDD> (date) + #-<HHMM> (time) + #-<keyword> + # Looking for keywords in: id,title,speaker,comment, comma seperated list in internal + try: + if len(s) == 6: + data['date'] = datetime.strptime(s,'%y%m%d').date() + elif len(s) == 4: + data['time'] = datetime.strptime(s,'%H%M').time() + else: + data['keywords'].append(s) + except ValueError: + data['keywords'].append(s) + # try to match the file on a single lecture + matches = [] + + # first try date and time (if one of them is set) + if 'date' in data: + for l in lectures: + if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) : + matches.append(l) + # if we can't match based on date and time, we have to match keywords + if ((len(matches) != 1) and (len(data['keywords']) > 0)): + found = False + for field in ['id','title','speaker','comment','internal']: + #only test lectures with the correct date/time + if len(matches) == 0: + matches.extend(lectures) + for l in matches: + for k in data['keywords']: + # first test for exact match, else make it asci and try substring test + if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ): + found = True + matches = [l] + if found: + break + if found: + break if found: break - if found: - break - if found: - break - # now we should have found exactly one match - if len(matches) == 1: - # now match the format - for i in formats: - #we match the last part of the file name without the extension - formatstring = splitfilename[-1].split('.',1)[0].lower() - if formatstring in i['keywords'].replace(',',' ').split(' '): - data['format'] = i['id'] - break - # if we found the format, insert the video - if not 'format' in data: - data['format'] = 0 - modify('BEGIN') - video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size) - query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now()) - modify('COMMIT') -# for debuging only -# else: -# d = [] -# for m in matches: -# d.append(m['id']) -# print('failed',{"data":data,"path":f,"results":d}) + # now we should have found exactly one match + if len(matches) == 1: + # now match the format + for i in formats: + #we match the last part of the file name without the extension + formatstring = splitfilename[-1].split('.',1)[0].lower() + if formatstring in i['keywords'].replace(',',' ').split(' '): + data['format'] = i['id'] + break + # if we found the format, insert the video + if not 'format' in data: + data['format'] = 0 + modify('BEGIN') + video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),-1,os.stat(coursepath+'/'+f).st_size) + query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now()) + modify('COMMIT') + # for debuging only + # else: + # d = [] + # for m in matches: + # d.append(m['id']) + # print('failed',{"data":data,"path":f,"results":d}) + except Exception: + pass if 'ref' in request.values: -- GitLab