From 43b3a99e29b34cc768b77e2adb5d6106afd1e01f Mon Sep 17 00:00:00 2001 From: Andreas <andreasv@fsmpi.rwth-aachen.de> Date: Wed, 5 Oct 2016 01:46:18 +0200 Subject: [PATCH] finished sorter, now correctly sorting most of our old date --- sorter.py | 54 +++++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/sorter.py b/sorter.py index 0638dcd..7830402 100644 --- a/sorter.py +++ b/sorter.py @@ -14,7 +14,7 @@ def sort_log(): FROM sortlog JOIN lectures ON lectures.id = sortlog.lecture_id JOIN courses ON courses.id = lectures.course_id - ORDER BY sortlog.`when` + ORDER BY sortlog.`when` DESC ''')) @@ -47,27 +47,25 @@ def sort_now(): filepath = coursepath + '/' + f # filenames: <handle>-<sorter>-<format>.mp4 # sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop) - splitfilename = f.replace('_','-').split('-') + splitfilename = f.replace('_','-').replace(' ','-').split('-') if not os.path.splitext(f)[1] == '.mp4': continue data = {'keywords': []} # parse the file name and save all data in 'data' for s in splitfilename: + s = s.replace('.mp4','') #-<YYMMDD> (date) #-<HHMM> (time) #-<keyword> - # Looking for keywords in: id,title,speaker,comment, comma seperated list in internal starting with "tags:" (in this order). first match counts - if len(s) == 6: - try: + # Looking for keywords in: id,title,speaker,comment, comma seperated list in internal + try: + if len(s) == 6: data['date'] = datetime.strptime(s,'%y%m%d').date() - except ValueError: - pass - elif len(s) == 4: - try: + elif len(s) == 4: data['time'] = datetime.strptime(s,'%H%M').time() - except ValueError: - pass - else: + else: + data['keywords'].append(s) + except ValueError: data['keywords'].append(s) # try to match the file on a single lecture matches = [] @@ -75,16 +73,19 @@ def sort_now(): # first try date and time (if one of them is set) if 'date' in data: for l in lectures: - if (l['time'].date() == data['date']) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) : + if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) : matches.append(l) # if we can't match based on date and time, we have to match keywords if ((len(matches) != 1) and (len(data['keywords']) > 0)): found = False for field in ['id','title','speaker','comment','internal']: #only test lectures with the correct date/time + if len(matches) == 0: + matches.extend(lectures) for l in matches: for k in data['keywords']: - if (k == l[field]) or (k in str(l[field])): + # first test for exact match, else make it asci and try substring test + if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ): found = True matches = [l] if found: @@ -98,20 +99,23 @@ def sort_now(): # now match the format for i in formats: #we match the last part of the file name without the extension - formatstring = splitfilename[-1].split('.',1)[0] - if splitfilename[-1].split('.',1)[0] in i['keywords'].split(','): + formatstring = splitfilename[-1].split('.',1)[0].lower() + if formatstring in i['keywords'].replace(',',' ').split(' '): data['format'] = i['id'] break # if we found the format, insert the video - if 'format' in data: - modify('BEGIN') - video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size) - query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now()) - modify('COMMIT') - else: - print('unknown format',formatstring,[data,f]) - else: - print('failed',[data,f]) + if not 'format' in data: + data['format'] = 0 + modify('BEGIN') + video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size) + query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now()) + modify('COMMIT') +# for debuging only +# else: +# d = [] +# for m in matches: +# d.append(m['id']) +# print('failed',{"data":data,"path":f,"results":d}) if 'ref' in request.values: -- GitLab