Skip to content
Snippets Groups Projects
Commit 43b3a99e authored by Andreas Valder's avatar Andreas Valder
Browse files

finished sorter, now correctly sorting most of our old date

parent 077703e9
Branches
No related tags found
No related merge requests found
...@@ -14,7 +14,7 @@ def sort_log(): ...@@ -14,7 +14,7 @@ def sort_log():
FROM sortlog FROM sortlog
JOIN lectures ON lectures.id = sortlog.lecture_id JOIN lectures ON lectures.id = sortlog.lecture_id
JOIN courses ON courses.id = lectures.course_id JOIN courses ON courses.id = lectures.course_id
ORDER BY sortlog.`when` ORDER BY sortlog.`when` DESC
''')) '''))
...@@ -47,44 +47,45 @@ def sort_now(): ...@@ -47,44 +47,45 @@ def sort_now():
filepath = coursepath + '/' + f filepath = coursepath + '/' + f
# filenames: <handle>-<sorter>-<format>.mp4 # filenames: <handle>-<sorter>-<format>.mp4
# sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop) # sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop)
splitfilename = f.replace('_','-').split('-') splitfilename = f.replace('_','-').replace(' ','-').split('-')
if not os.path.splitext(f)[1] == '.mp4': if not os.path.splitext(f)[1] == '.mp4':
continue continue
data = {'keywords': []} data = {'keywords': []}
# parse the file name and save all data in 'data' # parse the file name and save all data in 'data'
for s in splitfilename: for s in splitfilename:
s = s.replace('.mp4','')
#-<YYMMDD> (date) #-<YYMMDD> (date)
#-<HHMM> (time) #-<HHMM> (time)
#-<keyword> #-<keyword>
# Looking for keywords in: id,title,speaker,comment, comma seperated list in internal starting with "tags:" (in this order). first match counts # Looking for keywords in: id,title,speaker,comment, comma seperated list in internal
if len(s) == 6:
try: try:
if len(s) == 6:
data['date'] = datetime.strptime(s,'%y%m%d').date() data['date'] = datetime.strptime(s,'%y%m%d').date()
except ValueError:
pass
elif len(s) == 4: elif len(s) == 4:
try:
data['time'] = datetime.strptime(s,'%H%M').time() data['time'] = datetime.strptime(s,'%H%M').time()
except ValueError:
pass
else: else:
data['keywords'].append(s) data['keywords'].append(s)
except ValueError:
data['keywords'].append(s)
# try to match the file on a single lecture # try to match the file on a single lecture
matches = [] matches = []
# first try date and time (if one of them is set) # first try date and time (if one of them is set)
if 'date' in data: if 'date' in data:
for l in lectures: for l in lectures:
if (l['time'].date() == data['date']) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) : if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
matches.append(l) matches.append(l)
# if we can't match based on date and time, we have to match keywords # if we can't match based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)): if ((len(matches) != 1) and (len(data['keywords']) > 0)):
found = False found = False
for field in ['id','title','speaker','comment','internal']: for field in ['id','title','speaker','comment','internal']:
#only test lectures with the correct date/time #only test lectures with the correct date/time
if len(matches) == 0:
matches.extend(lectures)
for l in matches: for l in matches:
for k in data['keywords']: for k in data['keywords']:
if (k == l[field]) or (k in str(l[field])): # first test for exact match, else make it asci and try substring test
if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ):
found = True found = True
matches = [l] matches = [l]
if found: if found:
...@@ -98,20 +99,23 @@ def sort_now(): ...@@ -98,20 +99,23 @@ def sort_now():
# now match the format # now match the format
for i in formats: for i in formats:
#we match the last part of the file name without the extension #we match the last part of the file name without the extension
formatstring = splitfilename[-1].split('.',1)[0] formatstring = splitfilename[-1].split('.',1)[0].lower()
if splitfilename[-1].split('.',1)[0] in i['keywords'].split(','): if formatstring in i['keywords'].replace(',',' ').split(' '):
data['format'] = i['id'] data['format'] = i['id']
break break
# if we found the format, insert the video # if we found the format, insert the video
if 'format' in data: if not 'format' in data:
data['format'] = 0
modify('BEGIN') modify('BEGIN')
video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size) video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size)
query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now()) query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
modify('COMMIT') modify('COMMIT')
else: # for debuging only
print('unknown format',formatstring,[data,f]) # else:
else: # d = []
print('failed',[data,f]) # for m in matches:
# d.append(m['id'])
# print('failed',{"data":data,"path":f,"results":d})
if 'ref' in request.values: if 'ref' in request.values:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment