Commit 03b27a0a authored by Julian Rother's avatar Julian Rother

Moved sorting algorithm out of sort_now

parent 336c92d7
......@@ -52,6 +52,75 @@ def schedule_thumbnail(lectureid, filePath=None):
data = '{"lectureid": "'+str(lectureid)+'", "path": "'+path+'"}'
query('INSERT INTO jobs (type, data, time_created) VALUES ("thumbnail", ?, ?)', data, datetime.now());
def sort_file(filename, course=None, lectures=None):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename = filename.replace('_','-').replace(' ','-').split('-')
if not course:
handle = '-'.join(splitfilename[:2])
courses = query('SELECT * FROM courses WHERE handle = ?', handle)
if not courses:
return []
course = courses[0]
if not lectures:
lectures = query('SELECT * from lectures where course_id = ?', course['id'])
# we save all extraced data in a dict
data = {'keywords': []}
# parse the file name and save all data in 'data'
for s in splitfilename:
s = s.replace('.mp4','')
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try:
if len(s) == 6:
data['date'] = datetime.strptime(s,'%y%m%d').date()
elif len(s) == 4:
data['time'] = datetime.strptime(s,'%H%M').time()
else:
data['keywords'].append(s)
except ValueError:
# if its not a date or time, handle it as keyword
data['keywords'].append(s)
# try to match the file on a single lecture
matches = []
# first try date and time (if one of them is set)
if ('date' in data) or ('time' in data):
for lecture in lectures:
if not ('time' in lecture) or not lecture['time']:
continue
if ('date' in data) and (lecture['time'].date() != data['date']):
continue
if ('time' in data) and (lecture['time'].time() != data['time']):
continue
matches.append(lecture)
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if len(matches) == 0:
matches.extend(lectures)
found = False
for field in ['title','speaker','comment','internal']:
for lecture in matches:
for keyword in data['keywords']:
# first test for exact match, else make it asci and try substring test
if (keyword == lecture[field]) or \
(str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
found = True
matches = [lecture]
if found:
break
if found:
break
if found:
break
# now we should have found exactly one match
return matches
@app.route('/internal/sort/now')
@mod_required
@sched_func(600)
......@@ -83,80 +152,23 @@ def sort_now():
if ignore:
continue
filepath = coursepath + '/' + filename
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename = filename.replace('_','-').replace(' ','-').split('-')
if not os.path.splitext(filename)[1] == '.mp4':
if not os.path.splitext(filename)[1] == '.mp4':
continue
# we save all extraced data in a dict
data = {'keywords': []}
# parse the file name and save all data in 'data'
for s in splitfilename:
s = s.replace('.mp4','')
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try:
if len(s) == 6:
data['date'] = datetime.strptime(s,'%y%m%d').date()
elif len(s) == 4:
data['time'] = datetime.strptime(s,'%H%M').time()
else:
data['keywords'].append(s)
except ValueError:
# if its not a date or time, handle it as keyword
data['keywords'].append(s)
# try to match the file on a single lecture
matches = []
# first try date and time (if one of them is set)
if ('date' in data) or ('time' in data):
for lecture in lectures:
if not ('time' in lecture) or not lecture['time']:
continue
if ('date' in data) and (lecture['time'].date() != data['date']):
continue
if ('time' in data) and (lecture['time'].time() != data['time']):
continue
matches.append(lecture)
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if len(matches) == 0:
matches.extend(lectures)
found = False
for field in ['title','speaker','comment','internal']:
for lecture in matches:
for keyword in data['keywords']:
# first test for exact match, else make it asci and try substring test
if (keyword == lecture[field]) or \
(str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
found = True
matches = [lecture]
if found:
break
if found:
break
if found:
break
# now we should have found exactly one match
matches = sort_file(filename, course=course, lectures=lectures)
dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
if len(matches) == 1:
# now match the format
splitfilename = filename.replace('_','-').replace(' ','-').split('-')
# default format is "unknown", with id 0
fmt = 0
for videoformat in formats:
#we match the last part of the file name without the extension
formatstring = splitfilename[-1].split('.',1)[0].lower()
if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
data['format'] = videoformat['id']
fmt = videoformat['id']
break
# default format is "unknown", with id 0
if not 'format' in data:
data['format'] = 0
# insert the video into videos_data and log
insert_video( matches[0]['id'], dbfilepath, filepath, data['format'])
insert_video( matches[0]['id'], dbfilepath, filepath, fmt)
else:
# if we couldn't match the video on exactly one lecture, log an error
matches_id = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment