Skip to content
Snippets Groups Projects
Unverified Commit edfacd0a authored by Andreas Valder's avatar Andreas Valder
Browse files

refactor and split up of sort_file

parent ff93e6bf
Branches
No related tags found
No related merge requests found
...@@ -73,82 +73,88 @@ def insert_transcoded_video(jobid, jobtype, data, state, status): ...@@ -73,82 +73,88 @@ def insert_transcoded_video(jobid, jobtype, data, state, status):
return return
insert_video(data['lecture_id'], data['output']['path'], data['format_id'], status['hash'], status['filesize'], status['duration'], data['source_id'] ) insert_video(data['lecture_id'], data['output']['path'], data['format_id'], status['hash'], status['filesize'], status['duration'], data['source_id'] )
def sort_file(filename, course=None, lectures=None): def parseVideoFileName(splitFileName):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename = filename.replace('_','-').replace(' ','-').split('-')
if not course:
handle = splitfilename[0]
if splitfilename[0].endswith('ws') or splitfilename[0].endswith('ss'):
handle = '-'.join(splitfilename[:2])
courses = query('SELECT * FROM courses WHERE handle = ?', handle)
if not courses:
return [], 0
course = courses[0]
if not lectures:
lectures = query('SELECT * from lectures where course_id = ?', course['id'])
# we save all extraced data in a dict
data = {'keywords': []} data = {'keywords': []}
# parse the file name and save all data in 'data' for fileNameChunk in splitFileName:
for s in splitfilename: fileNameChunk = fileNameChunk.replace('.mp4','')
s = s.replace('.mp4','')
#-<YYMMDD> (date) #-<YYMMDD> (date)
#-<HHMM> (time) #-<HHMM> (time)
#-<keyword> #-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal # Looking for keywords in: title,speaker,comment, comma seperated list in internal
try: try:
if len(s) == 6: if len(fileNameChunk) == 6:
data['date'] = datetime.strptime(s,'%y%m%d').date() data['date'] = datetime.strptime(fileNameChunk,'%y%m%d').date()
elif len(s) == 4: elif len(fileNameChunk) == 4:
data['time'] = datetime.strptime(s,'%H%M').time() data['time'] = datetime.strptime(fileNameChunk,'%H%M').time()
else: else:
data['keywords'].append(s) data['keywords'].append(fileNameChunk)
except ValueError: except ValueError:
# if its not a date or time, handle it as keyword # if its not valid date or time, handle it as keyword
data['keywords'].append(s) data['keywords'].append(fileNameChunk)
# try to match the file on a single lecture return data
def matchDatetimeOnLecture(lectures, date, time):
matches = [] matches = []
# first try date and time (if one of them is set) # first try date and time (if one of them is set)
if ('date' in data) or ('time' in data): if date or time:
print(1)
for lecture in lectures: for lecture in lectures:
if not ('time' in lecture) or not lecture['time']: if (not 'time' in lecture) or (not lecture['time']):
continue continue
if ('date' in data) and (lecture['time'].date() != data['date']): if date and (lecture['time'].date() != date):
continue continue
if ('time' in data) and (lecture['time'].time() != data['time']): if time and (lecture['time'].time() != time):
continue continue
matches.append(lecture) matches.append(lecture)
# if we can't match exactly based on date and time, we have to match keywords return matches
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course def matchKeywordsOnLecture(lectures, keywords):
if len(matches) == 0:
matches.extend(lectures)
found = False
for field in ['title','speaker','comment','internal']: for field in ['title','speaker','comment','internal']:
for lecture in matches: for lecture in lectures:
for keyword in data['keywords']: for keyword in keywords:
# first test for exact match, else make it asci and try substring test # first test for exact match, else make it asci and try substring test
if (keyword == lecture[field]) or \ if (keyword == lecture[field]) or \
(str(keyword).lower() in str(to_ascii(lecture[field]).lower())): (str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
found = True return [lecture]
matches = [lecture] return []
if found:
break def matchFileNameOnFormat(splitFileName):
if found:
break
if found:
break
# now we should have found exactly one match
# default format is "unknown", with id 0 # default format is "unknown", with id 0
fmt = 0
formats = query('SELECT * FROM formats ORDER BY prio DESC') formats = query('SELECT * FROM formats ORDER BY prio DESC')
for videoformat in formats: for videoformat in formats:
# we match the last part of the file name without the extension # we match the last part of the file name without the extension
formatstring = splitfilename[-1].split('.',1)[0].lower() formatstring = splitFileName[-1].split('.',1)[0].lower()
if formatstring in videoformat['keywords'].replace(',',' ').split(' '): if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
fmt = videoformat['id'] return videoformat['id']
break return 0
def sort_file(filename, course=None, lectures=None):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitFileName = filename.replace('_','-').replace(' ','-').split('-')
if not course:
handle = splitFileName[0]
if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'):
handle = '-'.join(splitFileName[:2])
courses = query('SELECT * FROM courses WHERE handle = ?', handle)
if not courses:
return [], 0
course = courses[0]
if not lectures:
lectures = query('SELECT * from lectures where course_id = ?', course['id'])
data = parseVideoFileName(splitFileName)
# try to match the file on a single lecture
matches = matchDatetimeOnLecture(lectures, data.get('date'), data.get('time'))
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if len(matches) == 0:
matches = matchKeywordsOnLecture(lectures, data['keywords'])
else:
matches = matchKeywordsOnLecture(matches, data['keywords'])
# now we should have found exactly one match
fmt = matchFileNameOnFormat(splitFileName)
return matches, fmt return matches, fmt
def log_sort_error(course_id, path, matches): def log_sort_error(course_id, path, matches):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment