From 43b3a99e29b34cc768b77e2adb5d6106afd1e01f Mon Sep 17 00:00:00 2001
From: Andreas <andreasv@fsmpi.rwth-aachen.de>
Date: Wed, 5 Oct 2016 01:46:18 +0200
Subject: [PATCH] finished sorter, now correctly sorting most of our old date

---
 sorter.py | 54 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/sorter.py b/sorter.py
index 0638dcd..7830402 100644
--- a/sorter.py
+++ b/sorter.py
@@ -14,7 +14,7 @@ def sort_log():
 			FROM sortlog 
 			JOIN lectures ON lectures.id = sortlog.lecture_id
 			JOIN courses ON courses.id = lectures.course_id 
-			ORDER BY sortlog.`when`
+			ORDER BY sortlog.`when` DESC
 		'''))
 
 
@@ -47,27 +47,25 @@ def sort_now():
 			filepath = coursepath + '/' + f
 			# filenames: <handle>-<sorter>-<format>.mp4
 			# sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop)
-			splitfilename = f.replace('_','-').split('-')
+			splitfilename = f.replace('_','-').replace(' ','-').split('-')
 			if not	os.path.splitext(f)[1] == '.mp4':
 				continue
 			data = {'keywords': []}
 			# parse the file name and save all data in 'data'
 			for s in splitfilename:
+				s = s.replace('.mp4','')
 				#-<YYMMDD> (date)
 				#-<HHMM> (time)
 				#-<keyword>
-				#	Looking for keywords in: id,title,speaker,comment, comma seperated list in internal starting with "tags:" (in this order). first match counts
-				if len(s) == 6:
-					try:
+				#	Looking for keywords in: id,title,speaker,comment, comma seperated list in internal
+				try:
+					if len(s) == 6:
 						data['date'] = datetime.strptime(s,'%y%m%d').date()
-					except ValueError:
-						pass
-				elif  len(s) == 4:
-					try:
+					elif  len(s) == 4:
 						data['time'] = datetime.strptime(s,'%H%M').time()
-					except ValueError:
-						pass
-				else:	
+					else:	
+						data['keywords'].append(s)
+				except ValueError:
 					data['keywords'].append(s)
 			# try to match the file on a single lecture
 			matches = []
@@ -75,16 +73,19 @@ def sort_now():
 			# first try date and time (if one of them is set)
 			if 'date' in data:
 				for l in lectures:
-					if (l['time'].date() == data['date']) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
+					if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
 						matches.append(l)
 			# if we can't match based on date and time, we have to match keywords
 			if ((len(matches) != 1) and (len(data['keywords']) > 0)):
 				found = False
 				for field in ['id','title','speaker','comment','internal']:
 					#only test lectures with the correct date/time
+					if len(matches) == 0:
+						matches.extend(lectures)
 					for l in matches:
 						for k in data['keywords']:
-							if (k == l[field]) or (k in str(l[field])):
+							# first test for exact match, else make it asci and try substring test
+							if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ):
 								found = True
 								matches = [l]
 							if found:
@@ -98,20 +99,23 @@ def sort_now():
 				# now match the format
 				for i in formats:
 					#we match the last part of the file name without the extension
-					formatstring = splitfilename[-1].split('.',1)[0]
-					if splitfilename[-1].split('.',1)[0] in i['keywords'].split(','):
+					formatstring = splitfilename[-1].split('.',1)[0].lower()
+					if formatstring in i['keywords'].replace(',',' ').split(' '):
 						data['format'] = i['id']
 						break
 				# if we found the format, insert the video
-				if 'format' in data:
-					modify('BEGIN')
-					video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size)
-					query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
-					modify('COMMIT')
-				else:
-					print('unknown format',formatstring,[data,f])
-			else:
-				print('failed',[data,f])
+				if not 'format' in data:
+					data['format'] = 0
+				modify('BEGIN')
+				video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size)
+				query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
+				modify('COMMIT')
+# for debuging only
+#			else:
+#				d = []
+#				for m in matches:
+#					d.append(m['id'])
+#				print('failed',{"data":data,"path":f,"results":d})
 
 		
 	if 'ref' in request.values:
-- 
GitLab