From 080cdbc5b7dee6e9f950f8bc4ca93f0e7f51cfe9 Mon Sep 17 00:00:00 2001
From: Andreas <andreasv@fsmpi.rwth-aachen.de>
Date: Wed, 5 Oct 2016 04:18:04 +0200
Subject: [PATCH] extended sorter to work with old folder strukture

---
 config.py.example |   2 +-
 sorter.py         | 191 ++++++++++++++++++++++++----------------------
 2 files changed, 102 insertions(+), 91 deletions(-)

diff --git a/config.py.example b/config.py.example
index 53d80f9..711c23f 100644
--- a/config.py.example
+++ b/config.py.example
@@ -1,7 +1,7 @@
 # Defaults for development ,do not use in production!
 DEBUG = False
 VIDEOPREFIX = 'https://videoag.fsmpi.rwth-aachen.de'
-VIDEOMOUNT = 'files/'
+VIDEOMOUNT = ['files/protected/','files/pub/','files/vpnonline/']
 #SECRET_KEY = 'something random'
 
 DB_SCHEMA = 'db_schema.sql'
diff --git a/sorter.py b/sorter.py
index 5146cb0..fcfeb75 100644
--- a/sorter.py
+++ b/sorter.py
@@ -19,105 +19,116 @@ def sort_log():
 		'''))
 
 
+
+
 @app.route('/sort/now')
-@sched_func(60)
 @mod_required
 def sort_now():
+	return sort_wraper()
+
+@sched_func(60)
+def sort_auto():
+	pass
+#	return sort_wraper()
+
+def sort_wraper():
 	courses = query('SELECT * FROM courses')
 	formats = query('SELECT * FROM formats ORDER BY prio')
 	for c in courses:
-		existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',c['id'])
-		lectures = query('SELECT * from lectures where course_id = ?',c['id'])
-		coursepath = config['VIDEOMOUNT']+c['handle']
-		try:
-			files = os.listdir(coursepath)
-		except FileNotFoundError:
-			files = []
-
-		for f in files:
-			# if the video is in the table "videos" already, skip it
-			exists = False
-			for e in existingvideos:
-				# vpnonline/08ws-swt/08ws-swt-081118.mp4
-				e_filename =  e['path'].split('/',2)[1]
-				if f == e_filename:
-					exists = True
-					break
-			if exists:
-				continue
-
-			filepath = coursepath + '/' + f
-			# filenames: <handle>-<sorter>-<format>.mp4
-			# sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop)
-			splitfilename = f.replace('_','-').replace(' ','-').split('-')
-			if not	os.path.splitext(f)[1] == '.mp4':
-				continue
-			data = {'keywords': []}
-			# parse the file name and save all data in 'data'
-			for s in splitfilename:
-				s = s.replace('.mp4','')
-				#-<YYMMDD> (date)
-				#-<HHMM> (time)
-				#-<keyword>
-				#	Looking for keywords in: id,title,speaker,comment, comma seperated list in internal
+		for basepath in config['VIDEOMOUNT']:
+			existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',c['id'])
+			lectures = query('SELECT * from lectures where course_id = ?',c['id'])
+			coursepath = basepath+c['handle']
+			try:
+				files = os.listdir(coursepath)
+			except FileNotFoundError:
+				files = []
+			for f in files:
 				try:
-					if len(s) == 6:
-						data['date'] = datetime.strptime(s,'%y%m%d').date()
-					elif  len(s) == 4:
-						data['time'] = datetime.strptime(s,'%H%M').time()
-					else:	
-						data['keywords'].append(s)
-				except ValueError:
-					data['keywords'].append(s)
-			# try to match the file on a single lecture
-			matches = []
-			
-			# first try date and time (if one of them is set)
-			if 'date' in data:
-				for l in lectures:
-					if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
-						matches.append(l)
-			# if we can't match based on date and time, we have to match keywords
-			if ((len(matches) != 1) and (len(data['keywords']) > 0)):
-				found = False
-				for field in ['id','title','speaker','comment','internal']:
-					#only test lectures with the correct date/time
-					if len(matches) == 0:
-						matches.extend(lectures)
-					for l in matches:
-						for k in data['keywords']:
-							# first test for exact match, else make it asci and try substring test
-							if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ):
-								found = True
-								matches = [l]
+					# if the video is in the table "videos" already, skip it
+					exists = False
+					for e in existingvideos:
+						# vpnonline/08ws-swt/08ws-swt-081118.mp4
+						e_filename =  e['path'].split('/',2)[1]
+						if f == e_filename:
+							exists = True
+							break
+					if exists:
+						continue
+					filepath = coursepath + '/' + f
+					# filenames: <handle>-<sorter>-<format>.mp4
+					# sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop)
+					splitfilename = f.replace('_','-').replace(' ','-').split('-')
+					if not	os.path.splitext(f)[1] == '.mp4':
+						continue
+					data = {'keywords': []}
+					# parse the file name and save all data in 'data'
+					for s in splitfilename:
+						s = s.replace('.mp4','')
+						#-<YYMMDD> (date)
+						#-<HHMM> (time)
+						#-<keyword>
+						#	Looking for keywords in: id,title,speaker,comment, comma seperated list in internal
+						try:
+							if len(s) == 6:
+								data['date'] = datetime.strptime(s,'%y%m%d').date()
+							elif  len(s) == 4:
+								data['time'] = datetime.strptime(s,'%H%M').time()
+							else:	
+								data['keywords'].append(s)
+						except ValueError:
+							data['keywords'].append(s)
+					# try to match the file on a single lecture
+					matches = []
+					
+					# first try date and time (if one of them is set)
+					if 'date' in data:
+						for l in lectures:
+							if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
+								matches.append(l)
+					# if we can't match based on date and time, we have to match keywords
+					if ((len(matches) != 1) and (len(data['keywords']) > 0)):
+						found = False
+						for field in ['id','title','speaker','comment','internal']:
+							#only test lectures with the correct date/time
+							if len(matches) == 0:
+								matches.extend(lectures)
+							for l in matches:
+								for k in data['keywords']:
+									# first test for exact match, else make it asci and try substring test
+									if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ):
+										found = True
+										matches = [l]
+									if found:
+										break
+								if found:
+									break
 							if found:
 								break
-						if found:
-							break
-					if found:
-						break
-			# now we should have found exactly one match
-			if len(matches) == 1:
-				# now match the format
-				for i in formats:
-					#we match the last part of the file name without the extension
-					formatstring = splitfilename[-1].split('.',1)[0].lower()
-					if formatstring in i['keywords'].replace(',',' ').split(' '):
-						data['format'] = i['id']
-						break
-				# if we found the format, insert the video
-				if not 'format' in data:
-					data['format'] = 0
-				modify('BEGIN')
-				video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'],os.stat(coursepath+'/'+f).st_size)
-				query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
-				modify('COMMIT')
-# for debuging only
-#			else:
-#				d = []
-#				for m in matches:
-#					d.append(m['id'])
-#				print('failed',{"data":data,"path":f,"results":d})
+					# now we should have found exactly one match
+					if len(matches) == 1:
+						# now match the format
+						for i in formats:
+							#we match the last part of the file name without the extension
+							formatstring = splitfilename[-1].split('.',1)[0].lower()
+							if formatstring in i['keywords'].replace(',',' ').split(' '):
+								data['format'] = i['id']
+								break
+						# if we found the format, insert the video
+						if not 'format' in data:
+							data['format'] = 0
+						modify('BEGIN')
+						video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),-1,os.stat(coursepath+'/'+f).st_size)
+						query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
+						modify('COMMIT')
+		# for debuging only
+		#			else:
+		#				d = []
+		#				for m in matches:
+		#					d.append(m['id'])
+		#				print('failed',{"data":data,"path":f,"results":d})
+				except Exception:
+					pass
 
 		
 	if 'ref' in request.values:
-- 
GitLab