working sorter. not handling all cases yet. #26

d7de1784 · Andreas Valder · db6941d2 · d7de1784 · d7de1784 · d7de1784
Commit d7de1784 authored 8 years ago by Andreas Valder
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 config.py
 __pycache__
 *.sqlite
+files/*
--- a/config.py.example
+++ b/config.py.example
 # Defaults for development ,do not use in production!
 DEBUG = True
 VIDEOPREFIX = 'https://videoag.fsmpi.rwth-aachen.de'
+VIDEOMOUNT = 'files/'
 #SECRET_KEY = 'something random'

 DB_SCHEMA = 'db_schema.sql'

--- a/db_schema.sql
+++ b/db_schema.sql
@@ -206,6 +206,14 @@ CREATE TABLE IF NOT EXISTS `featured` (
  `created_by` INTEGER NOT NULL
 );

+CREATE TABLE IF NOT EXISTS `sortlog` (
+`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  `when` datetime NOT NULL,
+  `path` text NOT NULL,
+  `lecture_id` INTEGER NOT NULL,
+  `video_id` INTEGER NOT NULL
+);
+
 CREATE VIEW IF NOT EXISTS `courses` AS select * from `courses_data` where (not(`courses_data`.`deleted`));
 CREATE VIEW IF NOT EXISTS `lectures` AS select * from `lectures_data` where (not(`lectures_data`.`deleted`));
 CREATE VIEW IF NOT EXISTS `videos` AS select * from `videos_data` where (not(`videos_data`.`deleted`));

--- a/server.py
+++ b/server.py
@@ -417,13 +417,6 @@ def sitemap():

 	return Response(render_template('sitemap.xml', pages=pages), 200, {'Content-Type': 'application/atom+xml'} )

-
-@app.route('/sortlog')
-@register_navbar('Sortierlog', icon='sort-by-attributes-alt')
-@mod_required
-def sortlog():
-	return render_template('sortlog.html')
-
 import feeds
 import importer
 import schedule

--- a/sorter.py
+++ b/sorter.py
 from server import *

+@app.route('/sort/log')
+@register_navbar('Sortierlog', icon='sort-by-attributes-alt')
+@mod_required
+def sort_log():
+	return render_template('sortlog.html',sortlog=query('''
+			SELECT 
+				sortlog.*,
+				lectures.id as lecture_id,
+				lectures.title as lecture_title,
+				lectures.course_id as course_id, 
+				courses.title as course_title
+			FROM sortlog 
+			JOIN lectures ON lectures.id = sortlog.lecture_id
+			JOIN courses ON courses.id = lectures.course_id 
+			ORDER BY sortlog.`when`
+		'''))
+
+
+@app.route('/sort/now')
+@mod_required
+def sort_now():
+	courses = query('SELECT * FROM courses')
+	formats = query('SELECT * FROM formats ORDER BY prio')
+	for c in courses:
+		existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',c['id'])
+		lectures = query('SELECT * from lectures where course_id = ?',c['id'])
+		coursepath = config['VIDEOMOUNT']+c['handle']
+		try:
+			files = os.listdir(coursepath)
+		except FileNotFoundError:
+			files = []
+
+		for f in files:
+			# if the video is in the table "videos" already, skip it
+			exists = False
+			for e in existingvideos:
+				# vpnonline/08ws-swt/08ws-swt-081118.mp4
+				e_filename =  e['path'].split('/',2)[1]
+				if f == e_filename:
+					exists = True
+					break
+			if exists:
+				continue
+
+			filepath = coursepath + '/' + f
+			# filenames: <handle>-<sorter>-<format>.mp4
+			# sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop)
+			splitfilename = f.split('-')
+			if not	os.path.splitext(f)[1] == '.mp4':
+				continue
+			data = {'keywords': []}
+			# parse the file name and save all data in 'data'
+			for s in splitfilename:
+				#-<YYMMDD> (date)
+				#-<HHMM> (time)
+				#-<keyword>
+				#	Looking for keywords in: id,title,speaker,comment, comma seperated list in internal starting with "tags:" (in this order). first match counts
+				if len(s) == 6:
+					try:
+						data['date'] = datetime.strptime(s,'%y%m%d').date()
+					except ValueError:
+						pass
+				elif  len(s) == 4:
+					try:
+						data['time'] = datetime.strptime(s,'%H%M').time()
+					except ValueError:
+						pass
+				else:	
+					data['keywords'].append(s)
+			# try to match the file on a single lecture
+			matches = []
+			
+			# first try date and time (if one of them is set)
+			if 'date' in data:
+				for l in lectures:
+					if (l['time'].date() == data['date']) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
+						matches.append(l)
+			# if we can't match based on date and time, we have to match keywords
+			if ((len(matches) != 1) and (len(data['keywords']) > 0)):
+				found = False
+				for field in ['id','title','speaker','comment','internal']:
+					#only test lectures with the correct date/time
+					for l in matches:
+						for k in data['keywords']:
+							if (k == l[field]) or (k in str(l[field])):
+								found = True
+								matches = [l]
+							if found:
+								break
+						if found:
+							break
+					if found:
+						break
+			# now we should have found exactly one match
+			if len(matches) == 1:
+				# now match the format
+				for i in formats:
+					#we match the last part of the file name without the extension
+					formatstring = splitfilename[-1].split('.',1)[0]
+					if splitfilename[-1].split('.',1)[0] in i['keywords'].split(','):
+						data['format'] = i['id']
+						break
+				# if we found the format, insert the video
+				if 'format' in data:
+					modify('BEGIN')
+					video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash) VALUES (?,0,?,?,"","","",?,?,?,?,"")',matches[0]['id'],c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),session['user']['givenName'])
+					query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
+					modify('COMMIT')
+					print('sorted',[data,f])
+				else:
+					print('unknown format',formatstring,[data,f])
+			else:
+				print('failed',[data,f])
+
+		
+	if 'ref' in request.values:
+		return redirect(request.values['ref'])
+	else:
+		return 'OK',  200

--- a/templates/sortlog.html
+++ b/templates/sortlog.html
@@ -4,7 +4,7 @@
 <div class="panel-group">
 	<div class="panel panel-default">
 		<div class="panel-heading">
-			<h1 class="panel-title">Sortierlog</h1>
+			<h1 class="panel-title">Sortierlog <a class="btn btn-default" href="{{url_for('sort_now', ref=request.url)}}">Jetzt einsortieren</a></h1>
 		</div>
 		<div class="panel-body">
 			<p>Hier werden die hochgeladenen Videos einsortiert und geloggt wo jede Datei einsortiert wurde.</p>
@@ -18,7 +18,7 @@
 					<th>Lecture</th>
 					<th>Video id</th>
 				</tr>
-				{% for i in changelog %}
+				{% for i in sortlog %}
 					<tr>
 						<td>{{i.when}}</td>
 						<td>{{i.path}}</td>