Commit 3437b648 authored by Andreas Valder's avatar Andreas Valder
Browse files

finished sorter, closes #44 , closes #43

parent 7366749f
......@@ -194,8 +194,8 @@ CREATE TABLE IF NOT EXISTS `announcements` (
`level` INTEGER NOT NULL DEFAULT 0,
`visible` INTEGER NOT NULL DEFAULT 0,
`deleted` INTEGER NOT NULL DEFAULT 0,
`time_publish` datetime DEFAULT "",
`time_expire` datetime DEFAULT "",
`time_publish` datetime DEFAULT '',
`time_expire` datetime DEFAULT '',
`time_created` datetime NOT NULL,
`time_updated` datetime NOT NULL,
`created_by` INTEGER NOT NULL
......@@ -203,8 +203,8 @@ CREATE TABLE IF NOT EXISTS `announcements` (
CREATE TABLE IF NOT EXISTS `featured` (
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
`title` text NOT NULL DEFAULT '',
`text` text NOT NULL DEFAULT "",
`internal` text NOT NULL DEFAULT "",
`text` text NOT NULL DEFAULT '',
`internal` text NOT NULL DEFAULT '',
`visible` INTEGER NOT NULL DEFAULT 0,
`deleted` INTEGER NOT NULL DEFAULT 0,
`time_created` datetime NOT NULL,
......@@ -219,8 +219,20 @@ CREATE TABLE IF NOT EXISTS `sortlog` (
`video_id` INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS `sorterrorlog_data` (
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
`when` datetime NOT NULL,
`path` text NOT NULL,
`course_id` INTEGER NOT NULL,
`matches` text NOT NULL Default '',
`deleted` INTEGER NOT NULL DEFAULT '0',
`time_updated` datetime NOT NULL,
`time_created` datetime NOT NULL
);
CREATE VIEW IF NOT EXISTS `courses` AS select * from `courses_data` where (not(`courses_data`.`deleted`));
CREATE VIEW IF NOT EXISTS `lectures` AS select * from `lectures_data` where (not(`lectures_data`.`deleted`));
CREATE VIEW IF NOT EXISTS `videos` AS select * from `videos_data` where (not(`videos_data`.`deleted`));
CREATE VIEW IF NOT EXISTS `auth` AS select * from `auth_data` where (not(`auth_data`.`deleted`));
CREATE VIEW IF NOT EXISTS `sorterrorlog` AS select * from `sorterrorlog_data` where (not(`sorterrorlog_data`.`deleted`));
COMMIT;
......@@ -276,6 +276,7 @@ def logout():
session.pop('user')
return redirect(request.values.get('ref', url_for('index')))
# name: (tablename, idcolumn, [editable_fields], [fields_to_set_at_creation_time])
tabs = {
'courses': ('courses_data', 'id', ['visible', 'listed', 'title', 'short',
'handle', 'organizer', 'subject', 'semester', 'downloadable',
......@@ -294,7 +295,9 @@ tabs = {
'featured': ('featured', 'id', ['title', 'text', 'internal', 'visible', 'deleted'],
['created_by', 'time_created', 'time_updated']),
'auth': ('auth_data', 'auth_id', ['auth_type', 'auth_user', 'auth_passwd', 'deleted'],
['course_id', 'lecture_id', 'video_id', 'created_by', 'time_created', 'time_updated'])
['course_id', 'lecture_id', 'video_id', 'created_by', 'time_created', 'time_updated']),
'sorterrorlog': ('sorterrorlog_data', 'id', ['deleted'],
['time_created', 'time_updated'])
}
@app.route('/edit', methods=['GET', 'POST'])
......
from server import *
import traceback
@app.route('/sort/log')
@register_navbar('Sortierlog', icon='sort-by-attributes-alt')
......@@ -16,44 +17,54 @@ def sort_log():
JOIN courses ON courses.id = lectures.course_id
ORDER BY sortlog.`when` DESC
LIMIT 50
'''))
'''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC'))
def to_ascii(inputstring):
asciistring = inputstring
for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]:
asciistring = asciistring.replace(charset[0],charset[1])
return asciistring
@app.route('/sort/now')
@mod_required
@sched_func(600)
def sort_now():
modify('BEGIN')
courses = query('SELECT * FROM courses')
formats = query('SELECT * FROM formats ORDER BY prio')
for c in courses:
for course in courses:
for mountpoint in config['VIDEOMOUNT']:
existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',c['id'])
lectures = query('SELECT * from lectures where course_id = ?',c['id'])
coursepath = mountpoint['mountpoint']+c['handle']
existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
ignorefiles = existingvideos + knownerrors
lectures = query('SELECT * from lectures where course_id = ?',course['id'])
coursepath = mountpoint['mountpoint']+course['handle']
try:
files = os.listdir(coursepath)
except FileNotFoundError:
files = []
for f in files:
for filename in files:
try:
# if the video is in the table "videos" already, skip it
exists = False
for e in existingvideos:
# if the video is in the table "videos" already (with the correct course), skip it
ignore = False
for file_to_ignore in ignorefiles:
# path is something like
# vpnonline/08ws-swt/08ws-swt-081118.mp4
e_filename = e['path']
if os.path.basename(f) == os.path.basename(e_filename):
exists = True
if os.path.basename(filename) == os.path.basename(file_to_ignore['path']):
ignore = True
break
if exists:
if ignore:
continue
filepath = coursepath + '/' + f
filepath = coursepath + '/' + filename
# filenames: <handle>-<sorter>-<format>.mp4
# sorter musst be found with fuzzy matching. musst be one or more of the following: (inside the loop)
splitfilename = f.replace('_','-').replace(' ','-').split('-')
if not os.path.splitext(f)[1] == '.mp4':
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename = filename.replace('_','-').replace(' ','-').split('-')
if not os.path.splitext(filename)[1] == '.mp4':
continue
# we save all extraced data in a dict
data = {'keywords': []}
# parse the file name and save all data in 'data'
for s in splitfilename:
......@@ -61,7 +72,7 @@ def sort_now():
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: id,title,speaker,comment, comma seperated list in internal
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try:
if len(s) == 6:
data['date'] = datetime.strptime(s,'%y%m%d').date()
......@@ -70,28 +81,35 @@ def sort_now():
else:
data['keywords'].append(s)
except ValueError:
# if its not a date or time, handle it as keyword
data['keywords'].append(s)
# try to match the file on a single lecture
matches = []
# first try date and time (if one of them is set)
if 'date' in data:
for l in lectures:
if ((l['time'].date() == data['date']) and not ('time' in data)) or (('time' in data) and l['time'] == datetime.combine(data['date'],data['time'])) :
matches.append(l)
# if we can't match based on date and time, we have to match keywords
if ('date' in data) or ('time' in data):
for lecture in lectures:
if not ('time' in lecture) or not lecture['time']:
continue
if ('date' in data) and (lecture['time'].date() != data['date']):
continue
if ('time' in data) and (lecture['time'].time() != data['time']):
continue
matches.append(lecture)
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if len(matches) == 0:
matches.extend(lectures)
found = False
for field in ['id','title','speaker','comment','internal']:
#only test lectures with the correct date/time
if len(matches) == 0:
matches.extend(lectures)
for l in matches:
for k in data['keywords']:
for field in ['title','speaker','comment','internal']:
for lecture in matches:
for keyword in data['keywords']:
# first test for exact match, else make it asci and try substring test
if (k == l[field]) or (str(k).lower() in str(l[field]).lower().replace('ä','ae').replace('ü','ue').replace('ö','oe').replace('ß','ss') ):
if (keyword == lecture[field]) or \
(str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
found = True
matches = [l]
matches = [lecture]
if found:
break
if found:
......@@ -99,32 +117,37 @@ def sort_now():
if found:
break
# now we should have found exactly one match
dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
if len(matches) == 1:
# now match the format
for i in formats:
for videoformat in formats:
#we match the last part of the file name without the extension
formatstring = splitfilename[-1].split('.',1)[0].lower()
if formatstring in i['keywords'].replace(',',' ').split(' '):
data['format'] = i['id']
if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
data['format'] = videoformat['id']
break
# if we found the format, insert the video
# default format is "unknown", with id 0
if not 'format' in data:
data['format'] = 0
modify('BEGIN')
video_id = modify('INSERT INTO videos_data (lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size) VALUES (?,0,?,?,"","","",?,?,?,?,"",?)',matches[0]['id'],mountpoint['prefix']+c['handle']+'/'+f,data['format'],datetime.now(),datetime.now(),datetime.now(),-1,os.stat(coursepath+'/'+f).st_size)
query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)',matches[0]['id'],video_id,c['handle']+'/'+f,datetime.now())
modify('COMMIT')
# for debuging only
# insert the video into videos_data and log
video_id = modify('''
INSERT INTO videos_data
(lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size)
VALUES
(?,0,?,?,"","","",?,?,?,?,"",?)''',
matches[0]['id'], dbfilepath, data['format'], datetime.now(), datetime.now(), datetime.now(), -1, os.stat(filepath).st_size)
query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', matches[0]['id'], video_id, dbfilepath, datetime.now())
else:
d = []
for m in matches:
d.append(m['id'])
print('failed',{"data":data,"path":f,"results":d})
except Exception as exc:
# raise exc
pass
# if we couldn't match the video on exactly one lecture, log an error
matches_id = []
for match in matches:
matches_id.append(str(match['id']))
query('INSERT INTO sorterrorlog_data (course_id,path,matches,`when`,time_updated,time_created) VALUES (?,?,?,?,?,?)', course['id'], dbfilepath, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now())
except Exception:
traceback.print_exc()
modify('COMMIT')
if 'ref' in request.values:
return redirect(request.values['ref'])
else:
......
{% from 'macros.html' import preview %}
{% from 'macros.html' import moderator_delete %}
{% extends "base.html" %}
{% block content %}
<div class="panel-group">
<div class="panel panel-default">
<div class="panel-heading">
<h1 class="panel-title">Sortierlog <a class="btn btn-default" href="{{url_for('sort_now', ref=request.url)}}">Jetzt einsortieren</a></h1>
<h1 class="panel-title">Sortierlog
<a class="btn btn-default" href="{{url_for('sort_now', ref=request.url)}}">Jetzt einsortieren</a>
<button class="btn btn-default" onclick="$('button[data-path^=\'sorterrorlog.\'][data-path$=\'.deleted\']').each(function (e) { moderator.api.set($(this).data('path'),1,false); }); window.location.reload();">Alle Fehler entfernen</button>
</h1>
</div>
<div class="panel-body">
<p>Hier werden die hochgeladenen Videos einsortiert und geloggt wo jede Datei einsortiert wurde.</p>
</div>
{% if sorterrorlog %}
<div class="table-responsive" style="max-height: 250px">
<table class="table table-condensed">
<tr>
<th></th>
<th>Zeit</th>
<th>Pfad</th>
<th>Course</th>
<th>Matches</th>
</tr>
{% for i in sorterrorlog %}
<tr class="danger">
<td>{{ moderator_delete(['sorterrorlog',i.id,'deleted']) }}</td>
<td>{{i.when}}</td>
<td><a href="{{ config.VIDEOPREFIX }}/{{i.path}}">{{i.path}}</a></td>
<td><a href="{{url_for('course', id=i.course_id)}}">{{i.course_id}}</a></td>
<td>
{% for j in i.matches.split(',') %}
{% if not loop.first %},{% endif %}
<a href="{{url_for('course', id=i.course_id)}}#lecture-{{j}}">{{j}}</a>
{% endfor %}
</td>
</tr>
{% endfor %}
</table>
</div>
{% endif %}
<div class="table-responsive">
<table class="table table-condensed">
<tr>
......@@ -21,9 +51,9 @@
{% for i in sortlog %}
<tr>
<td>{{i.when}}</td>
<td>{{i.path}}</td>
<td>{{i.course_id}}</td>
<td>{{i.lecture_id}}</td>
<td><a href="{{ config.VIDEOPREFIX }}/{{i.path}}">{{i.path}}</a></td>
<td><a href="{{url_for('course', id=i.course_id)}}">{{i.course_id}}</a></td>
<td><a href="{{url_for('course', id=i.course_id)}}#lecture-{{i.lecture_id}}">{{i.lecture_id}}</a></td>
<td>{{i.id}}</td>
</tr>
{% endfor %}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment