Commit 38f4a48c authored by Julian Rother's avatar Julian Rother
Browse files

Merge branch 'master' into live_sources

parents 59e7f4d5 83e94e10
......@@ -7,7 +7,7 @@ unittest:
- python3 -V
- uname -a
- apt install -y sqlite3 locales-all git python3-flask python3-ldap3 python3-requests python3-lxml python3-icalendar python3-mysql.connector python3-requests python3-coverage
- python3 -m coverage run tests.py
- python3 -m coverage run runTests.py
- python3 -m coverage report --include "./*"
- python3 -m coverage report -m --include "./*" > report.txt
- python3 -m coverage html --include "./*"
......@@ -33,4 +33,4 @@ deploy_staging:
stage: deploy
script:
- pacman --noconfirm -Sy ansible git
\ No newline at end of file
......@@ -15,9 +15,9 @@ Hinweis: diese Variante startet eine lokale Testversion der Website, es sind nic
Alternativ, insbesondere zum Testen der Zugriffsbeschränkungen: Siehe `nginx.example.conf`.
### Unittests
Tests können mittels `./tests.py` ausgeführt werden.
Tests können mittels `./runTests.py` ausgeführt werden.
Coverage Tests können mittels `rm .coverage; python -m coverage run tests.py; python -m coverage html` ausgeführt werden. Dies erstellt einen Ordner `htmlcov` in dem HTML Output liegt.
Coverage Tests können mittels `rm .coverage; python -m coverage run runTests.py; python -m coverage html` ausgeführt werden. Dies erstellt einen Ordner `htmlcov` in dem HTML Output liegt.
### Zum Mitmachen:
1. Repo für den eigenen User forken, dafür den "Fork-Button" auf der Website verwenden
......
......@@ -29,7 +29,7 @@ LDAP_GROUPS = ['fachschaft']
#ICAL_URL = 'https://user:password@mail.fsmpi.rwth-aachen.de/SOGo/....ics'
ERROR_PAGE = 'static/500.html'
RWTH_IP_RANGES = ['134.130.0.0/16', '137.226.0.0/16', '134.61.0.0/16', '192.35.229.0/24', '2a00:8a60::/32']
FSMPI_IP_RANGES = ['137.226.35.192/29', '137.226.75.0/27', '137.226.127.32/27', '137.226.231.192/26', '134.130.102.0/26' ]
FSMPI_IP_RANGES = ['137.226.35.192/29', '137.226.75.0/27', '137.226.127.32/27', '137.226.231.192/26', '134.130.102.0/26', '127.0.0.1/32']
DISABLE_SCHEDULER = False
#MAIL_SERVER = 'mail.fsmpi.rwth-aachen.de'
MAIL_FROM = 'Video AG-Website <videoag-it@lists.fsmpi.rwth-aachen.de>'
......
from server import *
from sorter import insert_video
import os.path
import json
def set_metadata(dest, course, lecture):
chapters = query('SELECT text, time FROM chapters WHERE lecture_id = ? AND visible ORDER BY time', lecture['id'])
......@@ -72,7 +74,16 @@ def schedule_transcode(source, fmt_id=None, video=None):
data['lecture_id'] = lecture['id']
data['format_id'] = fmt['id']
data['source_id'] = source['id']
schedule_job('transcode', data, queue="background")
return schedule_job('transcode', data, queue="background")
@job_handler('transcode')
def insert_transcoded_video(jobid, jobtype, data, state, status):
if 'lecture_id' not in data or 'source_id' not in data or 'format_id' not in data:
return
if 'video_id' in data:
return
video_id = insert_video(data['lecture_id'], data['output']['path'], data['format_id'], status['hash'], status['filesize'], status['duration'], data['source_id'])
schedule_remux(data['lecture_id'], video_id)
@app.route('/internal/jobs/add/reencode', methods=['GET', 'POST'])
@mod_required
......
from server import modify, query, date_json_handler, sched_func, notify_admins
from datetime import datetime, timedelta
import traceback
import json
job_handlers = {}
def job_handler(*types, state='finished'):
def wrapper(func):
for jobtype in types:
if jobtype not in job_handlers:
job_handlers[jobtype] = {}
if state not in job_handlers[jobtype]:
job_handlers[jobtype][state] = []
job_handlers[jobtype][state].append(func)
return func
return wrapper
def job_handler_handle(id, state):
job = query('SELECT * FROM jobs WHERE id = ?', id, nlfix=False)[0]
type = job['type']
for func in job_handlers.get(type, {}).get(state, []):
try:
func(id, job['type'], json.loads(job['data']), state, json.loads(job['status']))
except Exception:
notify_admins('scheduler_exception', name=func.__name__, traceback=traceback.format_exc())
traceback.print_exc()
@sched_func(10)
def job_catch_broken():
# scheduled but never pinged
query('BEGIN')
query('UPDATE jobs SET state="ready" WHERE state="scheduled" and time_scheduled < ?', datetime.now() - timedelta(seconds=10))
try:
query('COMMIT')
except:
pass
# no pings since 60s
query('BEGIN')
query('UPDATE jobs SET state="failed" WHERE state="running" and last_ping < ?', datetime.now() - timedelta(seconds=60))
try:
query('COMMIT')
except:
pass
def job_set_state(id, state):
query('UPDATE jobs SET state=? WHERE id=?', state, id)
def schedule_job(jobtype, data=None, priority=0, queue="default"):
if not data:
data = {}
return modify('INSERT INTO jobs (type, priority, queue, data, time_created) VALUES (?, ?, ?, ?, ?)',
jobtype, priority, queue, json.dumps(data, default=date_json_handler), datetime.now())
def cancel_job(job_id):
query('UPDATE jobs SET state = "deleted" WHERE id = ? AND state = "ready"', job_id)
query('UPDATE jobs SET canceled = 1 WHERE id = ?', job_id)
def restart_job(job_id, canceled=False):
if canceled:
query('UPDATE jobs SET state = "ready", canceled = 0 WHERE id = ? AND state = "failed"', job_id)
else:
query('UPDATE jobs SET state = "ready" WHERE id = ? AND state = "failed" AND NOT canceled', job_id)
from server import *
import traceback
import json
import random
from time import sleep
job_handlers = {}
def job_handler(*types, state='finished'):
def wrapper(func):
for jobtype in types:
if jobtype not in job_handlers:
job_handlers[jobtype] = {}
if state not in job_handlers[jobtype]:
job_handlers[jobtype][state] = []
job_handlers[jobtype][state].append(func)
return func
return wrapper
def schedule_job(jobtype, data=None, priority=0, queue="default"):
if not data:
data = {}
return modify('INSERT INTO jobs (type, priority, queue, data, time_created) VALUES (?, ?, ?, ?, ?)',
jobtype, priority, queue, json.dumps(data, default=date_json_handler), datetime.now())
def cancel_job(job_id):
modify('UPDATE jobs SET state = "deleted" WHERE id = ? AND state = "ready"', job_id)
modify('UPDATE jobs SET canceled = 1 WHERE id = ?', job_id)
def restart_job(job_id, canceled=False):
if canceled:
modify('UPDATE jobs SET state = "ready", canceled = 0 WHERE id = ? AND state = "failed"', job_id)
else:
modify('UPDATE jobs SET state = "ready" WHERE id = ? AND state = "failed" AND NOT canceled', job_id)
@app.route('/internal/jobs/overview')
@register_navbar('Jobs', iconlib='fa', icon='suitcase', group='weitere')
@mod_required
......@@ -96,23 +67,6 @@ def jobs_api_token_required(func):
return func(*args, **kwargs)
return decorator
@sched_func(10)
def jobs_catch_broken():
# scheduled but never pinged
query('BEGIN')
query('UPDATE jobs SET state="ready" WHERE state="scheduled" and time_scheduled < ?', datetime.now() - timedelta(seconds=10))
try:
query('COMMIT')
except:
pass
# no pings since 60s
query('BEGIN')
query('UPDATE jobs SET state="failed" WHERE state="running" and last_ping < ?', datetime.now() - timedelta(seconds=60))
try:
query('COMMIT')
except:
pass
@app.route('/internal/jobs/api/job/<int:id>/ping', methods=['GET', 'POST'])
@jobs_api_token_required
def jobs_ping(id):
......@@ -123,12 +77,8 @@ def jobs_ping(id):
query('UPDATE jobs SET time_finished = ?, status = ?, state = "finished" where id = ?', datetime.now(), status, id)
else:
query('UPDATE jobs SET worker = ?, last_ping = ?, status = ?, state = ? where id = ?', hostname, datetime.now(), status, state, id)
job_handler_handle(id, state)
job = query('SELECT * FROM jobs WHERE id = ?', id, nlfix=False)[0]
for func in job_handlers.get(job['type'], {}).get(state, []):
try:
func(id, job['type'], json.loads(job['data']), state, json.loads(job['status']))
except Exception:
traceback.print_exc()
if job['canceled']:
return 'Job canceled', 205
else:
......
from server import *
import requests
from xml.etree import ElementTree
import random
......
#!/usr/bin/env python3
import unittest
import os
import server
def setUp():
server.app.testing = True
def tearDown():
os.unlink(server.app.config['SQLITE_DB'])
if __name__ == '__main__':
setUp()
try:
suite = unittest.defaultTestLoader.discover('./tests/', pattern="*")
unittest.TextTestRunner(verbosity=2, failfast=True).run(suite)
finally:
tearDown()
......@@ -26,7 +26,7 @@ if sys.argv[0].endswith('run.py'):
config['SQLITE_INIT_DATA'] = True
config['DEBUG'] = True
config.from_pyfile('config.py', silent=True)
if sys.argv[0].endswith('tests.py'):
if sys.argv[0].endswith('runTests.py'):
print('running in test mode')
import tempfile
# ensure we always use a clean sqlite db for tests
......@@ -476,18 +476,19 @@ def dbstatus():
def date_json_handler(obj):
return obj.isoformat() if hasattr(obj, 'isoformat') else obj
from jobs import job_handler, schedule_job, cancel_job, restart_job
from edit import edit_handler
from jobmanagement import job_handler, job_handler_handle, job_set_state, schedule_job, cancel_job, restart_job
import feeds
import importer
import stats
if 'ICAL_URL' in config:
import meetings
import l2pauth
from encoding import schedule_remux
import sorter
import timetable
import chapters
import icalexport
import livestreams
import encoding
import cutprogress
import jobs
......@@ -33,34 +33,18 @@ def update_video_metadata(jobid, jobtype, data, state, status):
if jobtype not in ['remux', 'transcode']:
video = query('SELECT * FROM videos WHERE id = ?', data['video_id'])[0]
if video['hash'] and video['hash'] != status['hash']:
print('Hash mismatch for video', data['video_id'])
return
raise Exception('Hash mismatch for video {}'.format(data['video_id']))
modify('UPDATE videos_data SET hash = ?, file_size = ?, duration = ? WHERE id = ?',
status['hash'], status['filesize'], status['duration'], data['video_id'])
def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1):
visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible']
video_id = modify('''INSERT INTO videos_data
(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration)
VALUES
(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?)''',
lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration)
query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
schedule_thumbnail(lectureid)
schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0]
course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
def schedule_thumbnail(lectureid):
videos = query('''
SELECT videos.path
FROM videos
JOIN formats ON (videos.video_format = formats.id)
WHERE videos.lecture_id = ?
ORDER BY formats.prio DESC''', lectureid)
schedule_job('thumbnail', {'lectureid': str(lectureid), 'path': videos[0]['path']})
SELECT videos.path
FROM videos
JOIN formats ON (videos.video_format = formats.id)
WHERE videos.lecture_id = ?
ORDER BY formats.prio DESC''', lectureid)
return schedule_job('thumbnail', {'lectureid': str(lectureid), 'path': videos[0]['path']})
@app.route('/internal/jobs/add/thumbnail', methods=['GET', 'POST'])
@mod_required
......@@ -70,103 +54,112 @@ def add_thumbnail_job():
schedule_thumbnail(request.values['lectureid'])
return redirect(request.values.get('ref', url_for('jobs_overview')))
@job_handler('transcode')
def insert_transcoded_video(jobid, jobtype, data, state, status):
if 'lecture_id' not in data or 'source_id' not in data or 'format_id' not in data:
return
if 'video_id' in data:
return
visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', data['lecture_id'])[0]['autovisible']
def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1, sourceid=None):
visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible']
video_id = modify('''INSERT INTO videos_data
(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, source, duration)
(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration, source)
VALUES
(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?, ?)''',
data['lecture_id'], visible, data['output']['path'], data['format_id'],
datetime.now(), datetime.now(), datetime.now(), -1, status['hash'],
status['filesize'], data['source_id'], status['duration'])
schedule_remux(data['lecture_id'], video_id)
schedule_thumbnail(data['lecture_id'])
lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration, sourceid)
if not sourceid:
query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
schedule_thumbnail(lectureid)
video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
lecture = query('SELECT * FROM lectures WHERE id = ?', data['lecture_id'])[0]
lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0]
course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
return video_id
def sort_file(filename, course=None, lectures=None):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
def split_filename(filename):
# '_' and ' ' are handled like '-'
splitfilename = filename.replace('_','-').replace(' ','-').split('-')
if not course:
handle = splitfilename[0]
if splitfilename[0].endswith('ws') or splitfilename[0].endswith('ss'):
handle = '-'.join(splitfilename[:2])
courses = query('SELECT * FROM courses WHERE handle = ?', handle)
if not courses:
return [], 0
course = courses[0]
if not lectures:
lectures = query('SELECT * from lectures where course_id = ?', course['id'])
# we save all extraced data in a dict
return filename.replace('_','-').replace(' ','-').split('-')
def parse_filename(splitFileName):
# filenames: <handle>-<sorter>-<format>.mp4
data = {'keywords': []}
# parse the file name and save all data in 'data'
for s in splitfilename:
s = s.replace('.mp4','')
for fileNameChunk in splitFileName:
fileNameChunk = fileNameChunk.replace('.mp4','')
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try:
if len(s) == 6:
data['date'] = datetime.strptime(s,'%y%m%d').date()
elif len(s) == 4:
data['time'] = datetime.strptime(s,'%H%M').time()
if len(fileNameChunk) == 6:
data['date'] = datetime.strptime(fileNameChunk,'%y%m%d').date()
elif len(fileNameChunk) == 4:
data['time'] = datetime.strptime(fileNameChunk,'%H%M').time()
else:
data['keywords'].append(s)
data['keywords'].append(fileNameChunk)
except ValueError:
# if its not a date or time, handle it as keyword
data['keywords'].append(s)
# try to match the file on a single lecture
# if its not valid date or time, handle it as keyword
data['keywords'].append(fileNameChunk)
return data
def filter_lectures_by_datetime(lectures, date, time):
matches = []
# first try date and time (if one of them is set)
if ('date' in data) or ('time' in data):
if date or time:
for lecture in lectures:
if not ('time' in lecture) or not lecture['time']:
if (not 'time' in lecture) or (not lecture['time']):
continue
if date and (lecture['time'].date() != date):
continue
if time and (lecture['time'].time() != time):
continue
if ('date' in data) and (lecture['time'].date() != data['date']):
continue
if ('time' in data) and (lecture['time'].time() != data['time']):
continue
matches.append(lecture)
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if len(matches) == 0:
matches.extend(lectures)
found = False
for field in ['title','speaker','comment','internal']:
for lecture in matches:
for keyword in data['keywords']:
# first test for exact match, else make it asci and try substring test
if (keyword == lecture[field]) or \
(str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
found = True
matches = [lecture]
if found:
break
if found:
break
if found:
break
# now we should have found exactly one match
# default format is "unknown", with id 0
fmt = 0
return matches
def filter_lectures_by_keywords(lectures, keywords):
for field in ['title','speaker','comment','internal']:
for lecture in lectures:
for keyword in keywords:
# first test for exact match, else make it asci and try substring test
if (field in lecture) and (
(keyword == lecture[field]) or
(to_ascii(str(keyword).lower()) in str(to_ascii(lecture[field]).lower()))
):
return [lecture]
return []
def extract_format_keyword_from_filename(splitFileName):
return splitFileName[-1].split('.',1)[0].lower()
def filter_formats_by_filename(splitFileName):
formatstring = extract_format_keyword_from_filename(splitFileName)
formats = query('SELECT * FROM formats ORDER BY prio DESC')
for videoformat in formats:
# we match the last part of the file name without the extension
formatstring = splitfilename[-1].split('.',1)[0].lower()
if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
fmt = videoformat['id']
break
return videoformat['id']
# default format is "unknown", with id 0
return 0
def sort_file(filename, course=None, lectures=None):
splitFileName = split_filename(filename)
if not course:
handle = splitFileName[0]
if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'):
handle = '-'.join(splitFileName[:2])
courses = query('SELECT * FROM courses WHERE handle = ?', handle)
if not courses:
return [], 0
course = courses[0]
if not lectures:
lectures = query('SELECT * from lectures where course_id = ?', course['id'])
# parse all data from the file name
data = parse_filename(splitFileName)
# try to match the file on a single lecture
matches = filter_lectures_by_datetime(lectures, data.get('date'), data.get('time'))
# if we can't match exactly based on date and time, we have to match keywords
if ((len(matches) != 1) and (len(data['keywords']) > 0)):
if not len(matches) == 0:
# only test lectures with the correct date/time, if we have any
matches = filter_lectures_by_keywords(matches, data['keywords'])
else:
# Else test for matches in all lectures of this course
matches = filter_lectures_by_keywords(lectures, data['keywords'])
# now we should have found exactly one match
fmt = filter_formats_by_filename(splitFileName)
return matches, fmt
def log_sort_error(course_id, path, matches):
......@@ -233,7 +226,9 @@ def sort_now():
for mountpoint in config['VIDEOMOUNT']:
existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
ignorefiles = existingvideos + knownerrors
ignorefiles = []
for path in existingvideos + knownerrors:
ignorefiles.append(os.path.basename(path['path']))
lectures = query('SELECT * from lectures where course_id = ?',course['id'])
coursepath = mountpoint['mountpoint']+course['handle']
try:
......@@ -243,14 +238,7 @@ def sort_now():
for filename in files:
try:
# if the video is in the table "videos" already (with the correct course), skip it
ignore = False
for file_to_ignore in ignorefiles:
# path is something like
# vpnonline/08ws-swt/08ws-swt-081118.mp4
if os.path.basename(filename) == os.path.basename(file_to_ignore['path']):
ignore = True
break
if ignore:
if os.path.basename(filename) in ignorefiles:
continue
if not os.path.splitext(filename)[1] == '.mp4':
continue
......
import unittest
import server
import flask
import os
import random
class FlaskTestCase(unittest.TestCase):
def tearDown(self):
pass
def setUp(self):
server.app.testing = True
self.requestContext = server.app.test_request_context()
self.client = server.app.test_client()
self.app = server.app
def videoagLogin(self):
self.sess_csrf_token = os.urandom(32)
self.sess_dbid = random.randint(0, 100)
self.sess_username = 'videoag'
with self.client.session_transaction() as sess:
sess['user'] = {'name': self.sess_username, '_csrf_token': self.csrf_token, 'dbid': self.sess_dbid}
sess['_csrf_token'] = self.sess_csrf_token
from flaskunittest import FlaskTestCase
from datetime import datetime, timedelta
import jobmanagement
from server import query
class JobmanagementTestCase(FlaskTestCase):
def getJobCount(self, state=None):
if not state:
data = query("SELECT count(id) AS count from jobs")
else:
data = query("SELECT count(id) AS count FROM jobs WHERE state=?", state)
return data[0]['count']
def getCanceledJobCount(self):
data = query("SELECT count(id) AS count from jobs WHERE canceled=1")
return data[0]['count']
def generateTestJob(self):
return jobmanagement.schedule_job('testjob', data={'data': 'mytestdata'})
def moveJobScheduletimeToPast(self, id, seconds=500):
query("UPDATE jobs SET time_scheduled = ? WHERE id = ?", datetime.now() - timedelta(seconds=seconds), id)
def test_schedule_job(self):
with self.requestContext:
jobCountBefore = self.getJobCount()
self.generateTestJob()
assert(jobCountBefore + 1 == self.getJobCount())
def test_cancel_job(self):
with self.requestContext:
canceledJobCountBefore = self.getCanceledJobCount()
jobmanagement.cancel_job(self.generateTestJob())
canceledJobCountAfter = self.getCanceledJobCount()
assert(canceledJobCountBefore +1 == canceledJobCountAfter)
def test_catch_broken(self):
with self.requestContext:
readyJobCountBefore = self.getJobCount('ready')
jobid = self.generateTestJob()
self.moveJobScheduletimeToPast(jobid)
jobmanagement.job_set_state(jobid, 'scheduled')
jobmanagement.job_catch_broken()
readyJobCountAfter = self.getJobCount('ready')
assert(readyJobCountBefore + 1 == readyJobCountAfter)
def test_job_set_state(self):
with self.requestContext:
jobCountBefore = self.getJobCount('teststate')
jobid = self.generateTestJob()
jobmanagement.job_set_state(jobid, 'teststate')
assert(jobCountBefore + 1 == self.getJobCount('teststate'))
#!/usr/bin/env python3