sorter.py 11.2 KB
Newer Older
Andreas Valder's avatar
Andreas Valder committed
1
from server import *
2
from jobmanagement import schedule_job
3
import traceback
Julian Rother's avatar
Julian Rother committed
4
import os.path
Andreas Valder's avatar
Andreas Valder committed
5

6
@app.route('/internal/sort/log')
7
@register_navbar('Sortierlog', icon='sort-by-attributes-alt', group='weitere')
8
9
10
11
12
13
14
15
16
17
18
19
@mod_required
def sort_log():
	return render_template('sortlog.html',sortlog=query('''
			SELECT 
				sortlog.*,
				lectures.id as lecture_id,
				lectures.title as lecture_title,
				lectures.course_id as course_id, 
				courses.title as course_title
			FROM sortlog 
			JOIN lectures ON lectures.id = sortlog.lecture_id
			JOIN courses ON courses.id = lectures.course_id 
20
			ORDER BY sortlog.`when` DESC
Andreas Valder's avatar
Andreas Valder committed
21
			LIMIT 50
22
		'''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC'))
23

24
25
26
27
28
def to_ascii(inputstring):
	asciistring = inputstring
	for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]:
		asciistring = asciistring.replace(charset[0],charset[1])
	return asciistring
29

30
31
32
33
34
35
36
37
38
@job_handler('probe', 'remux', 'transcode')
def update_video_metadata(jobid, jobtype, data, state, status):
	if 'video_id' not in data:
		return
	if jobtype not in ['remux', 'transcode']:
		video = query('SELECT * FROM videos WHERE id = ?', data['video_id'])[0]
		if video['hash'] and video['hash'] != status['hash']:
			print('Hash mismatch for video', data['video_id'])
			return
39
	modify('UPDATE videos_data SET hash = ?, file_size = ?, duration = ? WHERE id = ?',
Julian Rother's avatar
Julian Rother committed
40
			status['hash'], status['filesize'], status['duration'], data['video_id'])
41

42
def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1):
Julian Rother's avatar
Julian Rother committed
43
	visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible']
44
	video_id = modify('''INSERT INTO videos_data 
45
		(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration)
46
		VALUES 
47
48
		(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?)''',
		lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration)
49
	query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
Andreas Valder's avatar
Andreas Valder committed
50
	schedule_thumbnail(lectureid)
Julian Rother's avatar
Julian Rother committed
51
	schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
52
53
54
55
	video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
	lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
56

57
def schedule_thumbnail(lectureid):
Andreas Valder's avatar
Andreas Valder committed
58
59
60
61
62
	videos = query('''
			SELECT videos.path
			FROM videos
			JOIN formats ON (videos.video_format = formats.id)
			WHERE videos.lecture_id = ?
63
			ORDER BY formats.prio DESC''', lectureid)
Julian Rother's avatar
Julian Rother committed
64
	schedule_job('thumbnail', {'lectureid': str(lectureid), 'path': videos[0]['path']})
65
66
67
68
69
70
71
72

@app.route('/internal/jobs/add/thumbnail', methods=['GET', 'POST'])
@mod_required
@csrf_protect
@handle_errors('jobs_overview', 'Zu dieser Veranstaltung existieren keine Videos!', 404, IndexError)
def add_thumbnail_job():
	schedule_thumbnail(request.values['lectureid'])
	return redirect(request.values.get('ref', url_for('jobs_overview')))
73

Julian Rother's avatar
Julian Rother committed
74
75
76
77
78
79
@job_handler('transcode')
def insert_transcoded_video(jobid, jobtype, data, state, status):
	if 'lecture_id' not in data or 'source_id' not in data or 'format_id' not in data:
		return
	if 'video_id' in data:
		return
Julian Rother's avatar
Julian Rother committed
80
	visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', data['lecture_id'])[0]['autovisible']
Julian Rother's avatar
Julian Rother committed
81
	video_id = modify('''INSERT INTO videos_data 
82
		(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, source, duration)
Julian Rother's avatar
Julian Rother committed
83
		VALUES 
84
		(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?, ?)''',
Julian Rother's avatar
Julian Rother committed
85
		data['lecture_id'], visible, data['output']['path'], data['format_id'],
Julian Rother's avatar
Julian Rother committed
86
		datetime.now(), datetime.now(), datetime.now(), -1, status['hash'],
Julian Rother's avatar
Julian Rother committed
87
		status['filesize'], data['source_id'], status['duration'])
88
	schedule_remux(data['lecture_id'], video_id)
Julian Rother's avatar
Julian Rother committed
89
	schedule_thumbnail(data['lecture_id'])
90
91
92
93
	video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
	lecture = query('SELECT * FROM lectures WHERE id = ?', data['lecture_id'])[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
Julian Rother's avatar
Julian Rother committed
94

95
96
97
98
99
100
def sort_file(filename, course=None, lectures=None):
	# filenames: <handle>-<sorter>-<format>.mp4
	# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
	# '_' and ' ' are handled like '-'
	splitfilename = filename.replace('_','-').replace(' ','-').split('-')
	if not course:
101
102
103
		handle = splitfilename[0]
		if splitfilename[0].endswith('ws') or splitfilename[0].endswith('ss'):
			handle = '-'.join(splitfilename[:2])
104
105
		courses = query('SELECT * FROM courses WHERE handle = ?', handle)
		if not courses:
106
			return [], 0
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
		course = courses[0]
	if not lectures:
		lectures = query('SELECT * from lectures where course_id = ?', course['id'])
	# we save all extraced data in a dict
	data = {'keywords': []}
	# parse the file name and save all data in 'data'
	for s in splitfilename:
		s = s.replace('.mp4','')
		#-<YYMMDD> (date)
		#-<HHMM> (time)
		#-<keyword>
		#	Looking for keywords in: title,speaker,comment, comma seperated list in internal
		try:
			if len(s) == 6:
				data['date'] = datetime.strptime(s,'%y%m%d').date()
			elif  len(s) == 4:
				data['time'] = datetime.strptime(s,'%H%M').time()
			else:	
				data['keywords'].append(s)
		except ValueError:
			# if its not a date or time, handle it as keyword
			data['keywords'].append(s)
	# try to match the file on a single lecture
	matches = []
	# first try date and time (if one of them is set)
	if ('date' in data) or ('time' in data):
		for lecture in lectures:
			if not ('time' in lecture) or not lecture['time']:
				continue
			if ('date' in data) and (lecture['time'].date() != data['date']):
					continue
			if ('time' in data) and (lecture['time'].time() != data['time']):
					continue
			matches.append(lecture)
	# if we can't match exactly  based on date and time, we have to match keywords
	if ((len(matches) != 1) and (len(data['keywords']) > 0)):
		#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
		if len(matches) == 0:
			matches.extend(lectures)
		found = False
		for field in ['title','speaker','comment','internal']:
			for lecture in matches:
				for keyword in data['keywords']:
					# first test for exact match, else make it asci and try substring test
					if (keyword == lecture[field]) or \
						 (str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
						found = True
						matches = [lecture]
					if found:
						break
				if found:
					break
			if found:
				break
	# now we should have found exactly one match
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
	# default format is "unknown", with id 0
	fmt = 0
	formats = query('SELECT * FROM formats ORDER BY prio DESC')
	for videoformat in formats:
		# we match the last part of the file name without the extension
		formatstring = splitfilename[-1].split('.',1)[0].lower()
		if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
			fmt = videoformat['id']
			break
	return matches, fmt

def log_sort_error(course_id, path, matches):
	matches_id = []
	for match in matches:
		matches_id.append(str(match['id']))
	query('INSERT INTO sorterrorlog_data (course_id, path, matches, `when`, time_updated, time_created) VALUES (?, ?, ?, ?, ?, ?)',
			course_id, path, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now())

def sort_api_token_required(func):
	@wraps(func)
	def decorator(*args, **kwargs):
		if 'apikey' in request.values:
			token = request.values['apikey']
		elif request.get_json() and ('apikey' in request.get_json()):
			token = request.get_json()['apikey']
		else:
			token = None
		if not token == config.get('SORTER_API_KEY', [None]):
			return 'Permission denied', 403
		else:
			return func(*args, **kwargs)
	return decorator

@app.route('/internal/sort/encoded/<filename>')
@sort_api_token_required
def sort_encoded(filename):
	matches, fmt = sort_file(filename)
	if len(matches) != 1:
		log_sort_error(-1, 'kodiert/'+filename, matches)
		return "Could not match filename", 400
	lecture = matches[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	if course['autopublish']:
		schedule_job('publish_video', {'source': filename, 'path': 'pub/'+course['handle']+'/'+filename, 'lecture_id': lecture['id'], 'format_id': fmt})
	return 'OK', 200

Julian Rother's avatar
Julian Rother committed
208
209
210
211
212
213
214
215
216
217
218
219
220
@app.route('/internal/sort/autoencode')
@sort_api_token_required
def sort_autoencode():
	filename = request.values['path']
	path = 'autoencode/'+filename
	matches, fmt = sort_file(filename)
	if len(matches) != 1:
		log_sort_error(-1, 'raw/'+path, matches)
		return "Could not match filename", 400
	lecture = matches[0]
	schedule_job('probe-raw', {'path': path, 'lecture_id': lecture['id'], 'import-chapters': True})
	return 'OK', 200

221
222
223
224
@job_handler('publish_video')
def handle_published_video(jobid, jobtype, data, state, status):
	if 'lecture_id' not in data or 'format_id' not in data:
		return
225
	insert_video(data['lecture_id'], data['path'], data['format_id'], hash=status['hash'], filesize=status['filesize'], duration=status['duration'])
226

227
@app.route('/internal/sort/now')
228
@mod_required
Andreas Valder's avatar
Andreas Valder committed
229
@sched_func(600)
230
231
def sort_now():
	courses = query('SELECT * FROM courses')
232
	for course in courses:
Andreas Valder's avatar
Andreas Valder committed
233
		modify('BEGIN')
Andreas Valder's avatar
Andreas Valder committed
234
		for mountpoint in config['VIDEOMOUNT']:
235
236
237
238
239
			existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
			knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
			ignorefiles = existingvideos + knownerrors
			lectures = query('SELECT * from lectures where course_id = ?',course['id'])
			coursepath = mountpoint['mountpoint']+course['handle']
240
241
242
243
			try:
				files = os.listdir(coursepath)
			except FileNotFoundError:
				files = []
244
			for filename in files:
245
				try:
246
247
248
249
					# if the video is in the table "videos" already (with the correct course), skip it
					ignore = False
					for file_to_ignore in ignorefiles:
						# path is something like
250
						# vpnonline/08ws-swt/08ws-swt-081118.mp4
251
252
						if os.path.basename(filename) == os.path.basename(file_to_ignore['path']):
							ignore = True
253
							break
254
					if ignore:
255
						continue
256
					if not os.path.splitext(filename)[1] == '.mp4':
257
						continue
258
					matches, fmt = sort_file(filename, course=course, lectures=lectures)
259
					dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
260
					if len(matches) == 1:
261
						insert_video(matches[0]['id'], dbfilepath, fmt)
Andreas Valder's avatar
Andreas Valder committed
262
					else:
263
						log_sort_error(course['id'], dbfilepath, matches)
264
265
				except Exception:
					traceback.print_exc()
Andreas Valder's avatar
Andreas Valder committed
266
		modify('COMMIT')
267
268
269
	if 'ref' in request.values:
		return redirect(request.values['ref'])
	else:
270
		return 'OK', 200
Andreas Valder's avatar
Andreas Valder committed
271