sorter.py 10.3 KB
Newer Older
Andreas Valder's avatar
Andreas Valder committed
1
from server import *
2
3
from jobmanagement import schedule_job, job_handler
from jobtypes import schedule_thumbnail
4
import traceback
Julian Rother's avatar
Julian Rother committed
5
import os.path
Andreas Valder's avatar
Andreas Valder committed
6

7
@app.route('/internal/sort/log')
8
@register_navbar('Sortierlog', icon='sort-by-attributes-alt', group='weitere')
9
10
11
12
13
14
15
16
17
18
19
20
@mod_required
def sort_log():
	return render_template('sortlog.html',sortlog=query('''
			SELECT 
				sortlog.*,
				lectures.id as lecture_id,
				lectures.title as lecture_title,
				lectures.course_id as course_id, 
				courses.title as course_title
			FROM sortlog 
			JOIN lectures ON lectures.id = sortlog.lecture_id
			JOIN courses ON courses.id = lectures.course_id 
21
			ORDER BY sortlog.`when` DESC
Andreas Valder's avatar
Andreas Valder committed
22
			LIMIT 50
23
		'''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC'))
24

25
26
27
28
29
def to_ascii(inputstring):
	asciistring = inputstring
	for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]:
		asciistring = asciistring.replace(charset[0],charset[1])
	return asciistring
30

31
32
33
34
35
36
37
@job_handler('probe', 'remux', 'transcode')
def update_video_metadata(jobid, jobtype, data, state, status):
	if 'video_id' not in data:
		return
	if jobtype not in ['remux', 'transcode']:
		video = query('SELECT * FROM videos WHERE id = ?', data['video_id'])[0]
		if video['hash'] and video['hash'] != status['hash']:
38
			raise Exception('Hash mismatch for video {}'.format(data['video_id']))
39
	modify('UPDATE videos_data SET hash = ?, file_size = ?, duration = ? WHERE id = ?',
Julian Rother's avatar
Julian Rother committed
40
			status['hash'], status['filesize'], status['duration'], data['video_id'])
41

42
43
44
45
46
47
48
49
50
@app.route('/internal/jobs/add/thumbnail', methods=['GET', 'POST'])
@mod_required
@csrf_protect
@handle_errors('jobs_overview', 'Zu dieser Veranstaltung existieren keine Videos!', 404, IndexError)
def add_thumbnail_job():
	schedule_thumbnail(request.values['lectureid'])
	return redirect(request.values.get('ref', url_for('jobs_overview')))

def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1, sourceid=None):
Julian Rother's avatar
Julian Rother committed
51
	visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible']
52
	video_id = modify('''INSERT INTO videos_data 
53
		(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration, source)
54
		VALUES 
55
		(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?)''',
56
57
58
59
60
61
		lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration, source)
	if sourceid:
		schedule_remux(lectureid, video_id)
	else:
		query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
		schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
Andreas Valder's avatar
Andreas Valder committed
62
	schedule_thumbnail(lectureid)
63
64
65
66
	video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
	lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
67

Julian Rother's avatar
Julian Rother committed
68
69
70
71
72
73
@job_handler('transcode')
def insert_transcoded_video(jobid, jobtype, data, state, status):
	if 'lecture_id' not in data or 'source_id' not in data or 'format_id' not in data:
		return
	if 'video_id' in data:
		return
74
	insert_video(data['lecture_id'], data['output']['path'], data['format_id'], status['hash'], status['filesize'], status['duration'], data['source_id'] )
Julian Rother's avatar
Julian Rother committed
75

76
def parseVideoFileName(splitFileName):
Andreas Valder's avatar
Andreas Valder committed
77
	# filenames: <handle>-<sorter>-<format>.mp4
78
	data = {'keywords': []}
79
80
	for fileNameChunk in splitFileName:
		fileNameChunk = fileNameChunk.replace('.mp4','')
81
82
83
84
85
		#-<YYMMDD> (date)
		#-<HHMM> (time)
		#-<keyword>
		#	Looking for keywords in: title,speaker,comment, comma seperated list in internal
		try:
86
87
88
89
			if len(fileNameChunk) == 6:
				data['date'] = datetime.strptime(fileNameChunk,'%y%m%d').date()
			elif  len(fileNameChunk) == 4:
				data['time'] = datetime.strptime(fileNameChunk,'%H%M').time()
90
			else:	
91
				data['keywords'].append(fileNameChunk)
92
		except ValueError:
93
94
95
96
97
			# if its not valid date or time, handle it as keyword
			data['keywords'].append(fileNameChunk)
	return data

def matchDatetimeOnLecture(lectures, date, time):
98
	matches = []
99
	if date or time:
100
		for lecture in lectures:
101
102
103
104
105
			if (not 'time' in lecture) or (not lecture['time']):
				continue
			if date and (lecture['time'].date() != date):
				continue
			if time and (lecture['time'].time() != time):
106
107
				continue
			matches.append(lecture)
108
109
110
111
112
113
114
115
116
117
118
119
120
	return matches

def matchKeywordsOnLecture(lectures, keywords):
	for field in ['title','speaker','comment','internal']:
		for lecture in lectures:
			for keyword in keywords:
				# first test for exact match, else make it asci and try substring test
				if (keyword == lecture[field]) or \
					 (str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
					return [lecture]
	return []

def matchFileNameOnFormat(splitFileName):
121
122
123
124
	# default format is "unknown", with id 0
	formats = query('SELECT * FROM formats ORDER BY prio DESC')
	for videoformat in formats:
		# we match the last part of the file name without the extension
125
		formatstring = splitFileName[-1].split('.',1)[0].lower()
126
		if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
			return videoformat['id']
	return 0

def sort_file(filename, course=None, lectures=None):
	# '_' and ' ' are handled like '-'
	splitFileName = filename.replace('_','-').replace(' ','-').split('-')
	if not course:
		handle = splitFileName[0]
		if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'):
			handle = '-'.join(splitFileName[:2])
		courses = query('SELECT * FROM courses WHERE handle = ?', handle)
		if not courses:
			return [], 0
		course = courses[0]
	if not lectures:
		lectures = query('SELECT * from lectures where course_id = ?', course['id'])
Andreas Valder's avatar
Andreas Valder committed
143
	# parse all data from the file name
144
145
146
147
148
	data = parseVideoFileName(splitFileName)
	# try to match the file on a single lecture
	matches = matchDatetimeOnLecture(lectures, data.get('date'), data.get('time'))
	# if we can't match exactly  based on date and time, we have to match keywords
	if ((len(matches) != 1) and (len(data['keywords']) > 0)):
Andreas Valder's avatar
Andreas Valder committed
149
150
		if not len(matches) == 0:
			# only test lectures with the correct date/time, if we have any
151
			matches = matchKeywordsOnLecture(matches, data['keywords'])
Andreas Valder's avatar
Andreas Valder committed
152
153
154
		else:
			# Else test for matches in all lectures of this course
			matches = matchKeywordsOnLecture(lectures, data['keywords'])
155
156
	# now we should have found exactly one match
	fmt = matchFileNameOnFormat(splitFileName)
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
	return matches, fmt

def log_sort_error(course_id, path, matches):
	matches_id = []
	for match in matches:
		matches_id.append(str(match['id']))
	query('INSERT INTO sorterrorlog_data (course_id, path, matches, `when`, time_updated, time_created) VALUES (?, ?, ?, ?, ?, ?)',
			course_id, path, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now())

def sort_api_token_required(func):
	@wraps(func)
	def decorator(*args, **kwargs):
		if 'apikey' in request.values:
			token = request.values['apikey']
		elif request.get_json() and ('apikey' in request.get_json()):
			token = request.get_json()['apikey']
		else:
			token = None
		if not token == config.get('SORTER_API_KEY', [None]):
			return 'Permission denied', 403
		else:
			return func(*args, **kwargs)
	return decorator

@app.route('/internal/sort/encoded/<filename>')
@sort_api_token_required
def sort_encoded(filename):
	matches, fmt = sort_file(filename)
	if len(matches) != 1:
		log_sort_error(-1, 'kodiert/'+filename, matches)
		return "Could not match filename", 400
	lecture = matches[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	if course['autopublish']:
		schedule_job('publish_video', {'source': filename, 'path': 'pub/'+course['handle']+'/'+filename, 'lecture_id': lecture['id'], 'format_id': fmt})
	return 'OK', 200

Julian Rother's avatar
Julian Rother committed
194
195
196
197
198
199
200
201
202
203
204
205
206
@app.route('/internal/sort/autoencode')
@sort_api_token_required
def sort_autoencode():
	filename = request.values['path']
	path = 'autoencode/'+filename
	matches, fmt = sort_file(filename)
	if len(matches) != 1:
		log_sort_error(-1, 'raw/'+path, matches)
		return "Could not match filename", 400
	lecture = matches[0]
	schedule_job('probe-raw', {'path': path, 'lecture_id': lecture['id'], 'import-chapters': True})
	return 'OK', 200

207
208
209
210
@job_handler('publish_video')
def handle_published_video(jobid, jobtype, data, state, status):
	if 'lecture_id' not in data or 'format_id' not in data:
		return
211
	insert_video(data['lecture_id'], data['path'], data['format_id'], hash=status['hash'], filesize=status['filesize'], duration=status['duration'])
212

213
@app.route('/internal/sort/now')
214
@mod_required
Andreas Valder's avatar
Andreas Valder committed
215
@sched_func(600)
216
217
def sort_now():
	courses = query('SELECT * FROM courses')
218
	for course in courses:
Andreas Valder's avatar
Andreas Valder committed
219
		modify('BEGIN')
Andreas Valder's avatar
Andreas Valder committed
220
		for mountpoint in config['VIDEOMOUNT']:
221
222
223
224
225
			existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
			knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
			ignorefiles = existingvideos + knownerrors
			lectures = query('SELECT * from lectures where course_id = ?',course['id'])
			coursepath = mountpoint['mountpoint']+course['handle']
226
227
228
229
			try:
				files = os.listdir(coursepath)
			except FileNotFoundError:
				files = []
230
			for filename in files:
231
				try:
232
233
234
235
					# if the video is in the table "videos" already (with the correct course), skip it
					ignore = False
					for file_to_ignore in ignorefiles:
						# path is something like
236
						# vpnonline/08ws-swt/08ws-swt-081118.mp4
237
238
						if os.path.basename(filename) == os.path.basename(file_to_ignore['path']):
							ignore = True
239
							break
240
					if ignore:
241
						continue
242
					if not os.path.splitext(filename)[1] == '.mp4':
243
						continue
244
					matches, fmt = sort_file(filename, course=course, lectures=lectures)
245
					dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
246
					if len(matches) == 1:
247
						insert_video(matches[0]['id'], dbfilepath, fmt)
Andreas Valder's avatar
Andreas Valder committed
248
					else:
249
						log_sort_error(course['id'], dbfilepath, matches)
250
251
				except Exception:
					traceback.print_exc()
Andreas Valder's avatar
Andreas Valder committed
252
		modify('COMMIT')
253
254
255
	if 'ref' in request.values:
		return redirect(request.values['ref'])
	else:
256
		return 'OK', 200
Andreas Valder's avatar
Andreas Valder committed
257