sorter.py 10.2 KB
Newer Older
Andreas Valder's avatar
Andreas Valder committed
1
from server import *
2
import traceback
Julian Rother's avatar
Julian Rother committed
3
import os.path
Andreas Valder's avatar
Andreas Valder committed
4

5
@app.route('/internal/sort/log')
6
@register_navbar('Sortierlog', icon='sort-by-attributes-alt', group='weitere')
7
8
9
10
11
12
13
14
15
16
17
18
@mod_required
def sort_log():
	return render_template('sortlog.html',sortlog=query('''
			SELECT 
				sortlog.*,
				lectures.id as lecture_id,
				lectures.title as lecture_title,
				lectures.course_id as course_id, 
				courses.title as course_title
			FROM sortlog 
			JOIN lectures ON lectures.id = sortlog.lecture_id
			JOIN courses ON courses.id = lectures.course_id 
19
			ORDER BY sortlog.`when` DESC
Andreas Valder's avatar
Andreas Valder committed
20
			LIMIT 50
21
		'''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC'))
22

23
24
25
26
27
def to_ascii(inputstring):
	asciistring = inputstring
	for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]:
		asciistring = asciistring.replace(charset[0],charset[1])
	return asciistring
28

29
30
31
32
33
34
35
@job_handler('probe', 'remux', 'transcode')
def update_video_metadata(jobid, jobtype, data, state, status):
	if 'video_id' not in data:
		return
	if jobtype not in ['remux', 'transcode']:
		video = query('SELECT * FROM videos WHERE id = ?', data['video_id'])[0]
		if video['hash'] and video['hash'] != status['hash']:
36
			raise Exception('Hash mismatch for video {}'.format(data['video_id']))
37
	modify('UPDATE videos_data SET hash = ?, file_size = ?, duration = ? WHERE id = ?',
Julian Rother's avatar
Julian Rother committed
38
			status['hash'], status['filesize'], status['duration'], data['video_id'])
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def schedule_thumbnail(lectureid):
	videos = query('''
		SELECT videos.path
		FROM videos
		JOIN formats ON (videos.video_format = formats.id)
		WHERE videos.lecture_id = ?
		ORDER BY formats.prio DESC''', lectureid)
	return schedule_job('thumbnail', {'lectureid': str(lectureid), 'path': videos[0]['path']})

@app.route('/internal/jobs/add/thumbnail', methods=['GET', 'POST'])
@mod_required
@csrf_protect
@handle_errors('jobs_overview', 'Zu dieser Veranstaltung existieren keine Videos!', 404, IndexError)
def add_thumbnail_job():
	schedule_thumbnail(request.values['lectureid'])
	return redirect(request.values.get('ref', url_for('jobs_overview')))

57
def insert_video(lectureid, dbfilepath, fileformatid, hash="", filesize=-1, duration=-1, sourceid=None):
Julian Rother's avatar
Julian Rother committed
58
	visible = query('SELECT courses.autovisible FROM courses JOIN lectures ON lectures.course_id = courses.id WHERE lectures.id = ?', lectureid)[0]['autovisible']
59
	video_id = modify('''INSERT INTO videos_data 
60
		(lecture_id, visible, path, video_format, title, comment, internal, file_modified, time_created, time_updated, created_by, hash, file_size, duration, source)
61
		VALUES 
62
		(?, ?, ?, ?, "", "", "", ?, ?, ?, ?, ?, ?, ?)''',
63
		lectureid, visible, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, hash, filesize, duration, source)
64
	if not sourceid:
65
66
		query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
		schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
Andreas Valder's avatar
Andreas Valder committed
67
	schedule_thumbnail(lectureid)
68
69
70
71
	video = query('SELECT videos.*, "format" AS sep, formats.* FROM videos JOIN formats ON formats.id = videos.video_format WHERE videos.id = ?', video_id)[0]
	lecture = query('SELECT * FROM lectures WHERE id = ?', lectureid)[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	notify_mods('new_video', course['id'], course=course, lecture=lecture, video=video)
72
	return video_id
Julian Rother's avatar
Julian Rother committed
73

Andreas Valder's avatar
Andreas Valder committed
74
75
76
77
78
def split_filename(filename):
	# '_' and ' ' are handled like '-'
	return filename.replace('_','-').replace(' ','-').split('-')

def parse_filename(splitFileName):
Andreas Valder's avatar
Andreas Valder committed
79
	# filenames: <handle>-<sorter>-<format>.mp4
80
	data = {'keywords': []}
81
82
	for fileNameChunk in splitFileName:
		fileNameChunk = fileNameChunk.replace('.mp4','')
83
84
85
86
87
		#-<YYMMDD> (date)
		#-<HHMM> (time)
		#-<keyword>
		#	Looking for keywords in: title,speaker,comment, comma seperated list in internal
		try:
88
89
90
91
			if len(fileNameChunk) == 6:
				data['date'] = datetime.strptime(fileNameChunk,'%y%m%d').date()
			elif  len(fileNameChunk) == 4:
				data['time'] = datetime.strptime(fileNameChunk,'%H%M').time()
92
			else:	
93
				data['keywords'].append(fileNameChunk)
94
		except ValueError:
95
96
97
98
			# if its not valid date or time, handle it as keyword
			data['keywords'].append(fileNameChunk)
	return data

Andreas Valder's avatar
Andreas Valder committed
99
def filter_lectures_by_datetime(lectures, date, time):
100
	matches = []
101
	if date or time:
102
		for lecture in lectures:
103
104
105
106
107
			if (not 'time' in lecture) or (not lecture['time']):
				continue
			if date and (lecture['time'].date() != date):
				continue
			if time and (lecture['time'].time() != time):
108
109
				continue
			matches.append(lecture)
110
111
	return matches

Andreas Valder's avatar
Andreas Valder committed
112
def filter_lectures_by_keywords(lectures, keywords):
113
114
115
116
	for field in ['title','speaker','comment','internal']:
		for lecture in lectures:
			for keyword in keywords:
				# first test for exact match, else make it asci and try substring test
Andreas Valder's avatar
Andreas Valder committed
117
118
119
120
				if (field in lecture) and (
						(keyword == lecture[field]) or
						(to_ascii(str(keyword).lower()) in str(to_ascii(lecture[field]).lower()))
						):
121
122
123
					return [lecture]
	return []

Andreas Valder's avatar
Andreas Valder committed
124
125
126
def extract_format_keyword_from_filename(splitFileName):
	return splitFileName[-1].split('.',1)[0].lower()

Andreas Valder's avatar
Andreas Valder committed
127
def filter_formats_by_filename(splitFileName):
Andreas Valder's avatar
Andreas Valder committed
128
	formatstring = extract_format_keyword_from_filename(splitFileName)
129
130
131
132
	formats = query('SELECT * FROM formats ORDER BY prio DESC')
	for videoformat in formats:
		# we match the last part of the file name without the extension
		if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
133
			return videoformat['id']
Andreas Valder's avatar
Andreas Valder committed
134
	# default format is "unknown", with id 0
135
136
137
	return 0

def sort_file(filename, course=None, lectures=None):
Andreas Valder's avatar
Andreas Valder committed
138
	splitFileName = split_filename(filename)
139
140
141
142
143
144
145
146
147
148
	if not course:
		handle = splitFileName[0]
		if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'):
			handle = '-'.join(splitFileName[:2])
		courses = query('SELECT * FROM courses WHERE handle = ?', handle)
		if not courses:
			return [], 0
		course = courses[0]
	if not lectures:
		lectures = query('SELECT * from lectures where course_id = ?', course['id'])
Andreas Valder's avatar
Andreas Valder committed
149
	# parse all data from the file name
Andreas Valder's avatar
Andreas Valder committed
150
	data = parse_filename(splitFileName)
151
	# try to match the file on a single lecture
Andreas Valder's avatar
Andreas Valder committed
152
	matches = filter_lectures_by_datetime(lectures, data.get('date'), data.get('time'))
153
154
	# if we can't match exactly  based on date and time, we have to match keywords
	if ((len(matches) != 1) and (len(data['keywords']) > 0)):
Andreas Valder's avatar
Andreas Valder committed
155
156
		if not len(matches) == 0:
			# only test lectures with the correct date/time, if we have any
Andreas Valder's avatar
Andreas Valder committed
157
			matches = filter_lectures_by_keywords(matches, data['keywords'])
Andreas Valder's avatar
Andreas Valder committed
158
159
		else:
			# Else test for matches in all lectures of this course
Andreas Valder's avatar
Andreas Valder committed
160
			matches = filter_lectures_by_keywords(lectures, data['keywords'])
161
	# now we should have found exactly one match
Andreas Valder's avatar
Andreas Valder committed
162
	fmt = filter_formats_by_filename(splitFileName)
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
	return matches, fmt

def log_sort_error(course_id, path, matches):
	matches_id = []
	for match in matches:
		matches_id.append(str(match['id']))
	query('INSERT INTO sorterrorlog_data (course_id, path, matches, `when`, time_updated, time_created) VALUES (?, ?, ?, ?, ?, ?)',
			course_id, path, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now())

def sort_api_token_required(func):
	@wraps(func)
	def decorator(*args, **kwargs):
		if 'apikey' in request.values:
			token = request.values['apikey']
		elif request.get_json() and ('apikey' in request.get_json()):
			token = request.get_json()['apikey']
		else:
			token = None
		if not token == config.get('SORTER_API_KEY', [None]):
			return 'Permission denied', 403
		else:
			return func(*args, **kwargs)
	return decorator

@app.route('/internal/sort/encoded/<filename>')
@sort_api_token_required
def sort_encoded(filename):
	matches, fmt = sort_file(filename)
	if len(matches) != 1:
		log_sort_error(-1, 'kodiert/'+filename, matches)
		return "Could not match filename", 400
	lecture = matches[0]
	course = query('SELECT * FROM courses WHERE id = ?', lecture['course_id'])[0]
	if course['autopublish']:
		schedule_job('publish_video', {'source': filename, 'path': 'pub/'+course['handle']+'/'+filename, 'lecture_id': lecture['id'], 'format_id': fmt})
	return 'OK', 200

Julian Rother's avatar
Julian Rother committed
200
201
202
203
204
205
206
207
208
209
210
211
212
@app.route('/internal/sort/autoencode')
@sort_api_token_required
def sort_autoencode():
	filename = request.values['path']
	path = 'autoencode/'+filename
	matches, fmt = sort_file(filename)
	if len(matches) != 1:
		log_sort_error(-1, 'raw/'+path, matches)
		return "Could not match filename", 400
	lecture = matches[0]
	schedule_job('probe-raw', {'path': path, 'lecture_id': lecture['id'], 'import-chapters': True})
	return 'OK', 200

213
214
215
216
@job_handler('publish_video')
def handle_published_video(jobid, jobtype, data, state, status):
	if 'lecture_id' not in data or 'format_id' not in data:
		return
217
	insert_video(data['lecture_id'], data['path'], data['format_id'], hash=status['hash'], filesize=status['filesize'], duration=status['duration'])
218

219
@app.route('/internal/sort/now')
220
@mod_required
Andreas Valder's avatar
Andreas Valder committed
221
@sched_func(600)
222
223
def sort_now():
	courses = query('SELECT * FROM courses')
224
	for course in courses:
Andreas Valder's avatar
Andreas Valder committed
225
		modify('BEGIN')
Andreas Valder's avatar
Andreas Valder committed
226
		for mountpoint in config['VIDEOMOUNT']:
227
228
			existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
			knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
Julian Rother's avatar
Julian Rother committed
229
230
231
			ignorefiles = []
			for path in existingvideos + knownerrors:
				ignorefiles.append(os.path.basename(path))
232
233
			lectures = query('SELECT * from lectures where course_id = ?',course['id'])
			coursepath = mountpoint['mountpoint']+course['handle']
234
235
236
237
			try:
				files = os.listdir(coursepath)
			except FileNotFoundError:
				files = []
238
			for filename in files:
239
				try:
240
					# if the video is in the table "videos" already (with the correct course), skip it
Julian Rother's avatar
Julian Rother committed
241
					if os.path.basename(filename) in ignorefiles:
242
						continue
243
					if not os.path.splitext(filename)[1] == '.mp4':
244
						continue
245
					matches, fmt = sort_file(filename, course=course, lectures=lectures)
246
					dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
247
					if len(matches) == 1:
248
						insert_video(matches[0]['id'], dbfilepath, fmt)
Andreas Valder's avatar
Andreas Valder committed
249
					else:
250
						log_sort_error(course['id'], dbfilepath, matches)
251
252
				except Exception:
					traceback.print_exc()
Andreas Valder's avatar
Andreas Valder committed
253
		modify('COMMIT')
254
255
256
	if 'ref' in request.values:
		return redirect(request.values['ref'])
	else:
257
		return 'OK', 200
Andreas Valder's avatar
Andreas Valder committed
258