sorter.py 6.96 KB
Newer Older
Andreas Valder's avatar
Andreas Valder committed
1
from server import *
2
import traceback
Andreas Valder's avatar
Andreas Valder committed
3

4
@app.route('/internal/sort/log')
5
6
7
8
9
10
11
12
13
14
15
16
17
@register_navbar('Sortierlog', icon='sort-by-attributes-alt')
@mod_required
def sort_log():
	return render_template('sortlog.html',sortlog=query('''
			SELECT 
				sortlog.*,
				lectures.id as lecture_id,
				lectures.title as lecture_title,
				lectures.course_id as course_id, 
				courses.title as course_title
			FROM sortlog 
			JOIN lectures ON lectures.id = sortlog.lecture_id
			JOIN courses ON courses.id = lectures.course_id 
18
			ORDER BY sortlog.`when` DESC
Andreas Valder's avatar
Andreas Valder committed
19
			LIMIT 50
20
		'''),sorterrorlog=query('SELECT * FROM sorterrorlog ORDER BY sorterrorlog.`when` DESC'))
21

22

23
24
25
26
27
def to_ascii(inputstring):
	asciistring = inputstring
	for charset in [('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('ß', 'ss')]:
		asciistring = asciistring.replace(charset[0],charset[1])
	return asciistring
28

29
30
31
32
33
34
35
def insert_video(lectureid,dbfilepath,filepath,fileformatid):
	video_id = modify('''INSERT INTO videos_data 
		(lecture_id,visible,path,video_format,title,comment,internal,file_modified,time_created,time_updated,created_by,hash,file_size)
		VALUES 
		(?,0,?,?,"","","",?,?,?,?,"",?)''',
		lectureid, dbfilepath, fileformatid, datetime.now(), datetime.now(), datetime.now(), -1, os.stat(filepath).st_size)
	query('INSERT INTO sortlog (lecture_id,video_id,path,`when`) VALUES (?,?,?,?)', lectureid, video_id, dbfilepath, datetime.now())
Andreas Valder's avatar
Andreas Valder committed
36
	schedule_thumbnail(lectureid)
Julian Rother's avatar
Julian Rother committed
37
	schedule_job('probe', {'path': dbfilepath, 'lecture_id': lectureid, 'video_id': video_id, 'import-chapters': True})
38

Andreas Valder's avatar
Andreas Valder committed
39
def schedule_thumbnail(lectureid, filePath=None):
Andreas Valder's avatar
Andreas Valder committed
40
41
42
43
44
45
46
47
	videos = query('''
			SELECT videos.path
			FROM videos
			JOIN formats ON (videos.video_format = formats.id)
			WHERE videos.lecture_id = ?
			ORDER BY formats.prio DESC''', lectureid )
	if videos:
		path = videos[0]['path']
Andreas Valder's avatar
Andreas Valder committed
48
49
	elif filePath:
		path = filePath
Andreas Valder's avatar
Andreas Valder committed
50
51
52
53
	else:
		return
	data = '{"lectureid": "'+str(lectureid)+'", "path": "'+path+'"}'
	query('INSERT INTO jobs (type, data, time_created) VALUES ("thumbnail", ?, ?)', data, datetime.now());
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def sort_file(filename, course=None, lectures=None):
	# filenames: <handle>-<sorter>-<format>.mp4
	# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
	
	# '_' and ' ' are handled like '-'
	splitfilename = filename.replace('_','-').replace(' ','-').split('-')
	if not course:
		handle = '-'.join(splitfilename[:2])
		courses = query('SELECT * FROM courses WHERE handle = ?', handle)
		if not courses:
			return []
		course = courses[0]
	if not lectures:
		lectures = query('SELECT * from lectures where course_id = ?', course['id'])
	# we save all extraced data in a dict
	data = {'keywords': []}
	# parse the file name and save all data in 'data'
	for s in splitfilename:
		s = s.replace('.mp4','')
		#-<YYMMDD> (date)
		#-<HHMM> (time)
		#-<keyword>
		#	Looking for keywords in: title,speaker,comment, comma seperated list in internal
		try:
			if len(s) == 6:
				data['date'] = datetime.strptime(s,'%y%m%d').date()
			elif  len(s) == 4:
				data['time'] = datetime.strptime(s,'%H%M').time()
			else:	
				data['keywords'].append(s)
		except ValueError:
			# if its not a date or time, handle it as keyword
			data['keywords'].append(s)
	# try to match the file on a single lecture
	matches = []
	
	# first try date and time (if one of them is set)
	if ('date' in data) or ('time' in data):
		for lecture in lectures:
			if not ('time' in lecture) or not lecture['time']:
				continue
			if ('date' in data) and (lecture['time'].date() != data['date']):
					continue
			if ('time' in data) and (lecture['time'].time() != data['time']):
					continue
			matches.append(lecture)
	# if we can't match exactly  based on date and time, we have to match keywords
	if ((len(matches) != 1) and (len(data['keywords']) > 0)):
		#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
		if len(matches) == 0:
			matches.extend(lectures)
		found = False
		for field in ['title','speaker','comment','internal']:
			for lecture in matches:
				for keyword in data['keywords']:
					# first test for exact match, else make it asci and try substring test
					if (keyword == lecture[field]) or \
						 (str(keyword).lower() in str(to_ascii(lecture[field]).lower())):
						found = True
						matches = [lecture]
					if found:
						break
				if found:
					break
			if found:
				break
	# now we should have found exactly one match
	return matches

124
@app.route('/internal/sort/now')
125
@mod_required
Andreas Valder's avatar
Andreas Valder committed
126
@sched_func(600)
127
128
129
def sort_now():
	courses = query('SELECT * FROM courses')
	formats = query('SELECT * FROM formats ORDER BY prio')
130
	for course in courses:
Andreas Valder's avatar
Andreas Valder committed
131
		modify('BEGIN')
Andreas Valder's avatar
Andreas Valder committed
132
		for mountpoint in config['VIDEOMOUNT']:
133
134
135
136
137
			existingvideos = query('SELECT videos.path FROM videos JOIN lectures ON (videos.lecture_id = lectures.id) WHERE lectures.course_id = ?',course['id'])
			knownerrors = query('SELECT sorterrorlog.path FROM sorterrorlog WHERE sorterrorlog.course_id = ?',course['id'])
			ignorefiles = existingvideos + knownerrors
			lectures = query('SELECT * from lectures where course_id = ?',course['id'])
			coursepath = mountpoint['mountpoint']+course['handle']
138
139
140
141
			try:
				files = os.listdir(coursepath)
			except FileNotFoundError:
				files = []
142
			for filename in files:
143
				try:
144
145
146
147
					# if the video is in the table "videos" already (with the correct course), skip it
					ignore = False
					for file_to_ignore in ignorefiles:
						# path is something like
148
						# vpnonline/08ws-swt/08ws-swt-081118.mp4
149
150
						if os.path.basename(filename) == os.path.basename(file_to_ignore['path']):
							ignore = True
151
							break
152
					if ignore:
153
						continue
154
					filepath = coursepath + '/' + filename
155
					if not os.path.splitext(filename)[1] == '.mp4':
156
						continue
157
					matches = sort_file(filename, course=course, lectures=lectures)
158
					dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename
159
160
					if len(matches) == 1:
						# now match the format
161
162
163
						splitfilename = filename.replace('_','-').replace(' ','-').split('-')
						# default format is "unknown", with id 0
						fmt = 0
164
						for videoformat in formats:
165
166
							#we match the last part of the file name without the extension
							formatstring = splitfilename[-1].split('.',1)[0].lower()
167
							if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
168
								fmt = videoformat['id']
169
								break
170
						# insert the video into videos_data and log
171
						insert_video( matches[0]['id'], dbfilepath, filepath, fmt)
Andreas Valder's avatar
Andreas Valder committed
172
					else:
173
174
175
176
177
178
179
						# if we couldn't match the video on exactly one lecture, log an error
						matches_id = []
						for match in matches:
							matches_id.append(str(match['id']))
						query('INSERT INTO sorterrorlog_data (course_id,path,matches,`when`,time_updated,time_created) VALUES (?,?,?,?,?,?)', course['id'], dbfilepath, ','.join(matches_id), datetime.now(), datetime.now(), datetime.now())
				except Exception:
					traceback.print_exc()
180
181

		
Andreas Valder's avatar
Andreas Valder committed
182
		modify('COMMIT')
183
184
185
	if 'ref' in request.values:
		return redirect(request.values['ref'])
	else:
186
		return 'OK', 200
Andreas Valder's avatar
Andreas Valder committed
187