Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Video AG Infrastruktur
website
Commits
edfacd0a
Unverified
Commit
edfacd0a
authored
Jul 18, 2018
by
Andreas Valder
Browse files
refactor and split up of sort_file
parent
ff93e6bf
Changes
1
Hide whitespace changes
Inline
Side-by-side
sorter.py
View file @
edfacd0a
...
...
@@ -73,82 +73,88 @@ def insert_transcoded_video(jobid, jobtype, data, state, status):
return
insert_video
(
data
[
'lecture_id'
],
data
[
'output'
][
'path'
],
data
[
'format_id'
],
status
[
'hash'
],
status
[
'filesize'
],
status
[
'duration'
],
data
[
'source_id'
]
)
def
sort_file
(
filename
,
course
=
None
,
lectures
=
None
):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename
=
filename
.
replace
(
'_'
,
'-'
).
replace
(
' '
,
'-'
).
split
(
'-'
)
if
not
course
:
handle
=
splitfilename
[
0
]
if
splitfilename
[
0
].
endswith
(
'ws'
)
or
splitfilename
[
0
].
endswith
(
'ss'
):
handle
=
'-'
.
join
(
splitfilename
[:
2
])
courses
=
query
(
'SELECT * FROM courses WHERE handle = ?'
,
handle
)
if
not
courses
:
return
[],
0
course
=
courses
[
0
]
if
not
lectures
:
lectures
=
query
(
'SELECT * from lectures where course_id = ?'
,
course
[
'id'
])
# we save all extraced data in a dict
def
parseVideoFileName
(
splitFileName
):
data
=
{
'keywords'
:
[]}
# parse the file name and save all data in 'data'
for
s
in
splitfilename
:
s
=
s
.
replace
(
'.mp4'
,
''
)
for
fileNameChunk
in
splitFileName
:
fileNameChunk
=
fileNameChunk
.
replace
(
'.mp4'
,
''
)
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try
:
if
len
(
s
)
==
6
:
data
[
'date'
]
=
datetime
.
strptime
(
s
,
'%y%m%d'
).
date
()
elif
len
(
s
)
==
4
:
data
[
'time'
]
=
datetime
.
strptime
(
s
,
'%H%M'
).
time
()
if
len
(
fileNameChunk
)
==
6
:
data
[
'date'
]
=
datetime
.
strptime
(
fileNameChunk
,
'%y%m%d'
).
date
()
elif
len
(
fileNameChunk
)
==
4
:
data
[
'time'
]
=
datetime
.
strptime
(
fileNameChunk
,
'%H%M'
).
time
()
else
:
data
[
'keywords'
].
append
(
s
)
data
[
'keywords'
].
append
(
fileNameChunk
)
except
ValueError
:
# if its not a date or time, handle it as keyword
data
[
'keywords'
].
append
(
s
)
# try to match the file on a single lecture
# if its not valid date or time, handle it as keyword
data
[
'keywords'
].
append
(
fileNameChunk
)
return
data
def
matchDatetimeOnLecture
(
lectures
,
date
,
time
):
matches
=
[]
# first try date and time (if one of them is set)
if
(
'date'
in
data
)
or
(
'time'
in
data
):
if
date
or
time
:
print
(
1
)
for
lecture
in
lectures
:
if
not
(
'time'
in
lecture
)
or
not
lecture
[
'time'
]:
if
(
not
'time'
in
lecture
)
or
(
not
lecture
[
'time'
]):
continue
if
date
and
(
lecture
[
'time'
].
date
()
!=
date
):
continue
if
time
and
(
lecture
[
'time'
].
time
()
!=
time
):
continue
if
(
'date'
in
data
)
and
(
lecture
[
'time'
].
date
()
!=
data
[
'date'
]):
continue
if
(
'time'
in
data
)
and
(
lecture
[
'time'
].
time
()
!=
data
[
'time'
]):
continue
matches
.
append
(
lecture
)
# if we can't match exactly based on date and time, we have to match keywords
if
((
len
(
matches
)
!=
1
)
and
(
len
(
data
[
'keywords'
])
>
0
)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if
len
(
matches
)
==
0
:
matches
.
extend
(
lectures
)
found
=
False
for
field
in
[
'title'
,
'speaker'
,
'comment'
,
'internal'
]:
for
lecture
in
matches
:
for
keyword
in
data
[
'keywords'
]:
# first test for exact match, else make it asci and try substring test
if
(
keyword
==
lecture
[
field
])
or
\
(
str
(
keyword
).
lower
()
in
str
(
to_ascii
(
lecture
[
field
]).
lower
())):
found
=
True
matches
=
[
lecture
]
if
found
:
break
if
found
:
break
if
found
:
break
# now we should have found exactly one match
return
matches
def
matchKeywordsOnLecture
(
lectures
,
keywords
):
for
field
in
[
'title'
,
'speaker'
,
'comment'
,
'internal'
]:
for
lecture
in
lectures
:
for
keyword
in
keywords
:
# first test for exact match, else make it asci and try substring test
if
(
keyword
==
lecture
[
field
])
or
\
(
str
(
keyword
).
lower
()
in
str
(
to_ascii
(
lecture
[
field
]).
lower
())):
return
[
lecture
]
return
[]
def
matchFileNameOnFormat
(
splitFileName
):
# default format is "unknown", with id 0
fmt
=
0
formats
=
query
(
'SELECT * FROM formats ORDER BY prio DESC'
)
for
videoformat
in
formats
:
# we match the last part of the file name without the extension
formatstring
=
split
f
ile
n
ame
[
-
1
].
split
(
'.'
,
1
)[
0
].
lower
()
formatstring
=
split
F
ile
N
ame
[
-
1
].
split
(
'.'
,
1
)[
0
].
lower
()
if
formatstring
in
videoformat
[
'keywords'
].
replace
(
','
,
' '
).
split
(
' '
):
fmt
=
videoformat
[
'id'
]
break
return
videoformat
[
'id'
]
return
0
def
sort_file
(
filename
,
course
=
None
,
lectures
=
None
):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitFileName
=
filename
.
replace
(
'_'
,
'-'
).
replace
(
' '
,
'-'
).
split
(
'-'
)
if
not
course
:
handle
=
splitFileName
[
0
]
if
splitFileName
[
0
].
endswith
(
'ws'
)
or
splitFileName
[
0
].
endswith
(
'ss'
):
handle
=
'-'
.
join
(
splitFileName
[:
2
])
courses
=
query
(
'SELECT * FROM courses WHERE handle = ?'
,
handle
)
if
not
courses
:
return
[],
0
course
=
courses
[
0
]
if
not
lectures
:
lectures
=
query
(
'SELECT * from lectures where course_id = ?'
,
course
[
'id'
])
data
=
parseVideoFileName
(
splitFileName
)
# try to match the file on a single lecture
matches
=
matchDatetimeOnLecture
(
lectures
,
data
.
get
(
'date'
),
data
.
get
(
'time'
))
# if we can't match exactly based on date and time, we have to match keywords
if
((
len
(
matches
)
!=
1
)
and
(
len
(
data
[
'keywords'
])
>
0
)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if
len
(
matches
)
==
0
:
matches
=
matchKeywordsOnLecture
(
lectures
,
data
[
'keywords'
])
else
:
matches
=
matchKeywordsOnLecture
(
matches
,
data
[
'keywords'
])
# now we should have found exactly one match
fmt
=
matchFileNameOnFormat
(
splitFileName
)
return
matches
,
fmt
def
log_sort_error
(
course_id
,
path
,
matches
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment