Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
website
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Container registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Vincent Wehrwein
website
Commits
edfacd0a
Unverified
Commit
edfacd0a
authored
6 years ago
by
Andreas Valder
Browse files
Options
Downloads
Patches
Plain Diff
refactor and split up of sort_file
parent
ff93e6bf
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
sorter.py
+64
-58
64 additions, 58 deletions
sorter.py
with
64 additions
and
58 deletions
sorter.py
+
64
−
58
View file @
edfacd0a
...
...
@@ -73,82 +73,88 @@ def insert_transcoded_video(jobid, jobtype, data, state, status):
return
insert_video
(
data
[
'
lecture_id
'
],
data
[
'
output
'
][
'
path
'
],
data
[
'
format_id
'
],
status
[
'
hash
'
],
status
[
'
filesize
'
],
status
[
'
duration
'
],
data
[
'
source_id
'
]
)
def
sort_file
(
filename
,
course
=
None
,
lectures
=
None
):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename
=
filename
.
replace
(
'
_
'
,
'
-
'
).
replace
(
'
'
,
'
-
'
).
split
(
'
-
'
)
if
not
course
:
handle
=
splitfilename
[
0
]
if
splitfilename
[
0
].
endswith
(
'
ws
'
)
or
splitfilename
[
0
].
endswith
(
'
ss
'
):
handle
=
'
-
'
.
join
(
splitfilename
[:
2
])
courses
=
query
(
'
SELECT * FROM courses WHERE handle = ?
'
,
handle
)
if
not
courses
:
return
[],
0
course
=
courses
[
0
]
if
not
lectures
:
lectures
=
query
(
'
SELECT * from lectures where course_id = ?
'
,
course
[
'
id
'
])
# we save all extraced data in a dict
def
parseVideoFileName
(
splitFileName
):
data
=
{
'
keywords
'
:
[]}
# parse the file name and save all data in 'data'
for
s
in
splitfilename
:
s
=
s
.
replace
(
'
.mp4
'
,
''
)
for
fileNameChunk
in
splitFileName
:
fileNameChunk
=
fileNameChunk
.
replace
(
'
.mp4
'
,
''
)
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try
:
if
len
(
s
)
==
6
:
data
[
'
date
'
]
=
datetime
.
strptime
(
s
,
'
%y%m%d
'
).
date
()
elif
len
(
s
)
==
4
:
data
[
'
time
'
]
=
datetime
.
strptime
(
s
,
'
%H%M
'
).
time
()
if
len
(
fileNameChunk
)
==
6
:
data
[
'
date
'
]
=
datetime
.
strptime
(
fileNameChunk
,
'
%y%m%d
'
).
date
()
elif
len
(
fileNameChunk
)
==
4
:
data
[
'
time
'
]
=
datetime
.
strptime
(
fileNameChunk
,
'
%H%M
'
).
time
()
else
:
data
[
'
keywords
'
].
append
(
s
)
data
[
'
keywords
'
].
append
(
fileNameChunk
)
except
ValueError
:
# if its not a date or time, handle it as keyword
data
[
'
keywords
'
].
append
(
s
)
# try to match the file on a single lecture
# if its not valid date or time, handle it as keyword
data
[
'
keywords
'
].
append
(
fileNameChunk
)
return
data
def
matchDatetimeOnLecture
(
lectures
,
date
,
time
):
matches
=
[]
# first try date and time (if one of them is set)
if
(
'
date
'
in
data
)
or
(
'
time
'
in
data
):
if
date
or
time
:
print
(
1
)
for
lecture
in
lectures
:
if
not
(
'
time
'
in
lecture
)
or
not
lecture
[
'
time
'
]:
if
(
not
'
time
'
in
lecture
)
or
(
not
lecture
[
'
time
'
]
)
:
continue
if
(
'
date
'
in
data
)
and
(
lecture
[
'
time
'
].
date
()
!=
dat
a
[
'
date
'
]
):
if
date
and
(
lecture
[
'
time
'
].
date
()
!=
dat
e
):
continue
if
(
'
time
'
in
data
)
and
(
lecture
[
'
time
'
].
time
()
!=
data
[
'
time
'
]
):
if
time
and
(
lecture
[
'
time
'
].
time
()
!=
time
):
continue
matches
.
append
(
lecture
)
# if we can't match exactly based on date and time, we have to match keywords
if
((
len
(
matches
)
!=
1
)
and
(
len
(
data
[
'
keywords
'
])
>
0
)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if
len
(
matches
)
==
0
:
matches
.
extend
(
lectures
)
found
=
False
return
matches
def
matchKeywordsOnLecture
(
lectures
,
keywords
):
for
field
in
[
'
title
'
,
'
speaker
'
,
'
comment
'
,
'
internal
'
]:
for
lecture
in
match
es
:
for
keyword
in
data
[
'
keywords
'
]
:
for
lecture
in
lectur
es
:
for
keyword
in
keywords
:
# first test for exact match, else make it asci and try substring test
if
(
keyword
==
lecture
[
field
])
or
\
(
str
(
keyword
).
lower
()
in
str
(
to_ascii
(
lecture
[
field
]).
lower
())):
found
=
True
matches
=
[
lecture
]
if
found
:
break
if
found
:
break
if
found
:
break
# now we should have found exactly one match
return
[
lecture
]
return
[]
def
matchFileNameOnFormat
(
splitFileName
):
# default format is "unknown", with id 0
fmt
=
0
formats
=
query
(
'
SELECT * FROM formats ORDER BY prio DESC
'
)
for
videoformat
in
formats
:
# we match the last part of the file name without the extension
formatstring
=
split
f
ile
n
ame
[
-
1
].
split
(
'
.
'
,
1
)[
0
].
lower
()
formatstring
=
split
F
ile
N
ame
[
-
1
].
split
(
'
.
'
,
1
)[
0
].
lower
()
if
formatstring
in
videoformat
[
'
keywords
'
].
replace
(
'
,
'
,
'
'
).
split
(
'
'
):
fmt
=
videoformat
[
'
id
'
]
break
return
videoformat
[
'
id
'
]
return
0
def
sort_file
(
filename
,
course
=
None
,
lectures
=
None
):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitFileName
=
filename
.
replace
(
'
_
'
,
'
-
'
).
replace
(
'
'
,
'
-
'
).
split
(
'
-
'
)
if
not
course
:
handle
=
splitFileName
[
0
]
if
splitFileName
[
0
].
endswith
(
'
ws
'
)
or
splitFileName
[
0
].
endswith
(
'
ss
'
):
handle
=
'
-
'
.
join
(
splitFileName
[:
2
])
courses
=
query
(
'
SELECT * FROM courses WHERE handle = ?
'
,
handle
)
if
not
courses
:
return
[],
0
course
=
courses
[
0
]
if
not
lectures
:
lectures
=
query
(
'
SELECT * from lectures where course_id = ?
'
,
course
[
'
id
'
])
data
=
parseVideoFileName
(
splitFileName
)
# try to match the file on a single lecture
matches
=
matchDatetimeOnLecture
(
lectures
,
data
.
get
(
'
date
'
),
data
.
get
(
'
time
'
))
# if we can't match exactly based on date and time, we have to match keywords
if
((
len
(
matches
)
!=
1
)
and
(
len
(
data
[
'
keywords
'
])
>
0
)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if
len
(
matches
)
==
0
:
matches
=
matchKeywordsOnLecture
(
lectures
,
data
[
'
keywords
'
])
else
:
matches
=
matchKeywordsOnLecture
(
matches
,
data
[
'
keywords
'
])
# now we should have found exactly one match
fmt
=
matchFileNameOnFormat
(
splitFileName
)
return
matches
,
fmt
def
log_sort_error
(
course_id
,
path
,
matches
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment