Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
website
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Container registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jannik Hellenkamp
website
Commits
edfacd0a
Unverified
Commit
edfacd0a
authored
6 years ago
by
Andreas Valder
Browse files
Options
Downloads
Patches
Plain Diff
refactor and split up of sort_file
parent
ff93e6bf
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
sorter.py
+64
-58
64 additions, 58 deletions
sorter.py
with
64 additions
and
58 deletions
sorter.py
+
64
−
58
View file @
edfacd0a
...
@@ -73,82 +73,88 @@ def insert_transcoded_video(jobid, jobtype, data, state, status):
...
@@ -73,82 +73,88 @@ def insert_transcoded_video(jobid, jobtype, data, state, status):
return
return
insert_video
(
data
[
'
lecture_id
'
],
data
[
'
output
'
][
'
path
'
],
data
[
'
format_id
'
],
status
[
'
hash
'
],
status
[
'
filesize
'
],
status
[
'
duration
'
],
data
[
'
source_id
'
]
)
insert_video
(
data
[
'
lecture_id
'
],
data
[
'
output
'
][
'
path
'
],
data
[
'
format_id
'
],
status
[
'
hash
'
],
status
[
'
filesize
'
],
status
[
'
duration
'
],
data
[
'
source_id
'
]
)
def
sort_file
(
filename
,
course
=
None
,
lectures
=
None
):
def
parseVideoFileName
(
splitFileName
):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitfilename
=
filename
.
replace
(
'
_
'
,
'
-
'
).
replace
(
'
'
,
'
-
'
).
split
(
'
-
'
)
if
not
course
:
handle
=
splitfilename
[
0
]
if
splitfilename
[
0
].
endswith
(
'
ws
'
)
or
splitfilename
[
0
].
endswith
(
'
ss
'
):
handle
=
'
-
'
.
join
(
splitfilename
[:
2
])
courses
=
query
(
'
SELECT * FROM courses WHERE handle = ?
'
,
handle
)
if
not
courses
:
return
[],
0
course
=
courses
[
0
]
if
not
lectures
:
lectures
=
query
(
'
SELECT * from lectures where course_id = ?
'
,
course
[
'
id
'
])
# we save all extraced data in a dict
data
=
{
'
keywords
'
:
[]}
data
=
{
'
keywords
'
:
[]}
# parse the file name and save all data in 'data'
for
fileNameChunk
in
splitFileName
:
for
s
in
splitfilename
:
fileNameChunk
=
fileNameChunk
.
replace
(
'
.mp4
'
,
''
)
s
=
s
.
replace
(
'
.mp4
'
,
''
)
#-<YYMMDD> (date)
#-<YYMMDD> (date)
#-<HHMM> (time)
#-<HHMM> (time)
#-<keyword>
#-<keyword>
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
# Looking for keywords in: title,speaker,comment, comma seperated list in internal
try
:
try
:
if
len
(
s
)
==
6
:
if
len
(
fileNameChunk
)
==
6
:
data
[
'
date
'
]
=
datetime
.
strptime
(
s
,
'
%y%m%d
'
).
date
()
data
[
'
date
'
]
=
datetime
.
strptime
(
fileNameChunk
,
'
%y%m%d
'
).
date
()
elif
len
(
s
)
==
4
:
elif
len
(
fileNameChunk
)
==
4
:
data
[
'
time
'
]
=
datetime
.
strptime
(
s
,
'
%H%M
'
).
time
()
data
[
'
time
'
]
=
datetime
.
strptime
(
fileNameChunk
,
'
%H%M
'
).
time
()
else
:
else
:
data
[
'
keywords
'
].
append
(
s
)
data
[
'
keywords
'
].
append
(
fileNameChunk
)
except
ValueError
:
except
ValueError
:
# if its not a date or time, handle it as keyword
# if its not valid date or time, handle it as keyword
data
[
'
keywords
'
].
append
(
s
)
data
[
'
keywords
'
].
append
(
fileNameChunk
)
# try to match the file on a single lecture
return
data
def
matchDatetimeOnLecture
(
lectures
,
date
,
time
):
matches
=
[]
matches
=
[]
# first try date and time (if one of them is set)
# first try date and time (if one of them is set)
if
(
'
date
'
in
data
)
or
(
'
time
'
in
data
):
if
date
or
time
:
print
(
1
)
for
lecture
in
lectures
:
for
lecture
in
lectures
:
if
not
(
'
time
'
in
lecture
)
or
not
lecture
[
'
time
'
]:
if
(
not
'
time
'
in
lecture
)
or
(
not
lecture
[
'
time
'
]
)
:
continue
continue
if
(
'
date
'
in
data
)
and
(
lecture
[
'
time
'
].
date
()
!=
dat
a
[
'
date
'
]
):
if
date
and
(
lecture
[
'
time
'
].
date
()
!=
dat
e
):
continue
continue
if
(
'
time
'
in
data
)
and
(
lecture
[
'
time
'
].
time
()
!=
data
[
'
time
'
]
):
if
time
and
(
lecture
[
'
time
'
].
time
()
!=
time
):
continue
continue
matches
.
append
(
lecture
)
matches
.
append
(
lecture
)
# if we can't match exactly based on date and time, we have to match keywords
return
matches
if
((
len
(
matches
)
!=
1
)
and
(
len
(
data
[
'
keywords
'
])
>
0
)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
def
matchKeywordsOnLecture
(
lectures
,
keywords
):
if
len
(
matches
)
==
0
:
matches
.
extend
(
lectures
)
found
=
False
for
field
in
[
'
title
'
,
'
speaker
'
,
'
comment
'
,
'
internal
'
]:
for
field
in
[
'
title
'
,
'
speaker
'
,
'
comment
'
,
'
internal
'
]:
for
lecture
in
match
es
:
for
lecture
in
lectur
es
:
for
keyword
in
data
[
'
keywords
'
]
:
for
keyword
in
keywords
:
# first test for exact match, else make it asci and try substring test
# first test for exact match, else make it asci and try substring test
if
(
keyword
==
lecture
[
field
])
or
\
if
(
keyword
==
lecture
[
field
])
or
\
(
str
(
keyword
).
lower
()
in
str
(
to_ascii
(
lecture
[
field
]).
lower
())):
(
str
(
keyword
).
lower
()
in
str
(
to_ascii
(
lecture
[
field
]).
lower
())):
found
=
True
return
[
lecture
]
matches
=
[
lecture
]
return
[]
if
found
:
break
def
matchFileNameOnFormat
(
splitFileName
):
if
found
:
break
if
found
:
break
# now we should have found exactly one match
# default format is "unknown", with id 0
# default format is "unknown", with id 0
fmt
=
0
formats
=
query
(
'
SELECT * FROM formats ORDER BY prio DESC
'
)
formats
=
query
(
'
SELECT * FROM formats ORDER BY prio DESC
'
)
for
videoformat
in
formats
:
for
videoformat
in
formats
:
# we match the last part of the file name without the extension
# we match the last part of the file name without the extension
formatstring
=
split
f
ile
n
ame
[
-
1
].
split
(
'
.
'
,
1
)[
0
].
lower
()
formatstring
=
split
F
ile
N
ame
[
-
1
].
split
(
'
.
'
,
1
)[
0
].
lower
()
if
formatstring
in
videoformat
[
'
keywords
'
].
replace
(
'
,
'
,
'
'
).
split
(
'
'
):
if
formatstring
in
videoformat
[
'
keywords
'
].
replace
(
'
,
'
,
'
'
).
split
(
'
'
):
fmt
=
videoformat
[
'
id
'
]
return
videoformat
[
'
id
'
]
break
return
0
def
sort_file
(
filename
,
course
=
None
,
lectures
=
None
):
# filenames: <handle>-<sorter>-<format>.mp4
# "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop)
# '_' and ' ' are handled like '-'
splitFileName
=
filename
.
replace
(
'
_
'
,
'
-
'
).
replace
(
'
'
,
'
-
'
).
split
(
'
-
'
)
if
not
course
:
handle
=
splitFileName
[
0
]
if
splitFileName
[
0
].
endswith
(
'
ws
'
)
or
splitFileName
[
0
].
endswith
(
'
ss
'
):
handle
=
'
-
'
.
join
(
splitFileName
[:
2
])
courses
=
query
(
'
SELECT * FROM courses WHERE handle = ?
'
,
handle
)
if
not
courses
:
return
[],
0
course
=
courses
[
0
]
if
not
lectures
:
lectures
=
query
(
'
SELECT * from lectures where course_id = ?
'
,
course
[
'
id
'
])
data
=
parseVideoFileName
(
splitFileName
)
# try to match the file on a single lecture
matches
=
matchDatetimeOnLecture
(
lectures
,
data
.
get
(
'
date
'
),
data
.
get
(
'
time
'
))
# if we can't match exactly based on date and time, we have to match keywords
if
((
len
(
matches
)
!=
1
)
and
(
len
(
data
[
'
keywords
'
])
>
0
)):
#only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course
if
len
(
matches
)
==
0
:
matches
=
matchKeywordsOnLecture
(
lectures
,
data
[
'
keywords
'
])
else
:
matches
=
matchKeywordsOnLecture
(
matches
,
data
[
'
keywords
'
])
# now we should have found exactly one match
fmt
=
matchFileNameOnFormat
(
splitFileName
)
return
matches
,
fmt
return
matches
,
fmt
def
log_sort_error
(
course_id
,
path
,
matches
):
def
log_sort_error
(
course_id
,
path
,
matches
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment