diff --git a/importer.py b/importer.py new file mode 100755 index 0000000000000000000000000000000000000000..88bd07000bb0f5f5816ccb7a860fb22068f3fb3f --- /dev/null +++ b/importer.py @@ -0,0 +1,113 @@ +from server import * + +@app.route('/import/<int:id>', methods=['GET', 'POST']) +@mod_required +def import_from(source=None, id=None): + + courses = query('SELECT * FROM courses WHERE id = ?', id)[0] + lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id']) + + campus={} + for i in request.values: + group, importid, field = i.split('.', 2) + if group == 'campus': + if not importid in campus: + campus[importid] = {} + campus[importid][field] = request.values[i] + for i in campus: + if i.startswith('new'): + if campus[i]['url'] != '': + query('INSERT INTO import_campus (url, type, course_id, last_checked, changed) VALUES (?, ?, ?, ?, 1)',campus[i]['url'],campus[i]['type'],id,datetime.now()) + else: + if campus[i]['url'] != '': + query('UPDATE import_campus SET url = ?, `type` = ? WHERE (course_id = ?) AND (id = ?)', campus[i]['url'],campus[i]['type'],id,int(i)) + else: + query('DELETE FROM import_campus WHERE (id = ?) AND (course_id = ?)',int(i),id) + + import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id) + events = [] + try: + from lxml import html + from lxml import etree + import urllib.request + # if u have to port this to anything new, god be with you. + for i in import_campus: + remote_html = urllib.request.urlopen(i['url']).read() + tablexpath = "//td[text()='Termine und Ort']/following::table[1]" + basetable = html.fromstring(remote_html).xpath(tablexpath)[0] + parsebase = html.tostring(basetable); + + #parse recurring events + toparse = [i['url']] + for j in basetable.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']]"): + url = str(j.xpath("td[@name='togglePeriodApp']/a/@href")[0]) + toparse.append(url) + events_raw = [] + for j in toparse: + if j.startswith('event'): + url = 'https://www.campus.rwth-aachen.de/rwth/all/'+j + else: + url = j + text = urllib.request.urlopen(url).read() + dom = html.fromstring(text).xpath(tablexpath)[0] + #we get the "heading" row, from it extract the room and time. best way to get it is to match on the picture -.- + baserow = dom.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']/*/img[@src='../../server/img/minus.gif']]") + if not baserow: + continue + baserow = baserow[0] + rowdata = {'dates': []} + rowdata['place'] = baserow.xpath("td[6]/text()")[0][2:-1] + rowdata['start'] = baserow.xpath("td[3]/text()")[0] + rowdata['end'] = baserow.xpath("td[5]/text()")[0] + rowdata['dates'] = baserow.getparent().xpath("tr[@class='hierarchy5']//td[@colspan='3']/text()") + events_raw.append(rowdata) + + # parse single appointments + singletable = basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']")[0].getparent().getparent() + for row in singletable.xpath("tr/td[2]"): + rowdata = {} + rowdata['place'] = row.xpath("text()[2]")[0][2:-1] + rowdata['dates'] = [row.xpath("text()[1]")[0][4:14]] + rowdata['start'] = row.xpath("text()[1]")[0][17:22] + rowdata['end'] = row.xpath("text()[1]")[0][27:32] + events_raw.append(rowdata) + + #now we have to filter our data and do some lookups + for j in events_raw: + for k in j['dates']: + e = {} + fmt= "%d.%m.%Y %H:%M" + e['time'] = datetime.strptime("%s %s"%(k,j['start']) ,fmt) + e['duration'] = int((datetime.strptime("%s %s"%(k,j['end']) ,fmt) - e['time']).seconds/60) + e['place'] = query("SELECT name FROM places WHERE (campus_name = ?) OR ((NOT campus_name) AND name = ?)",j['place'],j['place'])[0]['name']; + e['exists'] = query("SELECT count(id) as c from lectures WHERE (time = ?) and (duration = ?) and (place = ?) and (course_id = ?)",e['time'],e['duration'],e['place'],id)[0]['c'] > 0 + events.append(e) + # it is pared. + + + + except ImportError: + flash('python-lxml not found, campus import will not work.') + + uniqueevents = [] + for i in events: + seen = False + for j in uniqueevents: + seen = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration']) + if seen: + break + if (not seen) and (not i['exists']): + i['type'] = 'import' + uniqueevents.append(i) + + for i in lectures: + i['hascampusmapping'] = False + for j in uniqueevents: + i['hascampusmapping'] = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration']) + if i['hascampusmapping']: + break + if not i['hascampusmapping']: + i['type'] = 'lecture' + uniqueevents.append(i) + + return render_template('import_campus.html', course=courses, import_campus=import_campus, events=uniqueevents) diff --git a/server.py b/server.py index e42c6f9bb726abc10b0104d4cebbdccda87f017e..206ada01a3e2b681b77099a4a22e043ea512c3e0 100755 --- a/server.py +++ b/server.py @@ -354,110 +354,9 @@ def log(): changelog = query('SELECT *, ( "table" || "." || id_value || "." ||field) as path FROM changelog LEFT JOIN users ON (changelog.who = users.id) ORDER BY "when" DESC LIMIT 50') return render_template('log.html', changelog=changelog) -@app.route('/import/<source>/<int:id>', methods=['GET', 'POST']) -@mod_required -def import_from(source=None, id=None): - - if source != "campus": - return "Unknown source", 404 - - courses = query('SELECT * FROM courses WHERE id = ?', id)[0] - lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id']) - - campus={} - for i in request.values: - group, importid, field = i.split('.', 2) - if group == 'campus': - if not importid in campus: - campus[importid] = {} - campus[importid][field] = request.values[i] - for i in campus: - if i.startswith('new'): - if campus[i]['url'] != '': - query('INSERT INTO import_campus (url, type, course_id, last_checked, changed) VALUES (?, ?, ?, ?, 1)',campus[i]['url'],campus[i]['type'],id,datetime.now()) - else: - if campus[i]['url'] != '': - query('UPDATE import_campus SET url = ?, `type` = ? WHERE (course_id = ?) AND (id = ?)', campus[i]['url'],campus[i]['type'],id,int(i)) - else: - query('DELETE FROM import_campus WHERE (id = ?) AND (course_id = ?)',int(i),id) - - import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id) - events = [] - try: - from lxml import html - from lxml import etree - import urllib.request - # if u have to port this to anything new, god be with you. - for i in import_campus: - remote_html = urllib.request.urlopen(i['url']).read() - tablexpath = "//td[text()='Termine und Ort']/following::table[1]" - basetable = html.fromstring(remote_html).xpath(tablexpath)[0] - parsebase = html.tostring(basetable); - - #parse recurring events - toparse = [i['url']] - for j in basetable.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']]"): - url = str(j.xpath("td[@name='togglePeriodApp']/a/@href")[0]) - toparse.append(url) - events_raw = [] - for j in toparse: - if j.startswith('event'): - url = 'https://www.campus.rwth-aachen.de/rwth/all/'+j - else: - url = j - text = urllib.request.urlopen(url).read() - dom = html.fromstring(text).xpath(tablexpath)[0] - #we get the "heading" row, from it extract the room and time. best way to get it is to match on the picture -.- - baserow = dom.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']/*/img[@src='../../server/img/minus.gif']]") - if not baserow: - continue - baserow = baserow[0] - rowdata = {'dates': []} - rowdata['place'] = baserow.xpath("td[6]/text()")[0][2:-1] - rowdata['start'] = baserow.xpath("td[3]/text()")[0] - rowdata['end'] = baserow.xpath("td[5]/text()")[0] - rowdata['dates'] = baserow.getparent().xpath("tr[@class='hierarchy5']//td[@colspan='3']/text()") - events_raw.append(rowdata) - - # parse single appointments - singletable = basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']")[0].getparent().getparent() - for row in singletable.xpath("tr/td[2]"): - rowdata = {} - rowdata['place'] = row.xpath("text()[2]")[0][2:-1] - rowdata['dates'] = [row.xpath("text()[1]")[0][4:14]] - rowdata['start'] = row.xpath("text()[1]")[0][17:22] - rowdata['end'] = row.xpath("text()[1]")[0][27:32] - events_raw.append(rowdata) - - #now we have to filter our data and do some lookups - for j in events_raw: - for k in j['dates']: - e = {} - fmt= "%d.%m.%Y %H:%M" - e['time'] = datetime.strptime("%s %s"%(k,j['start']) ,fmt) - e['duration'] = int((datetime.strptime("%s %s"%(k,j['end']) ,fmt) - e['time']).seconds/60) - e['place'] = query("SELECT name FROM places WHERE (campus_name = ?) OR ((NOT campus_name) AND name = ?)",j['place'],j['place'])[0]['name']; - e['exists'] = len(query("SELECT id from lectures WHERE (time = ?) and (duration = ?) and (place = ?) and (course_id = ?)",e['time'],e['duration'],e['place'],id)) > 0 - events.append(e) - # it is pared. - - - - except ImportError: - flash('python-lxml not found, campus import will not work.') - - uniqevents = [] - for i in events: - seen = False - for j in events: - seen = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration']) - if not seen: - uniqevents.append(i) - - return render_template('import_campus.html', course=courses, lectures=lectures, import_campus=import_campus, events=uniqevents) - @app.route('/files/<filename>') def files(filename): return redirect(config['VIDEOPREFIX']+'/'+filename) import feeds +import importer diff --git a/templates/course_id.html b/templates/course_id.html index c023a5f9b22b73347e48939ebd958254883b68a7..4e2433ce007b6c0a05389aec3ca36766e6287701 100644 --- a/templates/course_id.html +++ b/templates/course_id.html @@ -31,7 +31,7 @@ </div> <div class="panel panel-default"> <div class="panel-heading"> - <h1 class="panel-title">Videos{% if ismod() %} <a class="btn btn-default" style="margin-right: 5px;" href="todo">Neuer Termin</a><a class="btn btn-default" style="margin-right: 5px;" href="{{url_for('import_from', source="campus", id=course['id'])}}">Campus Import</a>{% endif %}</h1> + <h1 class="panel-title">Videos{% if ismod() %} <a class="btn btn-default" style="margin-right: 5px;" href="todo">Neuer Termin</a><a class="btn btn-default" style="margin-right: 5px;" href="{{url_for('import_from', id=course['id'])}}">Campus Import</a>{% endif %}</h1> </div> <ul class="list-group lectureslist"> {% for l in lectures %} diff --git a/templates/import_campus.html b/templates/import_campus.html index 5a5b0366c2e2eb767e18ea367dd87cd7e40120eb..c37be8d8c3ad394e607f6ef90bb7069673e8522d 100644 --- a/templates/import_campus.html +++ b/templates/import_campus.html @@ -1,4 +1,4 @@ -{% from 'macros.html' import preview %} +{% from 'macros.html' import valuedeletebtn %} {% extends "base.html" %} {% block content %} <div class="panel-group"> @@ -60,9 +60,37 @@ <h1 class="panel-title">Fehlende Termine:</h1> </div> <ul class="list-group-item"> - {% for i in events if not i.exists %} - <li class="list-group-item"> - {{i|pprint}} + {% for i in events|sort(attribute='time') %} + <li class="list-group-item row"> + <span class="col-xs-3"> + Time: {{i.time}} + </span> + <span class="col-xs-2"> + Duration: {{i.duration}} + </span> + <span class="col-xs-3"> + Place: {{i.place}} + </span> + <span class="col-xs-3"> + {% if (i.type == 'lecture') %} + <p> + {{i.comment}} + </p> + <p> + {{i.internal}} + </p> + {% endif%} + </span> + <span class="col-xs-1"> + <span class="pull-right"> + {% if (i.type == 'lecture') and (not i.hascampusmapping) %} + {{ valuedeletebtn(['lectures',i.id,'deleted']) }} + {% endif%} + {% if (i.type == 'import') and (not i.exists) %} + anlegen + {% endif%} + </span> + </span> </li> {% endfor %} </ul>