Commit bb362a6e authored by Andreas Valder's avatar Andreas Valder
Browse files

working parsing of campus office data

parent bff90c37
......@@ -324,25 +324,91 @@ def log():
changelog = query('SELECT *, ( "table" || "." || id_value || "." ||field) as path FROM changelog LEFT JOIN users ON (changelog.who = ORDER BY "when" DESC LIMIT 50')
return render_template('log.html', changelog=changelog)
@app.route('/import/<source>/<id>', methods=['GET', 'POST'])
@app.route('/import/<source>/<int:numid>', methods=['GET', 'POST'])
@handle_errors('course', 'Diese Veranstaltung existiert nicht!', 404, IndexError)
@app.route('/import/<source>/<int:id>', methods=['GET', 'POST'])
#@handle_errors('course', 'Diese Veranstaltung existiert nicht!', 404, IndexError)
def import_from(numid=None, source=None, id=None):
def import_from(source=None, id=None):
def recursive_dict(element):
return element.tag, dict(map(recursive_dict, element)) or element.text
if source != "campus":
return "Unknown source", 404
courses = query('SELECT * FROM courses WHERE id = ?', id)[0]
lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id'])
for i in request.values:
group, id, field = i.split('.', 2)
group, importid, field = i.split('.', 2)
if group == 'campus':
if not id in campus:
campus[id] = {}
campus[id][field] = request.values[i]
if not importid in campus:
campus[importid] = {}
campus[importid][field] = request.values[i]
for i in campus:
if i.startswith('new'):
if campus[i]['url'] != '':
query('INSERT INTO import_campus (url, type, course_id, last_checked, changed) VALUES (?, ?, ?, ?, 1)',campus[i]['url'],campus[i]['type'],id,
if campus[i]['url'] != '':
query('UPDATE import_campus SET url = ?, `type` = ? WHERE (course_id = ?) AND (id = ?)', campus[i]['url'],campus[i]['type'],id,int(i))
query('DELETE FROM import_campus WHERE (id = ?) AND (course_id = ?)',int(i),id)
import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id)
if numid:
courses = query('SELECT * FROM courses WHERE id = ?', numid)[0]
courses = query('SELECT * FROM courses WHERE handle = ?', id)[0]
lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id'])
return render_template('import_campus.html', course=courses, lectures=lectures, campus=campus)
from lxml import html
from lxml import etree
import urllib.request
# if u have to port this to anything new, god be with you.
for i in import_campus:
remote_html = urllib.request.urlopen(i['url']).read()
tablexpath = "//td[text()='Termine und Ort']/following::table[1]"
basetable = html.fromstring(remote_html).xpath(tablexpath)[0]
parsebase = html.tostring(basetable);
#parse recurring events
toparse = [i['url']]
for j in basetable.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']]"):
url = str(j.xpath("td[@name='togglePeriodApp']/a/@href")[0])
events_raw = []
for j in toparse:
if j.startswith('event'):
url = ''+j
url = j
text = urllib.request.urlopen(url).read()
dom = html.fromstring(text).xpath(tablexpath)[0]
#we get the "heading" row, from it extract the room and time. best way to get it is to match on the picture -.-
baserow = dom.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']/*/img[@src='../../server/img/minus.gif']]")
if not baserow:
baserow = baserow[0]
rowdata = {'dates': []}
rowdata['place'] = baserow.xpath("td[6]/text()")[0][2:-1]
rowdata['start'] = baserow.xpath("td[3]/text()")[0]
rowdata['end'] = baserow.xpath("td[5]/text()")[0]
rowdata['dates'] = baserow.getparent().xpath("tr[@class='hierarchy5']//td[@colspan='3']/text()")
# parse single appointments
singletable = basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']")[0].getparent().getparent()
# i['single']=html.tostring(singletable)
for row in singletable.xpath("tr/td[2]"):
rowdata = {}
rowdata['place'] = row.xpath("text()[2]")[0][2:-1]
rowdata['date'] = row.xpath("text()[1]")[0][4:14]
rowdata['start'] = row.xpath("text()[1]")[0][17:22]
rowdata['end'] = row.xpath("text()[1]")[0][27:32]
events = []
i['events'] = events
i['events_raw'] = events_raw
except ImportError:
flash('python-lxml not found, campus import will not work.')
return render_template('import_campus.html', course=courses, lectures=lectures, import_campus=import_campus)
......@@ -11,17 +11,37 @@
<p>Es folgen viele Pärchen an Campus-URL und Veranstaltungstyp Pärchen. Die Campus URL bekommt man aus dem Campus-System (<a href="">hier</a>). Der Veranstaltungstyp ist z.B. "Vorlesung" oder "Übung" oder "Praktikum".
{{ campus }}
{{ import_campus|pprint }}
<form method="post">
<ul class="list-group row">
<li class="list-group-item form-inline">
<span class="input-group">
<input class="form-control" type="text" name="campus.0.url" placeholder="url">
<ul class="list-group row" style="margin-left: 0px; margin-right: 0px;">
{%for i in import_campus %}
<li class="list-group-item form-inline row">
<span class="input-group col-xs-8">
<input class="form-control" type="text" name="campus.{{}}.url" value="{{i.url}}" id="campus-{{}}-url" placeholder="url">
<span class="input-group">
<input class="form-control" type="test" name="campus.0.type" placeholder="type">
<span class="input-group col-xs-2">
<input class="form-control" type="test" name="campus.{{}}.type" value="{{i.type}}" id="campus-{{}}-type" placeholder="type">
<span class="input-group col-xs-1 pull-right">
<button class="btn btn-default pull-right" onclick="$('#campus-{{}}-url').val('')">
<span class="glyphicon glyphicon-trash"></span>
<li class="list-group-item form-inline row">
<span class="input-group col-xs-8">
<input class="form-control" type="text" name="" placeholder="url">
<span class="input-group col-xs-2">
<input class="form-control" type="test" name="" placeholder="type">
<span class="input-group col-xs-1 pull-right">
<button class="btn btn-default pull-right">
<span class="glyphicon glyphicon-plus"></span>
<button class="btn btn-default pull-right" type="submit">speichern und neu Laden</button>
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment