Skip to content
Snippets Groups Projects
Commit bb362a6e authored by Andreas Valder's avatar Andreas Valder
Browse files

working parsing of campus office data

parent bff90c37
Branches
No related tags found
No related merge requests found
...@@ -324,25 +324,91 @@ def log(): ...@@ -324,25 +324,91 @@ def log():
changelog = query('SELECT *, ( "table" || "." || id_value || "." ||field) as path FROM changelog LEFT JOIN users ON (changelog.who = users.id) ORDER BY "when" DESC LIMIT 50') changelog = query('SELECT *, ( "table" || "." || id_value || "." ||field) as path FROM changelog LEFT JOIN users ON (changelog.who = users.id) ORDER BY "when" DESC LIMIT 50')
return render_template('log.html', changelog=changelog) return render_template('log.html', changelog=changelog)
@app.route('/import/<source>/<id>', methods=['GET', 'POST']) @app.route('/import/<source>/<int:id>', methods=['GET', 'POST'])
@app.route('/import/<source>/<int:numid>', methods=['GET', 'POST']) #@handle_errors('course', 'Diese Veranstaltung existiert nicht!', 404, IndexError)
@handle_errors('course', 'Diese Veranstaltung existiert nicht!', 404, IndexError)
@mod_required @mod_required
def import_from(numid=None, source=None, id=None): def import_from(source=None, id=None):
def recursive_dict(element):
return element.tag, dict(map(recursive_dict, element)) or element.text
if source != "campus": if source != "campus":
return "Unknown source", 404 return "Unknown source", 404
courses = query('SELECT * FROM courses WHERE id = ?', id)[0]
lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id'])
campus={} campus={}
for i in request.values: for i in request.values:
group, id, field = i.split('.', 2) group, importid, field = i.split('.', 2)
if group == 'campus': if group == 'campus':
if not id in campus: if not importid in campus:
campus[id] = {} campus[importid] = {}
campus[id][field] = request.values[i] campus[importid][field] = request.values[i]
for i in campus:
if i.startswith('new'):
if campus[i]['url'] != '':
query('INSERT INTO import_campus (url, type, course_id, last_checked, changed) VALUES (?, ?, ?, ?, 1)',campus[i]['url'],campus[i]['type'],id,datetime.now())
else:
if campus[i]['url'] != '':
query('UPDATE import_campus SET url = ?, `type` = ? WHERE (course_id = ?) AND (id = ?)', campus[i]['url'],campus[i]['type'],id,int(i))
else:
query('DELETE FROM import_campus WHERE (id = ?) AND (course_id = ?)',int(i),id)
if numid: import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id)
courses = query('SELECT * FROM courses WHERE id = ?', numid)[0]
try:
from lxml import html
from lxml import etree
import urllib.request
# if u have to port this to anything new, god be with you.
for i in import_campus:
remote_html = urllib.request.urlopen(i['url']).read()
tablexpath = "//td[text()='Termine und Ort']/following::table[1]"
basetable = html.fromstring(remote_html).xpath(tablexpath)[0]
parsebase = html.tostring(basetable);
#parse recurring events
toparse = [i['url']]
for j in basetable.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']]"):
url = str(j.xpath("td[@name='togglePeriodApp']/a/@href")[0])
toparse.append(url)
events_raw = []
for j in toparse:
if j.startswith('event'):
url = 'https://www.campus.rwth-aachen.de/rwth/all/'+j
else: else:
courses = query('SELECT * FROM courses WHERE handle = ?', id)[0] url = j
lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id']) text = urllib.request.urlopen(url).read()
return render_template('import_campus.html', course=courses, lectures=lectures, campus=campus) dom = html.fromstring(text).xpath(tablexpath)[0]
#we get the "heading" row, from it extract the room and time. best way to get it is to match on the picture -.-
baserow = dom.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']/*/img[@src='../../server/img/minus.gif']]")
if not baserow:
continue
baserow = baserow[0]
rowdata = {'dates': []}
rowdata['place'] = baserow.xpath("td[6]/text()")[0][2:-1]
rowdata['start'] = baserow.xpath("td[3]/text()")[0]
rowdata['end'] = baserow.xpath("td[5]/text()")[0]
rowdata['dates'] = baserow.getparent().xpath("tr[@class='hierarchy5']//td[@colspan='3']/text()")
events_raw.append(rowdata)
# parse single appointments
singletable = basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']")[0].getparent().getparent()
# i['single']=html.tostring(singletable)
for row in singletable.xpath("tr/td[2]"):
rowdata = {}
rowdata['place'] = row.xpath("text()[2]")[0][2:-1]
rowdata['date'] = row.xpath("text()[1]")[0][4:14]
rowdata['start'] = row.xpath("text()[1]")[0][17:22]
rowdata['end'] = row.xpath("text()[1]")[0][27:32]
events_raw.append(rowdata)
events = []
i['events'] = events
i['events_raw'] = events_raw
except ImportError:
flash('python-lxml not found, campus import will not work.')
return render_template('import_campus.html', course=courses, lectures=lectures, import_campus=import_campus)
...@@ -11,16 +11,36 @@ ...@@ -11,16 +11,36 @@
<p>Es folgen viele Pärchen an Campus-URL und Veranstaltungstyp Pärchen. Die Campus URL bekommt man aus dem Campus-System (<a href="https://www.campus.rwth-aachen.de/rwth/all/groups.asp">hier</a>). Der Veranstaltungstyp ist z.B. "Vorlesung" oder "Übung" oder "Praktikum". <p>Es folgen viele Pärchen an Campus-URL und Veranstaltungstyp Pärchen. Die Campus URL bekommt man aus dem Campus-System (<a href="https://www.campus.rwth-aachen.de/rwth/all/groups.asp">hier</a>). Der Veranstaltungstyp ist z.B. "Vorlesung" oder "Übung" oder "Praktikum".
</p> </p>
<p> <p>
{{ campus }} {{ import_campus|pprint }}
</p> </p>
<form method="post"> <form method="post">
<ul class="list-group row"> <ul class="list-group row" style="margin-left: 0px; margin-right: 0px;">
<li class="list-group-item form-inline"> {%for i in import_campus %}
<span class="input-group"> <li class="list-group-item form-inline row">
<input class="form-control" type="text" name="campus.0.url" placeholder="url"> <span class="input-group col-xs-8">
<input class="form-control" type="text" name="campus.{{i.id}}.url" value="{{i.url}}" id="campus-{{i.id}}-url" placeholder="url">
</span>
<span class="input-group col-xs-2">
<input class="form-control" type="test" name="campus.{{i.id}}.type" value="{{i.type}}" id="campus-{{i.id}}-type" placeholder="type">
</span>
<span class="input-group col-xs-1 pull-right">
<button class="btn btn-default pull-right" onclick="$('#campus-{{i.id}}-url').val('')">
<span class="glyphicon glyphicon-trash"></span>
</button>
</span>
</li>
{%endfor%}
<li class="list-group-item form-inline row">
<span class="input-group col-xs-8">
<input class="form-control" type="text" name="campus.new.url" placeholder="url">
</span>
<span class="input-group col-xs-2">
<input class="form-control" type="test" name="campus.new.type" placeholder="type">
</span> </span>
<span class="input-group"> <span class="input-group col-xs-1 pull-right">
<input class="form-control" type="test" name="campus.0.type" placeholder="type"> <button class="btn btn-default pull-right">
<span class="glyphicon glyphicon-plus"></span>
</button>
</span> </span>
</li> </li>
</ul> </ul>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment