importer.py 5.66 KB
Newer Older
1
2
from server import *

3
4
5
import urllib.request
import urllib.parse

6
@app.route('/internal/import/<int:id>', methods=['GET', 'POST'])
7
@mod_required
8
def list_import_sources(id):
9
	courses = query('SELECT * FROM courses WHERE id = ?', id)[0]
10

11
12
13
14
	campus={}
	for i in request.values:
		group, importid, field = i.split('.', 2)
		if group == 'campus':
15
			if not importid in campus:
16
17
18
19
20
				campus[importid] = {}
			campus[importid][field] = request.values[i]
	for i in campus:
		if i.startswith('new'):
			if campus[i]['url'] != '':
Julian Rother's avatar
Julian Rother committed
21
				modify('INSERT INTO import_campus (url, type, course_id, last_checked, changed) VALUES (?, ?, ?, ?, 1)',campus[i]['url'],campus[i]['type'],id,datetime.now())
22
23
24
25
26
		else:
			if campus[i]['url'] != '':
				query('UPDATE import_campus SET url = ?, `type` = ? WHERE (course_id = ?) AND (id = ?)', campus[i]['url'],campus[i]['type'],id,int(i))	
			else:
				query('DELETE FROM import_campus WHERE (id = ?) AND (course_id = ?)',int(i),id)
27
28
29
30
	import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id)

	return render_template('import_campus.html', course=courses, import_campus=import_campus, events=[])

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def fetch_co_course_events(i):
	from lxml import html
	from lxml import etree
	events = []
	try:
		remote_html = urllib.request.urlopen(i['url']).read()
	except:
		flash("Ungültige URL: '"+i['url']+"'")
	tablexpath = "//td[text()='Termine und Ort']/following::table[1]"
	basetable = html.fromstring(remote_html).xpath(tablexpath)[0]
	parsebase = html.tostring(basetable);

	#parse recurring events
	toparse = [i['url']]
	for j in basetable.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']]"):
		url = str(j.xpath("td[@name='togglePeriodApp']/a/@href")[0])
		toparse.append(url)
	events_raw = []
	for j in toparse:
		if j.startswith('event'):
			url = 'https://www.campus.rwth-aachen.de/rwth/all/'+j
		else:
			url = j
		text = urllib.request.urlopen(url).read()
		dom = html.fromstring(text).xpath(tablexpath)[0]
		#we get the "heading" row, from it extract the room and time. best way to get it is to match on the picture -.-
		baserow = dom.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']/*/img[@src='../../server/img/minus.gif']]")
		if not baserow:
			continue
		baserow = baserow[0]
		rowdata = {'dates': []}

		# "kein raum vergeben" is a special case, else use campus id
		if baserow.xpath("td[6]/text()")[0] == 'Kein Raum vergeben':
			rowdata['place'] = ''
		elif baserow.xpath("td[6]/a"):
			rowdata['place'] = baserow.xpath("td[6]/a")[0].text_content()
		else:
			rowdata['place'] = baserow.xpath("td[6]/text()")[0].split(' ',1)[0]

		rowdata['start'] = baserow.xpath("td[3]/text()")[0]
		rowdata['end'] = baserow.xpath("td[5]/text()")[0]
		rowdata['dates'] = baserow.getparent().xpath("tr[@class='hierarchy5']//td[@colspan='3']/text()")
		events_raw.append(rowdata)

	# parse single appointments
	if basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']"):
		singletable = basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']")[0].getparent().getparent()
		for row in singletable.xpath("tr/td[2]"):
			rowdata = {}
			if row.xpath("text()[2]")[0] == 'Kein Raum vergeben':
				rowdata['place'] = ''
			elif row.xpath("a"):
				rowdata['place'] = row.xpath("a")[0].text_content()
			else:
				rowdata['place'] = row.xpath("text()[2]")[0].split(' ',1)[0]

			rowdata['dates'] = [row.xpath("text()[1]")[0][4:14]]
			rowdata['start'] = row.xpath("text()[1]")[0][17:22]
			rowdata['end'] = row.xpath("text()[1]")[0][27:32]
			events_raw.append(rowdata)

	#now we have to filter our data and do some lookups
	for j in events_raw:
		for k in j['dates']:
			e = {}
			fmt= "%d.%m.%Y %H:%M"
			e['time'] = datetime.strptime("%s %s"%(k,j['start']) ,fmt)
			e['duration'] = int((datetime.strptime("%s %s"%(k,j['end']) ,fmt) - e['time']).seconds/60)
			j['place'] = str(j['place'])
			if j['place'] != '':
				dbplace = query("SELECT name FROM places WHERE (campus_room = ?) OR (campus_name = ?) OR ((NOT campus_name) AND name = ?)",j['place'],j['place'],j['place'])
				if dbplace:
					e['place'] = dbplace[0]['name']
				else:
					e['place'] = 'Unbekannter Ort ('+j['place']+')'
			else:
				e['place'] = ''
			e['title'] = i['type']
			events.append(e)
	# it is parsed.
	return events

114
@app.route('/internal/import/<int:id>/now', methods=['GET', 'POST'])
115
116
117
118
119
120
@mod_required
def import_from(id):

	courses = query('SELECT * FROM courses WHERE id = ?', id)[0]
	lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id'])
	
121
122
123
124
125
126
	
	import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id)
	events = []
	try:
		# if u have to port this to anything new, god be with you.
		for i in import_campus:
127
			events += fetch_co_course_events(i)
128
129
130
	except ImportError:
		flash('python-lxml not found, campus import will not work.')

131
132
	# events to add
	newevents = []
Andreas Valder's avatar
Andreas Valder committed
133
134
135
	for i in events + lectures:
		unique = False
		exists = False
136
		for j in newevents:
Andreas Valder's avatar
Andreas Valder committed
137
138
			unique = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration'])
			if unique:
Andreas Valder's avatar
Andreas Valder committed
139
				break
Andreas Valder's avatar
Andreas Valder committed
140
141
142
		for j in lectures:
			exists = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration'])
			if exists:
Andreas Valder's avatar
Andreas Valder committed
143
				break
Andreas Valder's avatar
Andreas Valder committed
144
		if (not unique) and (not exists):
145
146
147
148
149
150
151
152
153
154
155
156
			newevents.append(i)
	
	# deleted events
	deletedlectures = []
	for i in lectures:
		incampus = False
		for j in events:
			incampus = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration'])
			if incampus:
				break
		if not incampus:
			deletedlectures.append(i)
157

158
	return render_template('import_campus.html', course=courses, import_campus=import_campus, newevents=newevents, deletedlectures=deletedlectures)