Skip to content
Snippets Groups Projects
Select Git revision
  • forbid-save-as
  • upload-via-token
  • moodle-integration
  • patch-double-tap-seek
  • patch_datum_anzeigen
  • patch_raum_anzeigen
  • master default protected
  • intros
  • live_sources
  • bootstrap4
  • modules
11 results

importer.py

Blame
  • Forked from Video AG Infrastruktur / website
    887 commits behind the upstream repository.
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    importer.py 4.21 KiB
    from server import *
    
    @app.route('/import/<int:id>', methods=['GET', 'POST'])
    @mod_required
    def import_from(source=None, id=None):
    
    	courses = query('SELECT * FROM courses WHERE id = ?', id)[0]
    	lectures = query('SELECT * FROM lectures WHERE course_id = ?', courses['id'])
    	
    	campus={}
    	for i in request.values:
    		group, importid, field = i.split('.', 2)
    		if group == 'campus':
    			if not importid in  campus:
    				campus[importid] = {}
    			campus[importid][field] = request.values[i]
    	for i in campus:
    		if i.startswith('new'):
    			if campus[i]['url'] != '':
    				query('INSERT INTO import_campus (url, type, course_id, last_checked, changed) VALUES (?, ?, ?, ?, 1)',campus[i]['url'],campus[i]['type'],id,datetime.now())
    		else:
    			if campus[i]['url'] != '':
    				query('UPDATE import_campus SET url = ?, `type` = ? WHERE (course_id = ?) AND (id = ?)', campus[i]['url'],campus[i]['type'],id,int(i))	
    			else:
    				query('DELETE FROM import_campus WHERE (id = ?) AND (course_id = ?)',int(i),id)
    	
    	import_campus = query('SELECT * FROM import_campus WHERE course_id = ?',id)
    	events = []
    	try:
    		from lxml import html
    		from lxml import etree
    		import urllib.request
    		# if u have to port this to anything new, god be with you.
    		for i in import_campus:
    			remote_html = urllib.request.urlopen(i['url']).read()
    			tablexpath = "//td[text()='Termine und Ort']/following::table[1]"
    			basetable = html.fromstring(remote_html).xpath(tablexpath)[0]
    			parsebase = html.tostring(basetable);
    
    			#parse recurring events
    			toparse = [i['url']]
    			for j in basetable.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']]"):
    				url = str(j.xpath("td[@name='togglePeriodApp']/a/@href")[0])
    				toparse.append(url)
    			events_raw = []
    			for j in toparse:
    				if j.startswith('event'):
    					url = 'https://www.campus.rwth-aachen.de/rwth/all/'+j
    				else:
    					url = j
    				text = urllib.request.urlopen(url).read()
    				dom = html.fromstring(text).xpath(tablexpath)[0]
    				#we get the "heading" row, from it extract the room and time. best way to get it is to match on the picture -.-
    				baserow = dom.xpath("//table[@cellpadding='5']//tr[@class='hierarchy4' and td[@name='togglePeriodApp']/*/img[@src='../../server/img/minus.gif']]")
    				if not baserow:
    					continue
    				baserow = baserow[0]
    				rowdata = {'dates': []}
    				rowdata['place'] = baserow.xpath("td[6]/text()")[0][2:-1]
    				rowdata['start'] = baserow.xpath("td[3]/text()")[0]
    				rowdata['end'] = baserow.xpath("td[5]/text()")[0]
    				rowdata['dates'] = baserow.getparent().xpath("tr[@class='hierarchy5']//td[@colspan='3']/text()")
    				events_raw.append(rowdata)
    
    			# parse single appointments
    			singletable = basetable.xpath("//table[@cellpadding='3']/tr/td[text()='Einmalige Termine:']")[0].getparent().getparent()
    			for row in singletable.xpath("tr/td[2]"):
    				rowdata = {}
    				rowdata['place'] = row.xpath("text()[2]")[0][2:-1]
    				rowdata['dates'] = [row.xpath("text()[1]")[0][4:14]]
    				rowdata['start'] = row.xpath("text()[1]")[0][17:22]
    				rowdata['end'] = row.xpath("text()[1]")[0][27:32]
    				events_raw.append(rowdata)
    
    			#now we have to filter our data and do some lookups
    			for j in events_raw:
    				for k in j['dates']:
    					e = {}
    					fmt= "%d.%m.%Y %H:%M"
    					e['time'] = datetime.strptime("%s %s"%(k,j['start']) ,fmt)
    					e['duration'] = int((datetime.strptime("%s %s"%(k,j['end']) ,fmt) - e['time']).seconds/60)
    					e['place'] = query("SELECT name FROM places WHERE (campus_name = ?) OR ((NOT campus_name) AND name = ?)",j['place'],j['place'])[0]['name'];
    					e['title'] = i['type']
    					events.append(e)
    			# it is parsed.
    
    
    
    	except ImportError:
    		flash('python-lxml not found, campus import will not work.')
    
    	uniqueevents = []
    	for i in events + lectures:
    		unique = False
    		exists = False
    		for j in uniqueevents:
    			unique = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration'])
    			if unique:
    				break
    		for j in lectures:
    			exists = (i['place'] == j['place']) and (i['time'] == j['time']) and (i['duration'] == j['duration'])
    			if exists:
    				break
    		if (not unique) and (not exists):
    			i['type'] = 'import'
    			uniqueevents.append(i)
    
    	return render_template('import_campus.html', course=courses, import_campus=import_campus, events=uniqueevents)