-
Notifications
You must be signed in to change notification settings - Fork 1
/
import_repository.py
83 lines (61 loc) · 2.13 KB
/
import_repository.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import csv
import requests
# Store FILE
import codecs
from unidecode import unidecode
MAVLAB = ['wagter' , 'croon', 'remes',
'karasek', 'smeur', 'dupeyroux',
'hamaza', '"Scheper, K.Y.W."', '"popovic, marija"']
search = '%20OR%20'.join(MAVLAB)
# 'Wagter%20OR%20croon%20OR%20smeur%20OR%20remes%20OR%20popovic'
url = 'https://repository.tudelft.nl/islandora/search/' + search + '?collection=education&display=tud_csv'
print(url)
def msc_download_to_csv():
p=0
p = requests.get(url) # + '&page=%d' % pageno)
with open('msc.csv', 'wb') as f:
f.write(p.text.encode())
txt = p.text
msc_download_to_csv()
with open('msc.csv', 'rb') as f:
txt = f.read().decode('utf-8')
#
#print(txt.encode('UTF-8'))
print('Downloaded...\n')
# Create bib
bibf = codecs.open('msc.bib','w', 'utf-8')
bibf.write(u'\ufeff')
bibf.write('# AUTOGENERATED\n# Import from: '+url+'\n\n\n')
reader = csv.reader(txt.split('\n'), delimiter=',')
# Skip header
next(reader)
for row in reader:
if len(row) > 0:
if '(' in row[3]:
s = row[3].split('(')
name = s[0]
school = s[1]
else:
name = row[3]
school = 'Delft University of Technology'
# Extra checks: names should not be in the abstract but only in supervision
good = False
for M in MAVLAB:
MM = M.lower().replace('"','')
if MM in unidecode(row[4].lower()):
good = True
break
if good:
bibf.write('@mastersthesis{'+row[0]+',\n')
bibf.write('\tabstract = {'+row[6]+'},\n')
bibf.write('\tauthor = {'+name+'},\n')
bibf.write('\tkeywords = {'+row[7]+'},\n')
bibf.write('\tnote = {'+row[4]+'},\n')
bibf.write('\tschool = {'+school.replace(')','')+'},\n')
bibf.write('\ttitle = {'+row[2]+'},\n')
bibf.write('\ttype = {mathesis},\n')
bibf.write('\turl = {'+row[1]+'},\n')
bibf.write('\tyear = {'+row[5]+'}\n')
bibf.write('}\n\n')
#print(row[2])
bibf.close()