Skip to content

Commit 976292b

Browse files
committed
fix issues with pdf filename and new syntax with pybibliometrics
1 parent 375b29a commit 976292b

File tree

3 files changed

+61
-33
lines changed

3 files changed

+61
-33
lines changed

examples/use-cases/libConvert.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import pybliometrics
12
from pybliometrics.scopus import (
23
AbstractRetrieval,
34
AuthorRetrieval,
@@ -9,6 +10,8 @@
910
from habanero import Crossref
1011
import json
1112

13+
#initialisation
14+
pybliometrics.scopus.init()
1215

1316
cr = Crossref()
1417

@@ -90,10 +93,11 @@ def buildAffiliation(affiliation):
9093

9194
def findURL(data):
9295
return_value = None
93-
for it in data:
94-
if it.get("content-type") == "text/html":
95-
return_value = it.get("URL")
96-
break
96+
if data:
97+
for it in data:
98+
if it.get("content-type") == "text/html":
99+
return_value = it.get("URL")
100+
break
97101
return return_value
98102

99103

@@ -177,7 +181,7 @@ def buildJSON(article, json_dir, pdf_dir=None):
177181
# "link2": "https://link2.com/ID",
178182
# "link3": "https://link3.com/ID"
179183
}
180-
publisherlink = findURL(dataCrossRef["message"]["link"])
184+
publisherlink = findURL(dataCrossRef["message"].get("link",None))
181185
if publisherlink:
182186
content["extref"]["publisher"] = publisherlink
183187
enKeywords = article.get("keyword", None)
@@ -193,7 +197,7 @@ def buildJSON(article, json_dir, pdf_dir=None):
193197
"halDomain": ["spi"]
194198
}
195199
if pdf_path:
196-
content["fileTmp"] = os.path.relpath(pdf_path, start=json_dir)
200+
content["file"] = os.path.relpath(pdf_path, start=json_dir)
197201
content["authors"] = buildAuthors(dataScopus.authors)
198202
content["structures"] = buildAffiliations(content["authors"])
199203
content["license"] = "by"

examples/use-cases/run.py

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,33 @@
1414

1515

1616
saveDir = "data"
17+
saveJsonDir = "json"
18+
1719
pathlib.Path(saveDir).mkdir(parents=True, exist_ok=True)
20+
pathlib.Path(saveJsonDir).joinpath("pdf").mkdir(parents=True, exist_ok=True)
21+
22+
23+
def getAPIconfig(name):
24+
''''''
25+
__DEF_API_FILE_SPRINGER_ = ".api_springer"
26+
filePath = []
27+
if name == "springer":
28+
listFiles = [
29+
pathlib.Path(__file__).parent.absolute().joinpath(__DEF_API_FILE_SPRINGER_),
30+
pathlib.Path(__file__).parent.absolute().parent.joinpath(__DEF_API_FILE_SPRINGER_)
31+
]
32+
for f in listFiles:
33+
if f.exists():
34+
filePath = f
35+
break
36+
return filePath
37+
38+
1839

1940
if False:
2041
# get full list of papers in journal
21-
with open('.api_springer','r') as f:
42+
fileAPIspringer = getAPIconfig("springer")
43+
with open(fileAPIspringer ,'r') as f:
2244
api_key = f.read()
2345
journal_id = "40323"
2446
url_base = "http://api.springernature.com/"
@@ -62,7 +84,7 @@
6284
)
6385
)
6486

65-
if False:
87+
if True:
6688
# load pickle
6789
article_list = pickle.load(
6890
open(os.path.join(saveDir, "article_list.pck"), "rb")
@@ -79,13 +101,13 @@
79101
pickle.dump(article_list_collection, file)
80102
file.close()
81103
file = open(os.path.join(saveDir, "article_list_not_collection.pck"), "wb")
82-
pickle.dump(article_list_collection, file)
104+
pickle.dump(article_list_not_collection, file)
83105
file.close()
84106

85107
if False:
86108
# load pickle
87109
article_list_collection = pickle.load(
88-
open(os.path.join(saveDir, "article_list_collection.pck"), "rb")
110+
open(os.path.join(saveDir, "article_list_not_collection.pck"), "rb")
89111
)
90112
# check if article is in HAL
91113
article_list_in_hal = list()
@@ -106,38 +128,38 @@
106128
print("Articles in HAL: {}".format(len(article_list_in_hal)))
107129
print("Articles not in HAL: {}".format(len(article_list_notin_hal)))
108130

109-
if False:
131+
if True:
110132
# load pickle
111133
article_list = pickle.load(
112134
open(os.path.join(saveDir, "article_list_notin_hal.pck"), "rb")
113135
)
114136
# along articles
115137
for art in article_list:
116-
try:
117-
# convert to HAL
118-
json_file = libConvert.buildJSON(art,'json',os.path.join('json','pdf'))
138+
# try:
139+
# convert to HAL
140+
json_file = libConvert.buildJSON(art,'json',os.path.join('json','pdf'))
119141

120-
# push to HAL from json
121-
idHal = execHAL.runJSON2HAL(
122-
json_file,
123-
verbose=True,
124-
prod="test", # switch to prod with caution
125-
credentials=misc.load_credentials(),#
126-
completion="idext,affiliation", # or false
127-
idhal=None,
128-
)
129-
# push idhal to json
130-
data = json.loads(open(json_file).read())
131-
data['doc_idhal'] = idHal
132-
json_object = json.dumps(data, indent=4)
133-
with open(json_file, "w") as outfile:
134-
outfile.write(json_object)
135-
except:
136-
print("Error with article: {}".format(art["doi"]))
142+
# push to HAL from json
143+
idHal = execHAL.runJSON2HAL(
144+
json_file,
145+
verbose=True,
146+
prod="prod", # switch to prod with caution
147+
credentials=misc.load_credentials(),#
148+
completion="idext,affiliation", # or false
149+
idhal=None,
150+
)
151+
# push idhal to json
152+
data = json.loads(open(json_file).read())
153+
data['doc_idhal'] = idHal
154+
json_object = json.dumps(data, indent=4)
155+
with open(json_file, "w") as outfile:
156+
outfile.write(json_object)
157+
# except:
158+
# print("Error with article: {}".format(art["doi"]))
137159

138160

139161
# add pdf to HAL
140-
if True:
162+
if False:
141163
import glob,shutil
142164
jsondir = 'json'
143165
pathlib.Path(os.path.join(saveDir,"done")).mkdir(parents=True, exist_ok=True)

src/push2HAL/libHAL.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ def choose_from_results(
179179

180180
def addFileInXML(inTree, filePath, hal_id="upload"):
181181
"""Add new imported file in XML"""
182+
if hal_id == None:
183+
hal_id = "upload"
182184
newFilename = dflt.DEFAULT_UPLOAD_FILE_NAME_PDF.format(hal_id)
183185
Logger.debug("Copy original file to new one: {} -> {}".format(filePath, newFilename))
184186
shutil.copyfile(filePath, newFilename)
@@ -277,7 +279,7 @@ def preparePayload(
277279
)
278280
header["Hide-In-OAI"] = m.adaptH(options.get("hide4oai", header["Hide-In-OAI"]))
279281
header["Content-Disposition"] = m.adaptH(
280-
"attachment; filename={}".format(xmlFileName) # path inside the archive
282+
"attachment; filename={}".format(dflt.DEFAULT_UPLOAD_FILE_NAME_XML) # path inside the archive
281283
)
282284
else:
283285
header["Content-Type"] = m.adaptH("text/xml")

0 commit comments

Comments
 (0)