fix issues with pdf filename and new syntax with pybibliometrics

luclaurent · luclaurent · commit 976292b0c50f · 2024-10-22T15:18:24.000+02:00
diff --git a/examples/use-cases/libConvert.py b/examples/use-cases/libConvert.py
@@ -1,3 +1,4 @@
+import pybliometrics
 from pybliometrics.scopus import (
     AbstractRetrieval,
     AuthorRetrieval,
@@ -9,6 +10,8 @@
 from habanero import Crossref
 import json
 
+#initialisation 
+pybliometrics.scopus.init()
 
 cr = Crossref()
 
@@ -90,10 +93,11 @@ def buildAffiliation(affiliation):
 
 def findURL(data):
     return_value = None
-    for it in data:
-        if it.get("content-type") == "text/html":
-            return_value = it.get("URL")
-            break
+    if data:
+        for it in data:
+            if it.get("content-type") == "text/html":
+                return_value = it.get("URL")
+                break
     return return_value
 
 
@@ -177,7 +181,7 @@ def buildJSON(article, json_dir, pdf_dir=None):
         # "link2": "https://link2.com/ID",
         # "link3": "https://link3.com/ID"
     }
-    publisherlink = findURL(dataCrossRef["message"]["link"])
+    publisherlink = findURL(dataCrossRef["message"].get("link",None))
     if publisherlink:
         content["extref"]["publisher"] = publisherlink
     enKeywords = article.get("keyword", None)
@@ -193,7 +197,7 @@ def buildJSON(article, json_dir, pdf_dir=None):
         "halDomain": ["spi"]
     }
     if pdf_path:
-        content["fileTmp"] = os.path.relpath(pdf_path, start=json_dir)
+        content["file"] = os.path.relpath(pdf_path, start=json_dir)
     content["authors"] = buildAuthors(dataScopus.authors)
     content["structures"] = buildAffiliations(content["authors"])
     content["license"] = "by"
diff --git a/examples/use-cases/run.py b/examples/use-cases/run.py
@@ -14,11 +14,33 @@
 
 
 saveDir = "data"
+saveJsonDir = "json"
+
 pathlib.Path(saveDir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(saveJsonDir).joinpath("pdf").mkdir(parents=True, exist_ok=True)
+
+
+def getAPIconfig(name):
+    ''''''
+    __DEF_API_FILE_SPRINGER_ = ".api_springer"
+    filePath = []
+    if name == "springer":
+        listFiles = [
+            pathlib.Path(__file__).parent.absolute().joinpath(__DEF_API_FILE_SPRINGER_),
+            pathlib.Path(__file__).parent.absolute().parent.joinpath(__DEF_API_FILE_SPRINGER_)
+        ]
+        for f in listFiles:
+            if f.exists():
+                filePath = f
+                break
+    return filePath
+        
+
 
 if False:
     # get full list of papers in journal
-    with open('.api_springer','r') as f: 
+    fileAPIspringer = getAPIconfig("springer")
+    with open(fileAPIspringer ,'r') as f: 
         api_key = f.read()
     journal_id = "40323"
     url_base = "http://api.springernature.com/"
@@ -62,7 +84,7 @@
                 )
             )
 
-if False:
+if True:
     # load pickle
     article_list = pickle.load(
         open(os.path.join(saveDir, "article_list.pck"), "rb")
@@ -79,13 +101,13 @@
     pickle.dump(article_list_collection, file)
     file.close()
     file = open(os.path.join(saveDir, "article_list_not_collection.pck"), "wb")
-    pickle.dump(article_list_collection, file)
+    pickle.dump(article_list_not_collection, file)
     file.close()
 
 if False:
     # load pickle
     article_list_collection = pickle.load(
-        open(os.path.join(saveDir, "article_list_collection.pck"), "rb")
+        open(os.path.join(saveDir, "article_list_not_collection.pck"), "rb")
     )
     # check if article is in HAL
     article_list_in_hal = list()
@@ -106,38 +128,38 @@
     print("Articles in HAL: {}".format(len(article_list_in_hal)))
     print("Articles not in HAL: {}".format(len(article_list_notin_hal)))
 
-if False:
+if True:
     # load pickle
     article_list = pickle.load(
         open(os.path.join(saveDir, "article_list_notin_hal.pck"), "rb")
     )
     # along articles
     for art in article_list:
-        try:
-            # convert to HAL
-            json_file = libConvert.buildJSON(art,'json',os.path.join('json','pdf'))
+        # try:
+        # convert to HAL
+        json_file = libConvert.buildJSON(art,'json',os.path.join('json','pdf'))
 
-            # push to HAL from json
-            idHal = execHAL.runJSON2HAL(
-                json_file,
-                verbose=True,
-                prod="test", # switch to prod with caution
-                credentials=misc.load_credentials(),#
-                completion="idext,affiliation", # or false
-                idhal=None,
-            )
-            # push idhal to json
-            data = json.loads(open(json_file).read())
-            data['doc_idhal'] = idHal
-            json_object = json.dumps(data, indent=4)
-            with open(json_file, "w") as outfile:
-                outfile.write(json_object)
-        except:
-            print("Error with article: {}".format(art["doi"]))
+        # push to HAL from json
+        idHal = execHAL.runJSON2HAL(
+            json_file,
+            verbose=True,
+            prod="prod", # switch to prod with caution
+            credentials=misc.load_credentials(),#
+            completion="idext,affiliation", # or false
+            idhal=None,
+        )
+        # push idhal to json
+        data = json.loads(open(json_file).read())
+        data['doc_idhal'] = idHal
+        json_object = json.dumps(data, indent=4)
+        with open(json_file, "w") as outfile:
+            outfile.write(json_object)
+        # except:
+        #     print("Error with article: {}".format(art["doi"]))
     
     
 # add pdf to HAL      
-if True:
+if False:
     import glob,shutil
     jsondir = 'json'
     pathlib.Path(os.path.join(saveDir,"done")).mkdir(parents=True, exist_ok=True)
diff --git a/src/push2HAL/libHAL.py b/src/push2HAL/libHAL.py
@@ -179,6 +179,8 @@ def choose_from_results(
 
 def addFileInXML(inTree, filePath, hal_id="upload"):
     """Add new imported file in XML"""
+    if hal_id == None:
+        hal_id = "upload"
     newFilename = dflt.DEFAULT_UPLOAD_FILE_NAME_PDF.format(hal_id)
     Logger.debug("Copy original file to new one: {} -> {}".format(filePath, newFilename))
     shutil.copyfile(filePath, newFilename)
@@ -277,7 +279,7 @@ def preparePayload(
         )
         header["Hide-In-OAI"] = m.adaptH(options.get("hide4oai", header["Hide-In-OAI"]))
         header["Content-Disposition"] = m.adaptH(
-            "attachment; filename={}".format(xmlFileName)   # path inside the archive
+            "attachment; filename={}".format(dflt.DEFAULT_UPLOAD_FILE_NAME_XML)   # path inside the archive
         )
     else:
         header["Content-Type"] = m.adaptH("text/xml")