|
14 | 14 |
|
15 | 15 |
|
16 | 16 | saveDir = "data" |
| 17 | +saveJsonDir = "json" |
| 18 | + |
17 | 19 | pathlib.Path(saveDir).mkdir(parents=True, exist_ok=True) |
| 20 | +pathlib.Path(saveJsonDir).joinpath("pdf").mkdir(parents=True, exist_ok=True) |
| 21 | + |
| 22 | + |
| 23 | +def getAPIconfig(name): |
| 24 | + '''''' |
| 25 | + __DEF_API_FILE_SPRINGER_ = ".api_springer" |
| 26 | + filePath = [] |
| 27 | + if name == "springer": |
| 28 | + listFiles = [ |
| 29 | + pathlib.Path(__file__).parent.absolute().joinpath(__DEF_API_FILE_SPRINGER_), |
| 30 | + pathlib.Path(__file__).parent.absolute().parent.joinpath(__DEF_API_FILE_SPRINGER_) |
| 31 | + ] |
| 32 | + for f in listFiles: |
| 33 | + if f.exists(): |
| 34 | + filePath = f |
| 35 | + break |
| 36 | + return filePath |
| 37 | + |
| 38 | + |
18 | 39 |
|
19 | 40 | if False: |
20 | 41 | # get full list of papers in journal |
21 | | - with open('.api_springer','r') as f: |
| 42 | + fileAPIspringer = getAPIconfig("springer") |
| 43 | + with open(fileAPIspringer ,'r') as f: |
22 | 44 | api_key = f.read() |
23 | 45 | journal_id = "40323" |
24 | 46 | url_base = "http://api.springernature.com/" |
|
62 | 84 | ) |
63 | 85 | ) |
64 | 86 |
|
65 | | -if False: |
| 87 | +if True: |
66 | 88 | # load pickle |
67 | 89 | article_list = pickle.load( |
68 | 90 | open(os.path.join(saveDir, "article_list.pck"), "rb") |
|
79 | 101 | pickle.dump(article_list_collection, file) |
80 | 102 | file.close() |
81 | 103 | file = open(os.path.join(saveDir, "article_list_not_collection.pck"), "wb") |
82 | | - pickle.dump(article_list_collection, file) |
| 104 | + pickle.dump(article_list_not_collection, file) |
83 | 105 | file.close() |
84 | 106 |
|
85 | 107 | if False: |
86 | 108 | # load pickle |
87 | 109 | article_list_collection = pickle.load( |
88 | | - open(os.path.join(saveDir, "article_list_collection.pck"), "rb") |
| 110 | + open(os.path.join(saveDir, "article_list_not_collection.pck"), "rb") |
89 | 111 | ) |
90 | 112 | # check if article is in HAL |
91 | 113 | article_list_in_hal = list() |
|
106 | 128 | print("Articles in HAL: {}".format(len(article_list_in_hal))) |
107 | 129 | print("Articles not in HAL: {}".format(len(article_list_notin_hal))) |
108 | 130 |
|
109 | | -if False: |
| 131 | +if True: |
110 | 132 | # load pickle |
111 | 133 | article_list = pickle.load( |
112 | 134 | open(os.path.join(saveDir, "article_list_notin_hal.pck"), "rb") |
113 | 135 | ) |
114 | 136 | # along articles |
115 | 137 | for art in article_list: |
116 | | - try: |
117 | | - # convert to HAL |
118 | | - json_file = libConvert.buildJSON(art,'json',os.path.join('json','pdf')) |
| 138 | + # try: |
| 139 | + # convert to HAL |
| 140 | + json_file = libConvert.buildJSON(art,'json',os.path.join('json','pdf')) |
119 | 141 |
|
120 | | - # push to HAL from json |
121 | | - idHal = execHAL.runJSON2HAL( |
122 | | - json_file, |
123 | | - verbose=True, |
124 | | - prod="test", # switch to prod with caution |
125 | | - credentials=misc.load_credentials(),# |
126 | | - completion="idext,affiliation", # or false |
127 | | - idhal=None, |
128 | | - ) |
129 | | - # push idhal to json |
130 | | - data = json.loads(open(json_file).read()) |
131 | | - data['doc_idhal'] = idHal |
132 | | - json_object = json.dumps(data, indent=4) |
133 | | - with open(json_file, "w") as outfile: |
134 | | - outfile.write(json_object) |
135 | | - except: |
136 | | - print("Error with article: {}".format(art["doi"])) |
| 142 | + # push to HAL from json |
| 143 | + idHal = execHAL.runJSON2HAL( |
| 144 | + json_file, |
| 145 | + verbose=True, |
| 146 | + prod="prod", # switch to prod with caution |
| 147 | + credentials=misc.load_credentials(),# |
| 148 | + completion="idext,affiliation", # or false |
| 149 | + idhal=None, |
| 150 | + ) |
| 151 | + # push idhal to json |
| 152 | + data = json.loads(open(json_file).read()) |
| 153 | + data['doc_idhal'] = idHal |
| 154 | + json_object = json.dumps(data, indent=4) |
| 155 | + with open(json_file, "w") as outfile: |
| 156 | + outfile.write(json_object) |
| 157 | + # except: |
| 158 | + # print("Error with article: {}".format(art["doi"])) |
137 | 159 |
|
138 | 160 |
|
139 | 161 | # add pdf to HAL |
140 | | -if True: |
| 162 | +if False: |
141 | 163 | import glob,shutil |
142 | 164 | jsondir = 'json' |
143 | 165 | pathlib.Path(os.path.join(saveDir,"done")).mkdir(parents=True, exist_ok=True) |
|
0 commit comments