You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(Optional) The path for downloading the arXiv PDFs. Defaults to "data".
166
+
(Optional) The path for downloading the arXiv metadata in HDF5 files and, optionally (if set with download-pdfs), the PDFs. Defaults to "data".
166
167
""",
167
168
)
168
169
@click.option(
@@ -239,6 +240,16 @@ def cli():
239
240
Defaults to True.
240
241
""",
241
242
)
243
+
@click.option(
244
+
"--download-pdfs",
245
+
"-dp",
246
+
is_flag=True,
247
+
default=False,
248
+
required=False,
249
+
help="""
250
+
(Optional) If True, the PDFs will be downloaded and saved in `download_path`. Defaults to False.
251
+
""",
252
+
)
242
253
defsearch_and_download(
243
254
download_path,
244
255
category,
@@ -248,6 +259,7 @@ def search_and_download(
248
259
start_from_filepath,
249
260
loader,
250
261
clean_text,
262
+
download_pdfs,
251
263
):
252
264
start_time=time.time()
253
265
@@ -260,7 +272,56 @@ def search_and_download(
260
272
start_from_filepath=start_from_filepath,
261
273
loader=loader,
262
274
clean_text=clean_text,
275
+
download_pdfs=download_pdfs,
263
276
)
264
277
265
278
elapsed_time=time.time() -start_time
266
279
click.echo(f"Downloaded arXiv papers in {elapsed_time:.2f} seconds\n\n")
280
+
281
+
282
+
@cli.command(
283
+
name="download_pdfs",
284
+
help="Downloads the PDFs of the arXiv papers stored in HDF5 files in a specified path.",
285
+
)
286
+
@click.option(
287
+
"--data-path",
288
+
"-path",
289
+
type=str,
290
+
default="data",
291
+
required=False,
292
+
help="""
293
+
(Optional) The path where the HDF5 files with the arXiv papers metadata exist. The downloaded PDFs will be stored in there as well. Defaults to "data".
294
+
""",
295
+
)
296
+
defdownload_pdfs(data_path):
297
+
start_time=time.time()
298
+
299
+
# check if `data_path` exists, and if not, returns an error
300
+
data_path=Path(data_path)
301
+
ifnotdata_path.exists():
302
+
raiseclick.ClickException(f"The specified path {data_path} does not exist.")
0 commit comments