Releases: bio-guoda/preston
Releases · bio-guoda/preston
0.11.4
Features
- support discovery of data archives for anchored preston datasets published in Zenodo #356 .
Example usage:
preston ls \
--remote https://zenodo.org \
--anchor hash://md5/58de50154e330c331993fe5d0852ad84 \
--algo md5 \
| grep .zip \
| grep hasVersion \
| grep DwC \
| head -1 \
| preston dwc-stream \
--remote https://zenodo.org \
--anchor hash://md5/58de50154e330c331993fe5d0852ad84 \
--algo md5 \
| head -1 \
| jq .
to discover first dwc records encountered in
Elton, Nomer, & Preston. (2025). Versioned Archive and Review of Biotic Interactions and Taxon Names Found within globalbioticinteractions/scan hash://md5/58de50154e330c331993fe5d0852ad84. Zenodo. https://doi.org/10.5281/zenodo.16894884
as seen below.
To disable archive discovery and retrieve the entire archive to scan for requested content, use --disable-archive-discovery option.
{
"http://www.w3.org/ns/prov#wasDerivedFrom": "line:zip:hash://md5/f0116c11643a3465d94ec7511621c232!/occurrences.csv!/L2",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://rs.tdwg.org/dwc/terms/Occurrence",
"http://rs.tdwg.org/dwc/text/id": "29693343",
"http://rs.tdwg.org/dwc/terms/basisOfRecord": "Pinned Specimen",
"http://rs.tdwg.org/dwc/terms/verbatimEventDate": null,
"http://rs.tdwg.org/dwc/terms/catalogNumber": "ARTHARCH00034603",
"http://rs.tdwg.org/dwc/terms/dynamicProperties": null,
"http://rs.tdwg.org/dwc/terms/scientificNameAuthorship": "Emery, 1884",
"http://rs.tdwg.org/dwc/terms/eventDate": "2004-06-29",
"http://rs.tdwg.org/dwc/terms/taxonID": "234621",
"http://rs.tdwg.org/dwc/terms/georeferencedBy": null,
"http://rs.tdwg.org/dwc/terms/fieldNumber": null,
"http://rs.tdwg.org/dwc/terms/sex": null,
"http://rs.tdwg.org/dwc/terms/maximumDepthInMeters": null,
"http://rs.tdwg.org/dwc/terms/decimalLatitude": "26.122",
"http://rs.tdwg.org/dwc/terms/infraspecificEpithet": null,
"http://rs.tdwg.org/dwc/terms/stateProvince": "Florida",
"http://rs.tdwg.org/dwc/terms/georeferenceProtocol": null,
"http://purl.org/dc/elements/1.1/rights": "http://creativecommons.org/licenses/by-nc/4.0/",
"http://purl.org/dc/terms/accessRights": null,
"http://rs.tdwg.org/dwc/terms/lifeStage": null,
"http://rs.tdwg.org/dwc/terms/identificationRemarks": null,
"http://rs.tdwg.org/dwc/terms/recordedBy": "Mark Deyrup",
"http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters": null,
"http://rs.tdwg.org/dwc/terms/taxonRank": null,
"http://rs.tdwg.org/dwc/terms/class": "Insecta",
"http://rs.tdwg.org/dwc/terms/informationWithheld": null,
"http://purl.org/dc/terms/rightsHolder": "Archbold Biological Station",
"http://purl.org/dc/terms/language": null,
"http://rs.tdwg.org/dwc/terms/associatedTaxa": null,
"http://rs.tdwg.org/dwc/terms/collectionCode": "ARTHARCH",
"http://rs.tdwg.org/dwc/terms/individualCount": null,
"http://rs.tdwg.org/dwc/terms/verbatimElevation": null,
"http://rs.tdwg.org/dwc/terms/disposition": null,
"http://rs.tdwg.org/dwc/terms/otherCatalogNumbers": null,
"http://rs.tdwg.org/dwc/terms/typeStatus": null,
"http://rs.tdwg.org/dwc/terms/country": "United States",
"http://rs.tdwg.org/dwc/terms/verbatimDepth": null,
"http://rs.tdwg.org/dwc/terms/family": "Formicidae",
"http://rs.tdwg.org/dwc/terms/phylum": "Arthropoda",
"http://rs.tdwg.org/dwc/terms/samplingEffort": null,
"http://rs.tdwg.org/dwc/terms/geodeticDatum": null,
"http://rs.tdwg.org/dwc/terms/identifiedBy": null,
"http://symbiota.org/terms/recordEnteredBy": null,
"http://rs.tdwg.org/dwc/terms/reproductiveCondition": null,
"http://purl.org/dc/terms/references": "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=29693343",
"http://rs.tdwg.org/dwc/terms/collectionID": "f52df103-0126-47fd-8836-934641444fcd",
"http://rs.tdwg.org/dwc/terms/occurrenceRemarks": "2 specimens on pin; Specimen voucher for Ants of Florida CRC Press 2016; Images of this species available at https://www.antweb.org",
"http://rs.tdwg.org/dwc/terms/endDayOfYear": null,
"http://rs.tdwg.org/dwc/terms/habitat": null,
"http://portal.idigbio.org/terms/recordId": "urn:uuid:85dc3937-5e0b-438c-8401-f93af4d0a38f",
"http://purl.org/dc/terms/modified": "2019-06-24 12:45:21",
"http://rs.tdwg.org/dwc/terms/decimalLongitude": "-80.144",
"http://rs.tdwg.org/dwc/terms/order": "Hymenoptera",
"http://rs.tdwg.org/dwc/terms/georeferenceVerificationStatus": null,
"http://rs.tdwg.org/dwc/terms/day": "29",
"http://rs.tdwg.org/dwc/terms/minimumElevationInMeters": null,
"http://rs.tdwg.org/dwc/terms/municipality": null,
"http://rs.tdwg.org/dwc/terms/georeferenceRemarks": null,
"http://rs.tdwg.org/dwc/terms/locality": "Fort Lauderdale: Bonnet House, disturbed mesic area",
"http://rs.tdwg.org/dwc/terms/georeferenceSources": null,
"http://rs.tdwg.org/dwc/terms/occurrenceID": "85dc3937-5e0b-438c-8401-f93af4d0a38f",
"http://rs.tdwg.org/dwc/terms/specificEpithet": "mayri",
"http://rs.tdwg.org/dwc/terms/genus": "Anochetus",
"http://rs.tdwg.org/dwc/terms/associatedOccurrences": null,
"http://rs.tdwg.org/dwc/terms/ownerInstitutionCode": null,
"http://rs.tdwg.org/dwc/terms/month": "6",
"http://rs.tdwg.org/dwc/terms/locationRemarks": null,
"http://rs.tdwg.org/dwc/terms/samplingProtocol": null,
"http://rs.tdwg.org/dwc/terms/dateIdentified": null,
"http://rs.tdwg.org/dwc/terms/taxonRemarks": null,
"http://rs.tdwg.org/dwc/terms/county": "Broward County",
"http://rs.tdwg.org/dwc/terms/kingdom": "Animalia",
"http://rs.tdwg.org/dwc/terms/verbatimCoordinates": null,
"http://rs.tdwg.org/dwc/terms/recordNumber": null,
"http://rs.tdwg.org/dwc/terms/identificationReferences": null,
"http://rs.tdwg.org/dwc/terms/year": "2004",
"http://rs.tdwg.org/dwc/terms/establishmentMeans": null,
"http://rs.tdwg.org/dwc/terms/maximumElevationInMeters": null,
"http://rs.tdwg.org/dwc/terms/identificationQualifier": null,
"http://rs.tdwg.org/dwc/terms/preparations": null,
"http://rs.tdwg.org/dwc/terms/institutionCode": "ABS",
"http://rs.tdwg.org/dwc/terms/startDayOfYear": "181",
"http://rs.tdwg.org/dwc/terms/minimumDepthInMeters": null,
"http://rs.tdwg.org/dwc/terms/scientificName": "Anochetus mayri",
"http://rs.tdwg.org/dwc/terms/dataGeneralizations": null
}Improvements
- override default stderr logging by xerces xml parser
- add support for zotero record items without explicit attachments; in … …addition support implicitly linked attachments also
Bugs
- replace file movement with copy/delete in an attempt to make process … …more resilient when dealing with mapped network drives.
0.11.3
Features
n/a
Improvements
- log unhandled Zenodo metadata; #361
- reduce log verbosity when handling non-Zenodo data in
preston zenododeposit workflow. #361
Bugs
- address out of bounds exception on empty common name in plazi treatment likely causing premature ending of plazi record stream via
preston plazi-stream. Possibly related to jhpoelen/hmw#14 (comment) .
0.11.2
Features
- allow for Zotero records to be streamed into RIS records bat-literature/bat-literature.github.io#117
- allow for GOOGLE_TOKEN to access non-public google docs #360
Improvements
n/a
Bugs
- handle "name" elements in Zotero metadata bat-literature/bat-literature.github.io#113 thanks to @arw36
0.11.1
Features
- introduce
/historyendpoint to list provenance/history of doi / dataset uuid or dwc-a urls related to #357 @Jegelewicz
Improvements
- add alias
preston snapshotforpreston trackfyi @ajacsherman - add alias
preston publishforpreston zenodo - working towards discovering
data.zipin Zenodo deposits see #356
## Bugs
n/a
0.11.0
0.10.14
Features
n/a
Improvements
- support SciELO DOI inference #344 using
| domain | doi prefix | example url | example doi |
|---|---|---|---|
| http://www.scielo.br | 10.1590 | http://www.scielo.br/scielo.php?script=sci_arttext&pid=S2236-89062014000200010 | https://doi.org/10.1590/s2236-89062014000200010 |
| http://www.scielo.org.mx | NA | NA | NA |
| http://www.scielo.cl | 10.4067 | https://www.scielo.cl/scielo.php?script=sci_pdf&pid=S0717-65382015000100003 | https://doi.org/10.4067/s0717-65382015000100003 |
| http://www.scielo.org.co | NA | NA | NA |
| http://www.scielo.org.ar | NA | NA | NA |
- update
man preston-ris-streamdocumentation:
PRESTON-RIS-STREAM(1) Preston Manual PRESTON-RIS-STREAM(1)
NAME
preston-ris-stream - translates bibliographic citations from RIS format
into Zenodo metadata in JSON lines format
SYNOPSIS
preston ris-stream [--no-cache] [--no-progress] [--reuse-doi]
[-a=<hashType>] [-d=<depth>] [--data-dir=<dataDir>] [-l=<logMode>]
[-r=<provenanceAnchor>] [--tmp-dir=<tmpDir>]
[--community=<communities>[,<communities>...]]...
[--repos=<remotes>[,<remotes>...]]...
DESCRIPTION
Stream RIS records into line-json with Zenodo metadata
OPTIONS
-a, --algo, --hash-algorithm=<hashType>
Hash algorithm used to generate primary content identifiers.
Supported values: sha256, md5, sha1.
--community, --communities=<communities>[,<communities>...]
select which Zenodo communities to submit to. If community is known
(e.g., batlit, taxodros), default metadata is included.
-d, --depth=<depth>
folder depth of data dir
--data-dir=<dataDir>
Location of local content cache
-l, --log=<logMode>
Log format. Supported values: tsv, nquads.
--no-cache, --disable-cache
Disable local content cache
--no-progress
Disable progress monitor
-r, --anchor, --provenance-root, --provenance-anchor=<provenanceAnchor>
specify the provenance root/anchor of the command. By default, any
available data graph will be traversed up to it’s most recent
additions. If the provenance root is set, only specified provenance
signature and their origins are included in the scope.
--repos, --remote, --remotes, --include,
--repositories=<remotes>[,<remotes>...]
Included repository dependencies (e.g.,
https://linker.bio/,https://softwareheritage.org,https://wikimedia.org,https://dataone.org,https://zenodo.org)
--reuse-doi
use existing DOI in Zenodo deposit if available
--tmp-dir=<tmpDir>
Location of local tmp dir
EXAMPLES
1.
First, append the associated bhl pdf via:
preston track https://www.biodiversitylibrary.org/partpdf/326364
Following, generate a RIS record, record.ris:
cat > record.ris <<__EOL__
TY - BOOK
TI - Faber, Helen R May 5, 1913
T2 - Walter Deane correspondence
UR - https://www.biodiversitylibrary.org/part/326364
PY - 1913-05-05
AU - Faber, Helen R.,
ER -
__EOL__
Then, track record.ris using Preston into Zenodo metadata using:
cat record.ris\
| preston track
Finally, generate Zenodo metadata record.json using:
preston head\
| preston cat\
| preston ris-stream\
> record.json
where record.json:
{
"metadata": {
"description": "(Uploaded by Plazi from the Biodiversity Heritage Library) No abstract provided.",
"communities": [],
"http://www.w3.org/ns/prov#wasDerivedFrom": "https://linker.bio/line:hash://sha256/5fd5944b52b22efc56f901d96ff53a64c42e1f2264763e2f1074ac2c589e47cf!/L1-L7",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "application/x-research-info-systems",
"title": "Faber, Helen R May 5, 1913",
"upload_type": "publication",
"publication_type": "other",
"journal_title": "Walter Deane correspondence",
"publication_date": "1913-05-05",
"referenceId": "https://www.biodiversitylibrary.org/part/326364",
"filename": "bhlpart326364.pdf",
"keywords": [
"Biodiversity",
"BHL-Corpus",
"Source: Biodiversity Heritage Library",
"Source: https://biodiversitylibrary.org",
"Source: BHL"
],
"creators": [
{
"name": "Faber, Helen R."
}
],
"related_identifiers": [
{
"relation": "isDerivedFrom",
"identifier": "https://linker.bio/line:hash://sha256/5fd5944b52b22efc56f901d96ff53a64c42e1f2264763e2f1074ac2c589e47cf!/L1-L7"
},
{
"relation": "isDerivedFrom",
"identifier": "https://www.biodiversitylibrary.org/part/326364"
},
{
"relation": "isAlternateIdentifier",
"identifier": "urn:lsid:biodiversitylibrary.org:part:326364"
},
{
"relation": "isPartOf",
"identifier": "hash://sha256/3983c9abbba981838de5d47a5dadf94c4afcea7df63486effb71d780e592ebe8"
},
{
"relation": "hasVersion",
"identifier": "hash://md5/7fddbf186c6bbddb0b49919fc340bb61"
},
{
"relation": "hasVersion",
"identifier": "hash://sha256/9b30af8f432b78e0d739b0457376dac998057a5b4b5fccd52b81560ec1f4f146"
}
]
}
}
2025-07-09 PRESTON-RIS-STREAM(1)Bugs
n/a
0.10.13
Features
n/a
Improvements
- include man pages for sub-commands #343 ; add example sections for
man preston-trackandman preston-cat - follow multi-layer redirect
A -> B -> Cvia alternateOf/seeAlso #336 - allow for tracing gbif dataset DOIs to their reported dwc endpoint
- add markdown export for google docs; remove epub and rtf
Example from man preston-track
preston track\
https://doi.org/10.15468/w6hvhv\
| preston dwc-stream\
| head -1\
| jq .\
> specimen.json
Bugs
n/a
0.10.12
0.10.11
Features
n/a
Improvements
- bhl pdf endpoints cannot be inferred from bhl item part id for extern… …ally hosted pdfs; related to #339
- when encountering a sciELO resource with pdf request; check for javas… …cript-redirects and register associated content; #336 fyi @myrmoteras
- favor alternate/seeAlso contentIds associated over content associated…… with original locations; related to #336
- support copy-paste tracking of GBIF dataset via their landing page ht…
…tps://www.gbif.org/dataset/e4d3fc77-1d94-495b-96ff-3fe8b8f7a3bd fyi @seltmann
Example:
preston track https://www.gbif.org/dataset/e4d3fc77-1d94-495b-96ff-3fe8b8f7a3bd\
| preston dwc-stream\
| head -1\
| jq .
yields
{
"http://www.w3.org/ns/prov#wasDerivedFrom": "line:zip:hash://sha256/c2e545a14943beb12878b57e02f0718d3e784151b692ffdb51ebf08ffbb73dfe!/occurrence.txt!/L2",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://rs.tdwg.org/dwc/terms/Occurrence",
"http://rs.tdwg.org/dwc/text/id": "861c9d4e-d8e1-11e2-99a2-0026552be7ea",
"http://rs.tdwg.org/dwc/terms/country": "MEXICO",
"http://rs.tdwg.org/dwc/terms/minimumDepthInMeters": null,
"http://rs.tdwg.org/dwc/terms/verbatimLongitude": "098 47 00 W",
"http://rs.tdwg.org/dwc/terms/datasetName": "AMNH Hymenoptera",
"http://rs.tdwg.org/dwc/terms/individualCount": "1",
"http://rs.tdwg.org/dwc/terms/associatedOrganisms": null,
"http://rs.tdwg.org/dwc/terms/stateProvince": "Tamaulipas",
"http://rs.tdwg.org/dwc/terms/basisOfRecord": "PreservedSpecimen",
"http://rs.tdwg.org/dwc/terms/infraspecificEpithet": null,
"http://rs.tdwg.org/dwc/terms/occurrenceID": "861c9d4e-d8e1-11e2-99a2-0026552be7ea",
"http://rs.tdwg.org/dwc/terms/municipality": null,
"http://rs.tdwg.org/dwc/terms/locality": "Padilla",
"http://rs.tdwg.org/dwc/terms/specificEpithet": "completa",
"http://rs.tdwg.org/dwc/terms/island": null,
"http://rs.tdwg.org/dwc/terms/family": "Apidae",
"http://rs.tdwg.org/dwc/terms/verbatimEventDate": "5/17/1952",
"http://rs.tdwg.org/dwc/terms/locationID": "0004be86-935b-4850-84fe-78ef6cbc2954",
"http://rs.tdwg.org/dwc/terms/minimumElevationInMeters": null,
"http://rs.tdwg.org/dwc/terms/phylum": "Arthropoda",
"http://rs.tdwg.org/dwc/terms/typeStatus": null,
"http://rs.tdwg.org/dwc/terms/class": "Insecta",
"http://purl.org/dc/terms/license": "Attribution 4.0 International https://creativecommons.org/licenses/by/4.0/",
"http://rs.tdwg.org/dwc/terms/preparations": "Pinned",
"http://rs.tdwg.org/dwc/terms/county": null,
"http://rs.tdwg.org/dwc/terms/associatedOccurrences": null,
"http://rs.tdwg.org/dwc/terms/taxonID": "562d1f89-9d1b-4f4b-83a5-29f12f9e99f0",
"http://rs.tdwg.org/dwc/terms/order": "Hymenoptera",
"http://rs.tdwg.org/dwc/terms/genus": "Anthophorula",
"http://rs.tdwg.org/dwc/terms/catalogNumber": "AMNH_BEE 00198554",
"http://rs.tdwg.org/dwc/terms/institutionCode": "AMNH",
"http://rs.tdwg.org/dwc/terms/kingdom": "Animalia",
"http://rs.tdwg.org/dwc/terms/scientificName": "Anthophorula (Anthophorula) completa Cockerell, 1935",
"http://rs.tdwg.org/dwc/terms/recordedBy": "M. A. Cazier, W. J. Gertsch & R. Schrammel",
"http://rs.tdwg.org/dwc/terms/samplingProtocol": "Netting",
"http://rs.tdwg.org/dwc/terms/verbatimLatitude": "24 01 00 N",
"http://rs.tdwg.org/dwc/terms/subgenus": "Anthophorula",
"http://rs.tdwg.org/dwc/terms/waterBody": null,
"http://purl.org/dc/terms/rightsHolder": "American Museum of Natural History",
"http://rs.tdwg.org/dwc/terms/sex": "Male",
"http://rs.tdwg.org/dwc/terms/otherCatalogNumbers": null
}