Skip to content

Commit

Permalink
Add a Software Heritage provider
Browse files Browse the repository at this point in the history
Software Heritage¹ is a non-profit multi-stakeholder initiative which collects,
preserves and shares all software that is publicly available in source code
form.

It provides a public API allowing to retrieve any piece of source code
that has been ingested and is identified via its SWHID², including a
growing number of jupyter notbooks.

Thanks to the recent support for SWHID added in repo2docker, this now
bring support to SWHID to binderhub as a Provider.

¹ https://www.softwareheritage.org
² https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers
  • Loading branch information
douardda committed Jan 27, 2021
1 parent 81660eb commit 66e4681
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 3 deletions.
3 changes: 2 additions & 1 deletion binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from .repoproviders import (GitHubRepoProvider, GitRepoProvider,
GitLabRepoProvider, GistRepoProvider,
ZenodoProvider, FigshareProvider, HydroshareProvider,
DataverseProvider)
DataverseProvider, SWHIDProvider)
from .metrics import MetricsHandler

from .utils import ByteSpecification, url_path_join
Expand Down Expand Up @@ -436,6 +436,7 @@ def _add_slash(self, proposal):
'figshare': FigshareProvider,
'hydroshare': HydroshareProvider,
'dataverse': DataverseProvider,
'swh': SWHIDProvider,
},
config=True,
help="""
Expand Down
3 changes: 2 additions & 1 deletion binderhub/event-schemas/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"Zenodo",
"Figshare",
"Hydroshare",
"Dataverse"
"Dataverse",
"Software Heritage"
],
"description": "Provider for the repository being launched"
},
Expand Down
1 change: 1 addition & 0 deletions binderhub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"figshare": "Figshare",
"hydroshare": "Hydroshare",
"dataverse": "Dataverse",
"swh": "Software Heritage",
}


Expand Down
37 changes: 37 additions & 0 deletions binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,3 +886,40 @@ async def get_resolved_spec(self):

def get_build_slug(self):
return self.gist_id


class SWHIDProvider(RepoProvider):
"""Provide contents of a SWHID record
Users must provide a spec consisting of the SWHID of a directory or revision.
"""
name = Unicode("SWHID")

async def get_resolved_ref(self):
client = AsyncHTTPClient()
req = HTTPRequest("https://archive.softwareheritage.org/api/1/known/",
headers={'content-type': 'application/json'},
body=json.dumps([self.spec]),
user_agent="BinderHub")
r = await client.fetch(req)
r.rethrow()

response = json.loads(r.body)
if response[self.spec]["known"]:
return self.spec
raise RuntimeError(f"Unknown SWHID {self.spec}")


async def get_resolved_spec(self):
return self.spec

def get_repo_url(self):
# While called repo URL, the return value of this function is passed
# as argument to repo2docker, hence we return the spec as is.
return self.spec

async def get_resolved_ref_url(self):
return self.spec

def get_build_slug(self):
return "swh-{}".format(self.spec)
7 changes: 6 additions & 1 deletion binderhub/static/js/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ function updateRepoText() {
$("#ref").prop("disabled", true);
$("label[for=ref]").prop("disabled", true);
}
else if (provider === "swh") {
text = "Software Heritage SWHID (swh:1:rev:c30614ec4587418fb264efb466cba58991029f16)";
$("#ref").prop("disabled", true);
$("label[for=ref]").prop("disabled", true);
}
$("#repository").attr('placeholder', text);
$("label[for=repository]").text(text);
$("#ref").attr('placeholder', placeholder);
Expand All @@ -123,7 +128,7 @@ function getBuildFormValues() {

var ref = $('#ref').val().trim() || $("#ref").attr("placeholder");
if (providerPrefix === 'zenodo' || providerPrefix === 'figshare' || providerPrefix === 'dataverse' ||
providerPrefix === 'hydroshare') {
providerPrefix === 'hydroshare' || providerPrefix === 'swh') {
ref = "";
}
var path = $('#filepath').val().trim();
Expand Down
1 change: 1 addition & 0 deletions binderhub/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ <h4 id="form-header" class='row'>Build and launch a repository</h4>
<li class="dropdown-item" value="figshare"><a href="#">Figshare DOI</a></li>
<li class="dropdown-item" value="hydroshare"><a href="#">Hydroshare resource</a></li>
<li class="dropdown-item" value="dataverse"><a href="#">Dataverse DOI</a></li>
<li class="dropdown-item" value="swh"><a href="#">Software Heritage SWHID</a></li>
</ul>
</div>
<input class="form-control" type="text" id="repository" data-lpignore="true" placeholder="GitHub repository name or link"/>
Expand Down
25 changes: 25 additions & 0 deletions binderhub/tests/test_repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
GitLabRepoProvider,
GitRepoProvider,
HydroshareProvider,
SWHIDProvider,
ZenodoProvider,
strip_suffix,
tokenize_spec,
Expand Down Expand Up @@ -490,3 +491,27 @@ def test_gist_secret():

provider = GistRepoProvider(spec=spec, allow_secret_gist=True)
assert IOLoop().run_sync(provider.get_resolved_ref) is not None


@pytest.mark.parametrize('spec,resolved_spec,resolved_ref,resolved_ref_url,build_slug', [
['swh:1:rev:c30614ec4587418fb264efb466cba58991029f16',
'swh:1:rev:c30614ec4587418fb264efb466cba58991029f16',
'swh:1:rev:c30614ec4587418fb264efb466cba58991029f16',
'swh:1:rev:c30614ec4587418fb264efb466cba58991029f16',
'swh-swh:1:rev:c30614ec4587418fb264efb466cba58991029f16'],
])
async def test_swh(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug):
provider = SWHIDProvider(spec=spec)

# have to resolve the ref first
ref = await provider.get_resolved_ref()
assert ref == resolved_ref

slug = provider.get_build_slug()
assert slug == build_slug
repo_url = provider.get_repo_url()
assert repo_url == spec
ref_url = await provider.get_resolved_ref_url()
assert ref_url == resolved_ref_url
spec = await provider.get_resolved_spec()
assert spec == resolved_spec
6 changes: 6 additions & 0 deletions doc/developer/repoproviders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ Currently supported providers, their prefixes and specs are:
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+
| Git | ``git`` | ``<url-escaped-url>/<commit-sha>`` | A generic repository provider for URLs that point directly to a git repository. |
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+
| SWHID | ``swh`` | ``<SWHID>`` | A `Software Heritage persistent identifier`_. |
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+


.. _`Software Heritage persistent identifier`: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers


Adding a new repository provider
================================
Expand Down
7 changes: 7 additions & 0 deletions doc/reference/repoproviders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,10 @@ Module: :mod:`binderhub.repoproviders`

.. autoconfigurable:: GitRepoProvider
:members:


:class:`SWHIDProvider`
---------------------------

.. autoconfigurable:: SWHIDProvider
:members:

0 comments on commit 66e4681

Please sign in to comment.