Here you can find all datasets necessary to run the example notebooks already converted to the SpatialData Zarr file format.
Scripts to convert data from several other technologies into SpatialData Zarr are available in the spatialdata sandbox; in particular:
| Technology | Sample | File Size | Filename (spatialdata-sandbox) | license |
|---|---|---|---|---|
| Visium HD | Mouse intestin 5 | ~2.4 GB | visium_hd_3.0.0_io | CC BY 4.0 |
| Visium HD | Mouse brain 6 | <200MB | visium_hd_4.0.1_io | CC BY 4.0 |
| Visium | Breast cancer 7 | ~1.5 GB | visium_associated_xenium_io | CC BY 4.0 |
| Visium | Mouse brain 8 | <100MB | visium | CC BY 4.0 |
| Xenium | Breast cancer 7 | ~2.8 GB | xenium_rep1_io | CC BY 4.0 |
| Xenium | Lung cancer 9 | ~5.4 GB | xenium_2.0.0_io | CC BY 4.0 |
| MERFISH | Mouse brain 10 | ~50 MB | merfish | CC0 1.0 |
| MIBI-TOF | Colorectal carcinoma 11 | ~25 MB | mibitof | CC BY 4.0 |
| Molecular Cartography (SPArrOW output) | Mouse Liver 1213 | ~70 MB | mouse_liver | CC BY 4.0 |
| SpaceM | Hepa and NIH3T3 cells 14 | ~60 MB | spacem_helanih3t3 | CC BY 4.0 |
*Please select the dataset and version below to download the data. Available versions are fetched from the S3 bucket.
<div style="margin: 1em 0 2em 0; padding: 1em; border: 1px solid #444; border-radius: 6px; max-width: 600px;">
<label for="dataset-select"><strong>Dataset:</strong></label>
<select id="dataset-select" style="margin: 0.3em 0 0.8em 0.5em; padding: 0.3em; min-width: 300px;">
<option value="">-- Select a dataset --</option>
<option value="visium_hd_3.0.0_io">visium_hd_3.0.0_io</option>
<option value="visium_hd_4.0.1_io">visium_hd_4.0.1_io</option>
<option value="visium_associated_xenium_io">visium_associated_xenium_io</option>
<option value="visium">visium</option>
<option value="xenium_rep1_io">xenium_rep1_io</option>
<option value="xenium_2.0.0_io">xenium_2.0.0_io</option>
<option value="merfish">merfish</option>
<option value="mibitof">mibitof</option>
<option value="mouse_liver">mouse_liver</option>
<option value="spacem_helanih3t3">spacem_helanih3t3</option>
</select>
<br>
<span id="latest-download-container" style="display: none;">
<a id="latest-download-link" href="#">Download</a>
</span>
<div id="older-versions-section" style="display: none; margin-top: 0.5em;">
<a href="#" id="toggle-older-versions" style="font-size: 0.9em;">▶ Download a specific version</a>
<div id="older-versions-content" style="display: none; margin-top: 0.5em;">
<label for="version-select"><strong>Version:</strong></label>
<select id="version-select" style="margin: 0.3em 0 0.8em 0.5em; padding: 0.3em; min-width: 300px;">
</select>
<br>
<span id="download-link-container"></span>
</div>
</div>
<script>
(function() {
var datasetSelect = document.getElementById('dataset-select');
var versionSelect = document.getElementById('version-select');
var linkContainer = document.getElementById('download-link-container');
var latestContainer = document.getElementById('latest-download-container');
var latestLink = document.getElementById('latest-download-link');
var olderSection = document.getElementById('older-versions-section');
var toggleOlder = document.getElementById('toggle-older-versions');
var olderContent = document.getElementById('older-versions-content');
var BASE_URL = 'https://s3.embl.de/spatialdata/spatialdata-sandbox/';
// Suffix values are validated server-side by the Sphinx extension
// (alphanumerics, dots, underscores, hyphens, plus signs only) before
// being written into datasets_data.js. Re-check here as defence in
// depth before using them in URLs.
var SAFE_RE = /^[A-Za-z0-9._+\-]*$/;
function buildUrl(datasetId, suffix) {
if (!SAFE_RE.test(datasetId) || !SAFE_RE.test(suffix)) return null;
return BASE_URL + encodeURIComponent(datasetId + suffix + '.zip');
}
function makeDownloadLink(url) {
var a = document.createElement('a');
a.href = url;
a.textContent = 'Download';
return a;
}
// dataset version data is set by datasets_data.js, which is generated
// at docs build time by the fetch_s3_datasets Sphinx extension and
// included via app.add_js_file() (no browser-side fetch needed).
function getDatasets() {
if (typeof window.SPATIALDATA_DATASETS !== 'undefined') {
return window.SPATIALDATA_DATASETS;
}
return null;
}
function showError() {
latestContainer.textContent = '';
var em = document.createElement('em');
em.style.color = '#c44';
em.textContent = 'Could not load version data. Download datasets directly from ';
var a = document.createElement('a');
a.href = BASE_URL;
a.textContent = 'the S3 bucket';
em.appendChild(a);
em.appendChild(document.createTextNode('.'));
latestContainer.appendChild(em);
latestContainer.style.display = '';
}
function populateVersions(datasetId) {
latestContainer.style.display = 'none';
olderSection.style.display = 'none';
olderContent.style.display = 'none';
var data = getDatasets();
if (!data) { showError(); return; }
var suffixes = (data[datasetId] || []).slice();
if (suffixes.length === 0) return;
// Sort version strings to determine the latest
suffixes.sort();
var latestSuffix = suffixes[suffixes.length - 1];
var latestUrl = buildUrl(datasetId, latestSuffix);
if (!latestUrl) { showError(); return; }
// Show the latest download link
latestLink.href = latestUrl;
latestContainer.textContent = '';
latestContainer.appendChild(makeDownloadLink(latestUrl));
latestContainer.style.display = '';
// Show the version picker when there are multiple versions
if (suffixes.length > 1) {
olderSection.style.display = '';
versionSelect.textContent = '';
// List all versions, newest-first; mark the latest
for (var i = suffixes.length - 1; i >= 0; i--) {
var s = suffixes[i];
var label = s === '' ? '(base)' : s.replace(/^_/, '');
if (i === suffixes.length - 1) label += ' (latest)';
var opt = document.createElement('option');
opt.value = s;
opt.textContent = label;
versionSelect.appendChild(opt);
}
updateOlderDownloadLink();
}
}
function updateOlderDownloadLink() {
var dataset = datasetSelect.value;
var suffix = versionSelect.value;
if (!dataset) return;
var url = buildUrl(dataset, suffix);
linkContainer.textContent = '';
if (url) linkContainer.appendChild(makeDownloadLink(url));
}
datasetSelect.addEventListener('change', function() {
if (!datasetSelect.value) {
latestContainer.style.display = 'none';
olderSection.style.display = 'none';
olderContent.style.display = 'none';
return;
}
populateVersions(datasetSelect.value);
});
toggleOlder.addEventListener('click', function(e) {
e.preventDefault();
var visible = olderContent.style.display !== 'none';
olderContent.style.display = visible ? 'none' : '';
toggleOlder.textContent = (visible ? '\u25B6' : '\u25BC') + ' Download a specific version';
});
versionSelect.addEventListener('change', updateOlderDownloadLink);
})();
</script>
</div>
- CC0 1.0: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication
- CC BY 4.0: Creative Common Attribution 4.0 International
- CC BY-NC 4.0: Creative Common Attribution-NonCommercial 4.0 International
The data retains the license of the original published data.
Also, here you can find additional datasets and resources for methods developers.
If you use the datasets please cite the original sources and double-check their license.
If you notice any issues, such as a changed dataset, a removed dataset, or missing dataset information, please open a GitHub issue so we can address it. Thank you!
Footnotes
-
Schapiro, D. et al. MCMICRO: A scalable, modular image-processing pipeline for multiplexed tissue imaging. Cold Spring Harbor Laboratory 2021.03.15.435473 (2021) doi: 10.1101/2021.03.15.435473. ↩
-
Windhager, J., Bodenmiller, B. & Eling, N. An end-to-end workflow for multiplexed image processing and analysis. bioRxiv 2021.11.12.468357 (2021) doi: 10.1101/2021.11.12.468357. ↩
-
Eling, N. & Windhager, J. Example imaging mass cytometry raw data. (2022). doi: 10.5281/zenodo.5949116. ↩
-
Eling, N. & Windhager, J. steinbock results of IMC example data. (2022). doi: 10.5281/zenodo.7412972. ↩
-
From https://www.10xgenomics.com/datasets/visium-hd-cytassist-gene-expression-libraries-of-mouse-intestine ↩
-
From https://www.10xgenomics.com/datasets/visium-hd-three-prime-mouse-brain-fresh-frozen ↩
-
Janesick, A. et al. High resolution mapping of the breast cancer tumor microenvironment using integrated single cell, spatial and in situ analysis of FFPE tissue. bioRxiv 2022.10.06.510405 (2022) doi: 10.1101/2022.10.06.510405. https://www.10xgenomics.com/products/xenium-in-situ/preview-dataset-human-breast ↩ ↩2
-
Available here: https://www.ebi.ac.uk/biostudies/arrayexpress/studies/E-MTAB-11114. Linked publications: https://www.nature.com/articles/s43587-022-00246-4, https://www.nature.com/articles/s41587-021-01139-4 ↩
-
From https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard ↩
-
Moffitt, J. R. et al. Molecular, spatial, and functional single-cell profiling of the hypothalamic preoptic region. Science 362, (2018). ↩
-
Hartmann, F. J. et al. Single-cell metabolic profiling of human cytotoxic T cells. Nat. Biotechnol. (2020) doi:10.1038/s41587-020-0651-8. ↩
-
Guilliams, Martin, et al. "Spatial proteogenomics reveals distinct and evolutionarily conserved hepatic macrophage niches." Cell 185.2 (2022) doi: 10.1016/j.cell2021.12.018 ↩
-
Pollaris, Lotte, et al. "SPArrOW: a flexible, interactive and scalable pipeline for spatial transcriptomics analysis." bioRxiv (2024) doi:10.1101/2024.07.04.601829 ↩
-
See https://github.com/giovp/spatialdata-sandbox/blob/main/spacem_helanih3t3/README.md ↩