Skip to content

Commit b05dd5c

Browse files
mvdocclaude
andcommitted
Fix upload to skip hidden files/dirs in submodules
Previously, the upload command only skipped files starting with "." which correctly skipped root hidden files (.git/, .datalad/, etc.) but not hidden files/dirs in submodules (e.g., derivatives/.git/annex/objects/ or derivatives/.datalad/config). This could cause the CLI to upload duplicate annex objects or internal datalad files when uploading datasets containing submodules. The fix adds a check for any hidden path component (starting with .) anywhere in the relative path, while still correctly following symlinks that point to .git/annex/objects/. Also adds unit tests for the new shouldIncludeFile() function. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b81ca88 commit b05dd5c

File tree

2 files changed

+89
-4
lines changed

2 files changed

+89
-4
lines changed

cli/src/commands/upload.test.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { assertEquals } from "@std/assert/equals"
2+
import { hiddenPathPattern, shouldIncludeFile } from "./upload.ts"
3+
4+
Deno.test("hiddenPathPattern matches hidden files/dirs in subpaths", () => {
5+
assertEquals(hiddenPathPattern.test("derivatives/.git/config"), true)
6+
assertEquals(hiddenPathPattern.test("derivatives/.git/annex/objects/xx/yy/key"), true)
7+
assertEquals(hiddenPathPattern.test("derivatives/.datalad/config"), true)
8+
assertEquals(hiddenPathPattern.test("a/.git/b"), true)
9+
assertEquals(hiddenPathPattern.test("sub/.gitignore"), true)
10+
// Should not match dotfiles at root (no leading slash)
11+
assertEquals(hiddenPathPattern.test(".git/config"), false)
12+
assertEquals(hiddenPathPattern.test(".bidsignore"), false)
13+
// Should not match files that just contain a dot in name
14+
assertEquals(hiddenPathPattern.test("my.gitconfig"), false)
15+
assertEquals(hiddenPathPattern.test("file.nii.gz"), false)
16+
})
17+
18+
Deno.test("shouldIncludeFile() includes regular dataset files", () => {
19+
assertEquals(shouldIncludeFile("dataset_description.json"), true)
20+
assertEquals(shouldIncludeFile("sub-01/anat/sub-01_T1w.nii.gz"), true)
21+
assertEquals(shouldIncludeFile("derivatives/sub-01/func/bold.nii.gz"), true)
22+
})
23+
24+
Deno.test("shouldIncludeFile() includes .bidsignore", () => {
25+
assertEquals(shouldIncludeFile(".bidsignore"), true)
26+
})
27+
28+
Deno.test("shouldIncludeFile() skips root hidden files and directories", () => {
29+
assertEquals(shouldIncludeFile(".git/config"), false)
30+
assertEquals(shouldIncludeFile(".git/annex/objects/abc/def/SHA256--xyz"), false)
31+
assertEquals(shouldIncludeFile(".datalad/config"), false)
32+
assertEquals(shouldIncludeFile(".gitattributes"), false)
33+
})
34+
35+
Deno.test("shouldIncludeFile() skips hidden files/dirs in submodules", () => {
36+
// .git directories
37+
assertEquals(
38+
shouldIncludeFile("derivatives/.git/annex/objects/abc/def/SHA256--xyz"),
39+
false,
40+
)
41+
assertEquals(shouldIncludeFile("derivatives/.git/config"), false)
42+
assertEquals(shouldIncludeFile("sourcedata/.git/HEAD"), false)
43+
assertEquals(
44+
shouldIncludeFile("derivatives/preprocessing/.git/annex/objects/xx/yy/key"),
45+
false,
46+
)
47+
// Deeper nested paths
48+
assertEquals(
49+
shouldIncludeFile("derivatives/qa/.git/annex/objects/Xk/Mx/SHA256--abc"),
50+
false,
51+
)
52+
assertEquals(
53+
shouldIncludeFile("derivatives/fmriprep/sub-01/.datalad/config"),
54+
false,
55+
)
56+
// .datalad directories
57+
assertEquals(shouldIncludeFile("derivatives/.datalad/config"), false)
58+
assertEquals(shouldIncludeFile("sourcedata/.datalad/metadata/aggregate_v1.json"), false)
59+
// Other hidden files in submodules
60+
assertEquals(shouldIncludeFile("derivatives/.gitattributes"), false)
61+
assertEquals(shouldIncludeFile("sourcedata/.gitignore"), false)
62+
})
63+
64+
Deno.test("shouldIncludeFile() includes symlinks in submodules", () => {
65+
// Symlinks themselves don't have .git in their path
66+
// They point to .git/annex/objects but the path we check is the symlink location
67+
assertEquals(shouldIncludeFile("derivatives/sub-01/anat/sub-01_T1w.nii.gz"), true)
68+
})

cli/src/commands/upload.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,26 @@ import { createDatasetAffirmed } from "./create-dataset.ts"
1313
import { CreateDatasetAffirmedError } from "../error.ts"
1414
import validatorConfig from "../validator-config.json" with { type: "json" }
1515

16+
/**
17+
* Check if a path contains a hidden directory or file (starting with .)
18+
* Matches paths like "derivatives/.git/..." or "derivatives/.datalad/..." in submodules
19+
*/
20+
export const hiddenPathPattern = /[\/\\]\./
21+
22+
/**
23+
* Determine if a file should be included in the upload based on its relative path
24+
* @param relativePath Path relative to the dataset root
25+
* @returns true if the file should be uploaded
26+
*/
27+
export function shouldIncludeFile(relativePath: string): boolean {
28+
// Skip hidden files/dirs in submodules (e.g., derivatives/.git/, derivatives/.datalad/)
29+
const hasHiddenComponent = hiddenPathPattern.test(relativePath)
30+
return (
31+
relativePath === ".bidsignore" ||
32+
(!relativePath.startsWith(".") && !hasHiddenComponent)
33+
)
34+
}
35+
1636
async function getRepoDir(url: URL): Promise<string> {
1737
const LOCAL_STORAGE_KEY = `openneuro_cli_${url.hostname}_`
1838
const repoDir = localStorage.getItem(LOCAL_STORAGE_KEY)
@@ -42,10 +62,7 @@ export async function addGitFiles(
4262
})
4363
) {
4464
const relativePath = relative(dataset_directory_abs, walkEntry.path)
45-
if (
46-
relativePath === ".bidsignore" ||
47-
!relativePath.startsWith(".")
48-
) {
65+
if (shouldIncludeFile(relativePath)) {
4966
worker.postMessage({
5067
"command": "add",
5168
"path": walkEntry.path,

0 commit comments

Comments
 (0)