Skip to content

Commit 7b43e73

Browse files
authored
fix: fetch large files via blob request (#23)
1 parent b77a933 commit 7b43e73

File tree

9 files changed

+193
-74
lines changed

9 files changed

+193
-74
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
44

55
## [unreleased]
66

7+
## [0.8.1] - 2025-04-14
8+
9+
### Changed
10+
11+
- Improved GitHub file fetching, by adding a fallback to the `blobs` API, when file content is too large
12+
713
## [0.8.0] - 2025-04-08
814

915
### Added

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@open-resource-discovery/provider-server",
3-
"version": "0.8.0",
3+
"version": "0.8.1",
44
"description": "A CLI application or server that takes multiple ORD documents and other metadata files and exposes them as a ORD Provider implementation (ORD Document API)",
55
"engines": {
66
"node": ">=22.8.0",

src/constant.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import { config } from "dotenv";
2+
3+
config();
4+
15
// URL path constants
26
export const PATH_CONSTANTS = {
37
// Base paths

src/repositories/githubDocumentRepository.ts

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { ORDDocument } from "@open-resource-discovery/specification";
22
import { DocumentRepository } from "./interfaces/documentRepository.js";
3-
import { GithubOpts, GitHubFileResponse, GitHubInstance } from "../model/github.js";
3+
import { GithubOpts, GitHubInstance } from "../model/github.js";
44
import { fetchGitHubFile, getDirectoryHash, getGithubDirectoryContents } from "../util/github.js";
55
import { normalizePath, joinFilePaths } from "../util/pathUtils.js";
66
import { PATH_CONSTANTS } from "../constant.js";
@@ -34,8 +34,7 @@ export class GithubDocumentRepository implements DocumentRepository {
3434
public async getDocument(relativePath: string): Promise<ORDDocument | null> {
3535
const githubPath = this.getFullGithubPath(relativePath);
3636
try {
37-
const response = await fetchGitHubFile<GitHubFileResponse>(this.githubInstance, githubPath, this.githubToken);
38-
const content = Buffer.from(response.content, "base64").toString("utf-8");
37+
const content = await fetchGitHubFile(this.githubInstance, githubPath, this.githubToken);
3938
const jsonData = JSON.parse(content);
4039

4140
// Basic validation to ensure it's an ORD document
@@ -119,8 +118,7 @@ export class GithubDocumentRepository implements DocumentRepository {
119118
public async getFileContent(relativePath: string): Promise<string | Buffer | null> {
120119
const githubPath = this.getFullGithubPath(relativePath);
121120
try {
122-
const response = await fetchGitHubFile<GitHubFileResponse>(this.githubInstance, githubPath, this.githubToken);
123-
return Buffer.from(response.content, "base64").toString("utf-8");
121+
return await fetchGitHubFile(this.githubInstance, githubPath, this.githubToken);
124122
} catch (error) {
125123
log.error(`Error fetching file content from GitHub path ${githubPath}: ${error}`);
126124
return null;

src/util/__tests__/github.test.ts

Lines changed: 108 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import { describe, it, expect, jest, beforeAll, afterAll, beforeEach } from "@jest/globals";
2-
import { fetchGitHubFile } from "src/util/github.js";
3-
import { GitHubFileResponse, GitHubInstance } from "src/model/github.js";
2+
import { fetchGitHubFile, GitHubContentItem } from "src/util/github.js";
3+
import { GitHubInstance } from "src/model/github.js";
44
import { GitHubFileNotFoundError, GitHubNetworkError, GitHubAccessError } from "src/model/error/GithubErrors.js";
5+
import { Buffer } from "buffer";
56

6-
describe("GitHub", () => {
7+
describe("GitHub Util", () => {
78
beforeAll(() => {
89
const mockFetch = jest.fn() as jest.MockedFunction<typeof fetch>;
910
global.fetch = mockFetch;
@@ -23,114 +24,172 @@ describe("GitHub", () => {
2324
branch: "main",
2425
};
2526

26-
it("should fetch GitHub file successfully", async () => {
27-
const mockFileContent: GitHubFileResponse = {
27+
const mockToken = "test-token";
28+
const fileContent = "Decoded file content";
29+
const fileContentBase64 = Buffer.from(fileContent).toString("base64");
30+
const filePath = "path/to/test.json";
31+
const fileSha = "abc123def456";
32+
const contentsUrl = `https://api.github.com/repos/owner/repo/contents/${filePath}?ref=main`;
33+
const blobUrl = `https://api.github.com/repos/owner/repo/git/blobs/${fileSha}`;
34+
35+
it("should fetch small GitHub file successfully (content in first response)", async () => {
36+
const mockMetadataResponse: GitHubContentItem = {
2837
name: "test.json",
29-
path: "test.json",
30-
sha: "abc123",
38+
path: filePath,
39+
sha: fileSha,
3140
size: 100,
32-
url: "https://api.github.com/repos/owner/repo/contents/test.json",
33-
html_url: "https://github.com/owner/repo/blob/main/test.json",
34-
git_url: "https://api.github.com/repos/owner/repo/git/blobs/abc123",
35-
download_url: "https://raw.githubusercontent.com/owner/repo/main/test.json",
3641
type: "file",
37-
content: "base64encodedcontent",
42+
content: fileContentBase64,
3843
encoding: "base64",
3944
};
40-
const mockResponse: Partial<Response> = {
45+
const mockFetchResponse: Partial<Response> = {
4146
ok: true,
4247
status: 200,
43-
statusText: "OK",
44-
json: jest.fn<() => Promise<unknown>>().mockResolvedValue(mockFileContent),
48+
json: jest.fn<() => Promise<unknown>>().mockResolvedValue(mockMetadataResponse),
49+
};
50+
51+
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
52+
mockFetch.mockResolvedValueOnce(mockFetchResponse as Response);
53+
54+
const result = await fetchGitHubFile(mockInstance, filePath, mockToken);
55+
56+
expect(result).toEqual(fileContent);
57+
expect(mockFetch).toHaveBeenCalledTimes(1);
58+
expect(mockFetch).toHaveBeenCalledWith(contentsUrl, {
59+
headers: { Authorization: `Token ${mockToken}` },
60+
});
61+
});
62+
63+
it("should fetch large GitHub file successfully (content via blob API)", async () => {
64+
const mockMetadataResponse: GitHubContentItem = {
65+
name: "test.json",
66+
path: filePath,
67+
sha: fileSha,
68+
size: 2 * 1024 * 1024,
69+
type: "file",
70+
};
71+
const mockBlobResponseData = {
72+
sha: fileSha,
73+
size: 2 * 1024 * 1024,
74+
content: fileContentBase64,
75+
encoding: "base64",
76+
};
77+
78+
const mockFetchMetadataResponse: Partial<Response> = {
79+
ok: true,
80+
status: 200,
81+
json: jest.fn<() => Promise<unknown>>().mockResolvedValue(mockMetadataResponse),
82+
};
83+
const mockFetchBlobResponse: Partial<Response> = {
84+
ok: true,
85+
status: 200,
86+
json: jest.fn<() => Promise<unknown>>().mockResolvedValue(mockBlobResponseData),
4587
};
4688

4789
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
48-
mockFetch.mockResolvedValue(mockResponse as Response);
90+
mockFetch
91+
.mockResolvedValueOnce(mockFetchMetadataResponse as Response)
92+
.mockResolvedValueOnce(mockFetchBlobResponse as Response);
4993

50-
const result = await fetchGitHubFile<GitHubFileResponse>(mockInstance, "test.json", "token");
94+
const result = await fetchGitHubFile(mockInstance, filePath, mockToken);
5195

52-
expect(result).toEqual(mockFileContent);
53-
expect(mockFetch).toHaveBeenCalledWith("https://api.github.com/repos/owner/repo/contents/test.json?ref=main", {
54-
headers: { Authorization: "Token token" },
96+
expect(result).toEqual(fileContent);
97+
expect(mockFetch).toHaveBeenCalledTimes(2);
98+
expect(mockFetch).toHaveBeenNthCalledWith(1, contentsUrl, {
99+
headers: { Authorization: `Token ${mockToken}` },
100+
});
101+
expect(mockFetch).toHaveBeenNthCalledWith(2, blobUrl, {
102+
headers: { Authorization: `Token ${mockToken}` },
55103
});
56104
});
57105

58-
it("should throw GitHubFileNotFoundError when file is not found", async () => {
106+
it("should throw GitHubFileNotFoundError when file is not found (404 on metadata fetch)", async () => {
59107
const mockResponse: Partial<Response> = {
60108
ok: false,
61109
status: 404,
62110
statusText: "Not Found",
63111
};
64112
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
65-
mockFetch.mockResolvedValue(mockResponse as Response);
66-
await expect(fetchGitHubFile<GitHubFileResponse>(mockInstance, "nonexistent.json", "token")).rejects.toThrow(
67-
GitHubFileNotFoundError,
113+
mockFetch.mockResolvedValueOnce(mockResponse as Response);
114+
115+
await expect(fetchGitHubFile(mockInstance, "nonexistent.json", mockToken)).rejects.toThrow(GitHubFileNotFoundError);
116+
expect(mockFetch).toHaveBeenCalledTimes(1);
117+
expect(mockFetch).toHaveBeenCalledWith(
118+
"https://api.github.com/repos/owner/repo/contents/nonexistent.json?ref=main",
119+
expect.any(Object),
68120
);
69121
});
70122

71-
it("should throw GitHubNetworkError on network errors", async () => {
123+
it("should throw GitHubNetworkError on network errors during metadata fetch", async () => {
72124
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
73-
mockFetch.mockRejectedValue(new TypeError("Failed to fetch"));
74-
await expect(fetchGitHubFile<GitHubFileResponse>(mockInstance, "test.json", "token")).rejects.toThrow(
75-
GitHubNetworkError,
76-
);
125+
mockFetch.mockRejectedValueOnce(new TypeError("Failed to fetch"));
126+
127+
await expect(fetchGitHubFile(mockInstance, filePath, mockToken)).rejects.toThrow(GitHubNetworkError);
128+
expect(mockFetch).toHaveBeenCalledTimes(1);
129+
expect(mockFetch).toHaveBeenCalledWith(contentsUrl, expect.any(Object));
77130
});
78131

79-
it("should throw GitHubAccessError on invalid JSON response", async () => {
132+
it("should throw GitHubAccessError on invalid JSON response during metadata fetch", async () => {
80133
const mockResponse: Partial<Response> = {
81134
ok: true,
82135
status: 200,
83136
statusText: "OK",
84-
json: jest.fn<() => Promise<unknown>>().mockRejectedValue(new Error("Invalid JSON")),
137+
json: jest.fn<() => Promise<unknown>>().mockRejectedValue(new SyntaxError("Invalid JSON")),
85138
};
86139
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
87-
mockFetch.mockResolvedValue(mockResponse as Response);
88-
await expect(fetchGitHubFile<GitHubFileResponse>(mockInstance, "test.json", "token")).rejects.toThrow(
89-
GitHubAccessError,
90-
);
140+
mockFetch.mockResolvedValueOnce(mockResponse as Response);
141+
142+
await expect(fetchGitHubFile(mockInstance, filePath, mockToken)).rejects.toThrow(GitHubAccessError);
143+
expect(mockFetch).toHaveBeenCalledTimes(1);
144+
expect(mockFetch).toHaveBeenCalledWith(contentsUrl, expect.any(Object));
91145
});
92146

93-
it("should throw GitHubAccessError on empty response", async () => {
147+
it("should throw GitHubAccessError on empty response during metadata fetch", async () => {
94148
const mockResponse: Partial<Response> = {
95149
ok: true,
96150
status: 200,
97151
statusText: "OK",
98152
json: jest.fn<() => Promise<unknown>>().mockResolvedValue(null),
99153
};
100154
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
101-
mockFetch.mockResolvedValue(mockResponse as Response);
102-
await expect(fetchGitHubFile<GitHubFileResponse>(mockInstance, "test.json", "token")).rejects.toThrow(
103-
GitHubAccessError,
104-
);
155+
mockFetch.mockResolvedValueOnce(mockResponse as Response);
156+
157+
await expect(fetchGitHubFile(mockInstance, filePath, mockToken)).rejects.toThrow(GitHubAccessError);
158+
expect(mockFetch).toHaveBeenCalledTimes(1);
159+
expect(mockFetch).toHaveBeenCalledWith(contentsUrl, expect.any(Object));
105160
});
106161

107-
it("should throw GitHubAccessError on unauthorized access", async () => {
162+
it("should throw GitHubAccessError on unauthorized access during metadata fetch", async () => {
108163
const mockResponse: Partial<Response> = {
109164
ok: false,
110165
status: 401,
111166
statusText: "Unauthorized",
112167
};
113168
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
114-
mockFetch.mockResolvedValue(mockResponse as Response);
115-
await expect(fetchGitHubFile<GitHubFileResponse>(mockInstance, "test.json", "token")).rejects.toThrow(
116-
GitHubAccessError,
117-
);
169+
mockFetch.mockResolvedValueOnce(mockResponse as Response);
170+
171+
await expect(fetchGitHubFile(mockInstance, filePath, mockToken)).rejects.toThrow(GitHubAccessError);
172+
expect(mockFetch).toHaveBeenCalledTimes(1);
173+
expect(mockFetch).toHaveBeenCalledWith(contentsUrl, expect.any(Object));
118174
});
119175

120-
it("should include proper error details in GitHubAccessError", async () => {
176+
it("should include proper error details in GitHubAccessError from metadata fetch", async () => {
121177
const mockResponse: Partial<Response> = {
122178
ok: false,
123179
status: 401,
124180
statusText: "Unauthorized",
125181
};
126182
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
127-
mockFetch.mockResolvedValue(mockResponse as Response);
183+
mockFetch.mockResolvedValueOnce(mockResponse as Response);
184+
128185
try {
129-
await fetchGitHubFile<GitHubFileResponse>(mockInstance, "test.json", "token");
186+
await fetchGitHubFile(mockInstance, filePath, mockToken);
187+
188+
expect(true).toBe(false);
130189
} catch (error) {
131190
expect(error).toBeInstanceOf(GitHubAccessError);
132191
if (error instanceof GitHubAccessError) {
133-
expect(error.errorItem.target).toBe("test.json");
192+
expect(error.errorItem.target).toBe(filePath);
134193
expect(error.errorItem.details).toBeDefined();
135194
expect(error.errorItem.details![0].code).toBe(`HTTP_401`);
136195
}

src/util/__tests__/optsValidation.test.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,22 +137,25 @@ describe("Options Validation", () => {
137137
]),
138138
};
139139

140-
const mockResponseFile: Partial<Response> = {
140+
const mockResponseFileMetadata: Partial<Response> = {
141141
ok: true,
142142
status: 200,
143143
statusText: "OK",
144144
json: jest.fn<() => Promise<unknown>>().mockResolvedValue({
145145
name: "test.json",
146146
path: "documents/test.json",
147147
type: "file",
148+
sha: "dummySha123",
149+
size: 123,
148150
content: Buffer.from(JSON.stringify({ openResourceDiscovery: {} })).toString("base64"),
151+
encoding: "base64",
149152
}),
150153
};
151154

152155
const mockFetch = global.fetch as jest.MockedFunction<typeof fetch>;
153156
mockFetch
154157
.mockResolvedValueOnce(mockResponseDirectory as Response)
155-
.mockResolvedValueOnce(mockResponseFile as Response);
158+
.mockResolvedValueOnce(mockResponseFileMetadata as Response);
156159

157160
const options = {
158161
sourceType: OptSourceType.Github,

0 commit comments

Comments
 (0)