Skip to content

Commit 14a624c

Browse files
authored
Improve binary file handling (#187)
* Additional changes and experiments for docker * Fix comment * Detect binnary file read and redirect to processing * Add test * Remove docker files
1 parent 7baeeae commit 14a624c

File tree

7 files changed

+352
-110
lines changed

7 files changed

+352
-110
lines changed

docker-mcp.yaml

Lines changed: 0 additions & 99 deletions
This file was deleted.

package-lock.json

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
"cross-fetch": "^4.1.0",
7272
"fastest-levenshtein": "^1.0.16",
7373
"glob": "^10.3.10",
74+
"isbinaryfile": "^5.0.4",
7475
"zod": "^3.24.1",
7576
"zod-to-json-schema": "^3.23.5"
7677
},

src/server.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,9 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
414414
• start_process("wc -l /path/file.csv") → Line counting
415415
• start_process("head -10 /path/file.csv") → File preview
416416
417+
BINARY FILE SUPPORT:
418+
For PDF, Excel, Word, archives, databases, and other binary formats, use process tools with appropriate libraries or command-line utilities.
419+
417420
INTERACTIVE PROCESSES FOR DATA ANALYSIS:
418421
1. start_process("python3 -i") - Start Python REPL for data work
419422
2. start_process("node -i") - Start Node.js REPL for JSON/JS
@@ -487,6 +490,9 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
487490
4. Analyze: interact_with_process(pid, "print(df.describe())")
488491
5. Continue: interact_with_process(pid, "df.groupby('column').size()")
489492
493+
BINARY FILE PROCESSING WORKFLOWS:
494+
Use appropriate Python libraries (PyPDF2, pandas, docx2txt, etc.) or command-line tools for binary file analysis.
495+
490496
SMART DETECTION:
491497
- Automatically waits for REPL prompt (>>>, >, etc.)
492498
- Detects errors and completion states

src/tools/filesystem.ts

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import os from 'os';
44
import fetch from 'cross-fetch';
55
import { createReadStream } from 'fs';
66
import { createInterface } from 'readline';
7+
import { isBinaryFile } from 'isbinaryfile';
78
import {capture} from '../utils/capture.js';
89
import {withTimeout} from '../utils/withTimeout.js';
910
import {configManager} from '../config-manager.js';
@@ -87,6 +88,22 @@ async function getDefaultReadLength(): Promise<number> {
8788
return config.fileReadLineLimit ?? 1000; // Default to 1000 lines if not set
8889
}
8990

91+
/**
92+
* Generate instructions for handling binary files
93+
* @param filePath Path to the binary file
94+
* @param mimeType MIME type of the file
95+
* @returns Instruction message for the LLM
96+
*/
97+
function getBinaryFileInstructions(filePath: string, mimeType: string): string {
98+
const fileName = path.basename(filePath);
99+
100+
return `Cannot read binary file as text: ${fileName} (${mimeType})
101+
102+
Use start_process + interact_with_process to analyze binary files with appropriate tools (Node.js or Python libraries, command-line utilities, etc.).
103+
104+
The read_file tool only handles text files and images.`;
105+
}
106+
90107
// Initialize allowed directories from configuration
91108
async function getAllowedDirs(): Promise<string[]> {
92109
try {
@@ -383,6 +400,17 @@ async function readFileWithSmartPositioning(filePath: string, offset: number, le
383400
const stats = await fs.stat(filePath);
384401
const fileSize = stats.size;
385402

403+
// Check if the file is binary (but allow images to pass through)
404+
const { isImage } = await getMimeTypeInfo(filePath);
405+
if (!isImage) {
406+
const isBinary = await isBinaryFile(filePath);
407+
if (isBinary) {
408+
// Return instructions instead of trying to read binary content
409+
const instructions = getBinaryFileInstructions(filePath, mimeType);
410+
throw new Error(instructions);
411+
}
412+
}
413+
386414
// Get total line count for enhanced status messages (only for smaller files)
387415
const totalLines = await getFileLineCount(filePath);
388416

@@ -664,11 +692,20 @@ export async function readFileFromDisk(filePath: string, offset: number = 0, len
664692
try {
665693
return await readFileWithSmartPositioning(validPath, offset, length, mimeType, true);
666694
} catch (error) {
667-
// If UTF-8 reading fails, treat as binary and return base64 but still as text
668-
const buffer = await fs.readFile(validPath);
669-
const content = `Binary file content (base64 encoded):\n${buffer.toString('base64')}`;
670-
671-
return { content, mimeType: 'text/plain', isImage: false };
695+
// If it's our binary file instruction error, return it as content
696+
if (error instanceof Error && error.message.includes('Cannot read binary file as text:')) {
697+
return { content: error.message, mimeType: 'text/plain', isImage: false };
698+
}
699+
700+
// If UTF-8 reading fails for other reasons, also check if it's binary
701+
const isBinary = await isBinaryFile(validPath);
702+
if (isBinary) {
703+
const instructions = getBinaryFileInstructions(validPath, mimeType);
704+
return { content: instructions, mimeType: 'text/plain', isImage: false };
705+
}
706+
707+
// Only if it's truly not binary, then we have a real UTF-8 reading error
708+
throw error;
672709
}
673710
}
674711
};

0 commit comments

Comments
 (0)