-
-
Notifications
You must be signed in to change notification settings - Fork 558
feat: PDF v3 - page filtering, image extraction, performance optimization #283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 32 commits
648c054
8ae8a21
638ba0d
407f682
27bf915
ffdb55b
fa49964
f2f9a60
111dcf2
86a35b2
73d7c39
a4a7698
3d0dff1
84e796d
3f0ab32
72bca17
3fad97a
ef9dbbc
8f93730
0906011
24e6ef1
134047f
0988d9f
877109c
307d302
b259461
6883994
7180eba
cd2f0e9
1286099
f0687ca
dcd643c
ba083c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,14 +6,15 @@ import { | |
| listDirectory, | ||
| moveFile, | ||
| getFileInfo, | ||
| writePdf, | ||
| type FileResult, | ||
| type MultiFileResult | ||
| } from '../tools/filesystem.js'; | ||
|
|
||
| import {ServerResult} from '../types.js'; | ||
| import {withTimeout} from '../utils/withTimeout.js'; | ||
| import {createErrorResponse} from '../error-handlers.js'; | ||
| import {configManager} from '../config-manager.js'; | ||
| import { ServerResult } from '../types.js'; | ||
| import { withTimeout } from '../utils/withTimeout.js'; | ||
| import { createErrorResponse } from '../error-handlers.js'; | ||
| import { configManager } from '../config-manager.js'; | ||
|
|
||
| import { | ||
| ReadFileArgsSchema, | ||
|
|
@@ -22,7 +23,8 @@ import { | |
| CreateDirectoryArgsSchema, | ||
| ListDirectoryArgsSchema, | ||
| MoveFileArgsSchema, | ||
| GetFileInfoArgsSchema | ||
| GetFileInfoArgsSchema, | ||
| WritePdfArgsSchema | ||
| } from '../tools/schemas.js'; | ||
|
|
||
| /** | ||
|
|
@@ -62,16 +64,47 @@ export async function handleReadFile(args: unknown): Promise<ServerResult> { | |
| // Use the provided limits or defaults | ||
| const offset = parsed.offset ?? 0; | ||
| const length = parsed.length ?? defaultLimit; | ||
|
|
||
| const fileResult = await readFile(parsed.path, parsed.isUrl, offset, length); | ||
|
|
||
| if (fileResult.isPdf) { | ||
| const meta = fileResult.payload?.metadata; | ||
| const author = meta?.author ? `, Author: ${meta?.author}` : ""; | ||
| const title = meta?.title ? `, Title: ${meta?.title}` : ""; | ||
| // Use the provided limits or defaults. | ||
| // If the caller did not supply an explicit length, fall back to the configured default. | ||
| const rawArgs = args as { offset?: number; length?: number } | undefined; | ||
| const offset = rawArgs && 'offset' in rawArgs ? parsed.offset : 0; | ||
| const length = rawArgs && 'length' in rawArgs ? parsed.length : defaultLimit; | ||
|
|
||
| const content = fileResult.payload?.pages?.flatMap(p => [ | ||
| ...(p.images?.map((image, i) => ({ | ||
| type: "image", | ||
| data: image.data, | ||
| mimeType: image.mimeType | ||
| })) ?? []), | ||
| { | ||
| type: "text", | ||
| text: `<!-- Page: ${p.pageNumber} -->\n${p.text}`, | ||
| }, | ||
| ]) ?? []; | ||
|
|
||
| return { | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: `PDF file: ${parsed.path}${author}${title} (${meta?.totalPages} pages) \n` | ||
| }, | ||
| ...content | ||
| ] | ||
| }; | ||
| } | ||
| if (fileResult.isImage) { | ||
| // For image files, return as an image content type | ||
| return { | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: `Image file: ${parsed.path} (${fileResult.mimeType})\n` | ||
| { | ||
| type: "text", | ||
| text: `Image file: ${parsed.path} (${fileResult.mimeType})\n` | ||
| }, | ||
| { | ||
| type: "image", | ||
|
|
@@ -87,7 +120,7 @@ export async function handleReadFile(args: unknown): Promise<ServerResult> { | |
| }; | ||
| } | ||
| }; | ||
|
|
||
| // Execute with timeout at the handler level | ||
| const result = await withTimeout( | ||
| readFileOperation(), | ||
|
|
@@ -108,28 +141,44 @@ export async function handleReadFile(args: unknown): Promise<ServerResult> { | |
| export async function handleReadMultipleFiles(args: unknown): Promise<ServerResult> { | ||
| const parsed = ReadMultipleFilesArgsSchema.parse(args); | ||
| const fileResults = await readMultipleFiles(parsed.paths); | ||
|
|
||
| // Create a text summary of all files | ||
| const textSummary = fileResults.map(result => { | ||
| if (result.error) { | ||
| return `${result.path}: Error - ${result.error}`; | ||
| } else if (result.isPdf) { | ||
| return `${result.path}: PDF file with ${result.payload?.pages?.length} pages`; | ||
| } else if (result.mimeType) { | ||
| return `${result.path}: ${result.mimeType} ${result.isImage ? '(image)' : '(text)'}`; | ||
| } else { | ||
| return `${result.path}: Unknown type`; | ||
| } | ||
| }).join("\n"); | ||
|
|
||
| // Create content items for each file | ||
| const contentItems: Array<{type: string, text?: string, data?: string, mimeType?: string}> = []; | ||
| const contentItems: Array<{ type: string, text?: string, data?: string, mimeType?: string }> = []; | ||
|
|
||
| // Add the text summary | ||
| contentItems.push({ type: "text", text: textSummary }); | ||
|
|
||
| // Add each file content | ||
| for (const result of fileResults) { | ||
| if (!result.error && result.content !== undefined) { | ||
| if (result.isImage && result.mimeType) { | ||
| if (result.isPdf) { | ||
| result.payload?.pages.forEach((page, i) => { | ||
| page.images.forEach((image, i) => { | ||
| contentItems.push({ | ||
| type: "image", | ||
| data: image.data, | ||
| mimeType: image.mimeType | ||
| }); | ||
| }); | ||
| contentItems.push({ | ||
| type: "text", | ||
| text: page.text, | ||
| }); | ||
| }); | ||
| } else if (result.isImage && result.mimeType) { | ||
|
Comment on lines
112
to
+181
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n src/handlers/filesystem-handlers.ts | head -200 | tail -70Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 3028 🏁 Script executed: # Check file size first
wc -l src/handlers/filesystem-handlers.tsRepository: wonderwhy-er/DesktopCommanderMCP Length of output: 115 🏁 Script executed: # Look for type definitions
fd -e ts | xargs grep -l "interface.*Result\|type.*Result" | head -5Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 198 🏁 Script executed: # Search for where payload is assigned/used
rg "payload\s*=" src/handlers/filesystem-handlers.ts -B 2 -A 2Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 58 🏁 Script executed: # Check type definitions
cat -n src/types.tsRepository: wonderwhy-er/DesktopCommanderMCP Length of output: 2365 🏁 Script executed: # Find readMultipleFiles function definition
rg "function readMultipleFiles|const readMultipleFiles" src/ -B 2 -A 15Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 1448 🏁 Script executed: # Find MultiFileResult type definition
rg "interface MultiFileResult|type MultiFileResult" src/ -B 2 -A 10Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 1521 🏁 Script executed: # Look for payload structure definition
rg "payload.*=" src/tools/filesystem.ts -B 5 -A 5Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 777 🏁 Script executed: # Find FileResultPayloads type definition
rg "interface FileResultPayloads|type FileResultPayloads" src/ -B 2 -A 15Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 860 🏁 Script executed: # Search for where payload is created (PDF parsing)
rg "pages:\s*\[|images:\s*\[" src/ -B 5 -A 2 | head -60Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 1264 🏁 Script executed: # Find PdfPayload type definition
rg "interface PdfPayload|type PdfPayload" src/ -B 2 -A 10Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 602 🏁 Script executed: # Search for where PdfPayload is created/returned
rg "PdfPayload\s*[:{]|pages:\s*\[\|pages:" src/tools/ -B 3 -A 3 | head -80Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 58 🏁 Script executed: # Find PdfPageItem type definition
rg "interface PdfPageItem|type PdfPageItem" src/ -B 2 -A 10Repository: wonderwhy-er/DesktopCommanderMCP Length of output: 633 🏁 Script executed: # Verify the exact behavior at line 163 - what happens with optional chaining
# Optional chaining: result.payload?.pages will return undefined if payload is undefined
# Then calling .forEach() on undefined should error
python3 << 'EOF'
# Demonstrate the issue
code = "result.payload?.pages.forEach(...)"
print("JavaScript optional chaining behavior:")
print(f"Code: {code}")
print("If result.payload is undefined:")
print(" result.payload?.pages evaluates to undefined")
print(" undefined.forEach(...) throws TypeError")
print("\nThis is the actual bug - optional chaining on property doesn't protect the method call")
EOFRepository: wonderwhy-er/DesktopCommanderMCP Length of output: 162 Add optional chaining to PDF page and image iteration to prevent runtime errors. The current code has a subtle but real bug at line 163: result.payload?.pages.forEach((page, i) => {
page.images.forEach((image, i) => {Optional chaining ( Additionally, while the type system guarantees The fix is straightforward—add optional chaining to both - for (const result of fileResults) {
- if (!result.error && result.content !== undefined) {
- if (result.isPdf) {
- result.payload?.pages.forEach((page, i) => {
- page.images.forEach((image, i) => {
+ for (const result of fileResults) {
+ if (!result.error) {
+ if (result.isPdf) {
+ result.payload?.pages?.forEach((page) => {
+ page.images?.forEach((image) => {
contentItems.push({
type: "image",
data: image.data,
mimeType: image.mimeType
});
});
contentItems.push({
type: "text",
text: page.text,
});
});
- } else if (result.isImage && result.mimeType) {
+ } else if (result.isImage && result.mimeType && result.content !== undefined) {
// For image files, add an image content item
contentItems.push({
type: "image",
data: result.content,
mimeType: result.mimeType
});
- } else {
+ } else if (result.content !== undefined) {
// For text files, add a text summary
contentItems.push({
type: "text",
text: `\n--- ${result.path} contents: ---\n${result.content}`
});
}
}
}This removes the global content guard (PDFs don't need 🤖 Prompt for AI Agents |
||
| // For image files, add an image content item | ||
| contentItems.push({ | ||
| type: "image", | ||
|
|
@@ -145,7 +194,7 @@ export async function handleReadMultipleFiles(args: unknown): Promise<ServerResu | |
| } | ||
| } | ||
| } | ||
|
|
||
| return { content: contentItems }; | ||
| } | ||
|
|
||
|
|
@@ -159,7 +208,7 @@ export async function handleWriteFile(args: unknown): Promise<ServerResult> { | |
| // Get the line limit from configuration | ||
| const config = await configManager.getConfig(); | ||
| const MAX_LINES = config.fileWriteLineLimit ?? 50; // Default to 50 if not set | ||
|
|
||
| // Strictly enforce line count limit | ||
| const lines = parsed.content.split('\n'); | ||
| const lineCount = lines.length; | ||
|
|
@@ -172,13 +221,13 @@ export async function handleWriteFile(args: unknown): Promise<ServerResult> { | |
|
|
||
| // Pass the mode parameter to writeFile | ||
| await writeFile(parsed.path, parsed.content, parsed.mode); | ||
|
|
||
| // Provide more informative message based on mode | ||
| const modeMessage = parsed.mode === 'append' ? 'appended to' : 'wrote to'; | ||
|
|
||
| return { | ||
| content: [{ | ||
| type: "text", | ||
| content: [{ | ||
| type: "text", | ||
| text: `Successfully ${modeMessage} ${parsed.path} (${lineCount} lines) ${errorMessage}` | ||
| }], | ||
| }; | ||
|
|
@@ -249,11 +298,11 @@ export async function handleGetFileInfo(args: unknown): Promise<ServerResult> { | |
| const parsed = GetFileInfoArgsSchema.parse(args); | ||
| const info = await getFileInfo(parsed.path); | ||
| return { | ||
| content: [{ | ||
| type: "text", | ||
| content: [{ | ||
| type: "text", | ||
| text: Object.entries(info) | ||
| .map(([key, value]) => `${key}: ${value}`) | ||
| .join('\n') | ||
| .join('\n') | ||
| }], | ||
| }; | ||
| } catch (error) { | ||
|
|
@@ -262,5 +311,21 @@ export async function handleGetFileInfo(args: unknown): Promise<ServerResult> { | |
| } | ||
| } | ||
|
|
||
| // The listAllowedDirectories function has been removed | ||
| // Use get_config to retrieve the allowedDirectories configuration | ||
|
|
||
| /** | ||
| * Handle write_pdf command | ||
| */ | ||
| export async function handleWritePdf(args: unknown): Promise<ServerResult> { | ||
| try { | ||
| const parsed = WritePdfArgsSchema.parse(args); | ||
| await writePdf(parsed.path, parsed.content, parsed.outputPath, parsed.options); | ||
| const targetPath = parsed.outputPath || parsed.path; | ||
| return { | ||
| content: [{ type: "text", text: `Successfully wrote PDF to ${targetPath}${parsed.outputPath ? `\nOriginal file: ${parsed.path}` : ''}` }], | ||
| }; | ||
| } catch (error) { | ||
| const errorMessage = error instanceof Error ? error.message : String(error); | ||
| return createErrorResponse(errorMessage); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.