Skip to content

Commit 5bc2e0c

Browse files
🦅
1 parent 4d257a0 commit 5bc2e0c

File tree

3 files changed

+31
-10
lines changed

3 files changed

+31
-10
lines changed

src/extract-kindle-book.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,14 @@ import sharp from 'sharp'
1212

1313
import type { BookMetadata, TocItem } from './types'
1414
import { parsePageNav, parseTocItems } from './playwright-utils'
15-
import { assert, getEnv, normalizeAuthors, parseJsonpResponse } from './utils'
15+
import {
16+
assert,
17+
// extractTar,
18+
getEnv,
19+
// hashObject,
20+
normalizeAuthors,
21+
parseJsonpResponse
22+
} from './utils'
1623

1724
// Block amazon analytics requests
1825
// (not strictly necessary, but adblockers do this by default anyway and it
@@ -148,8 +155,20 @@ async function main() {
148155
result.info = body
149156
} else if (url.pathname === '/renderer/render') {
150157
// TODO: these TAR files have some useful metadata that we could use...
158+
// const params = Object.fromEntries(url.searchParams.entries())
159+
// const hash = hashObject(params)
160+
// const renderDir = path.join(userDataDir, 'render', hash)
161+
// await fs.mkdir(renderDir, { recursive: true })
151162
// const body = await response.body()
152-
// const tempDir = await extractTarToTemp(body)
163+
// const tempDir = await extractTar(body, { cwd: renderDir })
164+
// const { startingPosition, skipPageCount, numPage } = params
165+
// console.log('RENDER TAR', tempDir, {
166+
// startingPosition,
167+
// skipPageCount,
168+
// numPage
169+
// })
170+
// TODO: if `location_map.json` exists, record `navigationUnit` map of positions to pages
171+
// TODO: `page_data_0_5.json` has start/end/words for each page in this render batch
153172
// const toc = JSON.parse(
154173
// await fs.readFile(path.join(tempDir, 'toc.json'), 'utf8')
155174
// )

src/utils.ts

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,25 +137,27 @@ export function ffmpegOnProgress(
137137
* Decompress a TAR (optionally .tar.gz/.tgz) Buffer to a fresh temp directory.
138138
* Returns the absolute path of the temp directory.
139139
*/
140-
export async function extractTarToTemp(
140+
export async function extractTar(
141141
buf: Buffer,
142-
opts: { strip?: number } = {}
142+
{
143+
strip = 0,
144+
cwd = temporaryDirectory()
145+
}: { strip?: number; cwd?: string } = {}
143146
): Promise<string> {
144-
const dir = temporaryDirectory()
145147
const isGzip = buf.length >= 2 && buf[0] === 0x1f && buf[1] === 0x8b
146148

147149
try {
148150
const extractor = extract({
149-
cwd: dir,
151+
cwd,
150152
gzip: isGzip,
151-
strip: opts.strip ?? 0 // remove leading path segments if desired
153+
strip
152154
})
153155

154156
await pipeline(Readable.from(buf), extractor)
155-
return dir
157+
return cwd
156158
} catch (err) {
157159
// Clean up the temp dir if extraction fails
158-
await fs.rm(dir, { recursive: true, force: true }).catch(() => {})
160+
await fs.rm(cwd, { recursive: true, force: true }).catch(() => {})
159161
throw err
160162
}
161163
}

todo.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
- extraction
1+
- `extract-kindle-book`
22
- extract raw images (product image, etc)
33
- special-case handling of pages with only an image child
44
- handle rich-text / markdown

0 commit comments

Comments
 (0)