Skip to content

Checkpoint not reduce WAL to 0 #1990

Open
@unrealhoang

Description

@unrealhoang

What happens?

I'm trying to build the feature to allow user to download their own DB file, and re-open them, but despite after checkpointing & close the db / connection, if I use that same DB file without the attached WAL, the DB will be put in read-only mode. Is this intentional, or should the WAL be empty? As from the docs:

If DuckDB exits normally, the WAL file is deleted upon exit. If DuckDB crashes, the WAL file is required to recover data.

Following is a minimal reproduce script:

To Reproduce

// app_load_db.js

// Import the Wasm file URIs
import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/[email protected]/+esm';

// === Configuration ===
const OPFS_DB_PATH = 'opfs://testduck.db';
const DB_FILE = 'testduck.db'; 

let db = null; // AsyncDuckDB instance
let conn = null; // Active connection to the OPFS database
let workerScriptText;
let bundle;

async function removeFiles() {
  const opfsRoot = await navigator.storage.getDirectory();

  await opfsRoot.removeEntry(DB_FILE).catch((e) => {
    console.error(e);
  });
  await opfsRoot.removeEntry(DB_FILE + '.wal').catch((e) => {
    console.error(e);
  });
  await opfsRoot.removeEntry('new.db').catch((e) => {
    console.error(e);
  });
  await opfsRoot.removeEntry('new.db.wal').catch((e) => {
    console.error(e);
  });
}

async function setup() {
  await removeFiles();
  let blobUrl = null;

  try {
    const bundles = duckdb.getJsDelivrBundles();
    bundle = await duckdb.selectBundle(bundles);
    console.log('Using bundle:', bundle);
    if (!bundle.mainWorker) throw new Error('Could not determine worker URL.');

    const workerResponse = await fetch(bundle.mainWorker);
    if (!workerResponse.ok) throw new Error(`Failed to fetch worker: ${workerResponse.statusText}`);
    workerScriptText = await workerResponse.text();
    const blob = new Blob([workerScriptText], { type: 'application/javascript' });
    blobUrl = URL.createObjectURL(blob);

    const worker = new Worker(blobUrl);
    const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING);
    db = new duckdb.AsyncDuckDB(logger, worker);
    await db.instantiate(bundle.mainModule, bundle.pthreadWorker);

    await db.open({
      path: OPFS_DB_PATH,
      accessMode: duckdb.DuckDBAccessMode.READ_WRITE
    });
    conn = await db.connect();

    document.db = db; // Make accessible in console for debugging
    if (blobUrl) URL.revokeObjectURL(blobUrl);
  } catch (error) {
    if (blobUrl) URL.revokeObjectURL(blobUrl);
    console.error(error);
  }

  await conn.send(`CREATE TABLE tmp AS SELECT unnest(generate_series(1, 100)) as count`);
  await conn.send(`CHECKPOINT;`);
}

async function copyOpfsFile(opfsRoot, src, dst) {
  let srcFileHandle = await opfsRoot.getFileHandle(src, { create: false })
  let srcFile = await srcFileHandle.getFile();
  let dstFileHandle = await opfsRoot.getFileHandle(dst, { create: true });
  let writableDstFile = await dstFileHandle.createWritable();
  await srcFile.stream().pipeTo(writableDstFile);
}

async function test() {
  await conn.close();
  await db.reset();
  await db.terminate();

  const opfsRoot = await navigator.storage.getDirectory();
  copyOpfsFile(opfsRoot, DB_FILE, 'new.db')
  copyOpfsFile(opfsRoot, DB_FILE + '.wal', 'new.db.wal') // <-- comment out this line will cause the last create table to fail

  const blob = new Blob([workerScriptText], { type: 'application/javascript' });
  const blobUrl = URL.createObjectURL(blob);

  const worker = new Worker(blobUrl);
  const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING);
  db = new duckdb.AsyncDuckDB(logger, worker);
  await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
  await db.open({
    path: 'opfs://new.db',
    accessMode: duckdb.DuckDBAccessMode.READ_WRITE
  });
  conn = await db.connect();
  if (blobUrl) URL.revokeObjectURL(blobUrl);

  await query(`SELECT count(*)::INTEGER as cnt FROM tmp;`);
  await query('create table new as select 1 as b');
};

async function query(q) {
  const result = await conn.query(q);
  console.log(result.toArray().map(r => r.toJSON()));
}

await setup();
await test();

Browser/Environment:

Version 134.0.6998.166 (Official Build) (arm64)

Device:

Macbook M4 Pro

DuckDB-Wasm Version:

[email protected]

DuckDB-Wasm Deployment:

localhost

Full Name:

Hoang Luu

Affiliation:

H2Corporation

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions