Skip to content

Commit

Permalink
fix: ANY compositions work on selections
Browse files Browse the repository at this point in the history
  • Loading branch information
bmschmidt committed Apr 2, 2024
1 parent 58c5595 commit c32adee
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 17 deletions.
6 changes: 0 additions & 6 deletions src/Dataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,7 @@ export class Dataset {
this.promise = preProcessRootTile.then(async () => {
const batch = await this.root_tile.get_arrow(null);
const schema = batch.schema;
console.log('HERE');
await this.root_tile.loadManifestInfoFromTileMetadata();
console.log(this.root_tile.max_ix);
console.log('BHERE');

console.log(this.extent);
if (schema.metadata.has('sidecars')) {
const cars = schema.metadata.get('sidecars');
const parsed = JSON.parse(cars as string) as Record<string, string>;
Expand Down Expand Up @@ -602,7 +597,6 @@ export class Dataset {
* @returns A list of [tile, point] pairs that match the index.
*/
findPointRaw(ix: number): [Tile, StructRowProxy, number][] {
console.log({ ix });
const matches: [Tile, StructRowProxy, number][] = [];
this.visit((tile: Tile) => {
if (!(tile.record_batch && tile.min_ix <= ix && tile.max_ix >= ix)) {
Expand Down
28 changes: 17 additions & 11 deletions src/selection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@ function isCompositeSelectParam(
return params.composition !== undefined;
}

function isComposition(elems: any): elems is Composition {
function isComposition(elems: unknown): elems is Composition {
if (elems === undefined) throw new Error('Undefined composition');
if (!elems) return false;
if (!elems.length) return false;
const op = elems[0];
if (!Array.isArray(elems)) return false;
const op = elems[0] as unknown;
if (typeof op !== 'string') return false;
console.log('OP', op, elems);
return ['AND', 'OR', 'XOR', 'NOT', 'ANY', 'ALL'].indexOf(op) == 0;
}

Expand All @@ -113,7 +115,7 @@ async function applyCompositeFunctionToTile(
if (args[0] === 'NOT') {
const bitmask = await extractBitmask(tile, args[1]);
return bitmask.not();
} else if (isBinarySelectParam(operator)) {
} else if (isBinarySelectOperation(operator)) {
const [op, arg1, arg2] = args;
const bitmask1 = await extractBitmask(tile, arg1);
const bitmask2 = await extractBitmask(tile, arg2);
Expand All @@ -126,7 +128,7 @@ async function applyCompositeFunctionToTile(
} else {
throw new Error('Unknown binary operation');
}
} else if (isPluralSelectParam(operator)) {
} else if (isPluralSelectOperator(operator)) {
const op = args[0];
const bitmasks = await Promise.all(
args.slice(1).map((arg) => extractBitmask(tile, arg)),
Expand Down Expand Up @@ -157,18 +159,18 @@ export interface FunctionSelectParams extends SelectParams {
tileFunction: (t: Tile) => Promise<Vector<Bool>>;
}

function isPluralSelectParam(
function isPluralSelectOperator(
params: PluralOperation | BinaryOperation | UnaryOperation,
): params is PluralOperation {
const things = new Set(['ANY', 'ALL', 'NONE']);
return things.has(params[0]);
return things.has(params);
}

function isBinarySelectParam(
function isBinarySelectOperation(
params: PluralOperation | BinaryOperation | UnaryOperation,
): params is BinaryOperation {
const things = new Set(['AND', 'OR', 'XOR', 'NAND']);
return things.has(params[0]);
return things.has(params);
}

function isFunctionSelectParam(
Expand Down Expand Up @@ -343,6 +345,9 @@ export class DataSelection {
| CompositeSelectParams,
) {
this.dataset = dataset;
if (dataset === undefined) {
throw new Error("Can't create a selection without a dataset");
}
this.name = params.name;
let markReady = function () {};
this.ready = new Promise((resolve) => {
Expand Down Expand Up @@ -405,6 +410,7 @@ export class DataSelection {
applyToAllLoadedTiles(): Promise<void> {
return Promise.all(
this.dataset.map((tile) => {
console.log('TILE', tile.key);
// triggers creation of the dataset column as a side-effect.
return tile.get_column(this.name);
}),
Expand Down Expand Up @@ -721,7 +727,7 @@ export class DataSelection {
let currentOffset = 0;
let relevantTile: Tile = undefined;
let current_tile_ix = 0;
for (let match_length of this.match_count) {
for (const match_length of this.match_count) {
if (i < currentOffset + match_length) {
relevantTile = this.tiles[current_tile_ix];
break;
Expand Down Expand Up @@ -750,7 +756,7 @@ export class DataSelection {

// Iterate over the points in raw order.
*[Symbol.iterator]() {
for (let tile of this.tiles) {
for (const tile of this.tiles) {
const column = tile.record_batch.getChild(this.name) as Vector<Bool>;
for (let i = 0; i < column.length; i++) {
if (column.get(i)) {
Expand Down
1 change: 1 addition & 0 deletions tests/basic_display.spec.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export {};
67 changes: 67 additions & 0 deletions tests/dataset.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { Dataset, DataSelection } from '../dist/deepscatter.js';
import { Table, vectorFromArray, Utf8 } from 'apache-arrow';
import { test } from 'uvu';
import * as assert from 'uvu/assert';
import {
createIntegerDataset,
selectFunctionForFactorsOf,
} from './datasetHelpers.js';

test('Dataset can be created', async () => {
const dataset = createIntegerDataset();
const x = await dataset.root_tile.get_column('x');
assert.is(x.length, 4096);
const integers = await dataset.root_tile.get_column('integers');
assert.is(integers.toArray()[10], 10);
});

test('Test composition of selections', async () => {
const dataset = createIntegerDataset();
await dataset.root_tile.preprocessRootTileInfo();
const selectEvens = new DataSelection(dataset, {
name: 'twos',
tileFunction: selectFunctionForFactorsOf(2),
});

await selectEvens.ready;
await selectEvens.applyToAllLoadedTiles();

const selectThree = new DataSelection(dataset, {
name: 'threes',
tileFunction: selectFunctionForFactorsOf(3),
});

// await selectThree.ready;
// await selectThree.applyToAllLoadedTiles();

const selectSix = new DataSelection(dataset, {
name: 'six',
composition: ['ALL', selectThree, selectEvens],
});

await selectSix.ready;
await selectSix.applyToAllLoadedTiles();

assert.ok(
Math.abs(
Math.log(selectSix.selectionSize / (selectEvens.selectionSize / 3)),
) < 0.01,
'sixes are the same size as evens over three',
);

const selectTwoThree = new DataSelection(dataset, {
name: 'sixTwo',
composition: ['ANY', selectThree, selectEvens],
});
await selectTwoThree.ready;
await selectTwoThree.applyToAllLoadedTiles();

assert.ok(
Math.abs(
Math.log(selectTwoThree.selectionSize / (selectSix.selectionSize * 4)),
) < 0.01,
'sixes are 4x as big as twos over threes',
);
});

test.run();
72 changes: 72 additions & 0 deletions tests/datasetHelpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { Table, vectorFromArray, Utf8 } from 'apache-arrow';
import { Dataset, Bitmask } from '../dist/deepscatter.js';

// Creates a tile transformation for factors of n.
export function selectFunctionForFactorsOf(n) {
return async (tile) => {
const ints = await tile.get_column('integers');
const mask = new Bitmask(tile.record_batch.numRows);
for (let i = 0; i < tile.record_batch.numRows; i++) {
if (ints.toArray()[i] % n === 0) {
mask.set(i);
}
}
return mask.to_arrow();
};
}

function make_batch(start = 0, length = 65536, batch_number_here = 0) {
let x = new Float32Array(length);
let y = new Float32Array(length);
let integers = new Int32Array(length);
let ix = new Uint32Array(length);
let batch_id = new Float32Array(length).fill(batch_number_here);
for (let i = start; i < start + length; i++) {
ix[i - start] = i;
let x_ = 0;
let y_ = 0;
const binary = i.toString(2).split('').reverse();
for (let j = 0; j < binary.length; j++) {
const bit = binary[j];
if (bit == 1) {
if (j % 2 == 0) {
x_ += 2 ** (j / 2);
} else {
y_ += 2 ** ((j - 1) / 2);
}
}
}
x[i - start] = x_;
y[i - start] = y_;
integers[i - start] = i;
}

function num_to_string(num) {
return num.toString();
}
const vs = [...ix].map(num_to_string);
return new Table({
x: vectorFromArray(x),
y: vectorFromArray(y),
_id: vectorFromArray(vs, new Utf8()),
integers: vectorFromArray(integers),
batch_id: vectorFromArray(batch_id),
});
}

function createTable(n_batches) {
const batches = [];
const SIZE = 65536 / 4 / 4;
for (let i = 0; i < n_batches; i++) {
const batch = make_batch(i * SIZE, SIZE, i);
batches.push(batch);
}
const table = new Table([batches]);
return table;
}

export function createIntegerDataset() {
const num_batches = 4;
const table = createTable(num_batches);
return Dataset.from_arrow_table(table);
}

0 comments on commit c32adee

Please sign in to comment.