-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: ANY compositions work on selections
- Loading branch information
Showing
5 changed files
with
157 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export {}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import { Dataset, DataSelection } from '../dist/deepscatter.js'; | ||
import { Table, vectorFromArray, Utf8 } from 'apache-arrow'; | ||
import { test } from 'uvu'; | ||
import * as assert from 'uvu/assert'; | ||
import { | ||
createIntegerDataset, | ||
selectFunctionForFactorsOf, | ||
} from './datasetHelpers.js'; | ||
|
||
test('Dataset can be created', async () => { | ||
const dataset = createIntegerDataset(); | ||
const x = await dataset.root_tile.get_column('x'); | ||
assert.is(x.length, 4096); | ||
const integers = await dataset.root_tile.get_column('integers'); | ||
assert.is(integers.toArray()[10], 10); | ||
}); | ||
|
||
test('Test composition of selections', async () => { | ||
const dataset = createIntegerDataset(); | ||
await dataset.root_tile.preprocessRootTileInfo(); | ||
const selectEvens = new DataSelection(dataset, { | ||
name: 'twos', | ||
tileFunction: selectFunctionForFactorsOf(2), | ||
}); | ||
|
||
await selectEvens.ready; | ||
await selectEvens.applyToAllLoadedTiles(); | ||
|
||
const selectThree = new DataSelection(dataset, { | ||
name: 'threes', | ||
tileFunction: selectFunctionForFactorsOf(3), | ||
}); | ||
|
||
// await selectThree.ready; | ||
// await selectThree.applyToAllLoadedTiles(); | ||
|
||
const selectSix = new DataSelection(dataset, { | ||
name: 'six', | ||
composition: ['ALL', selectThree, selectEvens], | ||
}); | ||
|
||
await selectSix.ready; | ||
await selectSix.applyToAllLoadedTiles(); | ||
|
||
assert.ok( | ||
Math.abs( | ||
Math.log(selectSix.selectionSize / (selectEvens.selectionSize / 3)), | ||
) < 0.01, | ||
'sixes are the same size as evens over three', | ||
); | ||
|
||
const selectTwoThree = new DataSelection(dataset, { | ||
name: 'sixTwo', | ||
composition: ['ANY', selectThree, selectEvens], | ||
}); | ||
await selectTwoThree.ready; | ||
await selectTwoThree.applyToAllLoadedTiles(); | ||
|
||
assert.ok( | ||
Math.abs( | ||
Math.log(selectTwoThree.selectionSize / (selectSix.selectionSize * 4)), | ||
) < 0.01, | ||
'sixes are 4x as big as twos over threes', | ||
); | ||
}); | ||
|
||
test.run(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import { Table, vectorFromArray, Utf8 } from 'apache-arrow'; | ||
import { Dataset, Bitmask } from '../dist/deepscatter.js'; | ||
|
||
// Creates a tile transformation for factors of n. | ||
export function selectFunctionForFactorsOf(n) { | ||
return async (tile) => { | ||
const ints = await tile.get_column('integers'); | ||
const mask = new Bitmask(tile.record_batch.numRows); | ||
for (let i = 0; i < tile.record_batch.numRows; i++) { | ||
if (ints.toArray()[i] % n === 0) { | ||
mask.set(i); | ||
} | ||
} | ||
return mask.to_arrow(); | ||
}; | ||
} | ||
|
||
function make_batch(start = 0, length = 65536, batch_number_here = 0) { | ||
let x = new Float32Array(length); | ||
let y = new Float32Array(length); | ||
let integers = new Int32Array(length); | ||
let ix = new Uint32Array(length); | ||
let batch_id = new Float32Array(length).fill(batch_number_here); | ||
for (let i = start; i < start + length; i++) { | ||
ix[i - start] = i; | ||
let x_ = 0; | ||
let y_ = 0; | ||
const binary = i.toString(2).split('').reverse(); | ||
for (let j = 0; j < binary.length; j++) { | ||
const bit = binary[j]; | ||
if (bit == 1) { | ||
if (j % 2 == 0) { | ||
x_ += 2 ** (j / 2); | ||
} else { | ||
y_ += 2 ** ((j - 1) / 2); | ||
} | ||
} | ||
} | ||
x[i - start] = x_; | ||
y[i - start] = y_; | ||
integers[i - start] = i; | ||
} | ||
|
||
function num_to_string(num) { | ||
return num.toString(); | ||
} | ||
const vs = [...ix].map(num_to_string); | ||
return new Table({ | ||
x: vectorFromArray(x), | ||
y: vectorFromArray(y), | ||
_id: vectorFromArray(vs, new Utf8()), | ||
integers: vectorFromArray(integers), | ||
batch_id: vectorFromArray(batch_id), | ||
}); | ||
} | ||
|
||
function createTable(n_batches) { | ||
const batches = []; | ||
const SIZE = 65536 / 4 / 4; | ||
for (let i = 0; i < n_batches; i++) { | ||
const batch = make_batch(i * SIZE, SIZE, i); | ||
batches.push(batch); | ||
} | ||
const table = new Table([batches]); | ||
return table; | ||
} | ||
|
||
export function createIntegerDataset() { | ||
const num_batches = 4; | ||
const table = createTable(num_batches); | ||
return Dataset.from_arrow_table(table); | ||
} |