Skip to content

Commit 6758d0d

Browse files
authored
Merge pull request #278 from TileDB-Inc/sa/support-empty-enumerations
Support empty enumerations and extensions
2 parents 1b1b761 + 4dd7811 commit 6758d0d

File tree

7 files changed

+309
-6
lines changed

7 files changed

+309
-6
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
set -e -x
2-
curl --location -o tiledb.tar.gz https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-linux-x86_64-2.17.0-93c173d.tar.gz \
2+
curl --location -o tiledb.tar.gz https://github.com/TileDB-Inc/TileDB/releases/download/2.17.3/tiledb-linux-x86_64-2.17.3-0c2de58.tar.gz \
33
&& sudo tar -C /usr/local -xf tiledb.tar.gz
44
sudo ldconfig /usr/local/lib

.github/scripts/install_tiledb_linux_debug.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set -e -x
2-
git clone https://github.com/TileDB-Inc/TileDB.git -b 2.17.0
2+
git clone https://github.com/TileDB-Inc/TileDB.git -b 2.17.3
33
cd TileDB
44
mkdir build && cd build
55
cmake -DTILEDB_WERROR=OFF -DTILEDB_VCPKG=ON -DSANITIZER=leak -DTILEDB_VERBOSE=OFF -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/usr/local ..
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
set -e -x
2-
curl --location -o tiledb.tar.gz https://github.com/TileDB-Inc/TileDB/releases/download/2.17.0/tiledb-macos-x86_64-2.17.0-93c173d.tar.gz \
2+
curl --location -o tiledb.tar.gz https://github.com/TileDB-Inc/TileDB/releases/download/2.17.3/tiledb-macos-x86_64-2.17.3-0c2de58.tar.gz \
33
&& sudo tar -C /usr/local -xf tiledb.tar.gz

.github/scripts/install_tiledb_source_linux.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set -e -x
2-
git clone https://github.com/TileDB-Inc/TileDB.git -b 2.17.0
2+
git clone https://github.com/TileDB-Inc/TileDB.git -b 2.17.3
33
cd TileDB
44
mkdir build && cd build
55
cmake -DTILEDB_WERROR=OFF -DTILEDB_VCPKG=ON -DTILEDB_VERBOSE=OFF -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local ..

.github/scripts/install_tiledb_source_macos.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set -e -x
2-
git clone https://github.com/TileDB-Inc/TileDB.git -b 2.17.0
2+
git clone https://github.com/TileDB-Inc/TileDB.git -b 2.17.3
33
cd TileDB
44
mkdir build && cd build
55
cmake -DTILEDB_WERROR=OFF -DTILEDB_VCPKG=ON -DTILEDB_VERBOSE=OFF -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local ..

enumeration.go

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,15 @@ func newEnumeration[T EnumerationType](tdbCtx *Context, name string, ordered boo
8888
var cDataLen C.uint64_t
8989
var cOffsets unsafe.Pointer
9090
var cOffsetsLen C.uint64_t
91-
if tiledbType == TILEDB_STRING_ASCII {
91+
92+
// for empty enumerations, TileDB accepts only nils, not empty slices
93+
if len(values) == 0 {
94+
if tiledbType == TILEDB_STRING_ASCII {
95+
cCellNum = C.uint32_t(TILEDB_VAR_NUM)
96+
} else {
97+
cCellNum = C.uint32_t(1)
98+
}
99+
} else if tiledbType == TILEDB_STRING_ASCII {
92100
var dataSize int
93101
for _, v := range values {
94102
dataSize += reflect.ValueOf(v).Len()
@@ -302,6 +310,73 @@ func (e *Enumeration) Values() (interface{}, error) {
302310
return strs, nil
303311
}
304312

313+
// ExtendEnumeration extends an existing enumeration to add more values. The returned value should be
314+
// used with ArraySchemaEvolution.ApplyExtendedEnumeration to make changes persistent.
315+
func ExtendEnumeration[T EnumerationType](tdbCtx *Context, e *Enumeration, values []T) (*Enumeration, error) {
316+
if len(values) == 0 {
317+
return nil, fmt.Errorf("Error extending enumeration: empty values")
318+
}
319+
320+
eName, err := e.Name()
321+
if err != nil {
322+
return nil, fmt.Errorf("Error extending enumeration: failed to get name of enumeration: %s", tdbCtx.LastError())
323+
}
324+
325+
eType, err := e.Type()
326+
if err != nil {
327+
return nil, fmt.Errorf("Error extending enumeration: failed to get type of enumeration %s: %s", eName, tdbCtx.LastError())
328+
}
329+
330+
tiledbType := enumerationTypeToTileDB[T]()
331+
if eType != tiledbType {
332+
return nil, fmt.Errorf("Error extending enumeration: type mismatch: enumeration type %v, values type %v", eType, tiledbType)
333+
}
334+
335+
var cData unsafe.Pointer
336+
var cDataLen C.uint64_t
337+
var cOffsets unsafe.Pointer
338+
var cOffsetsLen C.uint64_t
339+
340+
if tiledbType == TILEDB_STRING_ASCII {
341+
var dataSize int
342+
for _, v := range values {
343+
dataSize += reflect.ValueOf(v).Len()
344+
}
345+
data := make([]byte, 0, dataSize)
346+
offsets := make([]uint64, 0, len(values))
347+
defer runtime.KeepAlive(data)
348+
defer runtime.KeepAlive(offsets)
349+
var currOffset uint64
350+
for _, v := range values {
351+
data = append(data, reflect.ValueOf(v).String()...)
352+
offsets = append(offsets, currOffset)
353+
currOffset += uint64(reflect.ValueOf(v).Len())
354+
}
355+
cData = reflect.ValueOf(data).UnsafePointer()
356+
cDataLen = C.uint64_t(dataSize)
357+
cOffsets = reflect.ValueOf(offsets).UnsafePointer()
358+
cOffsetsLen = C.uint64_t(len(values) * int(reflect.TypeOf(uint64(0)).Size()))
359+
} else {
360+
var zz T
361+
cData = reflect.ValueOf(values).UnsafePointer()
362+
cDataLen = C.uint64_t(len(values) * int(reflect.TypeOf(zz).Size()))
363+
}
364+
365+
var extEnum *C.tiledb_enumeration_t
366+
367+
ret := C.tiledb_enumeration_extend(tdbCtx.tiledbContext, e.tiledbEnum, cData, cDataLen, cOffsets, cOffsetsLen, &extEnum)
368+
if ret != C.TILEDB_OK {
369+
return nil, fmt.Errorf("Error extending enumeration: %s", tdbCtx.LastError())
370+
}
371+
372+
ext := &Enumeration{context: tdbCtx, tiledbEnum: extEnum}
373+
freeOnGC(ext)
374+
375+
runtime.KeepAlive(values)
376+
377+
return ext, nil
378+
}
379+
305380
// AddEnumeration adds the Enumeration to the schema. It must be added before we add it to an attribute.
306381
func (a *ArraySchema) AddEnumeration(e *Enumeration) error {
307382
ret := C.tiledb_array_schema_add_enumeration(a.context.tiledbContext, a.tiledbArraySchema, e.tiledbEnum)
@@ -413,6 +488,16 @@ func (ase *ArraySchemaEvolution) DropEnumeration(name string) error {
413488
return nil
414489
}
415490

491+
// ApplyExtendedEnumeration applies to the schema evolution the result of ExtendEnumeration
492+
func (ase *ArraySchemaEvolution) ApplyExtendedEnumeration(e *Enumeration) error {
493+
ret := C.tiledb_array_schema_evolution_extend_enumeration(ase.context.tiledbContext, ase.tiledbArraySchemaEvolution, e.tiledbEnum)
494+
if ret != C.TILEDB_OK {
495+
return fmt.Errorf("Error applying extended enumeration to arraySchemaEvolution: %s", ase.context.LastError())
496+
}
497+
498+
return nil
499+
}
500+
416501
// DeserializeLoadEnumerationsRequest deserializes a LoadEnumerationsRequests. This is used by TileDB-Cloud
417502
func DeserializeLoadEnumerationsRequest(array *Array, serializationType SerializationType, request *Buffer) (*Buffer, error) {
418503
response, err := NewBuffer(array.context)

enumeration_test.go

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,25 @@ func TestEnumeration(t *testing.T) {
9898
assert.Contains(t, contents, "Name: romanNumerals")
9999
assert.Contains(t, contents, "Element Count: 16")
100100
})
101+
102+
t.Run("Extend", func(t *testing.T) {
103+
startValues, err := romanNumerals.Values()
104+
require.NoError(t, err)
105+
106+
additionalValues := []string{"xvii", "xviii", "xix"}
107+
extendedRomanNumerals, err := ExtendEnumeration(tdbCtx, romanNumerals, additionalValues)
108+
require.NoError(t, err)
109+
extendedValues, err := extendedRomanNumerals.Values()
110+
require.NoError(t, err)
111+
112+
require.Equal(t, append(startValues.([]string), additionalValues...), extendedValues)
113+
})
114+
115+
t.Run("ExtendTypeConformance", func(t *testing.T) {
116+
_, err := ExtendEnumeration(tdbCtx, romanNumerals, []int32{10})
117+
require.Error(t, err)
118+
require.Contains(t, err.Error(), "type mismatch")
119+
})
101120
}
102121

103122
func TestEnumerationAndSchema(t *testing.T) {
@@ -246,6 +265,163 @@ func TestEnumerationQueryCondition(t *testing.T) {
246265
assert.Equal(t, []uint8{1, 5, 0}, greekBuffer[0:3])
247266
assert.Equal(t, []uint8{1, 5, 0}, romanBuffer[0:3])
248267
})
268+
269+
t.Run("LabelNotExists", func(t *testing.T) {
270+
array, err := NewArray(tdbCtx, arrayPath)
271+
require.NoError(t, err)
272+
require.NoError(t, array.Open(TILEDB_READ))
273+
rQuery, err := NewQuery(tdbCtx, array)
274+
require.NoError(t, err)
275+
qcR, err := NewQueryCondition(tdbCtx, "roman", TILEDB_QUERY_CONDITION_EQ, "C")
276+
require.NoError(t, err)
277+
require.NoError(t, rQuery.SetQueryCondition(qcR))
278+
279+
rowsBuffer := make([]uint8, 16)
280+
_, err = rQuery.SetDataBuffer("rows", rowsBuffer)
281+
require.NoError(t, err)
282+
colsBuffer := make([]uint8, 16)
283+
_, err = rQuery.SetDataBuffer("cols", colsBuffer)
284+
require.NoError(t, err)
285+
err = rQuery.Submit()
286+
require.Error(t, err)
287+
require.Contains(t, err.Error(), "Enumeration value not found")
288+
require.NoError(t, array.Close())
289+
})
290+
}
291+
292+
func TestEnumerationEmpty(t *testing.T) {
293+
schema := arraySchemaWithEmptyEnumerations(t)
294+
295+
config, err := NewConfig()
296+
require.NoError(t, err)
297+
tdbCtx, err := NewContext(config)
298+
require.NoError(t, err)
299+
300+
arrayPath := t.TempDir()
301+
array, err := NewArray(tdbCtx, arrayPath)
302+
require.NoError(t, err)
303+
require.NoError(t, array.Create(schema))
304+
}
305+
306+
func TestEnumerationEvolution(t *testing.T) {
307+
schema := arraySchemaWithEnumerations(t)
308+
309+
config, err := NewConfig()
310+
require.NoError(t, err)
311+
tdbCtx, err := NewContext(config)
312+
require.NoError(t, err)
313+
314+
arrayPath := t.TempDir()
315+
array, err := NewArray(tdbCtx, arrayPath)
316+
require.NoError(t, err)
317+
require.NoError(t, array.Create(schema))
318+
319+
//=====
320+
// write to the array. Each cell gets the row order rank + 10
321+
// The array will look like
322+
// 10 11 12 13
323+
// 14 15 16 17
324+
// 18 19 20 21
325+
// 22 23 24 25
326+
327+
array, err = NewArray(tdbCtx, arrayPath)
328+
require.NoError(t, err)
329+
require.NoError(t, array.Open(TILEDB_WRITE))
330+
wQuery, err := NewQuery(tdbCtx, array)
331+
require.NoError(t, err)
332+
_, err = wQuery.SetDataBuffer("rows", []uint8{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4})
333+
require.NoError(t, err)
334+
_, err = wQuery.SetDataBuffer("cols", []uint8{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4})
335+
require.NoError(t, err)
336+
_, err = wQuery.SetDataBuffer("greek", []uint8{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25})
337+
require.NoError(t, err)
338+
_, err = wQuery.SetDataBuffer("roman", []uint8{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25})
339+
require.NoError(t, err)
340+
require.NoError(t, wQuery.Submit())
341+
require.NoError(t, array.Close())
342+
343+
// the enumerations currently can handle values [1, 16]. We extend them to [1, 25]
344+
array, err = NewArray(tdbCtx, arrayPath)
345+
require.NoError(t, err)
346+
require.NoError(t, array.Open(TILEDB_READ))
347+
romanEnum, err := array.GetEnumeration("romanNumerals")
348+
require.NoError(t, err)
349+
romanEnumExt, err := ExtendEnumeration(tdbCtx, romanEnum, []string{"xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv", "xxv"})
350+
require.NoError(t, err)
351+
greekEnum, err := array.GetEnumeration("greekNumerals")
352+
require.NoError(t, err)
353+
greekEnumExt, err := ExtendEnumeration(tdbCtx, greekEnum, []string{"ιζ", "ιη", "ιθ", "κ", "κα", "κβ", "κγ", "κδ", "κε"})
354+
require.NoError(t, err)
355+
require.NoError(t, array.Close())
356+
357+
// apply the schema evolution
358+
ase, err := NewArraySchemaEvolution(tdbCtx)
359+
require.NoError(t, err)
360+
ase.ApplyExtendedEnumeration(romanEnumExt)
361+
require.NoError(t, err)
362+
ase.ApplyExtendedEnumeration(greekEnumExt)
363+
require.NoError(t, err)
364+
err = ase.Evolve(arrayPath)
365+
require.NoError(t, err)
366+
367+
// now query the array to verify the new values can be applied
368+
array, err = NewArray(tdbCtx, arrayPath)
369+
require.NoError(t, err)
370+
require.NoError(t, array.Open(TILEDB_READ))
371+
rQuery, err := NewQuery(tdbCtx, array)
372+
require.NoError(t, err)
373+
qcR, err := NewQueryCondition(tdbCtx, "roman", TILEDB_QUERY_CONDITION_EQ, "xxv")
374+
require.NoError(t, err)
375+
qcG, err := NewQueryCondition(tdbCtx, "greek", TILEDB_QUERY_CONDITION_EQ, "κ")
376+
require.NoError(t, err)
377+
qc, err := NewQueryConditionCombination(tdbCtx, qcR, TILEDB_QUERY_CONDITION_OR, qcG)
378+
require.NoError(t, err)
379+
require.NoError(t, rQuery.SetQueryCondition(qc))
380+
381+
rowsBuffer := make([]uint8, 16)
382+
_, err = rQuery.SetDataBuffer("rows", rowsBuffer)
383+
require.NoError(t, err)
384+
colsBuffer := make([]uint8, 16)
385+
_, err = rQuery.SetDataBuffer("cols", colsBuffer)
386+
require.NoError(t, err)
387+
greekBuffer := make([]uint8, 16)
388+
_, err = rQuery.SetDataBuffer("greek", greekBuffer)
389+
require.NoError(t, err)
390+
romanBuffer := make([]uint8, 16)
391+
_, err = rQuery.SetDataBuffer("roman", romanBuffer)
392+
require.NoError(t, err)
393+
394+
require.NoError(t, rQuery.Submit())
395+
require.NoError(t, array.Close())
396+
397+
assert.Equal(t, []uint8{3, 4, 0}, rowsBuffer[0:3])
398+
assert.Equal(t, []uint8{2, 3, 0}, colsBuffer[0:3])
399+
assert.Equal(t, []uint8{19, 24, 0}, greekBuffer[0:3])
400+
assert.Equal(t, []uint8{19, 24, 0}, romanBuffer[0:3])
401+
}
402+
403+
type bogus string
404+
405+
func (b bogus) String() string {
406+
return "i am " + string(b)
407+
}
408+
409+
func TestEnumerationDerivedValues(t *testing.T) {
410+
// This tests that we use reflection correctly and we operate on the original
411+
// values of types based on strings. Check newEnumeration and ExtendEnumeration
412+
// which use reflect.ValueOf(v).String()
413+
414+
config, err := NewConfig()
415+
require.NoError(t, err)
416+
tdbCtx, err := NewContext(config)
417+
require.NoError(t, err)
418+
419+
bogusEnum, err := NewOrderedEnumeration(tdbCtx, "bogusEnum", []bogus{"bogus1", "bogus2"})
420+
require.NoError(t, err)
421+
422+
bogusValues, err := bogusEnum.Values()
423+
require.NoError(t, err)
424+
assert.EqualValues(t, bogusValues, []string{"bogus1", "bogus2"})
249425
}
250426

251427
func arraySchemaWithEnumerations(t *testing.T) *ArraySchema {
@@ -291,3 +467,45 @@ func arraySchemaWithEnumerations(t *testing.T) *ArraySchema {
291467

292468
return schema
293469
}
470+
471+
func arraySchemaWithEmptyEnumerations(t *testing.T) *ArraySchema {
472+
config, err := NewConfig()
473+
require.NoError(t, err)
474+
tdbCtx, err := NewContext(config)
475+
require.NoError(t, err)
476+
477+
//=====
478+
// create a sparse array [1,4]x[1,4]
479+
//
480+
481+
schema, err := NewArraySchema(tdbCtx, TILEDB_SPARSE)
482+
require.NoError(t, err)
483+
require.NoError(t, schema.SetCellOrder(TILEDB_ROW_MAJOR))
484+
require.NoError(t, schema.SetTileOrder(TILEDB_ROW_MAJOR))
485+
486+
domain, err := NewDomain(tdbCtx)
487+
require.NoError(t, err)
488+
dimRows, err := NewDimension(tdbCtx, "rows", TILEDB_UINT8, []uint8{1, 4}, uint8(2))
489+
require.NoError(t, err)
490+
dimCols, err := NewDimension(tdbCtx, "cols", TILEDB_UINT8, []uint8{1, 4}, uint8(2))
491+
require.NoError(t, err)
492+
require.NoError(t, domain.AddDimensions(dimRows, dimCols))
493+
require.NoError(t, schema.SetDomain(domain))
494+
495+
greekNumerals, err := NewOrderedEnumeration[string](tdbCtx, "greekNumerals", nil)
496+
require.NoError(t, err)
497+
require.NoError(t, schema.AddEnumeration(greekNumerals))
498+
romanNumerals, err := NewOrderedEnumeration[string](tdbCtx, "romanNumerals", nil)
499+
require.NoError(t, err)
500+
require.NoError(t, schema.AddEnumeration(romanNumerals))
501+
502+
greekAttr, err := NewAttribute(tdbCtx, "greek", TILEDB_UINT8)
503+
require.NoError(t, err)
504+
require.NoError(t, greekAttr.SetEnumerationName("greekNumerals"))
505+
romanAttr, err := NewAttribute(tdbCtx, "roman", TILEDB_UINT8)
506+
require.NoError(t, err)
507+
require.NoError(t, romanAttr.SetEnumerationName("romanNumerals"))
508+
require.NoError(t, schema.AddAttributes(greekAttr, romanAttr))
509+
510+
return schema
511+
}

0 commit comments

Comments
 (0)