Skip to content

Commit 05ad44f

Browse files
authored
Merge pull request #4 from vulcanize/ian/tests_for_upstream
GetMany blockstore method + tests for upstream
2 parents 6602207 + e5b1548 commit 05ad44f

File tree

7 files changed

+436
-623
lines changed

7 files changed

+436
-623
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ The following emojis are used to highlight certain changes:
1515
## [Unreleased]
1616

1717
### Added
18+
* [GetMany blockstore implementation](https://github.com/vulcanize/boxo/pull/1)
19+
* Requires https://github.com/vulcanize/go-datastore/releases/tag/v0.6.1-internal
1820

1921
* `boxo/gateway`:
2022
* A new `WithResolver(...)` option can be used with `NewBlocksBackend(...)` allowing the user to pass their custom `Resolver` implementation.

blockstore/blockstore.go

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package blockstore
55
import (
66
"context"
77
"errors"
8+
"fmt"
89
"sync"
910
"sync/atomic"
1011

@@ -64,6 +65,12 @@ type Blockstore interface {
6465
HashOnRead(enabled bool)
6566
}
6667

68+
// GetManyBlockstore is a blockstore interface that supports a GetMany method
69+
type GetManyBlockstore interface {
70+
Blockstore
71+
GetMany(context.Context, []cid.Cid) ([]blocks.Block, []cid.Cid, error)
72+
}
73+
6774
// Viewer can be implemented by blockstores that offer zero-copy access to
6875
// values.
6976
//
@@ -310,6 +317,227 @@ func (bs *blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
310317
return output, nil
311318
}
312319

320+
// GetManyOption is a getManyBlockStore option implementation
321+
type GetManyOption struct {
322+
f func(bs *getManyBlockStore)
323+
}
324+
325+
// NewGetManyBlockstore returns a default GetManyBlockstore implementation
326+
// using the provided datastore.TxnDatastore backend.
327+
func NewGetManyBlockstore(d ds.TxnDatastore, opts ...GetManyOption) GetManyBlockstore {
328+
bs := &getManyBlockStore{
329+
datastore: d,
330+
}
331+
332+
for _, o := range opts {
333+
o.f(bs)
334+
}
335+
336+
if !bs.noPrefix {
337+
bs.datastore = dsns.WrapTxnDatastore(bs.datastore, BlockPrefix)
338+
}
339+
return bs
340+
}
341+
342+
type getManyBlockStore struct {
343+
datastore ds.TxnDatastore
344+
345+
rehash atomic.Bool
346+
writeThrough bool
347+
noPrefix bool
348+
}
349+
350+
func (bs *getManyBlockStore) HashOnRead(enabled bool) {
351+
bs.rehash.Store(enabled)
352+
}
353+
354+
func (bs *getManyBlockStore) Get(ctx context.Context, k cid.Cid) (blocks.Block, error) {
355+
if !k.Defined() {
356+
logger.Error("undefined cid in blockstore")
357+
return nil, ipld.ErrNotFound{Cid: k}
358+
}
359+
bdata, err := bs.datastore.Get(ctx, dshelp.MultihashToDsKey(k.Hash()))
360+
if err == ds.ErrNotFound {
361+
return nil, ipld.ErrNotFound{Cid: k}
362+
}
363+
if err != nil {
364+
return nil, err
365+
}
366+
if bs.rehash.Load() {
367+
rbcid, err := k.Prefix().Sum(bdata)
368+
if err != nil {
369+
return nil, err
370+
}
371+
372+
if !rbcid.Equals(k) {
373+
return nil, ErrHashMismatch
374+
}
375+
376+
return blocks.NewBlockWithCid(bdata, rbcid)
377+
}
378+
return blocks.NewBlockWithCid(bdata, k)
379+
}
380+
381+
func (bs *getManyBlockStore) GetMany(ctx context.Context, cs []cid.Cid) ([]blocks.Block, []cid.Cid, error) {
382+
if len(cs) == 1 {
383+
// performance fast-path
384+
block, err := bs.Get(ctx, cs[0])
385+
return []blocks.Block{block}, nil, err
386+
}
387+
388+
t, err := bs.datastore.NewTransaction(ctx, false)
389+
if err != nil {
390+
return nil, nil, err
391+
}
392+
blks := make([]blocks.Block, 0, len(cs))
393+
missingCIDs := make([]cid.Cid, 0, len(cs))
394+
for _, c := range cs {
395+
if !c.Defined() {
396+
logger.Error("undefined cid in blockstore")
397+
return nil, nil, ipld.ErrNotFound{Cid: c}
398+
}
399+
bdata, err := t.Get(ctx, dshelp.MultihashToDsKey(c.Hash()))
400+
if err != nil {
401+
if err == ds.ErrNotFound {
402+
missingCIDs = append(missingCIDs, c)
403+
} else {
404+
return nil, nil, err
405+
}
406+
} else {
407+
if bs.rehash.Load() {
408+
rbcid, err := c.Prefix().Sum(bdata)
409+
if err != nil {
410+
return nil, nil, err
411+
}
412+
413+
if !rbcid.Equals(c) {
414+
return nil, nil, fmt.Errorf("block in storage has different hash (%x) than requested (%x)", rbcid.Hash(), c.Hash())
415+
}
416+
417+
blk, err := blocks.NewBlockWithCid(bdata, rbcid)
418+
if err != nil {
419+
return nil, nil, err
420+
}
421+
422+
blks = append(blks, blk)
423+
} else {
424+
blk, err := blocks.NewBlockWithCid(bdata, c)
425+
if err != nil {
426+
return nil, nil, err
427+
}
428+
429+
blks = append(blks, blk)
430+
}
431+
}
432+
}
433+
return blks, missingCIDs, t.Commit(ctx)
434+
}
435+
436+
func (bs *getManyBlockStore) Put(ctx context.Context, block blocks.Block) error {
437+
k := dshelp.MultihashToDsKey(block.Cid().Hash())
438+
439+
// Has is cheaper than Put, so see if we already have it
440+
if !bs.writeThrough {
441+
exists, err := bs.datastore.Has(ctx, k)
442+
if err == nil && exists {
443+
return nil // already stored.
444+
}
445+
}
446+
return bs.datastore.Put(ctx, k, block.RawData())
447+
}
448+
449+
func (bs *getManyBlockStore) PutMany(ctx context.Context, blocks []blocks.Block) error {
450+
if len(blocks) == 1 {
451+
// performance fast-path
452+
return bs.Put(ctx, blocks[0])
453+
}
454+
455+
t, err := bs.datastore.NewTransaction(ctx, false)
456+
if err != nil {
457+
return err
458+
}
459+
for _, b := range blocks {
460+
k := dshelp.MultihashToDsKey(b.Cid().Hash())
461+
462+
if !bs.writeThrough {
463+
exists, err := bs.datastore.Has(ctx, k)
464+
if err == nil && exists {
465+
continue
466+
}
467+
}
468+
469+
err = t.Put(ctx, k, b.RawData())
470+
if err != nil {
471+
return err
472+
}
473+
}
474+
return t.Commit(ctx)
475+
}
476+
477+
func (bs *getManyBlockStore) Has(ctx context.Context, k cid.Cid) (bool, error) {
478+
return bs.datastore.Has(ctx, dshelp.MultihashToDsKey(k.Hash()))
479+
}
480+
481+
func (bs *getManyBlockStore) GetSize(ctx context.Context, k cid.Cid) (int, error) {
482+
size, err := bs.datastore.GetSize(ctx, dshelp.MultihashToDsKey(k.Hash()))
483+
if err == ds.ErrNotFound {
484+
return -1, ipld.ErrNotFound{Cid: k}
485+
}
486+
return size, err
487+
}
488+
489+
func (bs *getManyBlockStore) DeleteBlock(ctx context.Context, k cid.Cid) error {
490+
return bs.datastore.Delete(ctx, dshelp.MultihashToDsKey(k.Hash()))
491+
}
492+
493+
// AllKeysChan runs a query for keys from the blockstore.
494+
// this is very simplistic, in the future, take dsq.Query as a param?
495+
//
496+
// AllKeysChan respects context.
497+
func (bs *getManyBlockStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
498+
499+
// KeysOnly, because that would be _a lot_ of data.
500+
q := dsq.Query{KeysOnly: true}
501+
res, err := bs.datastore.Query(ctx, q)
502+
if err != nil {
503+
return nil, err
504+
}
505+
506+
output := make(chan cid.Cid, dsq.KeysOnlyBufSize)
507+
go func() {
508+
defer func() {
509+
res.Close() // ensure exit (signals early exit, too)
510+
close(output)
511+
}()
512+
513+
for {
514+
e, ok := res.NextSync()
515+
if !ok {
516+
return
517+
}
518+
if e.Error != nil {
519+
logger.Errorf("blockstore.AllKeysChan got err: %s", e.Error)
520+
return
521+
}
522+
523+
// need to convert to key.Key using key.KeyFromDsKey.
524+
bk, err := dshelp.BinaryFromDsKey(ds.RawKey(e.Key))
525+
if err != nil {
526+
logger.Warnf("error parsing key from binary: %s", err)
527+
continue
528+
}
529+
k := cid.NewCidV1(cid.Raw, bk)
530+
select {
531+
case <-ctx.Done():
532+
return
533+
case output <- k:
534+
}
535+
}
536+
}()
537+
538+
return output, nil
539+
}
540+
313541
// NewGCLocker returns a default implementation of
314542
// GCLocker using standard [RW] mutexes.
315543
func NewGCLocker() GCLocker {

blockstore/blockstore_test.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"fmt"
77
"testing"
88

9+
dstest "github.com/ipfs/go-datastore/test"
10+
911
u "github.com/ipfs/boxo/util"
1012
blocks "github.com/ipfs/go-block-format"
1113
cid "github.com/ipfs/go-cid"
@@ -72,6 +74,108 @@ func TestCidv0v1(t *testing.T) {
7274
}
7375
}
7476

77+
func TestGetManyWhenKeyNotPresent(t *testing.T) {
78+
bs := NewGetManyBlockstore(dstest.NewTestTxnDatastore(ds.NewMapDatastore(), false))
79+
c1 := cid.NewCidV0(u.Hash([]byte("stuff")))
80+
c2 := cid.NewCidV0(u.Hash([]byte("stuff2")))
81+
82+
blks, missingCIDs, err := bs.GetMany(bg, []cid.Cid{c1, c2})
83+
84+
if len(blks) != 0 {
85+
t.Error("no blocks expected")
86+
}
87+
if len(missingCIDs) != 2 {
88+
t.Error("2 missing cids expected")
89+
}
90+
if err != nil {
91+
t.Error("no error expected")
92+
}
93+
}
94+
95+
func TestGetManyWhenKeyIsNil(t *testing.T) {
96+
bs := NewGetManyBlockstore(dstest.NewTestTxnDatastore(ds.NewMapDatastore(), false))
97+
_, _, err := bs.GetMany(bg, []cid.Cid{{}, {}})
98+
if !ipld.IsNotFound(err) {
99+
t.Fail()
100+
}
101+
}
102+
103+
func TestPutsThenGetManyBlock(t *testing.T) {
104+
bs := NewGetManyBlockstore(dstest.NewTestTxnDatastore(ds.NewMapDatastore(), false))
105+
block1 := blocks.NewBlock([]byte("some data1"))
106+
block2 := blocks.NewBlock([]byte("some data2"))
107+
block3 := blocks.NewBlock([]byte("some data3"))
108+
block4 := blocks.NewBlock([]byte("some data4"))
109+
110+
err := bs.PutMany(bg, []blocks.Block{block1, block2, block4})
111+
if err != nil {
112+
t.Fatal(err)
113+
}
114+
115+
blocksFromBlockstore, missingCIDs, err := bs.GetMany(bg, []cid.Cid{block1.Cid(), block2.Cid(), block3.Cid(), block4.Cid()})
116+
if err != nil {
117+
t.Fatal(err)
118+
}
119+
if len(blocksFromBlockstore) != 3 {
120+
t.Fatal("unexpected number of blocks")
121+
}
122+
if len(missingCIDs) != 1 {
123+
t.Fatal("unexpected number of missing CIDs")
124+
}
125+
if !bytes.Equal(blocksFromBlockstore[0].RawData(), block1.RawData()) {
126+
t.Fail()
127+
}
128+
if !bytes.Equal(blocksFromBlockstore[1].RawData(), block2.RawData()) {
129+
t.Fail()
130+
}
131+
if !bytes.Equal(blocksFromBlockstore[2].RawData(), block4.RawData()) {
132+
t.Fail()
133+
}
134+
if !bytes.Equal(missingCIDs[0].Bytes(), block3.Cid().Bytes()) {
135+
t.Fail()
136+
}
137+
}
138+
139+
func TestCidv0v1Many(t *testing.T) {
140+
bs := NewGetManyBlockstore(dstest.NewTestTxnDatastore(ds.NewMapDatastore(), false))
141+
block1 := blocks.NewBlock([]byte("some data1"))
142+
block2 := blocks.NewBlock([]byte("some data2"))
143+
block3 := blocks.NewBlock([]byte("some data3"))
144+
block4 := blocks.NewBlock([]byte("some data4"))
145+
146+
err := bs.PutMany(bg, []blocks.Block{block1, block2, block4})
147+
if err != nil {
148+
t.Fatal(err)
149+
}
150+
151+
blocksFromBlockstore, missingCIDs, err := bs.GetMany(bg,
152+
[]cid.Cid{cid.NewCidV1(cid.DagProtobuf, block1.Cid().Hash()),
153+
cid.NewCidV1(cid.DagProtobuf, block2.Cid().Hash()),
154+
cid.NewCidV1(cid.DagProtobuf, block3.Cid().Hash()),
155+
cid.NewCidV1(cid.DagProtobuf, block4.Cid().Hash())})
156+
if err != nil {
157+
t.Fatal(err)
158+
}
159+
if len(blocksFromBlockstore) != 3 {
160+
t.Fatal("unexpected number of blocks")
161+
}
162+
if len(missingCIDs) != 1 {
163+
t.Fatal("unexpected number of missing CIDs")
164+
}
165+
if !bytes.Equal(blocksFromBlockstore[0].RawData(), block1.RawData()) {
166+
t.Fail()
167+
}
168+
if !bytes.Equal(blocksFromBlockstore[1].RawData(), block2.RawData()) {
169+
t.Fail()
170+
}
171+
if !bytes.Equal(blocksFromBlockstore[2].RawData(), block4.RawData()) {
172+
t.Fail()
173+
}
174+
if !bytes.Equal(missingCIDs[0].Bytes(), cid.NewCidV1(cid.DagProtobuf, block3.Cid().Hash()).Bytes()) {
175+
t.Fail()
176+
}
177+
}
178+
75179
func TestPutThenGetSizeBlock(t *testing.T) {
76180
bs := NewBlockstore(ds_sync.MutexWrap(ds.NewMapDatastore()))
77181
block := blocks.NewBlock([]byte("some data"))

0 commit comments

Comments
 (0)