From ff06bd129e0bc07debd572cfff0fb6476b6d5b24 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Fri, 17 Jan 2025 12:06:56 +0100 Subject: [PATCH] HTTP Block-retrieval support for rainbow This adds http retrieval support based on latest boxo. It needs explicit enabling with an "--http-retrieval-enable" flag. Allowlist and number of workers can be configured with additional flags. A provider-ignore flag has been added as well. Example: - http-retrieval-allowlist: [dag.w3s.link] - routing-ignore-providers: [QmQzqxhK82kAmKvARFZSkUVS6fo9sySaiogAnx5EnZ6ZmC] (w3s elastic ipfs peer) --- go.mod | 2 +- go.sum | 32 ++++++++++++++++++++++++-- main.go | 59 ++++++++++++++++++++++++++++++++++++++++++++---- setup.go | 8 +++++++ setup_bitswap.go | 26 ++++++++++++++++----- 5 files changed, 114 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 26f1e88..4eb17cf 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/felixge/httpsnoop v1.0.4 github.com/ipfs-shipyard/nopfs v0.0.14 github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 - github.com/ipfs/boxo v0.27.4 + github.com/ipfs/boxo v0.27.3-0.20250210095233-70538fd219ba github.com/ipfs/go-block-format v0.2.0 github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-datastore v0.6.0 diff --git a/go.sum b/go.sum index 6a3876b..6daca9a 100644 --- a/go.sum +++ b/go.sum @@ -260,8 +260,36 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.27.4 h1:6nC8lY5GnR6whAbW88hFz6L13wZUj2vr5BRe3iTvYBI= -github.com/ipfs/boxo v0.27.4/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250127164459-08419db21bba h1:UjZ4+buzBhiWww0rJ47eQdN8AgBaeRe0h9ZqYsk/Ew8= +github.com/ipfs/boxo v0.27.3-0.20250127164459-08419db21bba/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250130090023-3bc8ebd6d9b8 h1:G9JtVssDc6WaSmxEwoe4hoH2wUnMqYViMw4sK4pZrcg= +github.com/ipfs/boxo v0.27.3-0.20250130090023-3bc8ebd6d9b8/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250204114410-7f3b0e1f32a9 h1:eEDINc2P42Bctfr3gOMPl36JMY0my7oCAHAmKxLvPCo= +github.com/ipfs/boxo v0.27.3-0.20250204114410-7f3b0e1f32a9/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250204134905-aa1f7113c76b h1:yrL/B2ocwJJMUbKMmtZ7PHwv4QKODnrDcxTBvoxRjfw= +github.com/ipfs/boxo v0.27.3-0.20250204134905-aa1f7113c76b/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250204170555-5a118b05f9b3 h1:ad4it5di9Z6+2+JQHv0seAIz1AKE1BUClTCPHhLMfzE= +github.com/ipfs/boxo v0.27.3-0.20250204170555-5a118b05f9b3/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250204173852-a925bceda7a0 h1:Vw1kTaT2gars24F4vOlOVK48BzAb+miqH0R82WbzUiE= +github.com/ipfs/boxo v0.27.3-0.20250204173852-a925bceda7a0/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250204175459-0ad5b9335f7a h1:mwmwDB+v2BRJ0dAFKXJq2XH06/LziHccXj+c+JrIH/Q= +github.com/ipfs/boxo v0.27.3-0.20250204175459-0ad5b9335f7a/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250204212206-0a68017230f6 h1:DvTDhpGEiCzBkMQMowILxPlly4gZjOZczWHEimcZWAk= +github.com/ipfs/boxo v0.27.3-0.20250204212206-0a68017230f6/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250205141539-def0f2bd33d5 h1:yzeq9HZ6fL9C1yAHJyet2HFwN7rzkE1WuYE5o5ywGiI= +github.com/ipfs/boxo v0.27.3-0.20250205141539-def0f2bd33d5/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250205144801-3a75730dbb1b h1:fEUP/QSfKd5S4V8oMljsffTV1PuQqtdnOpnPQHdwSxE= +github.com/ipfs/boxo v0.27.3-0.20250205144801-3a75730dbb1b/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250206094658-6890482eafce h1:GUTqmwowN0CueuQHYRQs5i3Dx3rZL8SGkm2K5s4EV2M= +github.com/ipfs/boxo v0.27.3-0.20250206094658-6890482eafce/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250206095634-38fa962d384d h1:XqcbePxVsuGu6dBB+166l7nGD2MAW/MVFltf/9VO0Fw= +github.com/ipfs/boxo v0.27.3-0.20250206095634-38fa962d384d/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250206223725-67bcdaba19c5 h1:HCbIV6rFALrQzpTLCqZRVTAFJbiQnaqURT+xkyGcmfs= +github.com/ipfs/boxo v0.27.3-0.20250206223725-67bcdaba19c5/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250206224112-c780690065d0 h1:14Rdj3eTm8+JIRQq3zw2E5iU8/kSInian6DE71foPDE= +github.com/ipfs/boxo v0.27.3-0.20250206224112-c780690065d0/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= +github.com/ipfs/boxo v0.27.3-0.20250210095233-70538fd219ba h1:Nf4bQytkNbiUvLRWqaBlBG+PwB10EW+mG65Xx8B3sQs= +github.com/ipfs/boxo v0.27.3-0.20250210095233-70538fd219ba/go.mod h1:qEIRrGNr0bitDedTCzyzBHxzNWqYmyuHgK8LG9Q83EM= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= diff --git a/main.go b/main.go index fc7bd26..ec81ddd 100644 --- a/main.go +++ b/main.go @@ -401,6 +401,30 @@ Generate an identity seed and launch a gateway: EnvVars: []string{"ROUTING_MAX_TIMEOUT"}, Usage: "Maximum time for routing to find the maximum number of providers", }, + &cli.StringSliceFlag{ + Name: "routing-ignore-providers", + EnvVars: []string{"ROUTING_IGNORE_PROVIDERS"}, + Usage: "Ignore provider records from the given peer IDs", + }, + &cli.BoolFlag{ + Name: "http-retrieval-enable", + Value: false, + EnvVars: []string{"RAINBOW_HTTP_RETRIEVAL_ENABLE"}, + Usage: "Enable HTTP-retrieval of blocks.", + }, + &cli.StringSliceFlag{ + Name: "http-retrieval-allowlist", + Value: cli.NewStringSlice(), + EnvVars: []string{"RAINBOW_HTTP_RETRIEVAL_ALLOWLIST"}, + Usage: "When HTTP retrieval is enabled, allow it only to the given hosts. Empty means 'everyone'", + }, + &cli.IntFlag{ + Name: "http-retrieval-workers", + Value: 32, + EnvVars: []string{"RAINBOW_HTTP_RETRIEVAL_WORKERS"}, + Usage: "Number of workers to use for HTTP retrieval", + }, + &cli.StringSliceFlag{ Name: "dnslink-resolvers", Value: cli.NewStringSlice(extraDNSLinkResolvers...), @@ -527,6 +551,27 @@ share the same seed as long as the indexes are different. return err } + var routingIgnoreProviders []peer.ID + for _, pstr := range cctx.StringSlice("routing-ignore-providers") { + pid, err := peer.Decode(pstr) + if err != nil { + return fmt.Errorf("error parsing peer in routing-ignore-providers: %w", err) + } + routingIgnoreProviders = append(routingIgnoreProviders, pid) + } + + routerFilterProtocols := cctx.StringSlice("http-routers-filter-protocols") + httpRetrievalEnable := cctx.Bool("http-retrieval-enable") + httpRetrievalWorkers := cctx.Int("http-retrieval-workers") + httpRetrievalAllowlist := cctx.StringSlice("http-retrieval-allowlist") + if httpRetrievalEnable { + routerFilterProtocols = append(routerFilterProtocols, httpRouterGatewayProtocol) + fmt.Printf("HTTP block-retrievals enabled. Workers: %d. Allowlist set: %t\n", + httpRetrievalWorkers, + len(httpRetrievalAllowlist), + ) + } + cfg := Config{ DataDir: ddir, BlockstoreType: cctx.String("blockstore"), @@ -540,7 +585,7 @@ share the same seed as long as the indexes are different. MaxFD: cctx.Int("libp2p-max-fd"), InMemBlockCache: cctx.Int64("inmem-block-cache"), RoutingV1Endpoints: cctx.StringSlice("http-routers"), - RoutingV1FilterProtocols: cctx.StringSlice("http-routers-filter-protocols"), + RoutingV1FilterProtocols: routerFilterProtocols, DHTRouting: dhtRouting, DHTSharedHost: cctx.Bool("dht-shared-host"), Bitswap: bitswap, @@ -575,9 +620,15 @@ share the same seed as long as the indexes are different. WALMinSyncInterval: time.Second * time.Duration(cctx.Int("pebble-wal-min-sync-interval-sec")), // Routing ProviderQueryManager config - RoutingMaxRequests: cctx.Int("routing-max-requests"), - RoutingMaxProviders: cctx.Int("routing-max-providers"), - RoutingMaxTimeout: cctx.Duration("routing-max-timeout"), + RoutingMaxRequests: cctx.Int("routing-max-requests"), + RoutingMaxProviders: cctx.Int("routing-max-providers"), + RoutingMaxTimeout: cctx.Duration("routing-max-timeout"), + RoutingIgnoreProviders: routingIgnoreProviders, + + // HTTP Retrieval config + HTTPRetrievalEnable: httpRetrievalEnable, + HTTPRetrievalAllowlist: httpRetrievalAllowlist, + HTTPRetrievalWorkers: httpRetrievalWorkers, } var gnd *Node diff --git a/setup.go b/setup.go index 5361ad5..0a4c0ed 100644 --- a/setup.go +++ b/setup.go @@ -52,6 +52,8 @@ func init() { const cidContactEndpoint = "https://cid.contact" +const httpRouterGatewayProtocol = "transport-ipfs-gateway-http" + var httpRoutersFilterProtocols = []string{"unknown", "transport-bitswap"} // IPIP-484 var extraDNSLinkResolvers = []string{ @@ -113,6 +115,7 @@ type Config struct { TrustlessGatewayDomains []string RoutingV1Endpoints []string RoutingV1FilterProtocols []string + RoutingIgnoreProviders []peer.ID DHTRouting DHTRouting DHTSharedHost bool IpnsMaxCacheTTL time.Duration @@ -163,6 +166,11 @@ type Config struct { RoutingMaxRequests int RoutingMaxProviders int RoutingMaxTimeout time.Duration + + // HTTP Retrieval configuration + HTTPRetrievalEnable bool + HTTPRetrievalAllowlist []string + HTTPRetrievalWorkers int } func SetupNoLibp2p(ctx context.Context, cfg Config, dnsCache *cachedDNS) (*Node, error) { diff --git a/setup_bitswap.go b/setup_bitswap.go index 59b953a..764d4fe 100644 --- a/setup_bitswap.go +++ b/setup_bitswap.go @@ -8,7 +8,9 @@ import ( "github.com/ipfs/boxo/bitswap" bsclient "github.com/ipfs/boxo/bitswap/client" - bsnet "github.com/ipfs/boxo/bitswap/network" + "github.com/ipfs/boxo/bitswap/network" + bsnet "github.com/ipfs/boxo/bitswap/network/bsnet" + "github.com/ipfs/boxo/bitswap/network/httpnet" bsserver "github.com/ipfs/boxo/bitswap/server" "github.com/ipfs/boxo/blockstore" "github.com/ipfs/boxo/exchange" @@ -24,14 +26,26 @@ import ( func setupBitswapExchange(ctx context.Context, cfg Config, h host.Host, cr routing.ContentRouting, bstore blockstore.Blockstore) exchange.Interface { bsctx := metri.CtxScope(ctx, "ipfs_bitswap") + var exnet network.BitSwapNetwork bn := bsnet.NewFromIpfsHost(h) + if cfg.HTTPRetrievalEnable { + htnet := httpnet.New(h, + httpnet.WithHTTPWorkers(cfg.HTTPRetrievalWorkers), + httpnet.WithAllowlist(cfg.HTTPRetrievalAllowlist), + ) + exnet = network.New(h.Peerstore(), bn, htnet) + } else { + exnet = bn + } + // Custom query manager with the content router and the host // and our custom options to overwrite the default. - pqm, err := providerquerymanager.New(h, cr, + pqm, err := providerquerymanager.New(exnet, cr, providerquerymanager.WithMaxInProcessRequests(cfg.RoutingMaxRequests), providerquerymanager.WithMaxProviders(cfg.RoutingMaxProviders), providerquerymanager.WithMaxTimeout(cfg.RoutingMaxTimeout), + providerquerymanager.WithIgnoreProviders(cfg.RoutingIgnoreProviders...), ) if err != nil { panic(err) @@ -88,14 +102,14 @@ func setupBitswapExchange(ctx context.Context, cfg Config, h host.Host, cr routi ) // Initialize client+server - bswap := bitswap.New(bsctx, bn, pqm, bstore, opts...) - bn.Start(bswap) + bswap := bitswap.New(bsctx, exnet, pqm, bstore, opts...) + exnet.Start(bswap) return &noNotifyExchange{bswap} } // By default, rainbow runs with bitswap client alone - bswap := bsclient.New(bsctx, bn, pqm, bstore, clientOpts...) - bn.Start(bswap) + bswap := bsclient.New(bsctx, exnet, pqm, bstore, clientOpts...) + exnet.Start(bswap) return bswap }