diff --git a/fixtures/t0118-multiblock-file-in-directory.car b/fixtures/t0118-multiblock-file-in-directory.car new file mode 100644 index 000000000..f32f10fd8 Binary files /dev/null and b/fixtures/t0118-multiblock-file-in-directory.car differ diff --git a/fixtures/t0118-one-layer-hamt.car b/fixtures/t0118-one-layer-hamt.car new file mode 100644 index 000000000..bc2ae7554 Binary files /dev/null and b/fixtures/t0118-one-layer-hamt.car differ diff --git a/go.mod b/go.mod index 47500e3c6..f83e45bd2 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.20 require ( github.com/ipfs/boxo v0.8.2-0.20230510114019-33e3f0cd052b github.com/ipfs/go-cid v0.4.1 + github.com/ipfs/go-unixfsnode v1.6.0 github.com/ipld/go-ipld-prime v0.20.0 github.com/libp2p/go-libp2p v0.26.3 github.com/stretchr/testify v1.8.2 @@ -33,7 +34,7 @@ require ( require ( github.com/go-logr/logr v1.2.4 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/gogo/protobuf v1.3.2 + github.com/gogo/protobuf v1.3.2 // indirect github.com/google/uuid v1.3.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/ipfs/bbloom v0.0.4 // indirect diff --git a/go.sum b/go.sum index cf7a061c8..c897a087c 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,7 @@ github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJ github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= github.com/ipfs/go-block-format v0.1.2 h1:GAjkfhVx1f4YTODS6Esrj1wt2HhrtwTnhEr+DyPUaJo= github.com/ipfs/go-block-format v0.1.2/go.mod h1:mACVcrxarQKstUU3Yf/RdwbC4DzPV6++rO2a3d+a/KE= +github.com/ipfs/go-blockservice v0.5.0 h1:B2mwhhhVQl2ntW2EIpaWPwSCxSuqr5fFA93Ms4bYLEY= github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.2/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= @@ -71,6 +72,8 @@ github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IW github.com/ipfs/go-ipfs-chunker v0.0.5 h1:ojCf7HV/m+uS2vhUGWcogIIxiO5ubl5O57Q7NapWLY8= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-ds-help v1.1.0 h1:yLE2w9RAsl31LtfMt91tRZcrx+e61O5mDxFRR994w4Q= +github.com/ipfs/go-ipfs-exchange-interface v0.2.0 h1:8lMSJmKogZYNo2jjhUs0izT+dck05pqUw4mWNW9Pw6Y= +github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uYokgWRFidfvEkuA= github.com/ipfs/go-ipfs-pq v0.0.3 h1:YpoHVJB+jzK15mr/xsWC574tyDLkezVrDNeaalQBsTE= github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc= github.com/ipfs/go-ipfs-util v0.0.2 h1:59Sswnk1MFaiq+VcaknX7aYEyGyGDAA73ilhEK2POp8= @@ -88,10 +91,15 @@ github.com/ipfs/go-log v1.0.5/go.mod h1:j0b8ZoR+7+R99LD9jZ6+AJsrzkPbSXbZfGakb5JP github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g= github.com/ipfs/go-log/v2 v2.5.1 h1:1XdUzF7048prq4aBjDQQ4SL5RxftpRGdXhNRwKSAlcY= github.com/ipfs/go-log/v2 v2.5.1/go.mod h1:prSpmC1Gpllc9UYWxDiZDreBYw7zp4Iqp1kOLU9U5UI= +github.com/ipfs/go-merkledag v0.10.0 h1:IUQhj/kzTZfam4e+LnaEpoiZ9vZF6ldimVlby+6OXL4= github.com/ipfs/go-metrics-interface v0.0.1 h1:j+cpbjYvu4R8zbleSs36gvB7jR+wsL2fGD6n0jO4kdg= github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j/b/tL7HTWtJ4VPgWY= github.com/ipfs/go-peertaskqueue v0.8.1 h1:YhxAs1+wxb5jk7RvS0LHdyiILpNmRIRnZVztekOF0pg= +github.com/ipfs/go-unixfs v0.4.5 h1:wj8JhxvV1G6CD7swACwSKYa+NgtdWC1RUit+gFnymDU= github.com/ipfs/go-unixfsnode v1.6.0 h1:JOSA02yaLylRNi2rlB4ldPr5VcZhcnaIVj5zNLcOjDo= +github.com/ipfs/go-unixfsnode v1.6.0/go.mod h1:PVfoyZkX1B34qzT3vJO4nsLUpRCyhnMuHBznRcXirlk= +github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= +github.com/ipld/go-car/v2 v2.9.1-0.20230325062757-fff0e4397a3d h1:22g+x1tgWSXK34i25qjs+afr7basaneEkHaglBshd2g= github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= github.com/ipld/go-codec-dagpb v1.6.0/go.mod h1:ANzFhfP2uMJxRBr8CE+WQWs5UsNa0pYtmKZ+agnUw9s= github.com/ipld/go-ipld-prime v0.9.1-0.20210324083106-dc342a9917db/go.mod h1:KvBLMr4PX1gWptgkzRjVZCrLmSGcZCb/jioOQwCqZN8= diff --git a/tests/t0118_gateway_car_test.go b/tests/t0118_gateway_car_test.go index 9ab4aaac6..4fcca3976 100644 --- a/tests/t0118_gateway_car_test.go +++ b/tests/t0118_gateway_car_test.go @@ -11,6 +11,7 @@ import ( func TestGatewayCar(t *testing.T) { fixture := car.MustOpenUnixfsCar("t0118-test-dag.car") + oneLayerHAMTFixture := car.MustOpenUnixfsCar("t0118-one-layer-hamt.car") tests := SugarTests{ { @@ -67,6 +68,30 @@ func TestGatewayCar(t *testing.T) { InThatOrder(), ), }, + { + Name: "GET CAR with dag-scope=block pathing through a sharded directory", + Hint: ` + dag-scope=block should return a CAR file with only the root block and a + block for each optional path component. Pathing through a sharded directory should return + the blocks needed for the traversal, not the entire HAMT and not skipping all intermediate nodes + `, + Request: Request(). + Path("ipfs/{{cid}}/1.txt", oneLayerHAMTFixture.MustGetCid()). + Query("format", "car"). + Query("dag-scope", "block"), + Response: Expect(). + Status(200). + Body( + IsCar(). + HasRoot(oneLayerHAMTFixture.MustGetCid()). + HasBlocks(flattenStrings(t, + oneLayerHAMTFixture.MustGetCid(), + oneLayerHAMTFixture.MustGetCIDsInHAMTTraversal(nil, "1.txt"))..., + ). + Exactly(). + InThatOrder(), + ), + }, { Name: "GET CAR with dag-scope=entity", Hint: ` @@ -90,6 +115,30 @@ func TestGatewayCar(t *testing.T) { InThatOrder(), ), }, + { + Name: "GET CAR with dag-scope=entity for a sharded directory", + Hint: ` + dag-scope=entity for a sharded directory should return a CAR file with all of the path blocks as well + as all of the blocks in the HAMT, but not any of blocks below the HAMT. + `, + Request: Request(). + Path("ipfs/{{cid}}", oneLayerHAMTFixture.MustGetCid()). + Query("format", "car"). + Query("dag-scope", "entity"), + Response: Expect(). + Status(200). + Body( + IsCar(). + HasRoot(oneLayerHAMTFixture.MustGetCid()). + HasBlocks( + flattenStrings(t, + oneLayerHAMTFixture.MustGetCid(), + oneLayerHAMTFixture.MustGetCidsInHAMT())..., + ). + Exactly(). + InThatOrder(), + ), + }, { Name: "GET CAR with dag-scope=all", Hint: ` @@ -122,7 +171,7 @@ func TestGatewayCar(t *testing.T) { func TestGatewayCarEntityBytes(t *testing.T) { multiBlockFileInDirFixture := car.MustOpenUnixfsCar("t0118-multiblock-file-in-directory.car") - fixture := multiBlockFileInDirFixture + oneLayerHAMTFixture := car.MustOpenUnixfsCar("t0118-one-layer-hamt.car") tests := SugarTests{ { @@ -132,7 +181,7 @@ func TestGatewayCarEntityBytes(t *testing.T) { the full UnixFS file at the end of the specified path `, Request: Request(). - Path("ipfs/{{cid}}", multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). + Path("ipfs/{{cid}}/multiblock.txt", multiBlockFileInDirFixture.MustGetCid()). Query("format", "car"). Query("dag-scope", "entity"). Query("entity-bytes", "0:*"), @@ -144,7 +193,7 @@ func TestGatewayCarEntityBytes(t *testing.T) { HasBlocks(flattenStrings(t, multiBlockFileInDirFixture.MustGetCid(), multiBlockFileInDirFixture.MustGetCid("multiblock.txt"), - multiBlockFileInDirFixture.MustGetChildrenCids("multiblock.txt"), + multiBlockFileInDirFixture.MustGetIPLDChildrenCids("multiblock.txt"), )...). Exactly(). InThatOrder(), @@ -158,16 +207,19 @@ func TestGatewayCarEntityBytes(t *testing.T) { (i.e. entity-bytes is effectively optional if the entity is not a file) `, Request: Request(). - Path("ipfs/{{cid}}", fixture.MustGetCid()). + Path("ipfs/{{cid}}", oneLayerHAMTFixture.MustGetCid()). Query("format", "car"). - Query("dag-scope", "entity"), + Query("dag-scope", "entity"). + Query("entity-bytes", "0:*"), Response: Expect(). Status(200). Body( IsCar(). - HasRoot(fixture.MustGetCid()). + HasRoot(oneLayerHAMTFixture.MustGetCid()). HasBlocks( - fixture.MustGetCid(), + flattenStrings(t, + oneLayerHAMTFixture.MustGetCid(), + oneLayerHAMTFixture.MustGetCidsInHAMT())..., ). Exactly(). InThatOrder(), @@ -179,58 +231,67 @@ func TestGatewayCarEntityBytes(t *testing.T) { The response MUST contain only the minimal set of blocks necessary for fulfilling the range request `, Request: Request(). - Path("ipfs/{{cid}}", fixture.MustGetCid()). + Path("ipfs/{{cid}}", multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). Query("format", "car"). - Query("dag-scope", "entity"), + Query("dag-scope", "entity"). + Query("entity-bytes", "512:*"), Response: Expect(). Status(200). Body( IsCar(). - HasRoot(fixture.MustGetCid()). + HasRoot(multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). HasBlocks( - fixture.MustGetCid(), + flattenStrings(t, + multiBlockFileInDirFixture.MustGetCid("multiblock.txt"), + multiBlockFileInDirFixture.MustGetIPLDChildrenCids("multiblock.txt")[2:])..., ). Exactly(). InThatOrder(), ), }, { - Name: "GET CAR with entity-bytes equivalent to a HTTP Range Request for the middle of a large file", + Name: "GET CAR with entity-bytes equivalent to a HTTP Range Request for the middle of a file", Hint: ` The response MUST contain only the minimal set of blocks necessary for fulfilling the range request `, Request: Request(). - Path("ipfs/{{cid}}", fixture.MustGetCid()). + Path("ipfs/{{cid}}", multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). Query("format", "car"). - Query("dag-scope", "entity"), + Query("dag-scope", "entity"). + Query("entity-bytes", "512:1024"), Response: Expect(). Status(200). Body( IsCar(). - HasRoot(fixture.MustGetCid()). + HasRoot(multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). HasBlocks( - fixture.MustGetCid(), + flattenStrings(t, + multiBlockFileInDirFixture.MustGetCid("multiblock.txt"), + multiBlockFileInDirFixture.MustGetIPLDChildrenCids("multiblock.txt")[2:4])..., ). Exactly(). InThatOrder(), ), }, { - Name: "GET CAR with entity-bytes equivalent to HTTP Suffix Range Request for part of a small file", + Name: "GET CAR with entity-bytes equivalent to HTTP Suffix Range Request for part of a file", Hint: ` The response MUST contain only the minimal set of blocks necessary for fulfilling the range request `, Request: Request(). - Path("ipfs/{{cid}}", fixture.MustGetCid()). + Path("ipfs/{{cid}}", multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). Query("format", "car"). - Query("dag-scope", "entity"), + Query("dag-scope", "entity"). + Query("entity-bytes", "-5:*"), Response: Expect(). Status(200). Body( IsCar(). - HasRoot(fixture.MustGetCid()). + HasRoot(multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). HasBlocks( - fixture.MustGetCid(), + flattenStrings(t, + multiBlockFileInDirFixture.MustGetCid("multiblock.txt"), + multiBlockFileInDirFixture.MustGetIPLDChildrenCids("multiblock.txt")[3:])..., ). Exactly(). InThatOrder(), @@ -242,16 +303,19 @@ func TestGatewayCarEntityBytes(t *testing.T) { The response MUST contain only the minimal set of blocks necessary for fulfilling the range request `, Request: Request(). - Path("ipfs/{{cid}}", fixture.MustGetCid()). + Path("ipfs/{{cid}}", multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). Query("format", "car"). - Query("dag-scope", "entity"), + Query("dag-scope", "entity"). + Query("entity-bytes", "-999999:-3"), Response: Expect(). Status(200). Body( IsCar(). - HasRoot(fixture.MustGetCid()). + HasRoot(multiBlockFileInDirFixture.MustGetCid("multiblock.txt")). HasBlocks( - fixture.MustGetCid(), + flattenStrings(t, + multiBlockFileInDirFixture.MustGetCid("multiblock.txt"), + multiBlockFileInDirFixture.MustGetIPLDChildrenCids("multiblock.txt")[:5])..., ). Exactly(). InThatOrder(), diff --git a/tooling/car/unixfs.go b/tooling/car/unixfs.go index 90437f1ca..5bdf0fa1a 100644 --- a/tooling/car/unixfs.go +++ b/tooling/car/unixfs.go @@ -7,25 +7,34 @@ import ( "bytes" "context" "fmt" + dagpb "github.com/ipld/go-codec-dagpb" + "io" "os" "path" "sort" "strings" + "sync" "github.com/ipfs/boxo/blockservice" "github.com/ipfs/boxo/ipld/car/v2/blockstore" "github.com/ipfs/boxo/ipld/merkledag" - "github.com/ipfs/boxo/ipld/unixfs/io" - "github.com/ipfs/gateway-conformance/tooling/fixtures" + "github.com/ipfs/boxo/ipld/unixfs/hamt" + uio "github.com/ipfs/boxo/ipld/unixfs/io" "github.com/ipfs/go-cid" format "github.com/ipfs/go-ipld-format" + "github.com/ipfs/go-unixfsnode" "github.com/ipld/go-ipld-prime" _ "github.com/ipld/go-ipld-prime/codec/cbor" _ "github.com/ipld/go-ipld-prime/codec/dagcbor" _ "github.com/ipld/go-ipld-prime/codec/dagjson" _ "github.com/ipld/go-ipld-prime/codec/json" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/multicodec" mc "github.com/multiformats/go-multicodec" + + "github.com/ipfs/gateway-conformance/tooling/fixtures" ) type UnixfsDag struct { @@ -54,7 +63,7 @@ func newUnixfsDagFromCar(file string) (*UnixfsDag, error) { func (d *UnixfsDag) loadLinks(node format.Node) (map[string]*UnixfsDag, error) { result := make(map[string]*UnixfsDag) - dir, err := io.NewDirectoryFromNode(d.dsvc, node) + dir, err := uio.NewDirectoryFromNode(d.dsvc, node) if err != nil { return nil, err } @@ -178,6 +187,72 @@ func (d *UnixfsDag) MustGetChildrenCids(names ...string) []string { return cids } +func (d *UnixfsDag) MustGetIPLDChildrenCids(names ...string) []string { + node := d.MustGetNode(names...) + lnks := node.node.Links() + var cids []string + for _, l := range lnks { + cids = append(cids, l.Cid.String()) + } + return cids +} + +// MustGetCidsInHAMT returns the cids in the HAMT at the given path. Does not include the CID of the HAMT root +func (d *UnixfsDag) MustGetCidsInHAMT(names ...string) []string { + node := d.MustGetNode(names...) + var cids []string + tracker := dservTrackingWrapper{ + DAGService: node.dsvc, + } + + lsys := cidlink.DefaultLinkSystem() + unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) + lsys.StorageReadOpener = func(linkContext linking.LinkContext, link datamodel.Link) (io.Reader, error) { + nd, err := tracker.Get(linkContext.Ctx, link.(cidlink.Link).Cid) + if err != nil { + return nil, err + } + return bytes.NewReader(nd.RawData()), nil + } + + primeNodeBuilder := dagpb.Type.PBNode.NewBuilder() + err := dagpb.DecodeBytes(primeNodeBuilder, node.node.RawData()) + if err != nil { + panic(err) + } + primeNode := primeNodeBuilder.Build() + _, err = lsys.KnownReifiers["unixfs-preload"](linking.LinkContext{}, primeNode, &lsys) + if err != nil { + panic(err) + } + + for _, c := range tracker.requestedCids { + cids = append(cids, c.String()) + } + return cids +} + +// MustGetCIDsInHAMTTraversal returns the cids needed for a given HAMT traversal. Does not include the HAMT root. +func (d *UnixfsDag) MustGetCIDsInHAMTTraversal(path []string, child string) []string { + node := d.MustGetNode(path...) + var cids []string + tracker := dservTrackingWrapper{ + DAGService: node.dsvc, + } + h, err := hamt.NewHamtFromDag(&tracker, node.node) + if err != nil { + panic(err) + } + _, err = h.Find(context.Background(), child) + if err != nil { + panic(err) + } + for _, c := range tracker.requestedCids { + cids = append(cids, c.String()) + } + return cids +} + func (d *UnixfsDag) MustGetRoot() *FixtureNode { return d.MustGetNode() } @@ -231,3 +306,38 @@ func MustOpenUnixfsCar(file string) *UnixfsDag { } return dag } + +type dservTrackingWrapper struct { + format.DAGService + reqMx sync.Mutex + requestedCids []cid.Cid +} + +func (d *dservTrackingWrapper) Get(ctx context.Context, c cid.Cid) (format.Node, error) { + nd, err := d.DAGService.Get(ctx, c) + if err != nil { + return nil, err + } + d.reqMx.Lock() + d.requestedCids = append(d.requestedCids, c) + d.reqMx.Unlock() + return nd, nil +} + +func (d *dservTrackingWrapper) GetMany(ctx context.Context, cids []cid.Cid) <-chan *format.NodeOption { + innerCh := d.DAGService.GetMany(ctx, cids) + outCh := make(chan *format.NodeOption, 1) + go func() { + defer close(outCh) + for i := range innerCh { + if i.Err == nil { + c := i.Node.Cid() + d.reqMx.Lock() + d.requestedCids = append(d.requestedCids, c) + d.reqMx.Unlock() + } + outCh <- i + } + }() + return outCh +}