diff --git a/src/Console/UnpackRepoCommand.php b/src/Console/UnpackRepoCommand.php index 4c08a5a..407cb32 100644 --- a/src/Console/UnpackRepoCommand.php +++ b/src/Console/UnpackRepoCommand.php @@ -62,6 +62,39 @@ public function handle(): int return 1; } + // Unpack only record data for all collections + foreach (CAR::blockMap(Utils::streamFor(Storage::readStream($file))) as $key => $record) { + [$collection, $rkey] = explode('/', $key); + $block = data_get($record, 'value'); + $cid = data_get($record, 'cid'); + + if (CID::verify(CBOR::encode($block), $cid)) { + $this->info('Verified'); + } else { + $this->error('Verify failed'); + } + + $path = collect(['bluesky', 'download', $name, 'repo', $collection, $rkey.'.json']) + ->implode(DIRECTORY_SEPARATOR); + + Storage::put($path, json_encode($record, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)); + + $this->line('Unpack: '.Storage::path($path)); + } + + // $this->blockIterator($name, $file); + + $this->info('Unpack successful'); + + return 0; + } + + /** + * A different version using CAR::blockIterator(). + * Use this if you need _mst or _commit. + */ + private function blockIterator($name, $file): void + { foreach (CAR::blockIterator(Utils::streamFor(Storage::readStream($file))) as $cid => $block) { //dump($cid, $block); @@ -77,7 +110,7 @@ public function handle(): int } if (Arr::exists($block, '$type')) { - if (CID::verify(CBOR::encode($block), $cid, codec: CID::DAG_CBOR)) { + if (CID::verify(CBOR::encode($block), $cid)) { $this->info('Verified'); } else { $this->error('Verify failed'); @@ -91,9 +124,5 @@ public function handle(): int $this->line('Unpack: '.Storage::path($path)); } - - $this->info('Unpack successful'); - - return 0; } } diff --git a/src/Support/CAR.php b/src/Support/CAR.php index c19d084..7985ad9 100644 --- a/src/Support/CAR.php +++ b/src/Support/CAR.php @@ -5,6 +5,7 @@ namespace Revolution\Bluesky\Support; use GuzzleHttp\Psr7\Utils; +use Illuminate\Support\Str; use InvalidArgumentException; use Psr\Http\Message\StreamInterface; use YOCLIB\Multiformats\Multibase\Multibase; @@ -13,6 +14,7 @@ * @link https://ipld.io/specs/transport/car/carv1/ * @link https://github.com/ipld/go-car * @link https://github.com/ipld/js-car + * @link https://github.com/mary-ext/atcute/blob/trunk/packages/utilities/car/lib/atproto-repo.ts */ final class CAR { @@ -194,4 +196,62 @@ private static function decodeBlockV0(StreamInterface $data): array return [$cid, $block]; } + + /** + * Unlike {@link CAR::blockIterator()}, this is an iterator for `/` key and record array. + * + * ``` + * foreach (CAR::blockMap($data) as $key => $record) { + * [$collection, $rkey] = explode('/', $key); + * $block = data_get($record, 'value'); + * $cid = data_get($record, 'cid'); + * + * } + * ``` + * + * @return iterable + */ + public static function blockMap(StreamInterface|string $data): iterable + { + $data = Utils::streamFor($data); + + $roots = self::decodeRoots($data); + $blockmap = iterator_to_array(self::blockIterator($data)); + + $commit = data_get($blockmap, $roots[0]); + $did = data_get($commit, 'did'); + + yield from self::walkEntries($blockmap, data_get($commit, 'data./'), $did); + } + + private static function walkEntries(array $blockmap, string $pointer, string $did): iterable + { + $data = data_get($blockmap, $pointer); + $entries = data_get($data, 'e'); + + $lastKey = ''; + + if (filled(data_get($data, 'l./'))) { + yield from self::walkEntries($blockmap, data_get($data, 'l./'), $did); + } + + foreach ($entries as $entry) { + $key_str = data_get($entry, 'k'); + $key = substr($lastKey, 0, data_get($entry, 'p')).$key_str; + + $lastKey = $key; + + [$collection, $rkey] = explode('/', $key); + $uri = (string) AtUri::make(repo: $did, collection: $collection, rkey: $rkey); + $cid = data_get($entry, 'v./'); + $value = data_get($blockmap, $cid); + + // Match the format of getRecord and listRecords. + yield $key => compact('uri', 'cid', 'value'); + + if (filled(data_get($entry, 't./'))) { + yield from self::walkEntries($blockmap, data_get($entry, 't./'), $did); + } + } + } } diff --git a/tests/Feature/Support/SupportTest.php b/tests/Feature/Support/SupportTest.php index 8b3fea8..21051e4 100644 --- a/tests/Feature/Support/SupportTest.php +++ b/tests/Feature/Support/SupportTest.php @@ -565,4 +565,12 @@ public function test_car_download_file_stream() $hash_len = Varint::decode($data->read(1)); $this->assertSame(32, $hash_len); } + + public function test_car_block_map() + { + $data = Utils::streamFor(utils::tryFopen(__DIR__.'/fixture/bsky-app.car', 'rb')); + + $this->assertCount(604, iterator_to_array(CAR::blockMap($data))); + $this->assertCount(744, iterator_to_array(CAR::blockIterator($data))); + } }