Skip to content

Commit

Permalink
CAR::blockMap
Browse files Browse the repository at this point in the history
  • Loading branch information
kawax committed Dec 17, 2024
1 parent 5932a5e commit 5e4b66c
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 5 deletions.
39 changes: 34 additions & 5 deletions src/Console/UnpackRepoCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,39 @@ public function handle(): int
return 1;
}

// Unpack only record data for all collections
foreach (CAR::blockMap(Utils::streamFor(Storage::readStream($file))) as $key => $record) {
[$collection, $rkey] = explode('/', $key);
$block = data_get($record, 'value');
$cid = data_get($record, 'cid');

if (CID::verify(CBOR::encode($block), $cid)) {
$this->info('Verified');
} else {
$this->error('Verify failed');
}

$path = collect(['bluesky', 'download', $name, 'repo', $collection, $rkey.'.json'])
->implode(DIRECTORY_SEPARATOR);

Storage::put($path, json_encode($record, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));

$this->line('Unpack: '.Storage::path($path));
}

// $this->blockIterator($name, $file);

$this->info('Unpack successful');

return 0;
}

/**
* A different version using CAR::blockIterator().
* Use this if you need _mst or _commit.
*/
private function blockIterator($name, $file): void
{
foreach (CAR::blockIterator(Utils::streamFor(Storage::readStream($file))) as $cid => $block) {
//dump($cid, $block);

Expand All @@ -77,7 +110,7 @@ public function handle(): int
}

if (Arr::exists($block, '$type')) {
if (CID::verify(CBOR::encode($block), $cid, codec: CID::DAG_CBOR)) {
if (CID::verify(CBOR::encode($block), $cid)) {
$this->info('Verified');
} else {
$this->error('Verify failed');
Expand All @@ -91,9 +124,5 @@ public function handle(): int

$this->line('Unpack: '.Storage::path($path));
}

$this->info('Unpack successful');

return 0;
}
}
60 changes: 60 additions & 0 deletions src/Support/CAR.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Revolution\Bluesky\Support;

use GuzzleHttp\Psr7\Utils;
use Illuminate\Support\Str;
use InvalidArgumentException;
use Psr\Http\Message\StreamInterface;
use YOCLIB\Multiformats\Multibase\Multibase;
Expand All @@ -13,6 +14,7 @@
* @link https://ipld.io/specs/transport/car/carv1/
* @link https://github.com/ipld/go-car
* @link https://github.com/ipld/js-car
* @link https://github.com/mary-ext/atcute/blob/trunk/packages/utilities/car/lib/atproto-repo.ts
*/
final class CAR
{
Expand Down Expand Up @@ -194,4 +196,62 @@ private static function decodeBlockV0(StreamInterface $data): array

return [$cid, $block];
}

/**
* Unlike {@link CAR::blockIterator()}, this is an iterator for `<collection>/<rkey>` key and record array.
*
* ```
* foreach (CAR::blockMap($data) as $key => $record) {
* [$collection, $rkey] = explode('/', $key);
* $block = data_get($record, 'value');
* $cid = data_get($record, 'cid');
*
* }
* ```
*
* @return iterable<string, array>
*/
public static function blockMap(StreamInterface|string $data): iterable
{
$data = Utils::streamFor($data);

$roots = self::decodeRoots($data);
$blockmap = iterator_to_array(self::blockIterator($data));

$commit = data_get($blockmap, $roots[0]);
$did = data_get($commit, 'did');

yield from self::walkEntries($blockmap, data_get($commit, 'data./'), $did);
}

private static function walkEntries(array $blockmap, string $pointer, string $did): iterable
{
$data = data_get($blockmap, $pointer);
$entries = data_get($data, 'e');

$lastKey = '';

if (filled(data_get($data, 'l./'))) {
yield from self::walkEntries($blockmap, data_get($data, 'l./'), $did);
}

foreach ($entries as $entry) {
$key_str = data_get($entry, 'k');
$key = substr($lastKey, 0, data_get($entry, 'p')).$key_str;

$lastKey = $key;

[$collection, $rkey] = explode('/', $key);
$uri = (string) AtUri::make(repo: $did, collection: $collection, rkey: $rkey);
$cid = data_get($entry, 'v./');
$value = data_get($blockmap, $cid);

// Match the format of getRecord and listRecords.
yield $key => compact('uri', 'cid', 'value');

if (filled(data_get($entry, 't./'))) {
yield from self::walkEntries($blockmap, data_get($entry, 't./'), $did);
}
}
}
}
8 changes: 8 additions & 0 deletions tests/Feature/Support/SupportTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -565,4 +565,12 @@ public function test_car_download_file_stream()
$hash_len = Varint::decode($data->read(1));
$this->assertSame(32, $hash_len);
}

public function test_car_block_map()
{
$data = Utils::streamFor(utils::tryFopen(__DIR__.'/fixture/bsky-app.car', 'rb'));

$this->assertCount(604, iterator_to_array(CAR::blockMap($data)));
$this->assertCount(744, iterator_to_array(CAR::blockIterator($data)));
}
}

0 comments on commit 5e4b66c

Please sign in to comment.