Skip to content

Commit 52d95e1

Browse files
authored
chore: implement object listing (#293)
1 parent 42c41f6 commit 52d95e1

File tree

7 files changed

+166
-29
lines changed

7 files changed

+166
-29
lines changed

examples/gcs_demo.cc

+15-10
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@ using namespace util;
1414
using absl::GetFlag;
1515

1616
ABSL_FLAG(string, bucket, "", "");
17+
ABSL_FLAG(string, prefix, "", "");
18+
1719
ABSL_FLAG(uint32_t, connect_ms, 2000, "");
1820
ABSL_FLAG(bool, epoll, false, "Whether to use epoll instead of io_uring");
1921

20-
2122
void Run(SSL_CTX* ctx) {
2223
fb2::ProactorBase* pb = fb2::ProactorBase::me();
2324
cloud::GCPCredsProvider provider;
@@ -28,11 +29,18 @@ void Run(SSL_CTX* ctx) {
2829
cloud::GCS gcs(&provider, ctx, pb);
2930
ec = gcs.Connect(connect_ms);
3031
CHECK(!ec) << "Could not connect " << ec;
31-
auto cb = [](std::string_view bname) {
32-
CONSOLE_INFO << bname;
33-
};
3432

35-
ec = gcs.ListBuckets(cb);
33+
string prefix = GetFlag(FLAGS_prefix);
34+
if (!prefix.empty()) {
35+
auto cb = [](cloud::GCS::ObjectItem item) {
36+
cout << "Object: " << item.key << ", size: " << item.size << endl;
37+
};
38+
ec = gcs.List(GetFlag(FLAGS_bucket), prefix, false, cb);
39+
} else {
40+
auto cb = [](std::string_view bname) { CONSOLE_INFO << bname; };
41+
42+
ec = gcs.ListBuckets(cb);
43+
}
3644
CHECK(!ec) << ec.message();
3745
}
3846

@@ -53,12 +61,9 @@ int main(int argc, char** argv) {
5361

5462
pp->Run();
5563

56-
SSL_CTX* ctx = util::http::TlsClient::CreateSslContext();
57-
pp->GetNextProactor()->Await([ctx] {
58-
Run(ctx);
59-
});
64+
SSL_CTX* ctx = util::http::TlsClient::CreateSslContext();
65+
pp->GetNextProactor()->Await([ctx] { Run(ctx); });
6066
util::http::TlsClient::FreeContext(ctx);
6167

62-
6368
return 0;
6469
}

strings/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
add_library(strings_lib human_readable.cc)
1+
add_library(strings_lib escaping.cc human_readable.cc)
22
cxx_link(strings_lib base absl::strings absl::str_format)
33

44
cxx_test(strings_test strings_lib LABELS CI)

strings/escaping.cc

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright 2024, Roman Gershman. All rights reserved.
2+
// See LICENSE for licensing terms.
3+
//
4+
#include <absl/strings/ascii.h>
5+
6+
namespace strings {
7+
using namespace std;
8+
9+
inline bool IsValidUrlChar(char ch) {
10+
return absl::ascii_isalnum(ch) || ch == '-' || ch == '_' || ch == '_' || ch == '.' || ch == '!' ||
11+
ch == '~' || ch == '*' || ch == '(' || ch == ')';
12+
}
13+
14+
static size_t InternalUrlEncode(absl::string_view src, char* dest) {
15+
static const char digits[] = "0123456789ABCDEF";
16+
17+
char* start = dest;
18+
for (char ch_c : src) {
19+
unsigned char ch = static_cast<unsigned char>(ch_c);
20+
if (IsValidUrlChar(ch)) {
21+
*dest++ = ch_c;
22+
} else {
23+
*dest++ = '%';
24+
*dest++ = digits[(ch >> 4) & 0x0F];
25+
*dest++ = digits[ch & 0x0F];
26+
}
27+
}
28+
*dest = 0;
29+
30+
return static_cast<size_t>(dest - start);
31+
}
32+
33+
void AppendUrlEncoded(const std::string_view src, string* dest) {
34+
size_t sz = dest->size();
35+
dest->resize(dest->size() + src.size() * 3 + 1);
36+
char* next = &dest->front() + sz;
37+
size_t written = InternalUrlEncode(src, next);
38+
dest->resize(sz + written);
39+
}
40+
41+
} // namespace strings

strings/escaping.h

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2024, Roman Gershman. All rights reserved.
2+
// See LICENSE for licensing terms.
3+
//
4+
5+
#pragma once
6+
7+
#include <string_view>
8+
9+
namespace strings {
10+
11+
void AppendUrlEncoded(const std::string_view src, std::string* dest);
12+
13+
} // namespace strings

util/cloud/gcp/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
add_library(gcp_lib gcs.cc)
22

3-
cxx_link(gcp_lib http_client_lib TRDP::rapidjson)
3+
cxx_link(gcp_lib http_client_lib strings_lib TRDP::rapidjson)

util/cloud/gcp/gcs.cc

+86-16
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "io/file.h"
1515
#include "io/file_util.h"
1616
#include "io/line_reader.h"
17+
#include "strings/escaping.h"
1718

1819
using namespace std;
1920
namespace h2 = boost::beast::http;
@@ -31,17 +32,19 @@ auto Unexpected(std::errc code) {
3132
return nonstd::make_unexpected(make_error_code(code));
3233
}
3334

34-
#define RETURN_UNEXPECTED(x) do { \
35-
auto ec = (x); \
36-
if (ec) \
37-
return nonstd::make_unexpected(ec); \
38-
} while(false)
35+
#define RETURN_UNEXPECTED(x) \
36+
do { \
37+
auto ec = (x); \
38+
if (ec) \
39+
return nonstd::make_unexpected(ec); \
40+
} while (false)
3941

40-
#define RETURN_ERROR(x) do { \
41-
auto ec = (x); \
42-
if (ec) \
43-
return ec; \
44-
} while (false)
42+
#define RETURN_ERROR(x) \
43+
do { \
44+
auto ec = (x); \
45+
if (ec) \
46+
return ec; \
47+
} while (false)
4548

4649
string AuthHeader(string_view access_token) {
4750
return absl::StrCat("Bearer ", access_token);
@@ -185,7 +188,7 @@ io::Result<EmptyParserPtr> SendWithToken(GCPCredsProvider* provider, http::Clien
185188
EmptyParserPtr parser(new h2::response_parser<h2::empty_body>());
186189
RETURN_UNEXPECTED(client->ReadHeader(parser.get()));
187190

188-
VLOG(1) << "RespHeader" << i << ": " << parser.get();
191+
VLOG(1) << "RespHeader" << i << ": " << parser->get();
189192

190193
if (parser->get().result() == h2::status::ok) {
191194
return parser;
@@ -204,6 +207,11 @@ io::Result<EmptyParserPtr> SendWithToken(GCPCredsProvider* provider, http::Clien
204207
return nonstd::make_unexpected(ec);
205208
}
206209

210+
#define FETCH_ARRAY_MEMBER(val) \
211+
if (!(val).IsArray()) \
212+
return make_error_code(errc::bad_message); \
213+
auto array = val.GetArray()
214+
207215
} // namespace
208216

209217
error_code GCPCredsProvider::Init(unsigned connect_ms, fb2::ProactorBase* pb) {
@@ -335,11 +343,7 @@ error_code GCS::ListBuckets(ListBucketCb cb) {
335343
if (it == doc.MemberEnd())
336344
break;
337345

338-
const auto& val = it->value;
339-
if (!val.IsArray()) {
340-
return make_error_code(errc::bad_message);
341-
}
342-
auto array = val.GetArray();
346+
FETCH_ARRAY_MEMBER(it->value);
343347

344348
for (size_t i = 0; i < array.Size(); ++i) {
345349
const auto& item = array[i];
@@ -359,5 +363,71 @@ error_code GCS::ListBuckets(ListBucketCb cb) {
359363
return {};
360364
}
361365

366+
error_code GCS::List(string_view bucket, string_view prefix, bool recursive,
367+
ListObjectCb cb) {
368+
CHECK(!bucket.empty());
369+
370+
string url = "/storage/v1/b/";
371+
absl::StrAppend(&url, bucket, "/o?maxResults=200&prefix=");
372+
strings::AppendUrlEncoded(prefix, &url);
373+
if (!recursive) {
374+
absl::StrAppend(&url, "&delimiter=%2f");
375+
}
376+
auto http_req = PrepareRequest(h2::verb::get, url, creds_provider_.access_token());
377+
378+
rj::Document doc;
379+
while (true) {
380+
io::Result<EmptyParserPtr> parse_res =
381+
SendWithToken(&creds_provider_, client_.get(), &http_req);
382+
if (!parse_res)
383+
return parse_res.error();
384+
EmptyParserPtr empty_parser = std::move(*parse_res);
385+
h2::response_parser<h2::string_body> resp(std::move(*empty_parser));
386+
RETURN_ERROR(client_->Recv(&resp));
387+
388+
auto msg = resp.release();
389+
390+
doc.ParseInsitu(&msg.body().front());
391+
if (doc.HasParseError()) {
392+
return make_error_code(errc::bad_message);
393+
}
394+
395+
auto it = doc.FindMember("items");
396+
if (it != doc.MemberEnd()) {
397+
FETCH_ARRAY_MEMBER(it->value);
398+
399+
for (size_t i = 0; i < array.Size(); ++i) {
400+
const auto& item = array[i];
401+
auto it = item.FindMember("name");
402+
CHECK(it != item.MemberEnd());
403+
absl::string_view key_name(it->value.GetString(), it->value.GetStringLength());
404+
it = item.FindMember("size");
405+
CHECK(it != item.MemberEnd());
406+
absl::string_view sz_str(it->value.GetString(), it->value.GetStringLength());
407+
size_t item_size = 0;
408+
CHECK(absl::SimpleAtoi(sz_str, &item_size));
409+
cb(ObjectItem{item_size, key_name, false});
410+
}
411+
}
412+
it = doc.FindMember("prefixes");
413+
if (it != doc.MemberEnd()) {
414+
FETCH_ARRAY_MEMBER(it->value);
415+
for (size_t i = 0; i < array.Size(); ++i) {
416+
const auto& item = array[i];
417+
absl::string_view str(item.GetString(), item.GetStringLength());
418+
cb(ObjectItem{0, str, true});
419+
}
420+
}
421+
422+
it = doc.FindMember("nextPageToken");
423+
if (it == doc.MemberEnd()) {
424+
break;
425+
}
426+
absl::string_view page_token{it->value.GetString(), it->value.GetStringLength()};
427+
http_req.target(absl::StrCat(url, "&pageToken=", page_token));
428+
}
429+
return {};
430+
}
431+
362432
} // namespace cloud
363433
} // namespace util

util/cloud/gcp/gcs.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,23 @@ class GCPCredsProvider {
7070
class GCS {
7171
public:
7272
using BucketItem = std::string_view;
73+
struct ObjectItem {
74+
size_t size;
75+
std::string_view key;
76+
bool is_prefix;
77+
};
78+
7379
using ListBucketCb = std::function<void(BucketItem)>;
80+
using ListObjectCb = std::function<void(const ObjectItem&)>;
7481

7582
GCS(GCPCredsProvider* creds_provider, SSL_CTX* ssl_cntx, fb2::ProactorBase* pb);
7683
~GCS();
7784

7885
std::error_code Connect(unsigned msec);
7986

8087
std::error_code ListBuckets(ListBucketCb cb);
81-
88+
std::error_code List(std::string_view bucket, std::string_view prefix, bool recursive,
89+
ListObjectCb cb);
8290
private:
8391
GCPCredsProvider& creds_provider_;
8492
SSL_CTX* ssl_ctx_;

0 commit comments

Comments
 (0)