Skip to content

Commit 782493a

Browse files
committed
Support basic parquet import/export
1 parent 9183ba6 commit 782493a

28 files changed

+1012
-167
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ endif()
144144

145145
# find_package(Boost REQUIRED)
146146
find_package(Lz4 REQUIRED)
147+
find_package(Arrow REQUIRED)
148+
find_package(Parquet REQUIRED)
147149

148150
# You can disable jemalloc by passing the `-DENABLE_JEMALLOC=OFF` option to CMake.
149151
option(ENABLE_JEMALLOC "Enable jemalloc support" ON)

benchmark/local_infinity/CMakeLists.txt

+12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ target_link_libraries(
1717
atomic.a
1818
jma
1919
opencc
20+
Arrow::arrow_shared
21+
Parquet::parquet_shared
2022
)
2123

2224
# ########################################
@@ -40,6 +42,8 @@ target_link_libraries(
4042
atomic.a
4143
jma
4244
opencc
45+
Arrow::arrow_shared
46+
Parquet::parquet_shared
4347
)
4448

4549
# query benchmark
@@ -61,6 +65,8 @@ target_link_libraries(
6165
atomic.a
6266
jma
6367
opencc
68+
Arrow::arrow_shared
69+
Parquet::parquet_shared
6470
)
6571

6672
# ########################################
@@ -84,6 +90,8 @@ target_link_libraries(
8490
atomic.a
8591
jma
8692
opencc
93+
Arrow::arrow_shared
94+
Parquet::parquet_shared
8795
)
8896

8997
# ########################################
@@ -105,6 +113,8 @@ target_link_libraries(
105113
atomic.a
106114
jma
107115
opencc
116+
Arrow::arrow_shared
117+
Parquet::parquet_shared
108118
)
109119

110120
add_executable(bmp_benchmark
@@ -125,6 +135,8 @@ target_link_libraries(
125135
atomic.a
126136
jma
127137
opencc
138+
Arrow::arrow_shared
139+
Parquet::parquet_shared
128140
)
129141

130142
if(ENABLE_JEMALLOC)

benchmark/remote_infinity/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ target_link_libraries(
2626
thrift.a
2727
jma
2828
opencc
29+
Arrow::arrow_shared
30+
Parquet::parquet_shared
2931
)
3032

3133
if(ENABLE_JEMALLOC)

src/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,8 @@ target_link_libraries(infinity
289289
oatpp.a
290290
jma
291291
opencc
292+
Arrow::arrow_shared
293+
Parquet::parquet_shared
292294
)
293295

294296
if (ENABLE_JEMALLOC)
@@ -338,6 +340,8 @@ target_link_libraries(embedded_infinity_ext PRIVATE
338340
oatpp.a
339341
jma
340342
opencc
343+
Arrow::arrow_shared
344+
Parquet::parquet_shared
341345
)
342346
target_link_options(embedded_infinity_ext PRIVATE -static-libstdc++ -static-libgcc)
343347

@@ -464,6 +468,8 @@ target_link_libraries(unit_test
464468
thrift.a
465469
jma
466470
opencc
471+
Arrow::arrow_shared
472+
Parquet::parquet_shared
467473
)
468474

469475
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/lib")

src/common/third_party.cppm

+82
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,27 @@ module;
4646
#include "oatpp/network/tcp/server/ConnectionProvider.hpp"
4747
#include "oatpp/web/server/HttpConnectionHandler.hpp"
4848

49+
#include "arrow/api.h"
50+
#include "arrow/array.h"
51+
#include "arrow/array/array_base.h"
52+
#include "arrow/chunked_array.h"
53+
#include "arrow/io/api.h"
54+
#include "arrow/io/caching.h"
55+
#include "arrow/io/file.h"
56+
#include "arrow/memory_pool.h"
57+
#include "arrow/record_batch.h"
58+
#include "arrow/result.h"
59+
#include "arrow/status.h"
60+
#include "arrow/table.h"
61+
#include "arrow/type_fwd.h"
62+
#include "parquet/arrow/reader.h"
63+
#include "parquet/arrow/writer.h"
64+
#include <arrow/array/array_nested.h>
65+
#include <arrow/array/array_primitive.h>
66+
#include <arrow/io/interfaces.h>
67+
#include <arrow/type.h>
68+
#include "arrow/array/builder_primitive.h"
69+
4970
#pragma clang diagnostic pop
5071

5172
export module third_party;
@@ -113,6 +134,67 @@ namespace moodycamel {
113134
export using moodycamel::ConcurrentQueue;
114135
}
115136

137+
namespace arrow {
138+
export using Status = arrow::Status;
139+
export using ReadableFile = arrow::io::ReadableFile;
140+
export using RandomAccessFile = arrow::io::RandomAccessFile;
141+
export using Table = arrow::Table;
142+
143+
export using ChunkedArray = arrow::ChunkedArray;
144+
export using ArrayBuilder = arrow::ArrayBuilder;
145+
export using Array = arrow::Array;
146+
export using BooleanArray = arrow::BooleanArray;
147+
export using UInt8Array = arrow::UInt8Array;
148+
export using Int8Array = arrow::Int8Array;
149+
export using Int16Array = arrow::Int16Array;
150+
export using Int32Array = arrow::Int32Array;
151+
export using Int64Array = arrow::Int64Array;
152+
export using FloatArray = arrow::FloatArray;
153+
export using DoubleArray = arrow::DoubleArray;
154+
export using Decimal128Array = arrow::Decimal128Array;
155+
export using Date32Array = arrow::Date32Array;
156+
export using Time32Array = arrow::Time32Array;
157+
export using TimestampArray = arrow::TimestampArray;
158+
export using DurationArray = arrow::DurationArray;
159+
export using StringArray = arrow::StringArray;
160+
export using ListArray = arrow::ListArray;
161+
export using BinaryArray = arrow::BinaryArray;
162+
163+
export using UInt8Builder = arrow::UInt8Builder;
164+
export using Int8Builder = arrow::Int8Builder;
165+
export using Int16Builder = arrow::Int16Builder;
166+
export using Int32Builder = arrow::Int32Builder;
167+
export using Int64Builder = arrow::Int64Builder;
168+
export using FloatBuilder = arrow::FloatBuilder;
169+
export using DoubleBuilder = arrow::DoubleBuilder;
170+
export using Decimal128Builder = arrow::Decimal128Builder;
171+
export using Date32Builder = arrow::Date32Builder;
172+
export using Time32Builder = arrow::Time32Builder;
173+
export using TimestampBuilder = arrow::TimestampBuilder;
174+
export using DurationBuilder = arrow::DurationBuilder;
175+
export using StringBuilder = arrow::StringBuilder;
176+
export using ListBuilder = arrow::ListBuilder;
177+
178+
export using RecordBatchReader = arrow::RecordBatchReader;
179+
export using RecordBatch = arrow::RecordBatch;
180+
export using MemoryPool = arrow::MemoryPool;
181+
export MemoryPool *DefaultMemoryPool() { return arrow::default_memory_pool(); }
182+
183+
export using DataType = arrow::DataType;
184+
export using Field = arrow::Field;
185+
export using Schema = arrow::Schema;
186+
export using ParquetFileReader = parquet::arrow::FileReader;
187+
export using ParquetFileWriter = parquet::arrow::FileWriter;
188+
export using ArrowWriterProperties = parquet::ArrowWriterProperties;
189+
} // namespace arrow
190+
191+
namespace parquet {
192+
export ::arrow::Status
193+
OpenFile(std::shared_ptr<::arrow::RandomAccessFile> file, ::arrow::MemoryPool *pool, std::unique_ptr<parquet::arrow::FileReader> *reader) {
194+
return parquet::arrow::OpenFile(file, pool, reader);
195+
}
196+
} // namespace parquet
197+
116198
namespace infinity {
117199

118200
// spdlog

src/executor/explain_physical_plan.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -1762,6 +1762,10 @@ void ExplainPhysicalPlan::Explain(const PhysicalImport *import_node, SharedPtr<V
17621762
result->emplace_back(file_type);
17631763
break;
17641764
}
1765+
case CopyFileType::kPARQUET: {
1766+
SharedPtr<String> file_type = MakeShared<String>(String(intent_size, ' ') + " - type: PARQUET");
1767+
break;
1768+
}
17651769
case CopyFileType::kInvalid: {
17661770
String error_message = "Invalid show type";
17671771
LOG_CRITICAL(error_message);
@@ -1841,6 +1845,11 @@ void ExplainPhysicalPlan::Explain(const PhysicalExport *export_node, SharedPtr<V
18411845
result->emplace_back(file_type);
18421846
break;
18431847
}
1848+
case CopyFileType::kPARQUET: {
1849+
SharedPtr<String> file_type = MakeShared<String>(String(intent_size, ' ') + " - type: PARQUET");
1850+
result->emplace_back(file_type);
1851+
break;
1852+
}
18441853
case CopyFileType::kInvalid: {
18451854
String error_message = "Invalid file type";
18461855
LOG_CRITICAL(error_message);

0 commit comments

Comments
 (0)