-
Notifications
You must be signed in to change notification settings - Fork 932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add CSV Reader options classes to pylibcudf #17412
Changes from 10 commits
89945b6
96e1d44
39a22fc
9a2462b
40aba23
79dbafa
14a36e6
739e61f
187a43f
31d1212
db19620
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from collections.abc import Mapping | ||
from typing import Self | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since cudf still supports Python 3.10, we'll need to import this from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, done |
||
|
||
from pylibcudf.io.types import ( | ||
CompressionType, | ||
|
@@ -12,6 +13,47 @@ from pylibcudf.io.types import ( | |
from pylibcudf.table import Table | ||
from pylibcudf.types import DataType | ||
|
||
class CsvReaderOptions: | ||
def __init__(self): ... | ||
def set_header(self, header: int): ... | ||
def set_names(self, col_names: list[str]): ... | ||
def set_prefix(self, prefix: str): ... | ||
def set_use_cols_indexes(self, col_indices: list[int]): ... | ||
def set_use_cols_names(self, col_names: list[str]): ... | ||
def set_delimiter(self, delimiter: str): ... | ||
def set_thousands(self, thousands: str): ... | ||
def set_comment(self, comment: str): ... | ||
def set_parse_dates(self, val: list[int | str]): ... | ||
def set_parse_hex(self, val: list[int | str]): ... | ||
def set_dtypes(self, types: dict[str, DataType] | list[DataType]): ... | ||
def set_true_values(self, true_values: list[str]): ... | ||
def set_false_values(self, false_values: list[str]): ... | ||
def set_na_values(self, na_values: list[str]): ... | ||
@staticmethod | ||
def builder(source: SourceInfo) -> CsvReaderOptionsBuilder: ... | ||
|
||
class CsvReaderOptionsBuilder: | ||
def __init__(self): ... | ||
def compression(self, compression: CompressionType) -> Self: ... | ||
def mangle_dupe_cols(self, mangle_dupe_cols: bool) -> Self: ... | ||
def byte_range_offset(self, byte_range_offset: int) -> Self: ... | ||
def byte_range_size(self, byte_range_size: int) -> Self: ... | ||
def nrows(self, nrows: int) -> Self: ... | ||
def skiprows(self, skiprows: int) -> Self: ... | ||
def skipfooter(self, skipfooter: int) -> Self: ... | ||
def quoting(self, quoting: QuoteStyle) -> Self: ... | ||
def lineterminator(self, lineterminator: str) -> Self: ... | ||
def quotechar(self, quotechar: str) -> Self: ... | ||
def decimal(self, decimal: str) -> Self: ... | ||
def delim_whitespace(self, delim_whitespace: bool) -> Self: ... | ||
def skipinitialspace(self, skipinitialspace: bool) -> Self: ... | ||
def skip_blank_lines(self, skip_blank_lines: bool) -> Self: ... | ||
def doublequote(self, doublequote: bool) -> Self: ... | ||
def keep_default_na(self, keep_default_na: bool) -> Self: ... | ||
def na_filter(self, na_filter: bool) -> Self: ... | ||
def dayfirst(self, dayfirst: bool) -> Self: ... | ||
def build(self) -> CsvReaderOptions: ... | ||
|
||
def read_csv( | ||
source_info: SourceInfo, | ||
*, | ||
|
@@ -54,7 +96,7 @@ def read_csv( | |
# detect_whitespace_around_quotes: bool = False, | ||
# timestamp_type: DataType = DataType(type_id.EMPTY), | ||
) -> TableWithMetadata: ... | ||
def write_csv(options: CsvWriterOptionsBuilder) -> None: ... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this annotation incorrect before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No I don't think so. I wanted to stay consistent because I noticed we dropped the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see. IMO we should still include There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll follow up based on what we decide |
||
def write_csv(options: CsvWriterOptionsBuilder): ... | ||
|
||
class CsvWriterOptions: | ||
def __init__(self): ... | ||
|
@@ -63,14 +105,12 @@ class CsvWriterOptions: | |
|
||
class CsvWriterOptionsBuilder: | ||
def __init__(self): ... | ||
def names(self, names: list) -> CsvWriterOptionsBuilder: ... | ||
def na_rep(self, val: str) -> CsvWriterOptionsBuilder: ... | ||
def include_header(self, val: bool) -> CsvWriterOptionsBuilder: ... | ||
def rows_per_chunk(self, val: int) -> CsvWriterOptionsBuilder: ... | ||
def line_terminator(self, term: str) -> CsvWriterOptionsBuilder: ... | ||
def inter_column_delimiter( | ||
self, delim: str | ||
) -> CsvWriterOptionsBuilder: ... | ||
def true_value(self, val: str) -> CsvWriterOptionsBuilder: ... | ||
def false_value(self, val: str) -> CsvWriterOptionsBuilder: ... | ||
def names(self, names: list) -> Self: ... | ||
def na_rep(self, val: str) -> Self: ... | ||
def include_header(self, val: bool) -> Self: ... | ||
def rows_per_chunk(self, val: int) -> Self: ... | ||
def line_terminator(self, term: str) -> Self: ... | ||
def inter_column_delimiter(self, delim: str) -> Self: ... | ||
def true_value(self, val: str) -> Self: ... | ||
def false_value(self, val: str) -> Self: ... | ||
def build(self) -> CsvWriterOptions: ... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.