Skip to content

Commit

Permalink
Dynamic Type Inference and User-Defined Types from CSV Headers (#329)
Browse files Browse the repository at this point in the history
* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers

* fix: #69 fix join

* add: #69 add  test

* update: enable typecheck

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseString

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseStringStream

* refactor: #69 test

* refactor: #69 parse type

* refactor: #69 `PickHeader` -> `PickCSVHeader`

* refactor: #69 fix newline

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseStringStreamToStream

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseStringToArraySync

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseStringToArraySyncWASM

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseStringToIterableIterator

* refactor: #69 Dynamic Type Inference and User-Defined Types from CSV Headers - parseStringToStream

* fix: #69 escape delimiter

* fix: pick header type

* fix

* fix

* Revert "Refactor/#69 infer csv header type 2"

* fix: pick header type

* support escape quotation

* fix

* common -> utils

* update test

* default delimiter and quotation

* default delimiter and quotation

* add test

* tuning

* fix

* remove biome ignore comment

* remove vitest typecheck

* fix type check

* Resolve failed resovation of conflict in Lexer.ts

* Add changeset

* Imorove tsdoc example code

* Add codecov.yml to ignore test declaration files

* Fix codecov.yml

* feat: Improve to accept type parameters as const, eliminating the need to specify a readonly clause

* style: Fix import order

---------

Co-authored-by: nagasawaryoya <nagasawaryoya@gmail.com>
  • Loading branch information
kamiazya and nagasawaryoya authored Aug 21, 2024
1 parent 5d01c39 commit 0d717de
Show file tree
Hide file tree
Showing 27 changed files with 1,727 additions and 44 deletions.
5 changes: 5 additions & 0 deletions .changeset/quick-hats-tease.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"web-csv-toolbox": minor
---

Dynamic Type Inference and User-Defined Types from CSV Headers
2 changes: 2 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ignore:
- "**/*.test-d.ts"
20 changes: 13 additions & 7 deletions src/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@ import type {
RecordDelimiterToken,
Token,
} from "./common/types.ts";
import { COMMA, CRLF, DOUBLE_QUOTE, LF } from "./constants.ts";
import { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from "./constants.ts";
import { escapeRegExp } from "./utils/escapeRegExp.ts";

/**
* CSV Lexer.
*
* Lexter tokenizes CSV data into fields and records.
*/
export class Lexer {
export class Lexer<
Delimiter extends string = DEFAULT_DELIMITER,
Quotation extends string = DEFAULT_QUOTATION,
> {
#delimiter: string;
#quotation: string;
#buffer = "";
Expand All @@ -37,11 +40,14 @@ export class Lexer {
* Constructs a new Lexer instance.
* @param options - The common options for the lexer.
*/
constructor({
delimiter = COMMA,
quotation = DOUBLE_QUOTE,
signal,
}: CommonOptions & AbortSignalOptions = {}) {
constructor(
options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},
) {
const {
delimiter = DEFAULT_DELIMITER,
quotation = DEFAULT_QUOTATION,
signal,
} = options;
assertCommonOptions({ delimiter, quotation });
this.#delimiter = delimiter;
this.#quotation = quotation;
Expand Down
10 changes: 7 additions & 3 deletions src/LexerTransformer.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Lexer } from "./Lexer.ts";
import type { CommonOptions, Token } from "./common/types.ts";
import type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from "./constants.ts";

/**
* A transform stream that converts a stream of tokens into a stream of rows.
Expand Down Expand Up @@ -31,9 +32,12 @@ import type { CommonOptions, Token } from "./common/types.ts";
* // { type: RecordDelimiter, value: "\r\n", location: {...} }
* ```
*/
export class LexerTransformer extends TransformStream<string, Token[]> {
public readonly lexer: Lexer;
constructor(options: CommonOptions = {}) {
export class LexerTransformer<
Delimiter extends string = DEFAULT_DELIMITER,
Quotation extends string = DEFAULT_QUOTATION,
> extends TransformStream<string, Token[]> {
public readonly lexer: Lexer<Delimiter, Quotation>;
constructor(options: CommonOptions<Delimiter, Quotation> = {}) {
super({
transform: (chunk, controller) => {
if (chunk.length !== 0) {
Expand Down
10 changes: 7 additions & 3 deletions src/assertCommonOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,16 @@ function assertOptionValue(
* @throws {RangeError} If any required property is missing or if the delimiter is the same as the quotation.
* @throws {TypeError} If any required property is not a string.
*/
export function assertCommonOptions(
options: Required<CommonOptions>,
): asserts options is Required<CommonOptions> {
export function assertCommonOptions<
Delimiter extends string,
Quotation extends string,
>(
options: Required<CommonOptions<Delimiter, Quotation>>,
): asserts options is Required<CommonOptions<Delimiter, Quotation>> {
for (const name of ["delimiter", "quotation"] as const) {
assertOptionValue(options[name], name);
}
// @ts-ignore: TS doesn't understand that the values are strings
if (options.delimiter === options.quotation) {
throw new RangeError(
"delimiter must not be the same as quotation, use different characters",
Expand Down
36 changes: 29 additions & 7 deletions src/common/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from "../constants.ts";
import type { Join } from "../utils/types.ts";
import type { Field, FieldDelimiter, RecordDelimiter } from "./constants.ts";

/**
Expand Down Expand Up @@ -138,7 +140,10 @@ export interface AbortSignalOptions {
* CSV Common Options.
* @category Types
*/
export interface CommonOptions {
export interface CommonOptions<
Delimiter extends string,
Quotation extends string,
> {
/**
* CSV field delimiter.
* If you want to parse TSV, specify `'\t'`.
Expand All @@ -154,13 +159,13 @@ export interface CommonOptions {
*
* @default ','
*/
delimiter?: string;
delimiter?: Delimiter;
/**
* CSV field quotation.
*
* @default '"'
*/
quotation?: string;
quotation?: Quotation;
}

/**
Expand Down Expand Up @@ -249,8 +254,11 @@ export interface RecordAssemblerOptions<Header extends ReadonlyArray<string>>
* Parse options for CSV string.
* @category Types
*/
export interface ParseOptions<Header extends ReadonlyArray<string>>
extends CommonOptions,
export interface ParseOptions<
Header extends ReadonlyArray<string> = ReadonlyArray<string>,
Delimiter extends string = DEFAULT_DELIMITER,
Quotation extends string = DEFAULT_QUOTATION,
> extends CommonOptions<Delimiter, Quotation>,
RecordAssemblerOptions<Header>,
AbortSignalOptions {}

Expand Down Expand Up @@ -285,7 +293,15 @@ export type CSVRecord<Header extends ReadonlyArray<string>> = Record<
*
* @category Types
*/
export type CSVString = string | ReadableStream<string>;
export type CSVString<
Header extends ReadonlyArray<string> = [],
Delimiter extends string = DEFAULT_DELIMITER,
Quotation extends string = DEFAULT_QUOTATION,
> = Header extends readonly [string, ...string[]]
?
| Join<Header, Delimiter, Quotation>
| ReadableStream<Join<Header, Delimiter, Quotation>>
: string | ReadableStream<string>;

/**
* CSV Binary.
Expand All @@ -303,4 +319,10 @@ export type CSVBinary =
*
* @category Types
*/
export type CSV = CSVString | CSVBinary;
export type CSV<
Header extends ReadonlyArray<string> = [],
Delimiter extends string = DEFAULT_DELIMITER,
Quotation extends string = DEFAULT_QUOTATION,
> = Header extends []
? CSVString | CSVBinary
: CSVString<Header, Delimiter, Quotation>;
13 changes: 13 additions & 0 deletions src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
export const CR = "\r";
export type CR = typeof CR;

export const CRLF = "\r\n";
export type CRLF = typeof CRLF;

export const LF = "\n";
export type LF = typeof LF;

export type Newline = CRLF | CR | LF;

/**
* COMMA is a symbol for comma(,).
Expand All @@ -11,3 +18,9 @@ export const COMMA = ",";
* DOUBLE_QUOTE is a symbol for double quote(").
*/
export const DOUBLE_QUOTE = '"';

export const DEFAULT_DELIMITER = COMMA;
export type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;

export const DEFAULT_QUOTATION = DOUBLE_QUOTE;
export type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;
23 changes: 15 additions & 8 deletions src/escapeField.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import type { assertCommonOptions } from "./assertCommonOptions.ts";
import type { CommonOptions } from "./common/types.ts";
import { COMMA, DOUBLE_QUOTE } from "./constants.ts";
import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from "./constants.ts";
import { occurrences } from "./utils/occurrences.ts";

export interface EscapeFieldOptions extends CommonOptions {
export interface EscapeFieldOptions<
Delimiter extends string,
Quotation extends string,
> extends CommonOptions<Delimiter, Quotation> {
quote?: true;
}

Expand All @@ -17,14 +20,18 @@ const REPLACED_PATTERN_CACHE = new Map<string, string>();
* @param options The options.
* @returns The escaped field.
*/
export function escapeField(
export function escapeField<
const Delimiter extends string,
const Quotation extends string,
>(
value: string,
{
quotation = DOUBLE_QUOTE,
delimiter = COMMA,
quote,
}: EscapeFieldOptions = {},
options: EscapeFieldOptions<Delimiter, Quotation> = {},
): string {
const {
delimiter = DEFAULT_DELIMITER,
quotation = DEFAULT_QUOTATION,
quote,
} = options;
if (!REPLACED_PATTERN_CACHE.has(quotation)) {
REPLACED_PATTERN_CACHE.set(
quotation,
Expand Down
Loading

0 comments on commit 0d717de

Please sign in to comment.