From 88dd5d0d814964c14e52413358defe532d4db43e Mon Sep 17 00:00:00 2001 From: Philippe Charles Date: Thu, 24 Oct 2024 23:22:23 +0200 Subject: [PATCH 1/3] Add picocsv --- picocsv/build.gradle.kts | 4 + .../jmh/java/picocsv/PicocsvBenchmark.java | 66 +++++++++++++ picocsv/src/main/java/picocsv/Factory.java | 94 +++++++++++++++++++ picocsv/src/test/java/picocsv/FormatTest.java | 37 ++++++++ settings.gradle.kts | 1 + 5 files changed, 202 insertions(+) create mode 100644 picocsv/build.gradle.kts create mode 100644 picocsv/src/jmh/java/picocsv/PicocsvBenchmark.java create mode 100644 picocsv/src/main/java/picocsv/Factory.java create mode 100644 picocsv/src/test/java/picocsv/FormatTest.java diff --git a/picocsv/build.gradle.kts b/picocsv/build.gradle.kts new file mode 100644 index 0000000..c42a2bf --- /dev/null +++ b/picocsv/build.gradle.kts @@ -0,0 +1,4 @@ +dependencies { + jmh(rootProject) + implementation("com.github.nbbrd.picocsv:picocsv:2.4.0") +} diff --git a/picocsv/src/jmh/java/picocsv/PicocsvBenchmark.java b/picocsv/src/jmh/java/picocsv/PicocsvBenchmark.java new file mode 100644 index 0000000..09a3c38 --- /dev/null +++ b/picocsv/src/jmh/java/picocsv/PicocsvBenchmark.java @@ -0,0 +1,66 @@ +package picocsv; + +import java.io.IOException; +import java.util.Collection; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.infra.Blackhole; + +import de.siegmar.csvbenchmark.CsvConstants; +import de.siegmar.csvbenchmark.ICsvReader; +import de.siegmar.csvbenchmark.ICsvWriter; +import de.siegmar.csvbenchmark.util.NullWriter; +import de.siegmar.csvbenchmark.util.RowSupplier; + +public class PicocsvBenchmark { + + @State(Scope.Benchmark) + public static class WriteState { + + private final RowSupplier rowSupplier = new RowSupplier(CsvConstants.RECORDS); + private ICsvWriter writer; + + @Setup + public void setup(final Blackhole bh) throws IOException { + writer = Factory.writer(new NullWriter(bh)); + } + + @TearDown + public void teardown() throws IOException { + writer.close(); + } + + } + + @Benchmark + public void write(final WriteState state) throws Exception { + state.writer.writeRecord(state.rowSupplier.get()); + } + + @State(Scope.Benchmark) + public static class ReadState { + + private ICsvReader reader; + + @Setup + public void setup() throws IOException { + reader = Factory.reader(); + } + + @TearDown + public void teardown() throws IOException { + reader.close(); + } + + } + + @Benchmark + public Collection read(final ReadState state) throws Exception { + return state.reader.readRecord(); + } + +} diff --git a/picocsv/src/main/java/picocsv/Factory.java b/picocsv/src/main/java/picocsv/Factory.java new file mode 100644 index 0000000..efa75f6 --- /dev/null +++ b/picocsv/src/main/java/picocsv/Factory.java @@ -0,0 +1,94 @@ +package picocsv; + +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import de.siegmar.csvbenchmark.CsvConstants; +import de.siegmar.csvbenchmark.ICsvReader; +import de.siegmar.csvbenchmark.ICsvWriter; +import de.siegmar.csvbenchmark.util.InfiniteDataReader; +import nbbrd.picocsv.Csv; + +public final class Factory { + + private Factory() { + } + + public static ICsvReader reader() throws IOException { + return new ICsvReader() { + + // Performances seems highly related to the buffer size + + // Slower: the default buf size (8192) is too big for the benchmark data + private static final int BUF_SIZE_SLOWER = Csv.DEFAULT_CHAR_BUFFER_SIZE; + + // Equivalent: a random low buf size seems efficient + private static final int BUF_SIZE_EQUIVALENT = 200; + + // Faster: a perfect buf size (163) aligns the stars and performs very well + private static final int BUF_SIZE_FASTER = CsvConstants.DATA.length(); + + private final Csv.Reader csvReader = Csv.Reader.of( + Csv.Format.DEFAULT + .toBuilder() + .delimiter(CsvConstants.SEPARATOR) + .separator(Csv.Format.UNIX_SEPARATOR) + .quote(CsvConstants.DELIMITER) + .build(), + Csv.ReaderOptions.DEFAULT + .toBuilder() + .build(), + new InfiniteDataReader(CsvConstants.DATA), + BUF_SIZE_SLOWER + ); + + @Override + public List readRecord() throws IOException { + // picocsv is not designed to retrieve all fields at once + List result = new ArrayList<>(); + if (csvReader.readLine()) while (csvReader.readField()) result.add(csvReader.toString()); + return result; + } + + @Override + public void close() throws IOException { + csvReader.close(); + } + + }; + } + + public static ICsvWriter writer(final Writer writer) throws IOException { + return new ICsvWriter() { + + private final Csv.Writer csvWriter = Csv.Writer.of( + Csv.Format.DEFAULT + .toBuilder() + .delimiter(CsvConstants.SEPARATOR) + .separator(Csv.Format.UNIX_SEPARATOR) + .quote(CsvConstants.DELIMITER) + .build(), + Csv.WriterOptions.DEFAULT, + writer, + Csv.DEFAULT_CHAR_BUFFER_SIZE + ); + + @Override + public void writeRecord(final List fields) throws IOException { + for (String field : fields) { + csvWriter.writeField(field); + } + csvWriter.writeEndOfLine(); + } + + @Override + public void close() throws IOException { + csvWriter.close(); + } + + }; + } + +} diff --git a/picocsv/src/test/java/picocsv/FormatTest.java b/picocsv/src/test/java/picocsv/FormatTest.java new file mode 100644 index 0000000..3ea11f1 --- /dev/null +++ b/picocsv/src/test/java/picocsv/FormatTest.java @@ -0,0 +1,37 @@ +package picocsv; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.StringWriter; +import java.util.List; + +import org.junit.jupiter.api.Test; + +import de.siegmar.csvbenchmark.CsvConstants; +import de.siegmar.csvbenchmark.ICsvReader; +import de.siegmar.csvbenchmark.ICsvWriter; + +public class FormatTest { + + @Test + public void reader() throws Exception { + try (ICsvReader reader = Factory.reader()) { + for (final List row : CsvConstants.RECORDS) { + assertEquals(row, reader.readRecord()); + } + } + } + + @Test + public void writer() throws Exception { + final StringWriter sw = new StringWriter(); + try (ICsvWriter writer = Factory.writer(sw)) { + for (final List row : CsvConstants.RECORDS) { + writer.writeRecord(row); + } + } + + assertEquals(CsvConstants.DATA, sw.toString()); + } + +} diff --git a/settings.gradle.kts b/settings.gradle.kts index 93c6f51..3e3282d 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -5,6 +5,7 @@ include("fastcsv") include("jackson") include("javacsv") include("opencsv") +include("picocsv") include("simpleflatmapper") include("supercsv") include("univocity") From 1e848a692a7ebce3534817106c120d64cd76378d Mon Sep 17 00:00:00 2001 From: Philippe Charles Date: Sun, 27 Oct 2024 12:43:18 +0100 Subject: [PATCH 2/3] Fix illegal static declaration in inner class --- picocsv/src/main/java/picocsv/Factory.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picocsv/src/main/java/picocsv/Factory.java b/picocsv/src/main/java/picocsv/Factory.java index efa75f6..39674f7 100644 --- a/picocsv/src/main/java/picocsv/Factory.java +++ b/picocsv/src/main/java/picocsv/Factory.java @@ -22,13 +22,13 @@ public static ICsvReader reader() throws IOException { // Performances seems highly related to the buffer size // Slower: the default buf size (8192) is too big for the benchmark data - private static final int BUF_SIZE_SLOWER = Csv.DEFAULT_CHAR_BUFFER_SIZE; + private final int BUF_SIZE_SLOWER = Csv.DEFAULT_CHAR_BUFFER_SIZE; // Equivalent: a random low buf size seems efficient - private static final int BUF_SIZE_EQUIVALENT = 200; + private final int BUF_SIZE_EQUIVALENT = 200; // Faster: a perfect buf size (163) aligns the stars and performs very well - private static final int BUF_SIZE_FASTER = CsvConstants.DATA.length(); + private final int BUF_SIZE_FASTER = CsvConstants.DATA.length(); private final Csv.Reader csvReader = Csv.Reader.of( Csv.Format.DEFAULT From 478ac07e5a7d67d53205d1d2fbcb4d3d4ae4d810 Mon Sep 17 00:00:00 2001 From: Philippe Charles Date: Sun, 27 Oct 2024 14:16:40 +0100 Subject: [PATCH 3/3] Fix checkstyle errors --- picocsv/src/main/java/picocsv/Factory.java | 70 ++++++++-------------- 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/picocsv/src/main/java/picocsv/Factory.java b/picocsv/src/main/java/picocsv/Factory.java index 39674f7..03a731b 100644 --- a/picocsv/src/main/java/picocsv/Factory.java +++ b/picocsv/src/main/java/picocsv/Factory.java @@ -13,42 +13,37 @@ public final class Factory { + private static final Csv.Format FORMAT = Csv.Format.DEFAULT + .toBuilder() + .delimiter(CsvConstants.SEPARATOR) + .separator(Csv.Format.UNIX_SEPARATOR) + .quote(CsvConstants.DELIMITER) + .build(); + + // Read performances seems highly related to the buffer size + // Slower: the default buf size (8192) is too big for the benchmark data + private static final int BUF_SIZE_SLOWER = Csv.DEFAULT_CHAR_BUFFER_SIZE; + // Equivalent: a random low buf size seems efficient + private static final int BUF_SIZE_EQUIVALENT = 200; + // Faster: a perfect buf size (163) aligns the stars and performs very well + private static final int BUF_SIZE_FASTER = CsvConstants.DATA.length(); + private Factory() { } public static ICsvReader reader() throws IOException { return new ICsvReader() { - - // Performances seems highly related to the buffer size - - // Slower: the default buf size (8192) is too big for the benchmark data - private final int BUF_SIZE_SLOWER = Csv.DEFAULT_CHAR_BUFFER_SIZE; - - // Equivalent: a random low buf size seems efficient - private final int BUF_SIZE_EQUIVALENT = 200; - - // Faster: a perfect buf size (163) aligns the stars and performs very well - private final int BUF_SIZE_FASTER = CsvConstants.DATA.length(); - - private final Csv.Reader csvReader = Csv.Reader.of( - Csv.Format.DEFAULT - .toBuilder() - .delimiter(CsvConstants.SEPARATOR) - .separator(Csv.Format.UNIX_SEPARATOR) - .quote(CsvConstants.DELIMITER) - .build(), - Csv.ReaderOptions.DEFAULT - .toBuilder() - .build(), - new InfiniteDataReader(CsvConstants.DATA), - BUF_SIZE_SLOWER - ); + private final Csv.Reader csvReader = Csv.Reader.of(FORMAT, Csv.ReaderOptions.DEFAULT, + new InfiniteDataReader(CsvConstants.DATA), BUF_SIZE_SLOWER); @Override public List readRecord() throws IOException { - // picocsv is not designed to retrieve all fields at once - List result = new ArrayList<>(); - if (csvReader.readLine()) while (csvReader.readField()) result.add(csvReader.toString()); + final List result = new ArrayList<>(); + if (csvReader.readLine()) { + while (csvReader.readField()) { + result.add(csvReader.toString()); + } + } return result; } @@ -56,28 +51,17 @@ public List readRecord() throws IOException { public void close() throws IOException { csvReader.close(); } - }; } public static ICsvWriter writer(final Writer writer) throws IOException { return new ICsvWriter() { - - private final Csv.Writer csvWriter = Csv.Writer.of( - Csv.Format.DEFAULT - .toBuilder() - .delimiter(CsvConstants.SEPARATOR) - .separator(Csv.Format.UNIX_SEPARATOR) - .quote(CsvConstants.DELIMITER) - .build(), - Csv.WriterOptions.DEFAULT, - writer, - Csv.DEFAULT_CHAR_BUFFER_SIZE - ); + private final Csv.Writer csvWriter = Csv.Writer.of(FORMAT, Csv.WriterOptions.DEFAULT, + writer, Csv.DEFAULT_CHAR_BUFFER_SIZE); @Override public void writeRecord(final List fields) throws IOException { - for (String field : fields) { + for (final String field : fields) { csvWriter.writeField(field); } csvWriter.writeEndOfLine(); @@ -87,8 +71,6 @@ public void writeRecord(final List fields) throws IOException { public void close() throws IOException { csvWriter.close(); } - }; } - }