From 2ea5c15d9c3bc41609fc24897cbd7e22249e7857 Mon Sep 17 00:00:00 2001 From: salemsd Date: Wed, 22 Jan 2025 16:55:06 +0100 Subject: [PATCH 01/15] set up writer and matrix type --- pom.xml | 12 ++ .../datamanager_back/util/ArrowReader.java | 19 +++ .../datamanager_back/util/ArrowWriter.java | 114 ++++++++++++++++++ .../datamanager_back/util/ColumnType.java | 15 +++ .../antares/datamanager_back/util/Matrix.java | 28 +++++ .../datamanager_back/util/MatrixColumn.java | 71 +++++++++++ 6 files changed, 259 insertions(+) create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/ColumnType.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java diff --git a/pom.xml b/pom.xml index 4eda159..5a426ca 100644 --- a/pom.xml +++ b/pom.xml @@ -29,6 +29,7 @@ 21 2.0.3 + 18.1.0 antaressimulatorteam AntaresSimulatorTeam_antares-datamanager-back https://sonarcloud.io @@ -162,6 +163,17 @@ junit-jupiter test + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + org.apache.arrow + arrow-memory-netty + ${arrow.version} + diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java new file mode 100644 index 0000000..b4796a4 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java @@ -0,0 +1,19 @@ +package com.rte_france.antares.datamanager_back.util; + +import java.io.IOException; +import java.nio.file.Path; + +public class ArrowReader { + Matrix read(Path path) throws IOException { + + } + + MatrixColumn read(Path path, String columnName) throws IOException { + var matrix = read(path); + return matrix.getColumns().stream() + .filter(c -> c.name().equals(columnName)) + .findAny() + .orElse(null); + } + +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java new file mode 100644 index 0000000..f1498b4 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java @@ -0,0 +1,114 @@ +/** + * Copyright (c) 2025, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package com.rte_france.antares.datamanager_back.util; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.channels.Channels; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static com.rte_france.antares.datamanager_back.util.ColumnType.FLOAT; +import static com.rte_france.antares.datamanager_back.util.ColumnType.INT; + +/** + * @author Sylvain Leclerc + */ +public class ArrowWriter { + + private static final BufferAllocator ALLOCATOR = new RootAllocator(); + + private static Field floatField(String name) { + return new Field(name, FieldType.notNullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null); + } + + private static Field intField(String name) { + return new Field(name, FieldType.notNullable(new ArrowType.Int(32, true)), null); + } + + private static Field createField(MatrixColumn column) { + return switch (column.type()) { + case INT -> intField(column.name()); + case FLOAT -> floatField(column.name()); + default -> throw new IllegalArgumentException("Invalid column type " + column.type()); + }; + } + + private static Schema createSchema(Matrix matrix) { + var fields = matrix.getColumns().stream() + .map(ArrowWriter::createField) + .collect(Collectors.toList()); + return new Schema(fields); + } + + private static void populateFloatVector(VectorSchemaRoot table, MatrixColumn column) { + var vector = (Float4Vector) table.getVector(column.name()); + var values = column.getFloatValues(); + var size = values.length; + vector.allocateNew(size); + table.setRowCount(size); + IntStream.range(0, size).forEach(i -> vector.set(i, values[i])); + } + + private static void populateIntVector(VectorSchemaRoot table, MatrixColumn column) { + var vector = (IntVector) table.getVector(column.name()); + var values = column.getIntValues(); + var size = values.length; + vector.allocateNew(size); + table.setRowCount(size); + IntStream.range(0, size).forEach(i -> vector.set(i, values[i])); + } + + public void write(Matrix matrix, OutputStream out) throws IOException { + var schema = createSchema(matrix); + try (var table = VectorSchemaRoot.create(schema, ALLOCATOR)) { + matrix.getColumns().forEach(c -> { + switch (c.type()) { + case INT -> populateIntVector(table, c); + case FLOAT -> populateFloatVector(table, c); + default -> throw new IllegalArgumentException("Invalid column type " + c.type()); + } + }); + + // OpenOption[] options = {StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING}; + + try (var ch = Channels.newChannel(out); + var writer = new ArrowFileWriter(table, null, ch)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + } + } + + public String getDefaultFileExtension() { + return "arrow"; + } + + public static void main(String[] args) { + var writer = new ArrowWriter(); + writer.write(matrix, Path.of("src/main/resources/test-matrix.arrow")); + } +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ColumnType.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ColumnType.java new file mode 100644 index 0000000..6f8ad26 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ColumnType.java @@ -0,0 +1,15 @@ +/** + * Copyright (c) 2023, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +/** + * @author Sylvain Leclerc + */ +public enum ColumnType { + INT, + FLOAT +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java b/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java new file mode 100644 index 0000000..b8ea185 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java @@ -0,0 +1,28 @@ +/** + * Copyright (c) 2023, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +import lombok.Value; + +import java.util.List; + +/** + * @author Sylvain Leclerc + */ +@Value +public class Matrix { + + List columns; + + public int getRowCount() { + if (columns.isEmpty()) { + return 0; + } + return columns.getFirst().getSize(); + } + +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java new file mode 100644 index 0000000..8595f6e --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2025, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +import java.util.Arrays; +import java.util.Objects; + +/** + * @author Sylvain Leclerc + */ +public record MatrixColumn(String name, ColumnType type, int[] intValues, float[] floatValues) { + + public float[] getFloatValues() { + if (type != ColumnType.FLOAT) { + throw new IllegalArgumentException("Cannot get float values from int column"); + } + return floatValues; + } + + public int[] getIntValues() { + if (type != ColumnType.INT) { + throw new IllegalArgumentException("Cannot get int values from float column"); + } + return intValues; + } + + public int getSize() { + return switch (type) { + case INT -> intValues.length; + case FLOAT -> floatValues.length; + default -> throw new IllegalStateException("Invalid column type " + type); + }; + } + + public MatrixColumn(String name, float[] values) { + this(name, ColumnType.FLOAT, null, Objects.requireNonNull(values.clone())); + } + + public MatrixColumn(String name, int[] values) { + this(name, ColumnType.INT, Objects.requireNonNull(values.clone()), null); + } + + public MatrixColumn { + Objects.requireNonNull(name); + Objects.requireNonNull(type); + if (type == ColumnType.FLOAT && floatValues == null) { + throw new IllegalArgumentException("Float column must have floatValues"); + } + if (type == ColumnType.INT && intValues == null) { + throw new IllegalArgumentException("Int column must have intValues"); + } + } + + public MatrixColumn renamed(String newName) { + return new MatrixColumn(newName, type, intValues, floatValues); + } + + @Override + public String toString() { + return "MatrixColumn{" + + "name='" + name + '\'' + + ", type=" + type + + ", floatValues=" + Arrays.toString(floatValues) + + ", intValues=" + Arrays.toString(intValues) + + '}'; + } +} From 077e7339d2a1bfd88f9fe55125461659ca1b1c1c Mon Sep 17 00:00:00 2001 From: salemsd Date: Thu, 23 Jan 2025 10:30:57 +0100 Subject: [PATCH 02/15] set up arrow reader --- .../datamanager_back/util/ArrowReader.java | 71 ++++++++++++++++--- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java index b4796a4..3537de3 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java @@ -1,19 +1,74 @@ package com.rte_france.antares.datamanager_back.util; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; public class ArrowReader { - Matrix read(Path path) throws IOException { + public Matrix readMatrixFromTxt(Path filePath, ColumnType columnType) throws IOException { + var lines = Files.readAllLines(filePath); + if (lines.isEmpty()) { + throw new IllegalArgumentException("File is empty"); + } + + var rowCount = lines.size(); + var columnCount = lines.getFirst().split("\\s+").length; + + if (columnType == ColumnType.FLOAT) { + float[][] data = new float[rowCount][columnCount]; + for (var i = 0; i < rowCount; i++) { + var values = lines.get(i).split("\\s+"); + for (var j = 0; j < columnCount; j++) { + data[i][j] = Float.parseFloat(values[j]); + } + } + return createMatrixFromData(data); + } else if (columnType == ColumnType.INT) { + var data = new int[rowCount][columnCount]; + for (var i = 0; i < rowCount; i++) { + var values = lines.get(i).split("\\s+"); + for (var j = 0; j < columnCount; j++) { + data[i][j] = Integer.parseInt(values[j]); + } + } + return createMatrixFromData(data); + } else { + throw new IllegalArgumentException("Unsupported column type: " + columnType); + } } - MatrixColumn read(Path path, String columnName) throws IOException { - var matrix = read(path); - return matrix.getColumns().stream() - .filter(c -> c.name().equals(columnName)) - .findAny() - .orElse(null); + private Matrix createMatrixFromData(float[][] data) { + var rowCount = data.length; + var columnCount = data[0].length; + var columns = new ArrayList(columnCount); + + for (var j = 0; j < columnCount; j++) { + var columnData = new float[rowCount]; + for (var i = 0; i < rowCount; i++) { + columnData[i] = data[i][j]; + } + columns.add(new MatrixColumn("Column" + j, columnData)); + } + + return new Matrix(columns); } -} + private Matrix createMatrixFromData(int[][] data) { + var rowCount = data.length; + var columnCount = data[0].length; + var columns = new ArrayList(columnCount); + + for (var j = 0; j < columnCount; j++) { + var columnData = new int[rowCount]; + for (var i = 0; i < rowCount; i++) { + columnData[i] = data[i][j]; + } + columns.add(new MatrixColumn("Column" + j, columnData)); + } + + return new Matrix(columns); + } +} \ No newline at end of file From 2412d0ac7f65988032bf0bf1f1754c0b3a6198e6 Mon Sep 17 00:00:00 2001 From: salemsd Date: Mon, 27 Jan 2025 14:53:28 +0100 Subject: [PATCH 03/15] working serializer and deserializer --- pom.xml | 20 +++- .../datamanager_back/util/ArrowReader.java | 96 ++++++++++--------- .../datamanager_back/util/ArrowWriter.java | 77 ++++----------- .../antares/datamanager_back/util/Matrix.java | 3 - .../datamanager_back/util/MatrixColumn.java | 48 +--------- 5 files changed, 94 insertions(+), 150 deletions(-) diff --git a/pom.xml b/pom.xml index 5a426ca..3285e10 100644 --- a/pom.xml +++ b/pom.xml @@ -55,10 +55,6 @@ org.springframework.boot spring-boot-starter-validation - - org.springframework.boot - spring-boot-starter-integration - org.springframework.boot spring-boot-starter-web @@ -173,7 +169,19 @@ org.apache.arrow arrow-memory-netty ${arrow.version} + + + org.apache.arrow + arrow-memory-core + + + + + + + + @@ -188,6 +196,7 @@ lombok + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED @@ -231,6 +240,9 @@ org.apache.maven.plugins maven-surefire-plugin 3.0.0-M9 + + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java index 3537de3..e301db1 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java @@ -1,14 +1,53 @@ package com.rte_france.antares.datamanager_back.util; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.types.pojo.Field; + import java.io.IOException; +import java.nio.channels.Channels; +import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.Objects; +import java.util.function.BiConsumer; +import java.util.function.Function; public class ArrowReader { + private static final BufferAllocator ALLOCATOR = new RootAllocator(); + + public static Matrix readMatrixFromArrow(Path filePath) throws IOException { + Objects.requireNonNull(filePath); + + try (var channel = Files.newByteChannel(filePath); + var reader = new ArrowFileReader(channel, ALLOCATOR)) { + + reader.loadNextBatch(); + var root = reader.getVectorSchemaRoot(); + List fields = root.getSchema().getFields(); + + var columns = new ArrayList(); + for (var field : fields) { + var vector = root.getVector(field.getName()); + var values = new double[vector.getValueCount()]; + for (var i = 0; i < vector.getValueCount(); i++) { + values[i] = ((Float8Vector) vector).get(i); + } + columns.add(new MatrixColumn(field.getName(), values)); + } + + return new Matrix(columns); + } + } + + public static Matrix readMatrixFromTxt(Path filePath) throws IOException { + Objects.requireNonNull(filePath); - public Matrix readMatrixFromTxt(Path filePath, ColumnType columnType) throws IOException { var lines = Files.readAllLines(filePath); if (lines.isEmpty()) { throw new IllegalArgumentException("File is empty"); @@ -17,56 +56,25 @@ public Matrix readMatrixFromTxt(Path filePath, ColumnType columnType) throws IOE var rowCount = lines.size(); var columnCount = lines.getFirst().split("\\s+").length; - if (columnType == ColumnType.FLOAT) { - float[][] data = new float[rowCount][columnCount]; - for (var i = 0; i < rowCount; i++) { - var values = lines.get(i).split("\\s+"); - for (var j = 0; j < columnCount; j++) { - data[i][j] = Float.parseFloat(values[j]); - } - } - return createMatrixFromData(data); - } else if (columnType == ColumnType.INT) { - var data = new int[rowCount][columnCount]; - for (var i = 0; i < rowCount; i++) { - var values = lines.get(i).split("\\s+"); - for (var j = 0; j < columnCount; j++) { - data[i][j] = Integer.parseInt(values[j]); - } - } - return createMatrixFromData(data); - } else { - throw new IllegalArgumentException("Unsupported column type: " + columnType); - } + return readMatrix(lines, rowCount, columnCount, Double::parseDouble, (data, value) -> data.add(Double.parseDouble(value))); } - private Matrix createMatrixFromData(float[][] data) { - var rowCount = data.length; - var columnCount = data[0].length; - var columns = new ArrayList(columnCount); + private static Matrix readMatrix(List lines, int rowCount, int columnCount, Function parser, BiConsumer, String> adder) { + var data = new ArrayList>(columnCount); + for (var i = 0; i < columnCount; i++) { + data.add(new ArrayList<>(rowCount)); + } - for (var j = 0; j < columnCount; j++) { - var columnData = new float[rowCount]; - for (var i = 0; i < rowCount; i++) { - columnData[i] = data[i][j]; + for (var line : lines) { + var values = line.split("\\s+"); + for (var j = 0; j < columnCount; j++) { + adder.accept(data.get(j), values[j]); } - columns.add(new MatrixColumn("Column" + j, columnData)); } - return new Matrix(columns); - } - - private Matrix createMatrixFromData(int[][] data) { - var rowCount = data.length; - var columnCount = data[0].length; var columns = new ArrayList(columnCount); - - for (var j = 0; j < columnCount; j++) { - var columnData = new int[rowCount]; - for (var i = 0; i < rowCount; i++) { - columnData[i] = data[i][j]; - } - columns.add(new MatrixColumn("Column" + j, columnData)); + for (int j = 0; j < columnCount; j++) { + columns.add(new MatrixColumn("Column" + j, data.get(j).stream().mapToDouble(Double::doubleValue).toArray())); } return new Matrix(columns); diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java index f1498b4..66cabc4 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java @@ -1,16 +1,8 @@ -/** - * Copyright (c) 2025, RTE (http://www.rte-france.com) - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - package com.rte_france.antares.datamanager_back.util; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; @@ -23,58 +15,29 @@ import java.io.OutputStream; import java.nio.channels.Channels; import java.nio.file.Files; -import java.nio.file.OpenOption; import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; -import static com.rte_france.antares.datamanager_back.util.ColumnType.FLOAT; -import static com.rte_france.antares.datamanager_back.util.ColumnType.INT; - -/** - * @author Sylvain Leclerc - */ public class ArrowWriter { private static final BufferAllocator ALLOCATOR = new RootAllocator(); - private static Field floatField(String name) { - return new Field(name, FieldType.notNullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null); - } - - private static Field intField(String name) { - return new Field(name, FieldType.notNullable(new ArrowType.Int(32, true)), null); - } - - private static Field createField(MatrixColumn column) { - return switch (column.type()) { - case INT -> intField(column.name()); - case FLOAT -> floatField(column.name()); - default -> throw new IllegalArgumentException("Invalid column type " + column.type()); - }; + private static Field doubleField(String name) { + return new Field(name, FieldType.notNullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null); } private static Schema createSchema(Matrix matrix) { var fields = matrix.getColumns().stream() - .map(ArrowWriter::createField) + .map(column -> doubleField(column.name())) .collect(Collectors.toList()); return new Schema(fields); } - private static void populateFloatVector(VectorSchemaRoot table, MatrixColumn column) { - var vector = (Float4Vector) table.getVector(column.name()); - var values = column.getFloatValues(); - var size = values.length; - vector.allocateNew(size); - table.setRowCount(size); - IntStream.range(0, size).forEach(i -> vector.set(i, values[i])); - } - - private static void populateIntVector(VectorSchemaRoot table, MatrixColumn column) { - var vector = (IntVector) table.getVector(column.name()); - var values = column.getIntValues(); + private static void populateDoubleVector(VectorSchemaRoot table, MatrixColumn column) { + var vector = (Float8Vector) table.getVector(column.name()); + var values = column.values(); var size = values.length; vector.allocateNew(size); table.setRowCount(size); @@ -84,15 +47,7 @@ private static void populateIntVector(VectorSchemaRoot table, MatrixColumn colum public void write(Matrix matrix, OutputStream out) throws IOException { var schema = createSchema(matrix); try (var table = VectorSchemaRoot.create(schema, ALLOCATOR)) { - matrix.getColumns().forEach(c -> { - switch (c.type()) { - case INT -> populateIntVector(table, c); - case FLOAT -> populateFloatVector(table, c); - default -> throw new IllegalArgumentException("Invalid column type " + c.type()); - } - }); - - // OpenOption[] options = {StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING}; + matrix.getColumns().forEach(c -> populateDoubleVector(table, c)); try (var ch = Channels.newChannel(out); var writer = new ArrowFileWriter(table, null, ch)) { @@ -108,7 +63,17 @@ public String getDefaultFileExtension() { } public static void main(String[] args) { - var writer = new ArrowWriter(); - writer.write(matrix, Path.of("src/main/resources/test-matrix.arrow")); + var writer = new ArrowWriter(); + try { + var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/load_fr_2030-2031.txt")); + try (var out = Files.newOutputStream(Path.of("src/main/resources/test-matrix.arrow"))) { + writer.write(matrix, out); + } + + var deserializedMatrix = ArrowReader.readMatrixFromArrow(Path.of("src/main/resources/test-matrix.arrow")); + System.out.println("decoy"); + } catch (IOException e) { + throw new RuntimeException(e); + } } -} +} \ No newline at end of file diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java b/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java index b8ea185..5f29dd2 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/Matrix.java @@ -10,9 +10,6 @@ import java.util.List; -/** - * @author Sylvain Leclerc - */ @Value public class Matrix { diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java index 8595f6e..469afa6 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixColumn.java @@ -9,63 +9,25 @@ import java.util.Arrays; import java.util.Objects; -/** - * @author Sylvain Leclerc - */ -public record MatrixColumn(String name, ColumnType type, int[] intValues, float[] floatValues) { - - public float[] getFloatValues() { - if (type != ColumnType.FLOAT) { - throw new IllegalArgumentException("Cannot get float values from int column"); - } - return floatValues; - } - - public int[] getIntValues() { - if (type != ColumnType.INT) { - throw new IllegalArgumentException("Cannot get int values from float column"); - } - return intValues; - } - +public record MatrixColumn(String name, double[] values) { public int getSize() { - return switch (type) { - case INT -> intValues.length; - case FLOAT -> floatValues.length; - default -> throw new IllegalStateException("Invalid column type " + type); - }; - } - - public MatrixColumn(String name, float[] values) { - this(name, ColumnType.FLOAT, null, Objects.requireNonNull(values.clone())); - } - - public MatrixColumn(String name, int[] values) { - this(name, ColumnType.INT, Objects.requireNonNull(values.clone()), null); + return values.length; } public MatrixColumn { Objects.requireNonNull(name); - Objects.requireNonNull(type); - if (type == ColumnType.FLOAT && floatValues == null) { - throw new IllegalArgumentException("Float column must have floatValues"); - } - if (type == ColumnType.INT && intValues == null) { - throw new IllegalArgumentException("Int column must have intValues"); - } + Objects.requireNonNull(values); } public MatrixColumn renamed(String newName) { - return new MatrixColumn(newName, type, intValues, floatValues); + return new MatrixColumn(newName, values); } @Override public String toString() { return "MatrixColumn{" + "name='" + name + '\'' + - ", type=" + type + - ", floatValues=" + Arrays.toString(floatValues) + - ", intValues=" + Arrays.toString(intValues) + + ", values=" + Arrays.toString(values) + '}'; } } From cd8adb017bc037f61422c67912720dee239c2f70 Mon Sep 17 00:00:00 2001 From: salemsd Date: Mon, 27 Jan 2025 15:12:07 +0100 Subject: [PATCH 04/15] add timer and size logging --- .../datamanager_back/util/ArrowWriter.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java index 66cabc4..62294ba 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java @@ -66,12 +66,24 @@ public static void main(String[] args) { var writer = new ArrowWriter(); try { var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/load_fr_2030-2031.txt")); - try (var out = Files.newOutputStream(Path.of("src/main/resources/test-matrix.arrow"))) { + + var startSerialization = System.nanoTime(); + var arrowFilePath = Path.of("src/main/resources/test-matrix.arrow"); + try (var out = Files.newOutputStream(arrowFilePath)) { writer.write(matrix, out); } + var endSerialization = System.nanoTime(); + var serializationTime = (endSerialization - startSerialization) / 1_000_000_000.0; + var fileSize = Files.size(arrowFilePath); + + var startDeserialization = System.nanoTime(); + var deserializedMatrix = ArrowReader.readMatrixFromArrow(arrowFilePath); + var endDeserialization = System.nanoTime(); + var deserializationTime = (endDeserialization - startDeserialization) / 1_000_000_000.0; - var deserializedMatrix = ArrowReader.readMatrixFromArrow(Path.of("src/main/resources/test-matrix.arrow")); - System.out.println("decoy"); + System.out.println("Serialization time (s): " + serializationTime); + System.out.println("Deserialization time (s): " + deserializationTime); + System.out.println(".arrow file size (bytes): " + fileSize); } catch (IOException e) { throw new RuntimeException(e); } From 41deb1b50a8b498a4189c8cd043cfdc235422023 Mon Sep 17 00:00:00 2001 From: salemsd Date: Tue, 28 Jan 2025 09:19:07 +0100 Subject: [PATCH 05/15] add compression codec --- pom.xml | 22 +++++++++---------- .../datamanager_back/util/ArrowWriter.java | 12 ++++++++-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 3285e10..92d88ee 100644 --- a/pom.xml +++ b/pom.xml @@ -91,6 +91,12 @@ org.apache.poi poi-ooxml 5.2.3 + + + org.apache.commons + commons-compress + + @@ -169,19 +175,13 @@ org.apache.arrow arrow-memory-netty ${arrow.version} - - - org.apache.arrow - arrow-memory-core - - + + + org.apache.arrow + arrow-compression + ${arrow.version} - - - - - diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java index 62294ba..898fdc7 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java @@ -1,10 +1,16 @@ package com.rte_france.antares.datamanager_back.util; +import jakarta.validation.constraints.Negative; +import org.apache.arrow.compression.CommonsCompressionFactory; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.compression.ZstdCompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.message.IpcOption; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -17,6 +23,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -49,8 +56,9 @@ public void write(Matrix matrix, OutputStream out) throws IOException { try (var table = VectorSchemaRoot.create(schema, ALLOCATOR)) { matrix.getColumns().forEach(c -> populateDoubleVector(table, c)); + var compressionFactory = new CommonsCompressionFactory(); try (var ch = Channels.newChannel(out); - var writer = new ArrowFileWriter(table, null, ch)) { + var writer = new ArrowFileWriter(table, null, ch, null, IpcOption.DEFAULT, compressionFactory, CompressionUtil.CodecType.ZSTD)) { writer.start(); writer.writeBatch(); writer.end(); @@ -65,7 +73,7 @@ public String getDefaultFileExtension() { public static void main(String[] args) { var writer = new ArrowWriter(); try { - var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/load_fr_2030-2031.txt")); + var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/INPUT/load/load_fr_2030-2031.txt")); var startSerialization = System.nanoTime(); var arrowFilePath = Path.of("src/main/resources/test-matrix.arrow"); From 8a85e6340ff687bc31e0c3a282f2e79ac338132b Mon Sep 17 00:00:00 2001 From: salemsd Date: Tue, 28 Jan 2025 15:33:55 +0100 Subject: [PATCH 06/15] start setting up pom for parquet writesupport implem --- pom.xml | 22 +------------------ .../datamanager_back/util/ArrowWriter.java | 2 +- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/pom.xml b/pom.xml index 92d88ee..1e0f3df 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ 21 2.0.3 - 18.1.0 + 1.15.0 antaressimulatorteam AntaresSimulatorTeam_antares-datamanager-back https://sonarcloud.io @@ -166,22 +166,6 @@ test - - org.apache.arrow - arrow-vector - ${arrow.version} - - - org.apache.arrow - arrow-memory-netty - ${arrow.version} - - - org.apache.arrow - arrow-compression - ${arrow.version} - - @@ -196,7 +180,6 @@ lombok - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED @@ -240,9 +223,6 @@ org.apache.maven.plugins maven-surefire-plugin 3.0.0-M9 - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java index 898fdc7..5906d84 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java @@ -73,7 +73,7 @@ public String getDefaultFileExtension() { public static void main(String[] args) { var writer = new ArrowWriter(); try { - var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/INPUT/load/load_fr_2030-2031.txt")); + var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/INPUT/load/series.txt")); var startSerialization = System.nanoTime(); var arrowFilePath = Path.of("src/main/resources/test-matrix.arrow"); From cbb1cf9b5188947026fb2900ec7e6f3eac5786cf Mon Sep 17 00:00:00 2001 From: salemsd Date: Tue, 28 Jan 2025 16:16:53 +0100 Subject: [PATCH 07/15] finalize writesupport implem --- pom.xml | 30 ++++++ .../datamanager_back/util/ArrowWriter.java | 99 ------------------- .../datamanager_back/util/MatrixRow.java | 22 +++++ .../util/MatrixWriteSupport.java | 89 +++++++++++++++++ ...ader.java => ParquetTimeSeriesReader.java} | 37 +------ .../util/ParquetTimeSeriesWriter.java | 76 ++++++++++++++ .../util/ParquetWriterBuilder.java | 37 +++++++ 7 files changed, 255 insertions(+), 135 deletions(-) delete mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java rename src/main/java/com/rte_france/antares/datamanager_back/util/{ArrowReader.java => ParquetTimeSeriesReader.java} (54%) create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java diff --git a/pom.xml b/pom.xml index 1e0f3df..8fd2b72 100644 --- a/pom.xml +++ b/pom.xml @@ -30,6 +30,7 @@ 21 2.0.3 1.15.0 + 3.3.6 antaressimulatorteam AntaresSimulatorTeam_antares-datamanager-back https://sonarcloud.io @@ -166,6 +167,35 @@ test + + + org.apache.parquet + parquet-avro + ${parquet.version} + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + + diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java deleted file mode 100644 index 5906d84..0000000 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowWriter.java +++ /dev/null @@ -1,99 +0,0 @@ -package com.rte_france.antares.datamanager_back.util; - -import jakarta.validation.constraints.Negative; -import org.apache.arrow.compression.CommonsCompressionFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.compression.ZstdCompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.channels.Channels; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -public class ArrowWriter { - - private static final BufferAllocator ALLOCATOR = new RootAllocator(); - - private static Field doubleField(String name) { - return new Field(name, FieldType.notNullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null); - } - - private static Schema createSchema(Matrix matrix) { - var fields = matrix.getColumns().stream() - .map(column -> doubleField(column.name())) - .collect(Collectors.toList()); - return new Schema(fields); - } - - private static void populateDoubleVector(VectorSchemaRoot table, MatrixColumn column) { - var vector = (Float8Vector) table.getVector(column.name()); - var values = column.values(); - var size = values.length; - vector.allocateNew(size); - table.setRowCount(size); - IntStream.range(0, size).forEach(i -> vector.set(i, values[i])); - } - - public void write(Matrix matrix, OutputStream out) throws IOException { - var schema = createSchema(matrix); - try (var table = VectorSchemaRoot.create(schema, ALLOCATOR)) { - matrix.getColumns().forEach(c -> populateDoubleVector(table, c)); - - var compressionFactory = new CommonsCompressionFactory(); - try (var ch = Channels.newChannel(out); - var writer = new ArrowFileWriter(table, null, ch, null, IpcOption.DEFAULT, compressionFactory, CompressionUtil.CodecType.ZSTD)) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - } - } - - public String getDefaultFileExtension() { - return "arrow"; - } - - public static void main(String[] args) { - var writer = new ArrowWriter(); - try { - var matrix = ArrowReader.readMatrixFromTxt(Path.of("src/main/resources/INPUT/load/series.txt")); - - var startSerialization = System.nanoTime(); - var arrowFilePath = Path.of("src/main/resources/test-matrix.arrow"); - try (var out = Files.newOutputStream(arrowFilePath)) { - writer.write(matrix, out); - } - var endSerialization = System.nanoTime(); - var serializationTime = (endSerialization - startSerialization) / 1_000_000_000.0; - var fileSize = Files.size(arrowFilePath); - - var startDeserialization = System.nanoTime(); - var deserializedMatrix = ArrowReader.readMatrixFromArrow(arrowFilePath); - var endDeserialization = System.nanoTime(); - var deserializationTime = (endDeserialization - startDeserialization) / 1_000_000_000.0; - - System.out.println("Serialization time (s): " + serializationTime); - System.out.println("Deserialization time (s): " + deserializationTime); - System.out.println(".arrow file size (bytes): " + fileSize); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} \ No newline at end of file diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java new file mode 100644 index 0000000..136101a --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java @@ -0,0 +1,22 @@ + +/** + * Copyright (c) 2023, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +import lombok.Value; + +/** + * Parquet API takes rows as input: that small object represents one row of the matrix. + * + * @author Sylvain Leclerc + */ +@Value +class MatrixRow { + Matrix matrix; + int row; +} + diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java new file mode 100644 index 0000000..8bdcc29 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java @@ -0,0 +1,89 @@ +/** + * Copyright (c) 2023, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +import org.apache.parquet.hadoop.api.WriteSupport; +import org.apache.parquet.io.api.RecordConsumer; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Types; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +class MatrixWriteSupport extends WriteSupport { + + private final Matrix matrix; + private RecordConsumer consumer; + + MatrixWriteSupport(Matrix matrix) { + this.matrix = matrix; + } + + @Override + public WriteContext init(org.apache.hadoop.conf.Configuration configuration) { + var colTypes = matrix.getColumns().stream() + .map(c -> doubleType(c.name())) + .collect(Collectors.toList()); + + var schema = new MessageType("matrix", colTypes); + return new WriteContext(schema, Collections.emptyMap()); + } + + private static Type type(ColumnType type, String name) { + return switch (type) { + case INT -> intType(name); + case FLOAT -> floatType(name); + default -> throw new IllegalArgumentException("Unknown column type " + type); + }; + } + + private static Type floatType(String name) { + return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, Type.Repetition.REQUIRED) + .named(name); + } + + private static Type intType(String name) { + return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, Type.Repetition.REQUIRED) + .named(name); + } + + private static Type doubleType(String name) { + return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, Type.Repetition.REQUIRED) + .named(name); + } + + @Override + public void prepareForWrite(RecordConsumer recordConsumer) { + this.consumer = recordConsumer; + } + + @Override + public void write(MatrixRow matrixRow) { + consumer.startMessage(); + var columnIndex = 0; + for (var c : matrixRow.getMatrix().getColumns()) { + consumer.startField(c.name(), columnIndex); + consumer.addDouble(c.values()[matrixRow.getRow()]); +// switch (c.getType()) { +// case INT: +// consumer.addInteger(c.getIntValues()[matrixRow.getRow()]); +// break; +// case FLOAT: +// consumer.addFloat(c.getFloatValues()[matrixRow.getRow()]); +// break; +// default: +// throw new IllegalArgumentException("Unknown column type " + c.getType()); +// } + consumer.endField(c.name(), columnIndex); + columnIndex++; + } + consumer.endMessage(); + } +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java similarity index 54% rename from src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java rename to src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java index e301db1..dfc0cb5 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ArrowReader.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java @@ -1,15 +1,6 @@ package com.rte_france.antares.datamanager_back.util; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.types.pojo.Field; - import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -18,33 +9,7 @@ import java.util.function.BiConsumer; import java.util.function.Function; -public class ArrowReader { - private static final BufferAllocator ALLOCATOR = new RootAllocator(); - - public static Matrix readMatrixFromArrow(Path filePath) throws IOException { - Objects.requireNonNull(filePath); - - try (var channel = Files.newByteChannel(filePath); - var reader = new ArrowFileReader(channel, ALLOCATOR)) { - - reader.loadNextBatch(); - var root = reader.getVectorSchemaRoot(); - List fields = root.getSchema().getFields(); - - var columns = new ArrayList(); - for (var field : fields) { - var vector = root.getVector(field.getName()); - var values = new double[vector.getValueCount()]; - for (var i = 0; i < vector.getValueCount(); i++) { - values[i] = ((Float8Vector) vector).get(i); - } - columns.add(new MatrixColumn(field.getName(), values)); - } - - return new Matrix(columns); - } - } - +public class ParquetTimeSeriesReader { public static Matrix readMatrixFromTxt(Path filePath) throws IOException { Objects.requireNonNull(filePath); diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java new file mode 100644 index 0000000..68e4023 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2023, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +import com.google.common.base.Stopwatch; +import org.apache.parquet.column.ParquetProperties; +import org.apache.parquet.hadoop.ParquetFileWriter; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Path; +import java.util.Objects; + +/** + * + * Uses the standard "row based" API provided by "parquet-mr" implementation. + * Tests show that the implementation does not scale well with number of columns. + * Indeed it's weird to have a row-based implementation when the format is column-based ... + * Spark seems to have implemented a "vectorized" reader, but not a writer. + * + * @author Sylvain Leclerc + */ +public class ParquetTimeSeriesWriter { + + private static final Logger LOGGER = LoggerFactory.getLogger(ParquetTimeSeriesWriter.class); + + private final CompressionCodecName compression; + + public ParquetTimeSeriesWriter() { + this(CompressionCodecName.SNAPPY); + } + + public ParquetTimeSeriesWriter(CompressionCodecName compression) { + this.compression = Objects.requireNonNull(compression); + } + + public void write(Matrix matrix, OutputStream outputStream) throws IOException { + throw new UnsupportedOperationException("Writing to stream not supported for parquet format."); + } + + public void write(Matrix matrix, Path file) throws IOException { + var writer = new ParquetWriterBuilder(matrix, file) + .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) + .withCompressionCodec(compression) + .withDictionaryEncoding(false) + .withBloomFilterEnabled(false) + .withByteStreamSplitEncoding(false) + .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0) + .withPageWriteChecksumEnabled(false) + .withRowGroupSize((long) 1024*1024*128) // Default 1024*1024*128 + .withPageRowCountLimit(20000) // Default 20 000 + .withPageSize(1024*1024) //Default 1024 * 1024 + .build(); + Stopwatch timer = Stopwatch.createStarted(); + for (var r = 0; r < matrix.getRowCount(); r++) { + var row = new MatrixRow(matrix, r); + writer.write(row); + } + LOGGER.info("All rows written in {}", timer); + writer.close(); + LOGGER.info("Writer closed after {}", timer); + } + + public String getDefaultFileExtension() { + return "parquet"; + } + +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java new file mode 100644 index 0000000..bbed60e --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2023, RTE (http://www.rte-france.com) + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +package com.rte_france.antares.datamanager_back.util; + +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.api.WriteSupport; + +import java.nio.file.Path; + +/** + * Implementation of parquet writer builder for our use case. + * + * @author Sylvain Leclerc + */ +class ParquetWriterBuilder extends ParquetWriter.Builder { + + private final Matrix matrix; + + ParquetWriterBuilder(Matrix matrix, Path path) { + super(new org.apache.hadoop.fs.Path(path.toString())); + this.matrix = matrix; + } + + @Override + protected ParquetWriterBuilder self() { + return this; + } + + @Override + protected WriteSupport getWriteSupport(org.apache.hadoop.conf.Configuration conf) { + return new MatrixWriteSupport(matrix); + } +} From 2eba142529863b7c21d0edfe27ff1fa20a884ad8 Mon Sep 17 00:00:00 2001 From: salemsd Date: Tue, 28 Jan 2025 16:21:55 +0100 Subject: [PATCH 08/15] use constansts for defaults --- .../datamanager_back/util/ParquetTimeSeriesWriter.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java index 68e4023..64bcfe8 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java @@ -7,6 +7,7 @@ package com.rte_france.antares.datamanager_back.util; import com.google.common.base.Stopwatch; +import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.parquet.hadoop.ParquetWriter; @@ -55,10 +56,11 @@ public void write(Matrix matrix, Path file) throws IOException { .withByteStreamSplitEncoding(false) .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0) .withPageWriteChecksumEnabled(false) - .withRowGroupSize((long) 1024*1024*128) // Default 1024*1024*128 - .withPageRowCountLimit(20000) // Default 20 000 - .withPageSize(1024*1024) //Default 1024 * 1024 + .withRowGroupSize((long) ParquetWriter.DEFAULT_BLOCK_SIZE) // Default 1024*1024*128 + .withPageRowCountLimit(20_000) // Default 20 000 + .withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE) //Default 1024 * 1024 .build(); + Stopwatch timer = Stopwatch.createStarted(); for (var r = 0; r < matrix.getRowCount(); r++) { var row = new MatrixRow(matrix, r); From 3b3c36083069eb394f22703efb1e94be24f42426 Mon Sep 17 00:00:00 2001 From: salemsd Date: Tue, 28 Jan 2025 16:23:26 +0100 Subject: [PATCH 09/15] add reading from parquet to matrix method --- .../util/ParquetTimeSeriesReader.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java index dfc0cb5..7ffdd9f 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java @@ -1,5 +1,8 @@ package com.rte_france.antares.datamanager_back.util; +import org.apache.parquet.avro.AvroParquetReader; +import org.apache.parquet.io.LocalInputFile; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -44,4 +47,17 @@ private static Matrix readMatrix(List lines, int rowCount, int columnCou return new Matrix(columns); } + + public static Matrix readFromParquet(Path filePath) throws IOException { + Objects.requireNonNull(filePath); + + var inputFile = new LocalInputFile(filePath); + try (var reader = AvroParquetReader.builder(inputFile).build()) { + var matrix = reader.read(); + if (matrix == null) { + throw new IOException("The Parquet file is empty or does not contain a TimeSeriesMatrix"); + } + return matrix; + } + } } \ No newline at end of file From 98a6626c9b810b2c319cdf9c61908f69f6051527 Mon Sep 17 00:00:00 2001 From: salemsd Date: Wed, 29 Jan 2025 14:44:45 +0100 Subject: [PATCH 10/15] add ReadSupport and GroupConverter, finish implem --- .../util/MatrixGroupConverter.java | 74 +++++++++++++++++++ .../util/MatrixReadSupport.java | 46 ++++++++++++ .../datamanager_back/util/MatrixRow.java | 5 -- .../util/ParquetTimeSeriesReader.java | 18 ++--- .../util/ParquetTimeSeriesWriter.java | 36 ++++++--- .../util/ParquetWriterBuilder.java | 5 -- 6 files changed, 151 insertions(+), 33 deletions(-) create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java create mode 100644 src/main/java/com/rte_france/antares/datamanager_back/util/MatrixReadSupport.java diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java new file mode 100644 index 0000000..cd56b11 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java @@ -0,0 +1,74 @@ +package com.rte_france.antares.datamanager_back.util; + +import org.apache.parquet.io.api.GroupConverter; +import org.apache.parquet.io.api.PrimitiveConverter; +import org.apache.parquet.io.api.Converter; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.Type; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +public class MatrixGroupConverter extends GroupConverter { + private final List columns = new ArrayList<>(); + private final List columnNames; + private final int rowCount; + + public MatrixGroupConverter(GroupType schema, int rowCount) { + Objects.requireNonNull(schema); + this.rowCount = Objects.checkIndex(rowCount, 8761); + this.columnNames = new ArrayList<>(); + for (var field : schema.getFields()) { + columnNames.add(field.getName()); + } + } + + @Override + public Converter getConverter(int fieldIndex) { + return new PrimitiveConverter() { + private final double[] values = new double[rowCount]; + private int currentIndex = 0; + + @Override + public void addDouble(double value) { + values[currentIndex++] = value; + } + + @Override + public void addFloat(float value) { + values[currentIndex++] = value; + } + + @Override + public void addInt(int value) { + values[currentIndex++] = value; + } + + @Override + public void addLong(long value) { + values[currentIndex++] = value; + } + + @Override + public void addBoolean(boolean value) { + values[currentIndex++] = value ? 1.0 : 0.0; + } + }; + } + + @Override + public void start() {} + + @Override + public void end() { + for (var name : columnNames) { + double[] values = new double[rowCount]; + columns.add(new MatrixColumn(name, values)); + } + } + + public Matrix getMatrix() { + return new Matrix(columns); + } +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixReadSupport.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixReadSupport.java new file mode 100644 index 0000000..d290512 --- /dev/null +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixReadSupport.java @@ -0,0 +1,46 @@ +package com.rte_france.antares.datamanager_back.util; + +import org.apache.parquet.hadoop.api.InitContext; +import org.apache.parquet.hadoop.api.ReadSupport; +import org.apache.parquet.io.api.GroupConverter; +import org.apache.parquet.io.api.RecordMaterializer; +import org.apache.parquet.schema.MessageType; + +import java.util.Map; +import java.util.Objects; + +public class MatrixReadSupport extends ReadSupport { + private final int rowCount; + + public MatrixReadSupport(int rowCount) { + Objects.checkIndex(rowCount, 8761); + this.rowCount = rowCount; + } + + @Override + public ReadContext init(InitContext context) { + var schema = context.getFileSchema(); + return new ReadContext(schema); + } + + @Override + public RecordMaterializer prepareForRead( + org.apache.hadoop.conf.Configuration configuration, + Map keyValueMetaData, + MessageType fileSchema, + ReadContext readContext) { + return new RecordMaterializer() { + private final MatrixGroupConverter converter = new MatrixGroupConverter(fileSchema, rowCount); + + @Override + public Matrix getCurrentRecord() { + return converter.getMatrix(); + } + + @Override + public GroupConverter getRootConverter() { + return converter; + } + }; + } +} diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java index 136101a..aca6a79 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java @@ -9,11 +9,6 @@ import lombok.Value; -/** - * Parquet API takes rows as input: that small object represents one row of the matrix. - * - * @author Sylvain Leclerc - */ @Value class MatrixRow { Matrix matrix; diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java index 7ffdd9f..ef679e2 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java @@ -1,7 +1,6 @@ package com.rte_france.antares.datamanager_back.util; -import org.apache.parquet.avro.AvroParquetReader; -import org.apache.parquet.io.LocalInputFile; +import org.apache.parquet.hadoop.ParquetReader; import java.io.IOException; import java.nio.file.Files; @@ -41,23 +40,16 @@ private static Matrix readMatrix(List lines, int rowCount, int columnCou } var columns = new ArrayList(columnCount); - for (int j = 0; j < columnCount; j++) { + for (var j = 0; j < columnCount; j++) { columns.add(new MatrixColumn("Column" + j, data.get(j).stream().mapToDouble(Double::doubleValue).toArray())); } return new Matrix(columns); } - public static Matrix readFromParquet(Path filePath) throws IOException { - Objects.requireNonNull(filePath); - - var inputFile = new LocalInputFile(filePath); - try (var reader = AvroParquetReader.builder(inputFile).build()) { - var matrix = reader.read(); - if (matrix == null) { - throw new IOException("The Parquet file is empty or does not contain a TimeSeriesMatrix"); - } - return matrix; + public static Matrix readFromParquet(org.apache.hadoop.fs.Path filePath) throws IOException { + try (var reader = ParquetReader.builder(new MatrixReadSupport(8760), filePath).build()) { + return reader.read(); } } } \ No newline at end of file diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java index 64bcfe8..c6aeec7 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java @@ -17,18 +17,10 @@ import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Objects; -/** - * - * Uses the standard "row based" API provided by "parquet-mr" implementation. - * Tests show that the implementation does not scale well with number of columns. - * Indeed it's weird to have a row-based implementation when the format is column-based ... - * Spark seems to have implemented a "vectorized" reader, but not a writer. - * - * @author Sylvain Leclerc - */ public class ParquetTimeSeriesWriter { private static final Logger LOGGER = LoggerFactory.getLogger(ParquetTimeSeriesWriter.class); @@ -36,7 +28,7 @@ public class ParquetTimeSeriesWriter { private final CompressionCodecName compression; public ParquetTimeSeriesWriter() { - this(CompressionCodecName.SNAPPY); + this(CompressionCodecName.ZSTD); } public ParquetTimeSeriesWriter(CompressionCodecName compression) { @@ -75,4 +67,28 @@ public String getDefaultFileExtension() { return "parquet"; } + public static void main(String[] args) { + try { + var matrix = ParquetTimeSeriesReader.readMatrixFromTxt(Path.of("src/main/resources/INPUT/load/load_fr_2030-2031.txt")); + var writer = new ParquetTimeSeriesWriter(); + var startSerialization = System.nanoTime(); + var parquetFilePath = Path.of("src/main/resources/INPUT/load/output_test.parquet"); + var hadoopFilePath = new org.apache.hadoop.fs.Path(parquetFilePath.toUri()); + writer.write(matrix, parquetFilePath); + var endSerialization = System.nanoTime(); + var serializationTime = (endSerialization - startSerialization) / 1_000_000_000.0; + var fileSize = Files.size(parquetFilePath); + + var startDeserialization = System.nanoTime(); + var deserializedMatrix = ParquetTimeSeriesReader.readFromParquet(hadoopFilePath); + var endDeserialization = System.nanoTime(); + var deserializationTime = (endDeserialization - startDeserialization) / 1_000_000_000.0; + + System.out.println("Serialization time: " + serializationTime); + System.out.println("Deserialization time: " + deserializationTime); + System.out.println(".parquet file size (bytes): " + fileSize); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java index bbed60e..4724572 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetWriterBuilder.java @@ -11,11 +11,6 @@ import java.nio.file.Path; -/** - * Implementation of parquet writer builder for our use case. - * - * @author Sylvain Leclerc - */ class ParquetWriterBuilder extends ParquetWriter.Builder { private final Matrix matrix; From 0f409c33165e09a670e265c15cd4377d77f484d7 Mon Sep 17 00:00:00 2001 From: salemsd Date: Thu, 30 Jan 2025 11:29:32 +0100 Subject: [PATCH 11/15] remove commented code --- .../datamanager_back/util/MatrixWriteSupport.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java index 8bdcc29..3a183cc 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java @@ -71,16 +71,6 @@ public void write(MatrixRow matrixRow) { for (var c : matrixRow.getMatrix().getColumns()) { consumer.startField(c.name(), columnIndex); consumer.addDouble(c.values()[matrixRow.getRow()]); -// switch (c.getType()) { -// case INT: -// consumer.addInteger(c.getIntValues()[matrixRow.getRow()]); -// break; -// case FLOAT: -// consumer.addFloat(c.getFloatValues()[matrixRow.getRow()]); -// break; -// default: -// throw new IllegalArgumentException("Unknown column type " + c.getType()); -// } consumer.endField(c.name(), columnIndex); columnIndex++; } From 7acf5704f05e427fc74fa9d46ff17889454728ca Mon Sep 17 00:00:00 2001 From: salemsd Date: Thu, 30 Jan 2025 14:18:05 +0100 Subject: [PATCH 12/15] reformat --- .../datamanager_back/util/MatrixGroupConverter.java | 3 +-- .../antares/datamanager_back/util/MatrixRow.java | 7 ------- .../antares/datamanager_back/util/MatrixWriteSupport.java | 1 - .../datamanager_back/util/ParquetTimeSeriesWriter.java | 6 ------ 4 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java index cd56b11..3a354a6 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java @@ -1,10 +1,9 @@ package com.rte_france.antares.datamanager_back.util; +import org.apache.parquet.io.api.Converter; import org.apache.parquet.io.api.GroupConverter; import org.apache.parquet.io.api.PrimitiveConverter; -import org.apache.parquet.io.api.Converter; import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.Type; import java.util.ArrayList; import java.util.List; diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java index aca6a79..da7861c 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixRow.java @@ -1,10 +1,3 @@ - -/** - * Copyright (c) 2023, RTE (http://www.rte-france.com) - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ package com.rte_france.antares.datamanager_back.util; import lombok.Value; diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java index 3a183cc..699e262 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixWriteSupport.java @@ -14,7 +14,6 @@ import org.apache.parquet.schema.Types; import java.util.Collections; -import java.util.List; import java.util.stream.Collectors; class MatrixWriteSupport extends WriteSupport { diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java index c6aeec7..6c0bb3e 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java @@ -7,7 +7,6 @@ package com.rte_france.antares.datamanager_back.util; import com.google.common.base.Stopwatch; -import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.parquet.hadoop.ParquetWriter; @@ -16,7 +15,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.Objects; @@ -35,10 +33,6 @@ public ParquetTimeSeriesWriter(CompressionCodecName compression) { this.compression = Objects.requireNonNull(compression); } - public void write(Matrix matrix, OutputStream outputStream) throws IOException { - throw new UnsupportedOperationException("Writing to stream not supported for parquet format."); - } - public void write(Matrix matrix, Path file) throws IOException { var writer = new ParquetWriterBuilder(matrix, file) .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) From 25ad27ab64f400e51ca1b40cc8212a1bef81f8df Mon Sep 17 00:00:00 2001 From: salemsd Date: Mon, 3 Feb 2025 11:04:41 +0100 Subject: [PATCH 13/15] refactor readMatrix method and some fixes --- .../util/MatrixGroupConverter.java | 2 +- .../util/ParquetTimeSeriesReader.java | 62 ++++++++++--------- .../util/ParquetTimeSeriesWriter.java | 6 +- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java index 3a354a6..22ae699 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/MatrixGroupConverter.java @@ -16,7 +16,7 @@ public class MatrixGroupConverter extends GroupConverter { public MatrixGroupConverter(GroupType schema, int rowCount) { Objects.requireNonNull(schema); - this.rowCount = Objects.checkIndex(rowCount, 8761); + this.rowCount = rowCount; this.columnNames = new ArrayList<>(); for (var field : schema.getFields()) { columnNames.add(field.getName()); diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java index ef679e2..622990e 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesReader.java @@ -6,50 +6,52 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java.util.Objects; -import java.util.function.BiConsumer; -import java.util.function.Function; +import java.util.stream.Stream; public class ParquetTimeSeriesReader { + public static Matrix readFromParquet(org.apache.hadoop.fs.Path filePath) throws IOException { + try (var reader = ParquetReader.builder(new MatrixReadSupport(8760), filePath).build()) { + return reader.read(); + } + } + public static Matrix readMatrixFromTxt(Path filePath) throws IOException { Objects.requireNonNull(filePath); - var lines = Files.readAllLines(filePath); - if (lines.isEmpty()) { - throw new IllegalArgumentException("File is empty"); - } - - var rowCount = lines.size(); - var columnCount = lines.getFirst().split("\\s+").length; + try (var lines = Files.lines(filePath)) { + var iterator = lines.iterator(); + if (!iterator.hasNext()) { + throw new IllegalArgumentException("File is empty"); + } - return readMatrix(lines, rowCount, columnCount, Double::parseDouble, (data, value) -> data.add(Double.parseDouble(value))); - } + var firstLine = iterator.next(); + var columnCount = firstLine.split("\\s+").length; + var data = new ArrayList>(columnCount); + for (var i = 0; i < columnCount; i++) { + data.add(new ArrayList<>()); + } - private static Matrix readMatrix(List lines, int rowCount, int columnCount, Function parser, BiConsumer, String> adder) { - var data = new ArrayList>(columnCount); - for (var i = 0; i < columnCount; i++) { - data.add(new ArrayList<>(rowCount)); - } + fillDataList(firstLine, iterator, data); - for (var line : lines) { - var values = line.split("\\s+"); - for (var j = 0; j < columnCount; j++) { - adder.accept(data.get(j), values[j]); + var columns = new ArrayList(data.size()); + for (int j = 0; j < data.size(); j++) { + columns.add(new MatrixColumn("Column" + j, data.get(j).stream().mapToDouble(Double::doubleValue).toArray())); } - } - var columns = new ArrayList(columnCount); - for (var j = 0; j < columnCount; j++) { - columns.add(new MatrixColumn("Column" + j, data.get(j).stream().mapToDouble(Double::doubleValue).toArray())); + return new Matrix(columns); } - - return new Matrix(columns); } - public static Matrix readFromParquet(org.apache.hadoop.fs.Path filePath) throws IOException { - try (var reader = ParquetReader.builder(new MatrixReadSupport(8760), filePath).build()) { - return reader.read(); - } + private static void fillDataList(String firstLine, Iterator iterator, ArrayList> data) { + Stream.concat(Stream.of(firstLine), Stream.generate(iterator::next).takeWhile(x -> iterator.hasNext())) + .map(line -> line.split("\\s+")) + .forEach(values -> { + for (var j = 0; j < values.length; j++) { + data.get(j).add(Double.parseDouble(values[j])); + } + }); } } \ No newline at end of file diff --git a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java index 6c0bb3e..b834ef1 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/util/ParquetTimeSeriesWriter.java @@ -78,9 +78,9 @@ public static void main(String[] args) { var endDeserialization = System.nanoTime(); var deserializationTime = (endDeserialization - startDeserialization) / 1_000_000_000.0; - System.out.println("Serialization time: " + serializationTime); - System.out.println("Deserialization time: " + deserializationTime); - System.out.println(".parquet file size (bytes): " + fileSize); + LOGGER.info("Serialization time: {}", serializationTime); + LOGGER.info("Deserialization time: {}", deserializationTime); + LOGGER.info(".parquet file size (bytes): {}", fileSize); } catch (IOException e) { throw new RuntimeException(e); } From ca1c89ac303e339f2a07a53a7322afd85daf00ed Mon Sep 17 00:00:00 2001 From: mouad el azaar Date: Tue, 8 Oct 2024 20:33:01 +0200 Subject: [PATCH 14/15] create new project with studies endpoint --- .../controller/ProjectController.java | 10 +++++ .../datamanager_back/dto/ProjectInputDto.java | 3 +- .../service/ProjectService.java | 5 ++- .../service/impl/ProjectServiceImpl.java | 25 ++++++++++-- .../controller/ProjectControllerTest.java | 37 +++++++++++++++++- .../mapper/ProjectMapperTest.java | 1 - .../repository/ProjectRepositoryTest.java | 18 +++++++++ .../service/ProjectServiceImplTest.java | 39 +++++++++++++++++++ 8 files changed, 128 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/controller/ProjectController.java b/src/main/java/com/rte_france/antares/datamanager_back/controller/ProjectController.java index 6c6f484..8cd2bd9 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/controller/ProjectController.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/controller/ProjectController.java @@ -1,8 +1,11 @@ package com.rte_france.antares.datamanager_back.controller; import com.rte_france.antares.datamanager_back.dto.ProjectDto; +import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; + import com.rte_france.antares.datamanager_back.service.ProjectService; import io.swagger.v3.oas.annotations.Operation; +import jakarta.validation.constraints.NotNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.data.domain.Page; @@ -74,4 +77,11 @@ public void deleteProject(@PathVariable Integer id) { public ResponseEntity> searchProjectsByName(@RequestParam String partialName) { return new ResponseEntity<>(projectService.searchProjectsByName(partialName), HttpStatus.OK); } + + @Operation(summary = "Create a new project") + @PostMapping + public ResponseEntity createProject(@RequestBody @NotNull ProjectInputDto projectInputDto) { + + return new ResponseEntity<>(toProjectDto(projectService.createProject(projectInputDto)), HttpStatus.OK); + } } diff --git a/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java b/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java index c350d52..687727c 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java @@ -5,7 +5,6 @@ import lombok.Data; import lombok.NoArgsConstructor; -import java.util.List; @Data @Builder(toBuilder = true) @@ -13,5 +12,5 @@ @NoArgsConstructor public class ProjectInputDto { String name; - List studyIds; + String description; } diff --git a/src/main/java/com/rte_france/antares/datamanager_back/service/ProjectService.java b/src/main/java/com/rte_france/antares/datamanager_back/service/ProjectService.java index e90c8eb..2d3d8a1 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/service/ProjectService.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/service/ProjectService.java @@ -4,6 +4,8 @@ import com.rte_france.antares.datamanager_back.repository.model.ProjectEntity; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; +import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; + import java.util.List; @@ -23,6 +25,5 @@ public interface ProjectService { List searchProjectsByName(String partialName); - + ProjectEntity createProject(ProjectInputDto projectInputDto); } - diff --git a/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java b/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java index 84c0de8..38a2124 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java @@ -1,11 +1,13 @@ package com.rte_france.antares.datamanager_back.service.impl; import com.rte_france.antares.datamanager_back.dto.ProjectDto; +import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; import com.rte_france.antares.datamanager_back.exception.BadRequestException; import com.rte_france.antares.datamanager_back.exception.ResourceNotFoundException; import com.rte_france.antares.datamanager_back.mapper.ProjectMapper; import com.rte_france.antares.datamanager_back.repository.PinnedProjectRepository; import com.rte_france.antares.datamanager_back.repository.ProjectRepository; +import com.rte_france.antares.datamanager_back.repository.StudyRepository; import com.rte_france.antares.datamanager_back.repository.model.PinnedProjectEntity; import com.rte_france.antares.datamanager_back.repository.model.PinnedProjectEntityId; import com.rte_france.antares.datamanager_back.repository.model.ProjectEntity; @@ -37,7 +39,7 @@ public class ProjectServiceImpl implements ProjectService { private final PinnedProjectRepository pinnedProjectRepository; - + private final StudyRepository studyRepository; private final ProjectRepository projectRepository; public List getPinnedProjectsByUser(String userId) { @@ -120,10 +122,9 @@ public void deletePinnedProjectForGivenUser(String userId, Integer projectId) { @Override public ProjectEntity findProjectById(Integer projectId) { Optional projectDetails = projectRepository.findById(projectId); - if (projectDetails.isPresent()){ + if (projectDetails.isPresent()) { return projectDetails.get(); - } - else + } else throw new ResourceNotFoundException("Project with ID: " + projectId + " not found"); } @@ -175,4 +176,20 @@ private void checkIfUserHasALreadyMaxPinnedProjects(String userId) { } } + @Override + public ProjectEntity createProject(ProjectInputDto projectInputDto) { + Optional existingProject = projectRepository.findByName(projectInputDto.getName()); + + if (existingProject.isPresent()) { + throw new IllegalArgumentException("A project with the same name already exists."); + } + + ProjectEntity newProject = new ProjectEntity(); + newProject.setName(projectInputDto.getName()); + newProject.setCreationDate(LocalDateTime.now()); + newProject.setCreatedBy("pegase"); + newProject.setDescription(projectInputDto.getDescription()); + return projectRepository.save(newProject); + } + } diff --git a/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java b/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java index ffb7196..7ec2397 100644 --- a/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java +++ b/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java @@ -1,8 +1,12 @@ package com.rte_france.antares.datamanager_back.controller; +import com.fasterxml.jackson.databind.ObjectMapper; import com.rte_france.antares.datamanager_back.dto.ProjectDto; +import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; import com.rte_france.antares.datamanager_back.exception.BadRequestException; import com.rte_france.antares.datamanager_back.exception.ResourceNotFoundException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; import com.rte_france.antares.datamanager_back.repository.model.ProjectEntity; import com.rte_france.antares.datamanager_back.service.ProjectService; import org.junit.jupiter.api.BeforeEach; @@ -22,7 +26,6 @@ import java.util.Collections; import java.util.List; -import static com.rte_france.antares.datamanager_back.mapper.ProjectMapper.toProjectDto; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.*; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.*; @@ -212,4 +215,36 @@ void searchProjectsByNameReturnsEmptyListWhenNoMatches() throws Exception { .andExpect(status().isOk()) .andExpect(jsonPath("$").isEmpty()); } + + @Test + void createProject_returnsProjectDto_whenValidInputProvided() throws Exception { + ProjectInputDto projectInputDto = new ProjectInputDto(); + projectInputDto.setName("testProject"); + + when(projectService.createProject(any(ProjectInputDto.class))).thenReturn(new ProjectEntity()); + + this.mockMvc.perform(post("/v1/project") + .contentType(MediaType.APPLICATION_JSON_VALUE) + .content(new ObjectMapper().writeValueAsString(projectInputDto)) + .accept(MediaType.APPLICATION_JSON_VALUE)) + + .andExpect(status().isOk()) + .andReturn(); + + verify(projectService, times(1)).createProject(any(ProjectInputDto.class)); + } + + @Test + void createProject_returnsBadRequest_whenInvalidInputProvided() throws Exception { + ProjectInputDto projectInputDto = new ProjectInputDto(); + projectInputDto.setName("test"); + + this.mockMvc.perform(post("/v1/project") + .contentType(MediaType.APPLICATION_JSON_VALUE) + .content(new ObjectMapper().writeValueAsString(projectInputDto)) + .accept(MediaType.APPLICATION_JSON_VALUE)) + + .andExpect(status().isInternalServerError()) + .andReturn(); + } } \ No newline at end of file diff --git a/src/test/java/com/rte_france/antares/datamanager_back/mapper/ProjectMapperTest.java b/src/test/java/com/rte_france/antares/datamanager_back/mapper/ProjectMapperTest.java index b689132..0b72c9c 100644 --- a/src/test/java/com/rte_france/antares/datamanager_back/mapper/ProjectMapperTest.java +++ b/src/test/java/com/rte_france/antares/datamanager_back/mapper/ProjectMapperTest.java @@ -5,7 +5,6 @@ import com.rte_france.antares.datamanager_back.repository.model.StudyEntity; import org.junit.jupiter.api.Test; -import java.time.Instant; import java.time.LocalDateTime; import java.util.List; diff --git a/src/test/java/com/rte_france/antares/datamanager_back/repository/ProjectRepositoryTest.java b/src/test/java/com/rte_france/antares/datamanager_back/repository/ProjectRepositoryTest.java index 6c42f95..bcc1d49 100644 --- a/src/test/java/com/rte_france/antares/datamanager_back/repository/ProjectRepositoryTest.java +++ b/src/test/java/com/rte_france/antares/datamanager_back/repository/ProjectRepositoryTest.java @@ -84,4 +84,22 @@ void findByNameContainingIgnoreCaseHandlesNullInput() { assertThat(projects).isNotNull(); assertThat(projects).isEmpty(); } + @Test + void findByName_returnsEntityWhenExists() { + String name = "PROJECT1"; + + Optional result = projectRepository.findByName(name); + + assertThat(result).isNotEmpty(); + assertThat(result.get().getName()).isEqualTo(name); + } + + @Test + void findByName_returnsEmptyWhenDoesNotExist() { + String name = "nonExistentProject"; + + Optional result = projectRepository.findByName(name); + + assertThat(result).isEmpty(); + } } diff --git a/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java b/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java index a2b059a..b928cd1 100644 --- a/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java +++ b/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java @@ -1,10 +1,12 @@ package com.rte_france.antares.datamanager_back.service; import com.rte_france.antares.datamanager_back.dto.ProjectDto; +import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; import com.rte_france.antares.datamanager_back.exception.BadRequestException; import com.rte_france.antares.datamanager_back.exception.ResourceNotFoundException; import com.rte_france.antares.datamanager_back.repository.PinnedProjectRepository; import com.rte_france.antares.datamanager_back.repository.ProjectRepository; +import com.rte_france.antares.datamanager_back.repository.StudyRepository; import com.rte_france.antares.datamanager_back.repository.model.PinnedProjectEntity; import com.rte_france.antares.datamanager_back.repository.model.PinnedProjectEntityId; import com.rte_france.antares.datamanager_back.repository.model.ProjectEntity; @@ -41,6 +43,8 @@ class ProjectServiceImplTest { @Mock private ProjectRepository projectRepository; + @Mock + private StudyRepository studyRepository; @Test void findProjectsByCriteria_returnsAllProjectsWhenSearchIsNull() { @@ -292,4 +296,39 @@ void searchProjectsByNameHandlesNullInput() { assertEquals(0, result.size()); verify(projectRepository, times(1)).findByNameContainingIgnoreCase(null); } + @Test + void createProject_returnsProjectEntity_whenProjectDoesNotExistAndAllStudiesExist() { + ProjectInputDto projectInputDto = new ProjectInputDto(); + projectInputDto.setName("testProject"); + + when(projectRepository.findByName(any(String.class))).thenReturn(Optional.empty()); + when(projectRepository.save(any(ProjectEntity.class))).thenAnswer(i -> i.getArguments()[0]); + + ProjectEntity projectEntity = projectService.createProject(projectInputDto); + + assertEquals(projectInputDto.getName(), projectEntity.getName()); + } + + @Test + void createProject_throwsException_whenProjectExists() { + ProjectInputDto projectInputDto = new ProjectInputDto(); + projectInputDto.setName("testProject"); + + when(projectRepository.findByName(any(String.class))).thenReturn(Optional.of(new ProjectEntity())); + + assertThrows(IllegalArgumentException.class, () -> projectService.createProject(projectInputDto)); + } + + @Test + void createProject_createsProject_whenOneStudyProvided() { + ProjectInputDto projectInputDto = new ProjectInputDto(); + projectInputDto.setName("testProject"); + + when(projectRepository.findByName(any(String.class))).thenReturn(Optional.empty()); + when(projectRepository.save(any(ProjectEntity.class))).thenAnswer(i -> i.getArguments()[0]); + + ProjectEntity projectEntity = projectService.createProject(projectInputDto); + + assertEquals(projectInputDto.getName(), projectEntity.getName()); + } } From 88dc7ac84e45554c045db9f265a74d29f73178b5 Mon Sep 17 00:00:00 2001 From: mouad el azaar Date: Tue, 28 Jan 2025 20:55:06 +0100 Subject: [PATCH 15/15] refacto create project --- .../datamanager_back/dto/ProjectInputDto.java | 3 + .../service/impl/ProjectServiceImpl.java | 9 + .../controller/ProjectControllerTest.java | 2 - .../service/ProjectServiceImplTest.java | 164 ++++++++++-------- 4 files changed, 105 insertions(+), 73 deletions(-) diff --git a/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java b/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java index 687727c..ac9a52a 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/dto/ProjectInputDto.java @@ -5,6 +5,8 @@ import lombok.Data; import lombok.NoArgsConstructor; +import java.util.List; + @Data @Builder(toBuilder = true) @@ -13,4 +15,5 @@ public class ProjectInputDto { String name; String description; + List tags; } diff --git a/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java b/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java index 38a2124..224b067 100644 --- a/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java +++ b/src/main/java/com/rte_france/antares/datamanager_back/service/impl/ProjectServiceImpl.java @@ -178,17 +178,26 @@ private void checkIfUserHasALreadyMaxPinnedProjects(String userId) { @Override public ProjectEntity createProject(ProjectInputDto projectInputDto) { + if (StringUtils.isBlank(projectInputDto.getName())) { + throw new IllegalArgumentException("Project name is required."); + } + Optional existingProject = projectRepository.findByName(projectInputDto.getName()); if (existingProject.isPresent()) { throw new IllegalArgumentException("A project with the same name already exists."); } + if (projectInputDto.getTags() != null && projectInputDto.getTags().size() > 6) { + throw new IllegalArgumentException("A project cannot have more than 6 tags."); + } + ProjectEntity newProject = new ProjectEntity(); newProject.setName(projectInputDto.getName()); newProject.setCreationDate(LocalDateTime.now()); newProject.setCreatedBy("pegase"); newProject.setDescription(projectInputDto.getDescription()); + newProject.setTags(projectInputDto.getTags()); return projectRepository.save(newProject); } diff --git a/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java b/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java index 7ec2397..e2f2150 100644 --- a/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java +++ b/src/test/java/com/rte_france/antares/datamanager_back/controller/ProjectControllerTest.java @@ -5,8 +5,6 @@ import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; import com.rte_france.antares.datamanager_back.exception.BadRequestException; import com.rte_france.antares.datamanager_back.exception.ResourceNotFoundException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.rte_france.antares.datamanager_back.dto.ProjectInputDto; import com.rte_france.antares.datamanager_back.repository.model.ProjectEntity; import com.rte_france.antares.datamanager_back.service.ProjectService; import org.junit.jupiter.api.BeforeEach; diff --git a/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java b/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java index b928cd1..832c663 100644 --- a/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java +++ b/src/test/java/com/rte_france/antares/datamanager_back/service/ProjectServiceImplTest.java @@ -102,7 +102,7 @@ void getProjectsByUser_returnsEmptyWhenNoneExist() { } @Test - void deletePinnedProjectToUser_shouldCallDeleteMethod() { + void deletePinnedProjectToUser_shouldCallDeleteMethod() { // Given String userId = "userId"; Integer projectId = 1; @@ -135,16 +135,16 @@ void deletePinnedProjectForGivenUser_shouldThrowException_whenProjectDoesNotExis } @Test - void getProjectDetailsById(){ + void getProjectDetailsById() { //Given Integer projectId = 1; - ProjectEntity expectedProject= new ProjectEntity(); + ProjectEntity expectedProject = new ProjectEntity(); expectedProject.setId(1); expectedProject.setCreatedBy("User1"); expectedProject.setName("BP 2050"); when(projectRepository.findById(projectId)).thenReturn(Optional.of(expectedProject)); - ProjectEntity projectResult =projectService.findProjectById(projectId); + ProjectEntity projectResult = projectService.findProjectById(projectId); //Then assertEquals(projectResult, expectedProject); @@ -220,109 +220,130 @@ void pinProjectForUser_throwsExceptionWhenUserHasMaxPinnedProjects() { } @Test -void deleteProjectById_deletesProjectWhenNoStudies() { - Integer projectId = 1; - ProjectEntity project = new ProjectEntity(); - project.setId(projectId); + void deleteProjectById_deletesProjectWhenNoStudies() { + Integer projectId = 1; + ProjectEntity project = new ProjectEntity(); + project.setId(projectId); - when(projectRepository.findById(projectId)).thenReturn(Optional.of(project)); + when(projectRepository.findById(projectId)).thenReturn(Optional.of(project)); - projectService.deleteProjectById(projectId); + projectService.deleteProjectById(projectId); - verify(projectRepository, times(1)).deleteById(projectId); -} + verify(projectRepository, times(1)).deleteById(projectId); + } -@Test -void deleteProjectById_throwsExceptionWhenProjectNotFound() { - Integer projectId = 1; + @Test + void deleteProjectById_throwsExceptionWhenProjectNotFound() { + Integer projectId = 1; - when(projectRepository.findById(projectId)).thenReturn(Optional.empty()); + when(projectRepository.findById(projectId)).thenReturn(Optional.empty()); - ResourceNotFoundException exception = assertThrows( - ResourceNotFoundException.class, - () -> projectService.deleteProjectById(projectId) - ); + ResourceNotFoundException exception = assertThrows( + ResourceNotFoundException.class, + () -> projectService.deleteProjectById(projectId) + ); - assertEquals("Project not found with ID: 1", exception.getMessage()); - verify(projectRepository, never()).deleteById(projectId); -} + assertEquals("Project not found with ID: 1", exception.getMessage()); + verify(projectRepository, never()).deleteById(projectId); + } -@Test -void deleteProjectById_throwsExceptionWhenProjectContainsStudies() { - Integer projectId = 1; - ProjectEntity project = new ProjectEntity(); - project.setId(projectId); - project.setStudies(List.of(new StudyEntity())); + @Test + void deleteProjectById_throwsExceptionWhenProjectContainsStudies() { + Integer projectId = 1; + ProjectEntity project = new ProjectEntity(); + project.setId(projectId); + project.setStudies(List.of(new StudyEntity())); - when(projectRepository.findById(projectId)).thenReturn(Optional.of(project)); + when(projectRepository.findById(projectId)).thenReturn(Optional.of(project)); - BadRequestException exception = assertThrows( - BadRequestException.class, - () -> projectService.deleteProjectById(projectId) - ); + BadRequestException exception = assertThrows( + BadRequestException.class, + () -> projectService.deleteProjectById(projectId) + ); - assertEquals("Project contains studies and cannot be deleted", exception.getMessage()); - verify(projectRepository, never()).deleteById(projectId); -} + assertEquals("Project contains studies and cannot be deleted", exception.getMessage()); + verify(projectRepository, never()).deleteById(projectId); + } -@Test -void searchProjectsByNameReturnsMatchingProjects() { - ProjectEntity projectEntity = new ProjectEntity(); - projectEntity.setId(1); - projectEntity.setName("Project 1"); - when(projectRepository.findByNameContainingIgnoreCase("Proj")).thenReturn(List.of(projectEntity)); + @Test + void searchProjectsByNameReturnsMatchingProjects() { + ProjectEntity projectEntity = new ProjectEntity(); + projectEntity.setId(1); + projectEntity.setName("Project 1"); + when(projectRepository.findByNameContainingIgnoreCase("Proj")).thenReturn(List.of(projectEntity)); - List result = projectService.searchProjectsByName("Proj"); + List result = projectService.searchProjectsByName("Proj"); - assertEquals(1, result.size()); - assertEquals("Project 1", result.get(0).getName()); - verify(projectRepository, times(1)).findByNameContainingIgnoreCase("Proj"); -} + assertEquals(1, result.size()); + assertEquals("Project 1", result.get(0).getName()); + verify(projectRepository, times(1)).findByNameContainingIgnoreCase("Proj"); + } -@Test -void searchProjectsByNameReturnsEmptyListWhenNoMatches() { - when(projectRepository.findByNameContainingIgnoreCase("NonExistent")).thenReturn(List.of()); + @Test + void searchProjectsByNameReturnsEmptyListWhenNoMatches() { + when(projectRepository.findByNameContainingIgnoreCase("NonExistent")).thenReturn(List.of()); - List result = projectService.searchProjectsByName("NonExistent"); + List result = projectService.searchProjectsByName("NonExistent"); - assertEquals(0, result.size()); - verify(projectRepository, times(1)).findByNameContainingIgnoreCase("NonExistent"); -} + assertEquals(0, result.size()); + verify(projectRepository, times(1)).findByNameContainingIgnoreCase("NonExistent"); + } -@Test -void searchProjectsByNameHandlesNullInput() { - List result = projectService.searchProjectsByName(null); + @Test + void searchProjectsByNameHandlesNullInput() { + List result = projectService.searchProjectsByName(null); + + assertEquals(0, result.size()); + verify(projectRepository, times(1)).findByNameContainingIgnoreCase(null); + } - assertEquals(0, result.size()); - verify(projectRepository, times(1)).findByNameContainingIgnoreCase(null); -} @Test - void createProject_returnsProjectEntity_whenProjectDoesNotExistAndAllStudiesExist() { + void createProject_throwsException_whenProjectNameIsBlank() { ProjectInputDto projectInputDto = new ProjectInputDto(); - projectInputDto.setName("testProject"); + projectInputDto.setName(""); - when(projectRepository.findByName(any(String.class))).thenReturn(Optional.empty()); - when(projectRepository.save(any(ProjectEntity.class))).thenAnswer(i -> i.getArguments()[0]); + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, + () -> projectService.createProject(projectInputDto) + ); - ProjectEntity projectEntity = projectService.createProject(projectInputDto); + assertEquals("Project name is required.", exception.getMessage()); + } - assertEquals(projectInputDto.getName(), projectEntity.getName()); + @Test + void createProject_throwsException_whenProjectWithSameNameExists() { + ProjectInputDto projectInputDto = new ProjectInputDto(); + projectInputDto.setName("existingProject"); + + when(projectRepository.findByName(any(String.class))).thenReturn(Optional.of(new ProjectEntity())); + + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, + () -> projectService.createProject(projectInputDto) + ); + + assertEquals("A project with the same name already exists.", exception.getMessage()); } @Test - void createProject_throwsException_whenProjectExists() { + void createProject_throwsException_whenTagsExceedLimit() { ProjectInputDto projectInputDto = new ProjectInputDto(); projectInputDto.setName("testProject"); + projectInputDto.setTags(List.of("tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7")); - when(projectRepository.findByName(any(String.class))).thenReturn(Optional.of(new ProjectEntity())); + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, + () -> projectService.createProject(projectInputDto) + ); - assertThrows(IllegalArgumentException.class, () -> projectService.createProject(projectInputDto)); + assertEquals("A project cannot have more than 6 tags.", exception.getMessage()); } @Test - void createProject_createsProject_whenOneStudyProvided() { + void createProject_createsProjectSuccessfully_whenValidInput() { ProjectInputDto projectInputDto = new ProjectInputDto(); projectInputDto.setName("testProject"); + projectInputDto.setTags(List.of("tag1", "tag2")); when(projectRepository.findByName(any(String.class))).thenReturn(Optional.empty()); when(projectRepository.save(any(ProjectEntity.class))).thenAnswer(i -> i.getArguments()[0]); @@ -330,5 +351,6 @@ void createProject_createsProject_whenOneStudyProvided() { ProjectEntity projectEntity = projectService.createProject(projectInputDto); assertEquals(projectInputDto.getName(), projectEntity.getName()); + assertEquals(projectInputDto.getTags(), projectEntity.getTags()); } }