diff --git a/README.md b/README.md index 0bfd07e..7ab52b8 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,14 @@ wip ## Internals wip: stages of processing, threading etc ### Line reader -wip + +`Line reader` aka. `Provider` processes the raw data into easy to look up format, to achieve optimal performance. Depends on characteristic of your application you can choose one of many storage types: +* StorageMode.HEAP +* StorageMode.HEAP_BYTE_ARRAY +* StorageMode.OFF_HEAP +* StorageMode.FILE + +Those are directly linked to mapdb modes described [here](http://www.mapdb.org/book/performance/). Default one is `StorageMode.FILE` ### Line processor wip ### Database builder diff --git a/pom.xml b/pom.xml index b577beb..757dd0a 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ technology.dice.open dice-where - 1.0.5 + 1.0.6 dice-where dice-where is a low memory footprint, highly efficient diff --git a/src/main/java/technology/dice/dicewhere/building/DatabaseBuilder.java b/src/main/java/technology/dice/dicewhere/building/DatabaseBuilder.java index 716ee9c..28d67ee 100644 --- a/src/main/java/technology/dice/dicewhere/building/DatabaseBuilder.java +++ b/src/main/java/technology/dice/dicewhere/building/DatabaseBuilder.java @@ -6,21 +6,9 @@ package technology.dice.dicewhere.building; -import com.google.common.base.Stopwatch; import com.google.common.collect.Queues; - -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -import java.util.stream.Stream; - import com.google.protobuf.ByteString; +import org.jetbrains.annotations.NotNull; import org.mapdb.DB; import org.mapdb.DBException; import org.mapdb.DBMaker; @@ -32,11 +20,18 @@ import technology.dice.dicewhere.lineprocessing.SerializedLine; import technology.dice.dicewhere.lineprocessing.serializers.IPSerializer; import technology.dice.dicewhere.lineprocessing.serializers.protobuf.IPInformationProto; -import technology.dice.dicewhere.parsing.ParsedLine; import technology.dice.dicewhere.provider.ProviderKey; -import technology.dice.dicewhere.utils.IPUtils; import technology.dice.dicewhere.utils.ProtoValueConverter; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + public class DatabaseBuilder implements Runnable { private final BlockingQueue source; private final DatabaseBuilderListener listener; @@ -51,19 +46,29 @@ public DatabaseBuilder( BlockingQueue source, DatabaseBuilderListener listener, Decorator decorator) { + this(StorageMode.FILE, provider, source, listener, decorator); + } + + public DatabaseBuilder( + StorageMode storageMode, + ProviderKey provider, + BlockingQueue source, + DatabaseBuilderListener listener) { + + this(storageMode, provider, source, listener, null); + } + + public DatabaseBuilder( + StorageMode storageMode, + ProviderKey provider, + BlockingQueue source, + DatabaseBuilderListener listener, + Decorator decorator) { this.source = source; this.expectingMore = true; this.listener = listener; this.provider = provider; - DB db = - DBMaker.tempFileDB() - .checksumHeaderBypass() - .fileLockDisable() - .fileMmapEnable() - .fileChannelEnable() - .transactionEnable() - .fileDeleteAfterClose() - .make(); + DB db = createDB(storageMode); DB.TreeMapSink sink = db.treeMap( @@ -73,12 +78,33 @@ public DatabaseBuilder( this.decorator = decorator; } - public DatabaseBuilder( - ProviderKey provider, - BlockingQueue source, - DatabaseBuilderListener listener) { - - this(provider, source, listener, null); + @NotNull + private DB createDB(StorageMode storageMode) { + DB db; + switch (storageMode) { + case HEAP: + db = DBMaker.heapDB().checksumHeaderBypass().transactionEnable().make(); + break; + case HEAP_BYTE_ARRAY: + db = DBMaker.memoryDB().checksumHeaderBypass().transactionEnable().make(); + break; + case OFF_HEAP: + db = DBMaker.memoryDirectDB().checksumHeaderBypass().transactionEnable().make(); + break; + case FILE: + default: + db = + DBMaker.tempFileDB() + .checksumHeaderBypass() + .fileLockDisable() + .fileMmapEnable() + .fileChannelEnable() + .transactionEnable() + .fileDeleteAfterClose() + .make(); + break; + } + return db; } public void dontExpectMore() { @@ -154,4 +180,11 @@ private IPInformationProto.IpInformationProto buildIpProtobuf(IpInformation inpu public IPDatabase build() { return new IPDatabase(sink.create()); } + + public enum StorageMode { + HEAP, + HEAP_BYTE_ARRAY, + OFF_HEAP, + FILE + } } diff --git a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCityLiteLineReader.java b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCityLiteLineReader.java index 7a8147f..59865c3 100644 --- a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCityLiteLineReader.java +++ b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCityLiteLineReader.java @@ -5,6 +5,7 @@ */ package technology.dice.dicewhere.provider.dbip.reading; +import technology.dice.dicewhere.building.DatabaseBuilder; import technology.dice.dicewhere.decorator.Decorator; import technology.dice.dicewhere.decorator.DecoratorInformation; import technology.dice.dicewhere.parsing.LineParser; @@ -15,12 +16,18 @@ public class DbIpCityLiteLineReader extends DbIpLineReader { public DbIpCityLiteLineReader(Path csv) { - super(csv); + this(csv, null); + } + + public DbIpCityLiteLineReader(Path csv, Decorator decorator) { + this(csv, decorator, DatabaseBuilder.StorageMode.FILE); } public DbIpCityLiteLineReader( - Path csv, Decorator decorator) { - super(csv, decorator); + Path csv, + Decorator decorator, + DatabaseBuilder.StorageMode storageMode) { + super(csv, decorator, storageMode); } @Override diff --git a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCountryLiteLineReader.java b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCountryLiteLineReader.java index 38ea743..b27cc3c 100644 --- a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCountryLiteLineReader.java +++ b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpCountryLiteLineReader.java @@ -5,6 +5,7 @@ */ package technology.dice.dicewhere.provider.dbip.reading; +import technology.dice.dicewhere.building.DatabaseBuilder; import technology.dice.dicewhere.decorator.Decorator; import technology.dice.dicewhere.decorator.DecoratorInformation; import technology.dice.dicewhere.parsing.LineParser; @@ -15,12 +16,18 @@ public class DbIpCountryLiteLineReader extends DbIpLineReader { public DbIpCountryLiteLineReader(Path csv) { - super(csv); + this(csv, null); + } + + public DbIpCountryLiteLineReader(Path csv, Decorator decorator) { + this(csv, decorator, DatabaseBuilder.StorageMode.FILE); } public DbIpCountryLiteLineReader( - Path csv, Decorator decorator) { - super(csv, decorator); + Path csv, + Decorator decorator, + DatabaseBuilder.StorageMode storageMode) { + super(csv, decorator, storageMode); } @Override diff --git a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLineReader.java b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLineReader.java index a60f39c..1e1e8fb 100644 --- a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLineReader.java +++ b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLineReader.java @@ -6,12 +6,9 @@ package technology.dice.dicewhere.provider.dbip.reading; -import java.io.IOException; -import java.nio.file.Path; -import java.util.stream.Stream; - import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import technology.dice.dicewhere.building.DatabaseBuilder; import technology.dice.dicewhere.decorator.Decorator; import technology.dice.dicewhere.decorator.DecoratorInformation; import technology.dice.dicewhere.parsing.LineParser; @@ -19,6 +16,10 @@ import technology.dice.dicewhere.provider.dbip.DbIpProviderKey; import technology.dice.dicewhere.reading.LineReader; +import java.io.IOException; +import java.nio.file.Path; +import java.util.stream.Stream; + public abstract class DbIpLineReader extends LineReader { private static final int BUFFER_SIZE = 1024 * 1024; private final LineParser lineParser; @@ -28,7 +29,16 @@ public DbIpLineReader(@NotNull Path csv) { this(csv, null); } - public DbIpLineReader(@NotNull Path csv, @Nullable Decorator decorator) { + public DbIpLineReader( + @NotNull Path csv, @Nullable Decorator decorator) { + this(csv, decorator, DatabaseBuilder.StorageMode.FILE); + } + + public DbIpLineReader( + @NotNull Path csv, + @Nullable Decorator decorator, + @NotNull DatabaseBuilder.StorageMode storageMode) { + super(storageMode); lineParser = buildLineParser(decorator); this.csv = csv; } diff --git a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLocationAndIspLineReader.java b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLocationAndIspLineReader.java index 9c9ca08..fdcce1f 100644 --- a/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLocationAndIspLineReader.java +++ b/src/main/java/technology/dice/dicewhere/provider/dbip/reading/DbIpLocationAndIspLineReader.java @@ -5,6 +5,7 @@ */ package technology.dice.dicewhere.provider.dbip.reading; +import technology.dice.dicewhere.building.DatabaseBuilder; import technology.dice.dicewhere.decorator.Decorator; import technology.dice.dicewhere.decorator.DecoratorInformation; import technology.dice.dicewhere.parsing.LineParser; @@ -15,12 +16,19 @@ public class DbIpLocationAndIspLineReader extends DbIpLineReader { public DbIpLocationAndIspLineReader(Path csv) { - super(csv); + this(csv, null); } public DbIpLocationAndIspLineReader( Path csv, Decorator decorator) { - super(csv, decorator); + this(csv, decorator, DatabaseBuilder.StorageMode.FILE); + } + + public DbIpLocationAndIspLineReader( + Path csv, + Decorator decorator, + DatabaseBuilder.StorageMode storageMode) { + super(csv, decorator, storageMode); } @Override diff --git a/src/main/java/technology/dice/dicewhere/provider/maxmind/reading/MaxmindDbReader.java b/src/main/java/technology/dice/dicewhere/provider/maxmind/reading/MaxmindDbReader.java index 5792b53..a0d517a 100644 --- a/src/main/java/technology/dice/dicewhere/provider/maxmind/reading/MaxmindDbReader.java +++ b/src/main/java/technology/dice/dicewhere/provider/maxmind/reading/MaxmindDbReader.java @@ -8,6 +8,7 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import technology.dice.dicewhere.building.DatabaseBuilder; import technology.dice.dicewhere.decorator.Decorator; import technology.dice.dicewhere.decorator.DecoratorInformation; import technology.dice.dicewhere.parsing.LineParser; @@ -40,7 +41,17 @@ public MaxmindDbReader( @NotNull Path ipV6CSV, @Nullable Decorator decorator) throws IOException { + this(locationNames, ipV4CSV, ipV6CSV, decorator, DatabaseBuilder.StorageMode.FILE); + } + public MaxmindDbReader( + @NotNull Path locationNames, + @NotNull Path ipV4CSV, + @NotNull Path ipV6CSV, + @Nullable Decorator decorator, + @NotNull DatabaseBuilder.StorageMode storageMode) + throws IOException { + super(storageMode); ipV4CSVPath = ipV4CSV; ipV6CSVPath = ipV6CSV; MaxmindLocationsParser locationsParser = new MaxmindLocationsParser(); diff --git a/src/main/java/technology/dice/dicewhere/reading/LineReader.java b/src/main/java/technology/dice/dicewhere/reading/LineReader.java index c867030..0468786 100644 --- a/src/main/java/technology/dice/dicewhere/reading/LineReader.java +++ b/src/main/java/technology/dice/dicewhere/reading/LineReader.java @@ -9,6 +9,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Streams; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.jetbrains.annotations.NotNull; import technology.dice.dicewhere.building.DatabaseBuilder; import technology.dice.dicewhere.building.DatabaseBuilderListener; import technology.dice.dicewhere.building.IPDatabase; @@ -19,7 +20,13 @@ import technology.dice.dicewhere.parsing.LineParser; import technology.dice.dicewhere.provider.ProviderKey; -import java.io.*; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.RandomAccessFile; +import java.io.SequenceInputStream; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; @@ -28,7 +35,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; -import java.util.concurrent.*; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.zip.GZIPInputStream; @@ -40,9 +52,18 @@ */ public abstract class LineReader { private static final int LINES_BUFFER = 100000; + private final DatabaseBuilder.StorageMode storageMode; public static byte[] MAGIC_ZIP = {'P', 'K', 0x3, 0x4}; public static int MAGIG_GZIP = 0xff00; + public LineReader() { + this(DatabaseBuilder.StorageMode.FILE); + } + + public LineReader(@NotNull DatabaseBuilder.StorageMode storageMode) { + this.storageMode = storageMode; + } + public abstract ProviderKey provider(); public abstract LineParser parser(); @@ -137,9 +158,14 @@ public final IPDatabase read( DatabaseBuilder databaseBuilder = parser() .getDecorator() - .map(d -> new DatabaseBuilder(provider(), serializedLinesBuffer, buildingListener, d)) + .map( + d -> + new DatabaseBuilder( + storageMode, provider(), serializedLinesBuffer, buildingListener, d)) .orElseGet( - () -> new DatabaseBuilder(provider(), serializedLinesBuffer, buildingListener)); + () -> + new DatabaseBuilder( + storageMode, provider(), serializedLinesBuffer, buildingListener)); Future processorFuture = setupExecutorService.submit(processor); Future databaseBuilderFuture = setupExecutorService.submit(databaseBuilder);