Skip to content

Commit

Permalink
Add instance index feature
Browse files Browse the repository at this point in the history
  • Loading branch information
rmartinsanta committed Jun 6, 2024
1 parent 55db0c6 commit eeaf1ca
Show file tree
Hide file tree
Showing 6 changed files with 239 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Developing
- (New) Add support for 7Z and ZIP compressed instances, with no configuration required.
- (New) Add support for instance index files, for more info see the docs, inside the Features -> Instance Manager section.
- (New) Added Solution::notifyUpdate(long), the difference with the existing Solution::notifyUpdate() is that users may notify that the solution was last updated at a different time
- (New) Added Solution::lastExecutesMovesAsString(): generate a string representation of the last executed moves, useful when something goes wrong.
- (Fix) #239: Metrics tracking did not work in some circumstances when autoconfig was disabled but Irace was manually enabled.
Expand Down
9 changes: 5 additions & 4 deletions common/src/main/java/es/urjc/etsii/grafo/util/IOUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ public static void createFolder(String path) {
*
* @param path path to check
*/
public static void checkExists(String path) {
File dir = new File(path);
if (!dir.exists()) {
throw new IllegalArgumentException("Path does not exist or not a folder: " + dir.getAbsolutePath());
public static String checkExists(String path) {
File file = new File(path);
if (!file.exists()) {
throw new IllegalArgumentException("Path does not exist: " + file.getAbsolutePath());
}
return path;
}

/**
Expand Down
55 changes: 48 additions & 7 deletions core/src/main/java/es/urjc/etsii/grafo/io/InstanceManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.lang.ref.SoftReference;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
Expand All @@ -26,6 +30,8 @@ public class InstanceManager<I extends Instance> {

private static final Logger log = LoggerFactory.getLogger(InstanceManager.class);
private static final int MAX_LENGTH = 300;
public static final String INDEX_SUFFIX = ".index";

protected final SoftReference<I> EMPTY = new SoftReference<>(null);
protected final InstanceConfiguration instanceConfiguration;
protected final InstanceImporter<I> instanceImporter;
Expand Down Expand Up @@ -63,18 +69,53 @@ public synchronized List<String> getInstanceSolveOrder(String expName, boolean p
return this.solveOrderByExperiment.computeIfAbsent(expName, s -> {
String instancePath = this.instanceConfiguration.getPath(expName);
checkExists(instancePath);
List<String> instances = IOUtil.iterate(instancePath);
List<String> instances = isIndexFile(instancePath)?
listIndexFile(instancePath):
listNormalFile(instancePath);

List<String> sortedInstances;
if (preload) {
sortedInstances = validateAndSort(expName, instances);
} else {
sortedInstances = lexicSort(instances);
}
List<String> sortedInstances = preload?
validateAndSort(expName, instances):
lexicSort(instances);
return sortedInstances;
});
}

private List<String> listNormalFile(String instancePath) {
List<String> files = IOUtil.iterate(instancePath);
for (var iterator = files.iterator(); iterator.hasNext(); ) {
var f = iterator.next();
if (f.endsWith(INDEX_SUFFIX)) {
log.info("Ignoring index file: {}", f);
iterator.remove();
}
}
return files;
}

private List<String> listIndexFile(String instancePath) {
Path indexFile = Path.of(instancePath);
var parentPath = indexFile.getParent();
try (var stream = Files.lines(indexFile)){
return stream
.filter(p -> !p.startsWith("#"))
.filter(p -> !p.isBlank())
.map(Path::of)
.map(parentPath::resolve)
.map(Path::toAbsolutePath)
.map(Path::toString)
.map(IOUtil::checkExists)
.toList();
} catch (IOException e){
throw new RuntimeException(e);
}
}

private boolean isIndexFile(String instancePath) {
var file = new File(instancePath);
log.debug("Not an index file: {}", instancePath);
return file.isFile() && instancePath.endsWith(INDEX_SUFFIX);
}

protected List<String> validateAndSort(String expName, List<String> instancePaths) {
List<String> sortedInstances;
log.info("Loading all instances to check correctness...");
Expand Down
54 changes: 42 additions & 12 deletions core/src/test/java/es/urjc/etsii/grafo/io/InstanceManagerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.*;

class InstanceManagerTest {
Expand All @@ -23,6 +25,8 @@ class InstanceManagerTest {
String instancePath;
File instance1File;
File instance2File;
File instance3File;
File indexFile;

@SuppressWarnings("unchecked")
@BeforeEach
Expand All @@ -32,16 +36,30 @@ void setUpMock(@TempDir File tempFolder) throws IOException {
instance1File.createNewFile();
instance2File = new File(tempFolder, "TestInstance2");
instance2File.createNewFile();
instance3File = new File(tempFolder, "TestInstance3");
instance3File.createNewFile();

indexFile = new File(tempFolder, "instances.index");
Files.writeString(indexFile.toPath(), "TestInstance3\nTestInstance1");

var instance1 = new TestInstance("TestInstance1");
instance1.setPath(instance1File.getAbsolutePath());

var instance2 = new TestInstance("TestInstance2");
instance2.setPath(instance2File.getAbsolutePath());

var instance3 = new TestInstance("TestInstance3");
instance3.setPath(instance3File.getAbsolutePath());

instanceImporter = mock(InstanceImporter.class);


when(instanceImporter.importInstance(instance1File.getAbsolutePath())).thenReturn(instance1);
when(instanceImporter.importInstance(instance2File.getAbsolutePath())).thenReturn(instance2);
when(instanceImporter.importInstance(instance3File.getAbsolutePath())).thenReturn(instance3);

// If the instance manager requests to load a non-existing instance, or the index file, throw an exception
when(instanceImporter.importInstance(indexFile.getAbsolutePath())).thenThrow(RuntimeException.class);
}

private InstanceManager<TestInstance> buildManager(boolean preload, String instancePath){
Expand All @@ -60,31 +78,43 @@ void testLazy(){
var manager = buildManager(false, instancePath);
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
verifyNoInteractions(instanceImporter);
Assertions.assertEquals(instances.get(0), instance1File.getAbsolutePath());
Assertions.assertEquals(instances.get(1), instance2File.getAbsolutePath());
assertEquals(instances.get(0), instance1File.getAbsolutePath());
assertEquals(instances.get(1), instance2File.getAbsolutePath());
}

@Test
void testCustomOrder(){
var manager = buildManager(true, instancePath);
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
verify(instanceImporter, times(2)).importInstance(any());
Assertions.assertEquals(instances.get(1), instance1File.getAbsolutePath());
Assertions.assertEquals(instances.get(0), instance2File.getAbsolutePath());
verify(instanceImporter, times(3)).importInstance(any());
assertEquals(instances.get(2), instance1File.getAbsolutePath());
assertEquals(instances.get(1), instance2File.getAbsolutePath());
assertEquals(instances.get(0), instance3File.getAbsolutePath());
}

@Test
void testIndexFileOrdered(){
var manager = buildManager(true, indexFile.getAbsolutePath());
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
// there are 3 instances in the folder, but only instance3 and 1 should have been loaded
assertEquals(2, instances.size());
verify(instanceImporter, times(1)).importInstance(instance1File.getAbsolutePath());
verify(instanceImporter, times(1)).importInstance(instance3File.getAbsolutePath());
verifyNoMoreInteractions(instanceImporter);
}

@Test
void checkCache(){
var manager = buildManager(true, instancePath);
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
verify(instanceImporter, times(2)).importInstance(any());
verify(instanceImporter, times(3)).importInstance(any());
manager.getInstance(instances.get(0));
manager.getInstance(instances.get(1));
verify(instanceImporter, times(2)).importInstance(any());
verify(instanceImporter, times(3)).importInstance(any());
manager.purgeCache();
manager.getInstance(instances.get(0));
manager.getInstance(instances.get(1));
verify(instanceImporter, times(4)).importInstance(any());
verify(instanceImporter, times(5)).importInstance(any());
}

@Test
Expand All @@ -110,7 +140,7 @@ void checkNoCache(){
@Test
void validateTest(){
var manager = buildManager(false, instancePath);
Assertions.assertEquals(instanceImporter, manager.getUserImporterImplementation());
assertEquals(instanceImporter, manager.getUserImporterImplementation());
Assertions.assertThrows(IllegalArgumentException.class, () -> manager.validate(new ArrayList<>(), TEST_EXPERIMENT));
Assertions.assertThrows(IllegalArgumentException.class, () -> manager.validate(List.of(
new TestInstance("Instance 1"), new TestInstance("Instance 2"), new TestInstance("Instance 1")
Expand All @@ -122,23 +152,23 @@ void validateNoCompression(){
var path = "src/test/resources/instances";
var manager = buildManager(false, path, new TestInstanceImporter());
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
Assertions.assertEquals(6, instances.size());
assertEquals(6, instances.size());
}

@Test
void validateZipCompression(){
var path = "src/test/resources/instzip";
var manager = buildManager(false, path, new TestInstanceImporter());
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
Assertions.assertEquals(6, instances.size());
assertEquals(6, instances.size());
}

@Test
void validate7zCompression(){
var path = "src/test/resources/inst7z";
var manager = buildManager(false, path, new TestInstanceImporter());
var instances = manager.getInstanceSolveOrder(TEST_EXPERIMENT);
Assertions.assertEquals(6, instances.size());
assertEquals(6, instances.size());
for(var s: instances){
var instance = manager.getInstance(s);
Assertions.assertNotNull(instance);
Expand Down
133 changes: 133 additions & 0 deletions docs/features/instance-manager.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Instance manager
Mork comes with a built-in instance manager, which manages the whole instance load-unload lifecycle,
and provides a way to classify instances based on their properties.

## Instance manager modes
The instance manager is configured by using the properties with the prefix `instance.*`. For example,
the default instance path is configured using the property `instance.path.default`.
The `default` keyword can be replaced with the experiment name,
overriding the instance path for each user defined experiment if necessary.

The value of the `instance.path` property can be interpreted in multiple ways, depending on the path type.

### Path is a "normal" file
In the simplest case, the path represents a single file. In this case, the instance manager will ask
the user implemented `InstanceImporter` to load the instance from the file, and run the experiment using only this file.

### Path is a compressed file
If the path is a compressed file, the instance manager will decompress each file in memory,
and get an input stream to avoid disk I/O. This is useful when the instances are compressed to save disk space, as
decompression only happens in memory when required. From the user point of view, this is done transparently, and the
same method of the `InstanceImporter` is called, regardless of the file being compressed or not. If the compressed file
contains multiple files or directories, they are recursively enumerated.

### Path is a directory
If the path is a directory, the instance manager will enumerate all files in the directory,
and load each file as an instance. If the directory contains directories, they will be recursively enumerated.
If there are compressed files inside the directory, they will be processed as explained in the previous section.

### Path is an index file
Index files are a way to group instances together, without splitting them in different folders.
They are specially useful when instances are extremely big, to avoid data duplication.

For example, if we have a folder called "instances", which contains all the known instances in the literature for the problem we are currently solving,
we may want to execute only a subset of this instance set, for example when running preliminary experiments. An easy way to do this is to create a new instance folder, called `preliminary-instances`, and copy the instances that we want to run there. However, this approach has the downside of duplicating the data, which can be a problem if the instances are huge.
As an alternative, we can create a file inside the `instances` folder, called `preliminary.index`, which contains the names of the instances that we want to run. The instance manager will read this file, and load only the instances that are listed there, ignoring the rest.

!!! info
Note that a index files are identified by the `.index` extension, and are ignored when enumerating instances if they are not explicitly configured as the `instance.path` properties.


## Instance loading
In order to load instances, users must extend the `InstanceImporter` class.
By default, the following template is generated by the Mork generator:

```java

public class ExampleInstanceImporter extends InstanceImporter<__RNAME__Instance> {

@Override
public ExampleInstance importInstance(BufferedReader reader, String suggestedName) throws IOException {
// Create and return instance object from file data
// TODO parse all data from the given reader however I want

// TIP: You may use a Scanner if you prefer it to a Buffered Reader:
// Scanner sc = new Scanner(reader);

// Call instance constructor when we have parsed all the data, passing data as we see fit
var instance = new ExampleInstance(suggestedName);

// IMPORTANT! Remember that instance data must be immutable from this point
return instance;
}
}
```

One important thing to always take into account is that **instances must be immutable after they have been loaded**.
This means that under no circumstance should the instance data be modified after the `importInstance` method
has finished. The reason for this is that immutable data can be safely and efficiently shared between threads,
which is a key feature for Mork's parallel execution.

!!! tip
Note that if you want to precalculate any kind of data, it is perfectly valid to do it while loading the instance, before returning.


### Advanced loading methods
In some cases, the same logical instance is split in multiple files, or maybe instance data follows a customized binary
and you have an existing method that receives a File objects and initializes the instance. In these advanced use cases,
where more control over the loading process is needed, the second overload of the `importInstance` method can be used
instead. For example:
```java
public class ExampleInstanceImporter extends InstanceImporter<ExampleInstance> {
@Override
public ExampleInstance importInstance(BufferedReader reader, String suggestedName) throws IOException {
throw new UnsupportedOperationException("Loading from a BufferedReader is not supported");
}

@Override
public ExampleInstance importInstance(String path) {
var file = new File(path);
// call our custom method to load the instance
ExampleInstance instance = customLoadMethod(file);
return instance;
}
}
```

## Instance solve order
By default, instances are solved in lexicographic order, which means sorting instances by their filename.
If you want to solve instances in a specific order, for example from smaller to bigger instances, you can
override the `compareTo` method of the `Instance` class. For example:
```java
/**
*
* Sort instances by their node count, from smaller to bigger
* @param o the instance to compare to
* @return a negative integer, zero, or a positive integer as this instance is less than, equal to, or greater than the specified instance.
*/
@Override
public int compareTo(Instance o) {
return this.nNodes.compareTo(o.nNodes);
}
```

!!! tip
Instead of manually implementing the compare or comparator methods, which can sometimes be confusing, we recommend using the `Comparator.comparing` methods or the property comparator directly, as the previous example.


## Instance unloading

By default, instances are validated and cached before the experiment starts running, which means that if they
fit in memory, I/O is reduced to a minimum. If they do not fit into memory,
they are loaded and unloaded as needed, which is suboptimal but works fine without user intervention.

This feature is called instance preloading, and while it is is enabled by default,
it can be disabled by setting `instances.preload` to `false`. The main advantages of instance preloading are that you make sure
all instances can be actually loaded before the experiment starts, so any mistake is detected as early as possible, and that you avoid I/O during the experiment execution.
Moreover, preloading is required if a custom solve order is used.

However, if the instances are huge, you do not need to solve the instances in any given order, and you are sure that all instances are valid, you can safely disable this feature.

!!! tip
As a general rule, do not use automatic parallelization if instances are huge. The reason for this is that multiple threads can be solving different instances, and therefore Mork will be forced to keep multiple instances in memory at the same time, which can produce out-of-memory errors.

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@

public class __RNAME__InstanceImporter extends InstanceImporter<__RNAME__Instance> {

/**
* Load instance from file. This method is called by the framework when a new instance is being loaded.
* Note that instance load time is never considered in the total execution time.
* @param reader Input buffer, managed by the framework.
* @param suggestedName Suggested filename for the instance, can be ignored.
* By default, the suggested filename is built by removing the path and extension info.
* For example, for the path "instances/TSP/TSP-1.txt", the suggestedName would be "TSP-1"
* @return immutable instance
* @throws IOException If an error is encountered while the instance is being parsed
*/
@Override
public __RNAME__Instance importInstance(BufferedReader reader, String suggestedName) throws IOException {
// Create and return instance object from file data
Expand Down

0 comments on commit eeaf1ca

Please sign in to comment.