Skip to content

Commit 450c747

Browse files
#20 Extract entries async
1 parent 41d6959 commit 450c747

File tree

15 files changed

+349
-147
lines changed

15 files changed

+349
-147
lines changed

misc/pmd/multithreading.xml

+1
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@
1111
<exclude name="AvoidSynchronizedAtMethodLevel"/>
1212
<exclude name="UseConcurrentHashMap"/>
1313
<exclude name="AvoidUsingVolatile"/>
14+
<exclude name="DoNotUseThreads"/>
1415
</rule>
1516
</ruleset>

src/main/java/ru/olegcherednik/zip4jvm/decompose/LocalFileHeaderDecompose.java

+6-5
Original file line numberDiff line numberDiff line change
@@ -128,26 +128,27 @@ private void copyPayload(Path dir, ZipEntry zipEntry, ZipEntryBlock.LocalFileHea
128128

129129
Block content = diagLocalFileHeader.getContent();
130130
long size = zipEntry.getCompressedSize();
131-
long offs = content.getDiskOffs() + content.getSize();
131+
// TODO here we should use SrcZip methods
132+
long absOffs = content.getDiskOffs() + content.getSize();
132133

133134
EncryptionMethod encryptionMethod = zipEntry.getEncryptionMethod();
134135

135136
if (encryptionMethod.isAes()) {
136137
AesEncryptionHeaderBlock block = (AesEncryptionHeaderBlock) encryptionHeaderBlock;
137138

138-
offs += block.getSalt().getSize();
139-
offs += block.getPasswordChecksum().getSize();
139+
absOffs += block.getSalt().getSize();
140+
absOffs += block.getPasswordChecksum().getSize();
140141

141142
size -= block.getSalt().getSize();
142143
size -= block.getPasswordChecksum().getSize();
143144
size -= block.getMac().getSize();
144145
} else if (encryptionMethod == EncryptionMethod.PKWARE) {
145146
PkwareEncryptionHeaderBlock block = (PkwareEncryptionHeaderBlock) encryptionHeaderBlock;
146-
offs += block.getSize();
147+
absOffs += block.getSize();
147148
size -= block.getSize();
148149
}
149150

150-
Utils.copyLarge(blockModel.getZipModel(), dir.resolve("payload" + EXT_DATA), offs, size);
151+
Utils.copyLarge(blockModel.getZipModel(), dir.resolve("payload" + EXT_DATA), absOffs, absOffs, size);
151152
}
152153

153154
private EncryptionHeaderDecompose encryptionHeader(EncryptionMethod encryptionMethod,

src/main/java/ru/olegcherednik/zip4jvm/decompose/Utils.java

+6-15
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import ru.olegcherednik.zip4jvm.model.ZipModel;
2222
import ru.olegcherednik.zip4jvm.model.block.Block;
2323
import ru.olegcherednik.zip4jvm.model.entry.ZipEntry;
24-
import ru.olegcherednik.zip4jvm.utils.ValidationUtils;
2524

2625
import lombok.AccessLevel;
2726
import lombok.NoArgsConstructor;
@@ -50,30 +49,22 @@ public static void print(Path file, Consumer<PrintStream> consumer) throws FileN
5049
}
5150

5251
public static void copyLarge(ZipModel zipModel, Path out, Block block) throws IOException {
53-
copyLarge(zipModel, out, block.getDiskOffs(), block.getSize());
52+
copyLarge(zipModel, out, block.getDiskOffs(), block.getAbsOffs(), block.getSize());
5453
}
5554

56-
public static void copyLarge(ZipModel zipModel, Path out, long offs, long size) throws IOException {
57-
Path file = zipModel.getSrcZip().getDiskByAbsOffs(offs).getPath();
55+
public static void copyLarge(ZipModel zipModel, Path out, long diskOffs, long absOffs, long size)
56+
throws IOException {
57+
Path file = zipModel.getSrcZip().getDiskByAbsOffs(absOffs).getPath();
5858

5959
try (InputStream fis = Files.newInputStream(file);
6060
OutputStream fos = Files.newOutputStream(out)) {
61-
long skipBytes = fis.skip(offs);
62-
assert skipBytes == offs;
61+
long skipBytes = fis.skip(diskOffs);
62+
assert skipBytes == diskOffs;
6363

6464
IOUtils.copyLarge(fis, fos, 0, size);
6565
}
6666
}
6767

68-
public static void copyByteArray(Path out, byte[] buf, Block block) throws IOException {
69-
ValidationUtils.requireLessOrEqual(block.getAbsOffs(), Integer.MAX_VALUE, "block.absoluteOffs");
70-
ValidationUtils.requireLessOrEqual(block.getSize(), Integer.MAX_VALUE, "block.size");
71-
72-
try (OutputStream fos = Files.newOutputStream(out)) {
73-
fos.write(buf, (int) block.getAbsOffs(), (int) block.getSize());
74-
}
75-
}
76-
7768
public static void copyByteArray(Path out, byte[] buf) throws IOException {
7869
Files.write(out, buf);
7970
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package ru.olegcherednik.zip4jvm.engine.unzip;
20+
21+
import ru.olegcherednik.zip4jvm.io.in.DataInput;
22+
import ru.olegcherednik.zip4jvm.io.in.file.consecutive.ConsecutiveAccessDataInput;
23+
24+
import lombok.RequiredArgsConstructor;
25+
import org.apache.commons.io.IOUtils;
26+
27+
import java.util.List;
28+
import java.util.concurrent.CopyOnWriteArrayList;
29+
import java.util.concurrent.ExecutorService;
30+
import java.util.function.Supplier;
31+
32+
/**
33+
* This class is designed to use with custom {@link ExecutorService} only. It holds a list of all {@link DataInput}
34+
* were create in a different threads. When method {@link #release()} is invoked, it closes all created
35+
* {@link DataInput}, but it does not clear {@link #THREAD_LOCAL} for all threads. I.e. after invoking
36+
* {@link #release()} and then invoking {@link #get()}, the given {@link DataInput} will not be {@literal null}, but it
37+
* will be closed and not available to reuse.
38+
*
39+
* @author Oleg Cherednik
40+
* @since 28.12.2024
41+
*/
42+
@RequiredArgsConstructor
43+
public class ConsecutiveAccessDataInputHolder {
44+
45+
private static final ThreadLocal<ConsecutiveAccessDataInput> THREAD_LOCAL = new ThreadLocal<>();
46+
47+
private final List<ConsecutiveAccessDataInput> dataInputs = new CopyOnWriteArrayList<>();
48+
49+
private final Supplier<ConsecutiveAccessDataInput> dataInputSupplier;
50+
51+
public void release() {
52+
// cannot clear all THREAD_LOCAL here
53+
dataInputs.forEach(IOUtils::closeQuietly);
54+
dataInputs.clear();
55+
}
56+
57+
public ConsecutiveAccessDataInput get() {
58+
ConsecutiveAccessDataInput in = THREAD_LOCAL.get();
59+
60+
if (in == null) {
61+
in = dataInputSupplier.get();
62+
THREAD_LOCAL.set(in);
63+
dataInputs.add(in);
64+
}
65+
66+
return in;
67+
}
68+
69+
}

src/main/java/ru/olegcherednik/zip4jvm/engine/unzip/UnzipEngine.java

+15-5
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
2929
import ru.olegcherednik.zip4jvm.model.settings.UnzipSettings;
3030
import ru.olegcherednik.zip4jvm.model.src.SrcZip;
31+
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;
3132

32-
import java.io.IOException;
3333
import java.nio.file.Path;
3434
import java.util.Collection;
3535
import java.util.Collections;
@@ -47,7 +47,17 @@ public final class UnzipEngine implements ZipFile.Reader {
4747
public UnzipEngine(SrcZip srcZip, UnzipSettings settings) {
4848
PasswordProvider passwordProvider = settings.getPasswordProvider();
4949
zipModel = ZipModelBuilder.read(srcZip, settings.getCharsetCustomizer(), passwordProvider);
50-
unzipExtractEngine = new UnzipExtractEngine(passwordProvider, zipModel);
50+
unzipExtractEngine = createUnzipExtractEngine(settings, zipModel);
51+
}
52+
53+
private static UnzipExtractEngine createUnzipExtractEngine(UnzipSettings settings, ZipModel zipModel) {
54+
PasswordProvider passwordProvider = settings.getPasswordProvider();
55+
56+
if (settings.getAsyncThreads() == UnzipSettings.ASYNC_THREADS_OFF)
57+
return new UnzipExtractEngine(passwordProvider, zipModel);
58+
59+
int totalThreads = settings.getAsyncThreads();
60+
return new UnzipExtractAsyncEngine(passwordProvider, zipModel, totalThreads);
5161
}
5262

5363
// ---------- ZipFile.Reader ----------
@@ -105,9 +115,9 @@ public ZipFile.Entry next() {
105115
};
106116
}
107117

108-
public static RandomAccessDataInput createRandomAccessDataInput(SrcZip srcZip) throws IOException {
109-
return srcZip.isSolid() ? new SolidRandomAccessDataInput(srcZip)
110-
: new SplitRandomAccessDataInput(srcZip);
118+
public static RandomAccessDataInput createRandomAccessDataInput(SrcZip srcZip) {
119+
return Quietly.doRuntime(() -> srcZip.isSolid() ? new SolidRandomAccessDataInput(srcZip)
120+
: new SplitRandomAccessDataInput(srcZip));
111121
}
112122

113123
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package ru.olegcherednik.zip4jvm.engine.unzip;
20+
21+
import ru.olegcherednik.zip4jvm.model.ZipModel;
22+
import ru.olegcherednik.zip4jvm.model.entry.ZipEntry;
23+
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
24+
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;
25+
import ru.olegcherednik.zip4jvm.utils.quitely.functions.RunnableWithException;
26+
27+
import org.apache.commons.collections4.CollectionUtils;
28+
29+
import java.nio.file.Path;
30+
import java.util.Iterator;
31+
import java.util.LinkedList;
32+
import java.util.List;
33+
import java.util.Set;
34+
import java.util.concurrent.CompletableFuture;
35+
import java.util.concurrent.Executor;
36+
import java.util.concurrent.ExecutorService;
37+
import java.util.concurrent.ForkJoinPool;
38+
import java.util.concurrent.ForkJoinWorkerThread;
39+
import java.util.concurrent.atomic.AtomicInteger;
40+
41+
/**
42+
* @author Oleg Cherednik
43+
* @since 28.12.2024
44+
*/
45+
public class UnzipExtractAsyncEngine extends UnzipExtractEngine {
46+
47+
protected final int totalThreads;
48+
49+
public UnzipExtractAsyncEngine(PasswordProvider passwordProvider, ZipModel zipModel, int totalThreads) {
50+
super(passwordProvider, zipModel);
51+
this.totalThreads = totalThreads <= 0 ? Runtime.getRuntime().availableProcessors() : totalThreads;
52+
}
53+
54+
// ---------- UnzipExtractEngine ----------
55+
56+
@Override
57+
protected void extractAllEntries(Path dstDir) {
58+
List<CompletableFuture<Void>> tasks = new LinkedList<>();
59+
Iterator<ZipEntry> it = zipModel.absOffsAscIterator();
60+
61+
ConsecutiveAccessDataInputHolder dataInputHolder =
62+
new ConsecutiveAccessDataInputHolder(this::createConsecutiveDataInput);
63+
ExecutorService executor = createExecutor();
64+
65+
try {
66+
while (it.hasNext()) {
67+
ZipEntry zipEntry = it.next();
68+
Path file = dstDir.resolve(zipEntry.getFileName());
69+
70+
CompletableFuture<Void> task = createCompletableFuture(
71+
() -> extractEntry(file, zipEntry, dataInputHolder.get()), executor);
72+
73+
tasks.add(task);
74+
}
75+
76+
tasks.forEach(CompletableFuture::join);
77+
} finally {
78+
executor.shutdown();
79+
dataInputHolder.release();
80+
}
81+
}
82+
83+
@Override
84+
protected void extractEntryByPrefix(Path dstDir, Set<String> prefixes) {
85+
assert CollectionUtils.isNotEmpty(prefixes);
86+
87+
List<CompletableFuture<Void>> tasks = new LinkedList<>();
88+
Iterator<ZipEntry> it = zipModel.absOffsAscIterator();
89+
90+
ConsecutiveAccessDataInputHolder dataInputHolder =
91+
new ConsecutiveAccessDataInputHolder(this::createConsecutiveDataInput);
92+
ExecutorService executor = createExecutor();
93+
94+
try {
95+
while (it.hasNext()) {
96+
ZipEntry zipEntry = it.next();
97+
String fileName = getFileName(zipEntry, prefixes);
98+
99+
if (fileName != null) {
100+
Path file = dstDir.resolve(fileName);
101+
CompletableFuture<Void> task = createCompletableFuture(
102+
() -> extractEntry(file, zipEntry, dataInputHolder.get()), executor);
103+
104+
tasks.add(task);
105+
}
106+
}
107+
108+
tasks.forEach(CompletableFuture::join);
109+
} finally {
110+
dataInputHolder.release();
111+
executor.shutdown();
112+
}
113+
}
114+
115+
// ----------
116+
117+
protected ExecutorService createExecutor() {
118+
AtomicInteger counter = new AtomicInteger();
119+
String format = String.format("zip4jvm-extract-%%0%dd", String.valueOf(totalThreads).length());
120+
121+
ForkJoinPool.ForkJoinWorkerThreadFactory factory = pool -> {
122+
ForkJoinWorkerThread thread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
123+
thread.setName(String.format(format, counter.incrementAndGet()));
124+
return thread;
125+
};
126+
127+
return new ForkJoinPool(totalThreads, factory, null, false);
128+
}
129+
130+
protected CompletableFuture<Void> createCompletableFuture(RunnableWithException task, Executor executor) {
131+
return CompletableFuture.runAsync(() -> Quietly.doRuntime(task), executor);
132+
}
133+
134+
}

0 commit comments

Comments
 (0)