Skip to content

Commit

Permalink
IGNITE-24238 Fix flaky testCreationZoneWhenDataNodesAreDeletedIsNotSu…
Browse files Browse the repository at this point in the history
…ccessful (#5086)
  • Loading branch information
sashapolo authored Jan 21, 2025
1 parent 46b7a33 commit 2d20b60
Showing 1 changed file with 57 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.stream.Stream;
import org.apache.ignite.configuration.validation.Validator;
import org.apache.ignite.internal.BaseIgniteRestartTest;
Expand Down Expand Up @@ -184,7 +187,7 @@ public class ItIgniteDistributionZoneManagerNodeRestartTest extends BaseIgniteRe
* @param idx Node index.
* @return Partial node.
*/
private PartialNode startPartialNode(int idx) {
private PartialNode startPartialNode(int idx, Consumer<MetaStorageManager> metaStorageMocker) {
String name = testNodeName(testInfo, idx);

Path dir = workDir.resolve(name);
Expand Down Expand Up @@ -260,6 +263,8 @@ private PartialNode startPartialNode(int idx) {

metastore = spy(StandaloneMetaStorageManager.create(storage, clock, readOperationForCompactionTracker));

metaStorageMocker.accept(metastore);

blockMetaStorageUpdates(metastore);

var revisionUpdater = new MetaStorageRevisionListenerRegistry(metastore);
Expand Down Expand Up @@ -350,11 +355,17 @@ private PartialNode startPartialNode(int idx) {
clock
);

assertThat(catalogManager.catalogInitializationFuture(), willCompleteSuccessfully());

partialNodes.add(partialNode);

return partialNode;
}

private PartialNode startPartialNode(int idx) {
return startPartialNode(idx, metaStorageManager -> {});
}

@AfterEach
public void afterTest() {
startScaleUpBlocking = false;
Expand Down Expand Up @@ -499,8 +510,6 @@ public void testLogicalTopologyInterruptedEventRestoredAfterRestart() throws Exc
TIMEOUT_MILLIS
);

metastore = findComponent(node.startedComponents(), MetaStorageManager.class);

startGlobalStateUpdateBlocking = true;
startScaleUpBlocking = true;

Expand Down Expand Up @@ -606,21 +615,60 @@ public void testFirstLogicalTopologyUpdateInterruptedEventRestoredAfterRestart()

@Test
public void testCreationZoneWhenDataNodesAreDeletedIsNotSuccessful() throws Exception {
PartialNode node = startPartialNode(0);
var imitateConcurrentRemoval = new AtomicBoolean();

var dataNodeKey = new AtomicReference<ByteArray>();

PartialNode node = startPartialNode(0, metaStorageManager -> {
// In this mock we catch invocation of DistributionZoneManager.initDataNodesAndTriggerKeysInMetaStorage, where condition is
// based on presence of data node key in ms. After that we make this data node as a tombstone, so when logic of creation of a
// zone is run, there won't be any initialisation of data nodes keys. We try to imitate concurrent removal of a zone.
doAnswer(invocation -> {
ByteArray dataNodeKeyForZone = dataNodeKey.get();

// Here we remove data nodes value for newly created zone, so it is tombstone
assertThat(
metaStorageManager.put(dataNodeKeyForZone, DataNodesMapSerializer.serialize(toDataNodesMap(emptySet()))),
willCompleteSuccessfully()
);

assertThat(
metaStorageManager.remove(dataNodeKeyForZone),
willCompleteSuccessfully()
);

return invocation.callRealMethod();
}).when(metaStorageManager).invoke(argThat(iif -> {
if (!imitateConcurrentRemoval.get()) {
return false;
}

If iif1 = MetaStorageWriteHandler.toIf(iif);

byte[][] keysFromIf = iif1.cond().keys();

Optional<ByteArray> dataNodeKeyOptional = Arrays.stream(keysFromIf)
.filter(op -> startsWith(op, zoneDataNodesKey().bytes()))
.findFirst()
.map(ByteArray::new);

dataNodeKeyOptional.ifPresent(dataNodeKey::set);

return dataNodeKeyOptional.isPresent();
}));
});

node.logicalTopology().putNode(A);
node.logicalTopology().putNode(B);

node.logicalTopology().putNode(C);

Set<NodeWithAttributes> logicalTopology = Stream.of(A, B, C)
.map(n -> new NodeWithAttributes(n.name(), n.id(), n.userAttributes(), n.storageProfiles()))
.collect(toSet());

DistributionZoneManager distributionZoneManager = getDistributionZoneManager(node);
DistributionZoneManager finalDistributionZoneManager = distributionZoneManager;

assertTrue(waitForCondition(() -> logicalTopology.equals(finalDistributionZoneManager.logicalTopology()), TIMEOUT_MILLIS));
assertTrue(waitForCondition(() -> logicalTopology.equals(distributionZoneManager.logicalTopology()), TIMEOUT_MILLIS));

int zoneId = getDefaultZoneId(node);

Expand All @@ -632,40 +680,12 @@ public void testCreationZoneWhenDataNodesAreDeletedIsNotSuccessful() throws Exce
TIMEOUT_MILLIS
);

metastore = findComponent(node.startedComponents(), MetaStorageManager.class);

byte[][] dataNodeKey = new byte[1][1];

// In this mock we catch invocation of DistributionZoneManager.initDataNodesAndTriggerKeysInMetaStorage, where condition is based
// on presence of data node key in ms. After that we make this data node as a tombstone, so when logic of creation of a zone is
// run, there won't be any initialisation of data nodes keys. We try to imitate concurrent removal of a zone.
doAnswer(invocation -> {
ByteArray dataNodeKeyForZone = new ByteArray(dataNodeKey[0]);

// Here we remove data nodes value for newly created zone, so it is tombstone
metastore.put(dataNodeKeyForZone, DataNodesMapSerializer.serialize(toDataNodesMap(emptySet()))).get();

metastore.remove(dataNodeKeyForZone).get();

return invocation.callRealMethod();
}).when(metastore).invoke(argThat(iif -> {
If iif1 = MetaStorageWriteHandler.toIf(iif);

byte[][] keysFromIf = iif1.cond().keys();

Optional<byte[]> dataNodeKeyOptional = Arrays.stream(keysFromIf)
.filter(op -> startsWith(op, zoneDataNodesKey().bytes()))
.findFirst();

dataNodeKeyOptional.ifPresent(bytes -> dataNodeKey[0] = bytes);

return dataNodeKeyOptional.isPresent();
}));
imitateConcurrentRemoval.set(true);

createZone(getCatalogManager(node), "zone1", INFINITE_TIMER_VALUE, INFINITE_TIMER_VALUE, null, DEFAULT_STORAGE_PROFILE);

// Assert that after creation of a zone, data nodes are still tombstone, but not the logical topology, as for default zone.
assertThat(metastore.get(new ByteArray(dataNodeKey[0])).thenApply(Entry::tombstone), willBe(true));
assertThat(metastore.get(dataNodeKey.get()).thenApply(Entry::tombstone), willBe(true));
}

private Set<NodeWithAttributes> deserializeLogicalTopologySet(byte[] bytes) {
Expand Down Expand Up @@ -768,8 +788,6 @@ public void testScaleUpTimerIsRestoredAfterRestart(boolean defaultZone) throws E
assertDataNodesFromManager(distributionZoneManager, metastore::appliedRevision, catalogManager::latestCatalogVersion, zoneId,
Set.of(A, B, C), TIMEOUT_MILLIS);

metastore = findComponent(node.startedComponents(), MetaStorageManager.class);

assertValueInStorage(
metastore,
zoneDataNodesKey(zoneId),
Expand Down Expand Up @@ -829,8 +847,6 @@ public void testScaleDownTimerIsRestoredAfterRestart(boolean defaultZone) throws
TIMEOUT_MILLIS
);

metastore = findComponent(node.startedComponents(), MetaStorageManager.class);

assertValueInStorage(
metastore,
zoneDataNodesKey(zoneId),
Expand Down

0 comments on commit 2d20b60

Please sign in to comment.