Skip to content

Commit 340d06b

Browse files
IGNITE-28242 Add reproducer, add assertion for cp read lock
1 parent aeaf9d6 commit 340d06b

3 files changed

Lines changed: 202 additions & 0 deletions

File tree

modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,8 @@ protected GridDhtLocalPartition localPartition() {
457457
boolean deferred = false;
458458
GridCacheVersion ver0 = null;
459459

460+
assert !checkExpire || cctx.shared().database().checkpointLockIsHeldByThread();
461+
460462
lockEntry();
461463

462464
try {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.ignite.internal.processors.cache.persistence.db;
19+
20+
import java.util.concurrent.atomic.AtomicBoolean;
21+
import java.util.concurrent.atomic.AtomicInteger;
22+
import java.util.concurrent.locks.ReentrantReadWriteLock;
23+
import org.apache.ignite.IgniteCache;
24+
import org.apache.ignite.cache.CacheAtomicityMode;
25+
import org.apache.ignite.cluster.ClusterState;
26+
import org.apache.ignite.configuration.CacheConfiguration;
27+
import org.apache.ignite.configuration.DataRegionConfiguration;
28+
import org.apache.ignite.configuration.DataStorageConfiguration;
29+
import org.apache.ignite.configuration.IgniteConfiguration;
30+
import org.apache.ignite.failure.StopNodeFailureHandler;
31+
import org.apache.ignite.internal.IgniteEx;
32+
import org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager;
33+
import org.apache.ignite.internal.processors.cache.persistence.IgniteCacheDatabaseSharedManager;
34+
import org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointManager;
35+
import org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointTimeoutLock;
36+
import org.apache.ignite.testframework.GridTestUtils;
37+
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
38+
import org.apache.ignite.transactions.Transaction;
39+
import org.junit.Test;
40+
41+
import static org.apache.ignite.transactions.TransactionConcurrency.PESSIMISTIC;
42+
import static org.apache.ignite.transactions.TransactionIsolation.READ_COMMITTED;
43+
44+
/**
45+
* Test verifies there is no deadlock between GridCacheMapEntry.unswap() in tx put operation and a checkpointer requesting cp write lock
46+
* with a parallel tx get operation.
47+
* <p/>
48+
* Root cause of the deadlock was wrong locking order when cp readlock is acquired under already locked GridCacheMapEntry instance
49+
* by the first tx put op.
50+
*/
51+
public class TxPutTxGetCheckpointerDeadlockTest extends GridCommonAbstractTest {
52+
/** */
53+
private final AtomicBoolean deadlockDetected = new AtomicBoolean(false);
54+
55+
/** */
56+
private final AtomicBoolean testFinished = new AtomicBoolean(false);
57+
58+
/** */
59+
private static final String CP_WRITE_LOCK_SWITCHING_THREAD_NAME = "cp-write-lock-switching-runner";
60+
61+
/** {@inheritDoc} */
62+
@Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
63+
IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
64+
65+
cfg.setDataStorageConfiguration(
66+
new DataStorageConfiguration()
67+
.setDefaultDataRegionConfiguration(
68+
new DataRegionConfiguration()
69+
.setPersistenceEnabled(true)
70+
)
71+
);
72+
73+
cfg.setFailureHandler(new StopNodeFailureHandler());
74+
75+
return cfg;
76+
}
77+
78+
/** {@inheritDoc} */
79+
@Override protected void beforeTest() throws Exception {
80+
stopAllGrids();
81+
82+
cleanPersistenceDir();
83+
}
84+
85+
/** {@inheritDoc} */
86+
@SuppressWarnings({"deprecation"})
87+
@Override protected void afterTest() throws Exception {
88+
if (deadlockDetected.get()) {
89+
Thread.getAllStackTraces().keySet().stream()
90+
.filter(t -> t.getName().startsWith(CP_WRITE_LOCK_SWITCHING_THREAD_NAME))
91+
.forEach(Thread::interrupt);
92+
}
93+
94+
testFinished.set(true);
95+
96+
stopAllGrids();
97+
98+
cleanPersistenceDir();
99+
}
100+
101+
/**
102+
* Tests for the absence of a deadlock between transactional cache operations and checkpointer write lock acquisition.
103+
* <p>
104+
* This test simulates a scenario where:
105+
* <ul>
106+
* <li>One thread performs continuous transactional {@code put} operations on a cache entry.</li>
107+
* <li>Another thread performs continuous transactional {@code get} operations on the same entry, potentially
108+
* triggering unswapping of the entry under lock.</li>
109+
* <li>A third thread repeatedly acquires and releases the checkpoint write lock, simulating checkpointer activity.</li>
110+
* </ul>
111+
* </p>
112+
* <p>
113+
* The primary purpose is to verify that there is no deadlock caused by incorrect lock ordering,
114+
* specifically when a transactional {@code put} holds a lock on a {@link org.apache.ignite.internal.processors.cache.GridCacheMapEntry}
115+
* while attempting to acquire a checkpoint read lock, at the same time as the checkpointer
116+
* tries to acquire checkpoint write lock.
117+
* </p>
118+
*
119+
* @throws Exception If failed.
120+
*/
121+
@Test
122+
public void testDeadlock() throws Exception {
123+
IgniteEx ignite = startGrid(0);
124+
125+
ignite.cluster().state(ClusterState.ACTIVE);
126+
127+
IgniteCache<Object, Object> cache = ignite
128+
.getOrCreateCache(new CacheConfiguration<>("test").setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL));
129+
130+
AtomicInteger cnt = new AtomicInteger(-1);
131+
132+
GridTestUtils.runAsync(
133+
() -> {
134+
while (!testFinished.get()) {
135+
try (Transaction tx = ignite.transactions().txStart(PESSIMISTIC, READ_COMMITTED)) {
136+
cache.put(0, cnt.incrementAndGet());
137+
tx.commit();
138+
}
139+
}
140+
},
141+
"write-tx-runner"
142+
);
143+
144+
GridTestUtils.runAsync(
145+
() -> {
146+
while (!testFinished.get()) {
147+
try (Transaction tx = ignite.transactions().txStart(PESSIMISTIC, READ_COMMITTED)) {
148+
cache.get(0);
149+
}
150+
}
151+
},
152+
"read-tx-runner"
153+
);
154+
155+
GridTestUtils.runAsync(
156+
() -> {
157+
IgniteCacheDatabaseSharedManager db = ignite.context().cache().context().database();
158+
CheckpointManager cpMgr = ((GridCacheDatabaseSharedManager)db).getCheckpointManager();
159+
CheckpointTimeoutLock timeoutLock = cpMgr.checkpointTimeoutLock();
160+
ReentrantReadWriteLock.WriteLock cpWriteLock = GridTestUtils.getFieldValue(timeoutLock,
161+
"checkpointReadWriteLock", "checkpointLock", "writeLock");
162+
163+
while (!testFinished.get()) {
164+
// an interruptible version of lock method is used to allow end the deadlock at the end of the test
165+
cpWriteLock.lockInterruptibly();
166+
cpWriteLock.unlock();
167+
}
168+
},
169+
CP_WRITE_LOCK_SWITCHING_THREAD_NAME
170+
);
171+
172+
GridTestUtils.runAsync(
173+
() -> {
174+
int prevVal = -1;
175+
176+
while (!testFinished.get()) {
177+
int curVal = cnt.get();
178+
179+
if (curVal != -1 && curVal == prevVal) {
180+
deadlockDetected.set(true);
181+
182+
return;
183+
}
184+
else {
185+
prevVal = curVal;
186+
187+
doSleep(200);
188+
}
189+
}
190+
},
191+
"progress-monitor"
192+
);
193+
194+
assertTrue(GridTestUtils.waitForCondition(() -> cnt.get() != -1, 10_000L));
195+
assertFalse("Unexpected deadlock detected", GridTestUtils.waitForCondition(deadlockDetected::get, 10_000L));
196+
}
197+
}

modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Collection;
2222
import java.util.List;
2323
import org.apache.ignite.internal.processors.cache.persistence.EagerTtlTest;
24+
import org.apache.ignite.internal.processors.cache.persistence.db.TxPutTxGetCheckpointerDeadlockTest;
2425
import org.apache.ignite.internal.processors.cache.persistence.wal.WalRotatedIdPartRecordTest;
2526
import org.apache.ignite.testframework.GridTestUtils;
2627
import org.apache.ignite.testframework.junits.DynamicSuite;
@@ -47,6 +48,8 @@ public static List<Class<?>> suite(Collection<Class> ignoredTests) {
4748

4849
GridTestUtils.addTestIfNeeded(suite, WalRotatedIdPartRecordTest.class, ignoredTests);
4950

51+
GridTestUtils.addTestIfNeeded(suite, TxPutTxGetCheckpointerDeadlockTest.class, ignoredTests);
52+
5053
return suite;
5154
}
5255
}

0 commit comments

Comments
 (0)