001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2015-2016 ForgeRock AS.
015 */
016package org.opends.server.types;
017
018import java.util.Iterator;
019import java.util.LinkedList;
020import java.util.concurrent.TimeUnit;
021import java.util.concurrent.atomic.AtomicInteger;
022import java.util.concurrent.locks.Lock;
023import java.util.concurrent.locks.ReentrantReadWriteLock;
024
025import org.forgerock.opendj.ldap.DN;
026import org.forgerock.util.Reject;
027
028/**
029 * A lock manager coordinates directory update operations so that the DIT structure remains in a
030 * consistent state, as well as providing repeatable read isolation. When accessing entries
031 * components need to ensure that they have the appropriate lock:
032 * <ul>
033 * <li>repeatable reads: repeatable read isolation is rarely needed in practice, since all backend
034 * reads are guaranteed to be performed with read-committed isolation, which is normally sufficient.
035 * Specifically, read-only operations such as compare and search do not require any additional
036 * locking. If repeatable read isolation is required then lock the entry using
037 * {@link #tryReadLockEntry(DN)}
038 * <li>modifying an entry: acquire an entry write-lock for the target entry using
039 * {@link #tryWriteLockEntry(DN)}. Updates are typically performed using a read-modify-write cycle,
040 * so the write lock should be acquired before performing the initial read in order to ensure
041 * consistency
042 * <li>adding an entry: client code must acquire an entry write-lock for the target entry using
043 * {@link #tryWriteLockEntry(DN)}. The parent entry will automatically be protected from deletion by
044 * an implicit subtree read lock on the parent
045 * <li>deleting an entry: client code must acquire a subtree write lock for the target entry using
046 * {@link #tryWriteLockSubtree(DN)}
047 * <li>renaming an entry: client code must acquire a subtree write lock for the old entry, and a
048 * subtree write lock for the new entry using {@link #tryWriteLockSubtree(DN)}. Care should be taken
049 * to avoid deadlocks, e.g. by locking the DN which sorts first.
050 * </ul>
051 * In addition, backend implementations may choose to use their own lock manager for enforcing
052 * atomicity and isolation. This is typically the case for backends which cannot take advantage of
053 * atomicity guarantees provided by an underlying DB (the task backend is one such example).
054 * <p>
055 * <b>Implementation Notes</b>
056 * <p>
057 * The lock table is conceptually a cache of locks keyed on DN, i.e. a {@code Map<DN, DNLock>}.
058 * Locks must be kept in the cache while they are locked, but may be removed once they are no longer
059 * locked by any threads. Locks are represented using a pair of read-write locks: the first lock is
060 * the "subtree" lock and the second is the "entry" lock.
061 * <p>
062 * In order to lock an entry for read or write a <b>subtree</b> read lock is first acquired on each
063 * of the parent entries from the root DN down to the immediate parent of the entry to be locked.
064 * Then the appropriate read or write <b>entry</b> lock is acquired for the target entry. Subtree
065 * write locking is performed by acquiring a <b>subtree</b> read lock on each of the parent entries
066 * from the root DN down to the immediate parent of the subtree to be locked. Then a <b>subtree</b>
067 * write lock is acquired for the target subtree.
068 * <p>
069 * The lock table itself is not represented using a {@code ConcurrentHashMap} because the JDK6/7
070 * APIs do not provide the ability to atomically add-and-lock or unlock-and-remove locks (this
071 * capability is provided in JDK8). Instead, we provide our own implementation comprising of a fixed
072 * number of buckets, a bucket being a {@code LinkedList} of {@code DNLock}s. In addition, it is
073 * important to be able to efficiently iterate up and down a chain of hierarchically related locks,
074 * so each lock maintains a reference to its parent lock. Modern directories tend to have a flat
075 * structure so it is also important to avoid contention on "hot" parent DNs. Typically, a lock
076 * attempt against a DN will involve a cache miss for the target DN and a cache hit for the parent,
077 * but the parent will be the same parent for all lock requests, resulting in a lot of contention on
078 * the same lock bucket. To avoid this the lock manager maintains a small-thread local cache of
079 * locks, so that parent locks can be acquired using a lock-free algorithm.
080 * <p>
081 * Since the thread local cache may reference locks which are not actively locked by anyone, a
082 * reference counting mechanism is used in order to prevent cached locks from being removed from the
083 * underlying lock table. The reference counting mechanism is also used for references between a
084 * lock and its parent lock. To summarize, locking a DN involves the following steps:
085 * <ul>
086 * <li>get the lock from the thread local cache. If the lock was not in the thread local cache then
087 * try fetching it from the lock table:
088 * <ul>
089 * <li><i>found</i> - store it in the thread local cache and bump the reference count
090 * <li><i>not found</i> - create a new lock. First fetch the parent lock using the same process,
091 * i.e. looking in the thread local cache, etc. Then create a new lock referencing the parent lock
092 * (bumps the reference count for the parent lock), and store it in the lock table and the thread
093 * local cache with a reference count of 1.
094 * </ul>
095 * <li>return the lock to the application and increment its reference count since the application
096 * now also has a reference to the lock.
097 * </ul>
098 * Locks are dereferenced when they are unlocked, when they are evicted from a thread local cache,
099 * and when a child lock's reference count reaches zero. A lock is completely removed from the lock
100 * table once its reference count reaches zero.
101 */
102@org.opends.server.types.PublicAPI(stability = org.opends.server.types.StabilityLevel.UNCOMMITTED,
103    mayInstantiate = false, mayExtend = false, mayInvoke = true)
104public final class LockManager
105{
106  /** A lock on an entry or subtree. A lock can only be unlocked once. */
107  public final class DNLock
108  {
109    private final DNLockHolder lock;
110    private final Lock subtreeLock;
111    private final Lock entryLock;
112    private boolean isLocked = true;
113
114    private DNLock(final DNLockHolder lock, final Lock subtreeLock, final Lock entryLock)
115    {
116      this.lock = lock;
117      this.subtreeLock = subtreeLock;
118      this.entryLock = entryLock;
119    }
120
121    @Override
122    public String toString()
123    {
124      return lock.toString();
125    }
126
127    /**
128     * Unlocks this lock and releases any blocked threads.
129     *
130     * @throws IllegalStateException
131     *           If this lock has already been unlocked.
132     */
133    public void unlock()
134    {
135      if (!isLocked)
136      {
137        throw new IllegalStateException("Already unlocked");
138      }
139      lock.releaseParentSubtreeReadLock();
140      subtreeLock.unlock();
141      entryLock.unlock();
142      dereference(lock);
143      isLocked = false;
144    }
145
146    /** For unit testing. */
147    int refCount()
148    {
149      return lock.refCount.get();
150    }
151  }
152
153  /** Lock implementation. */
154  private final class DNLockHolder
155  {
156    private final AtomicInteger refCount = new AtomicInteger();
157    private final DNLockHolder parent;
158    private final DN dn;
159    private final int dnHashCode;
160    private final ReentrantReadWriteLock subtreeLock = new ReentrantReadWriteLock();
161    private final ReentrantReadWriteLock entryLock = new ReentrantReadWriteLock();
162
163    DNLockHolder(final DNLockHolder parent, final DN dn, final int dnHashCode)
164    {
165      this.parent = parent;
166      this.dn = dn;
167      this.dnHashCode = dnHashCode;
168    }
169
170    @Override
171    public String toString()
172    {
173      return "\"" + dn + "\" : " + refCount;
174    }
175
176    /** Unlocks the subtree read lock from the parent of this lock up to the root. */
177    void releaseParentSubtreeReadLock()
178    {
179      for (DNLockHolder lock = parent; lock != null; lock = lock.parent)
180      {
181        lock.subtreeLock.readLock().unlock();
182      }
183    }
184
185    DNLock tryReadLockEntry()
186    {
187      return tryLock(subtreeLock.readLock(), entryLock.readLock());
188    }
189
190    DNLock tryWriteLockEntry()
191    {
192      return tryLock(subtreeLock.readLock(), entryLock.writeLock());
193    }
194
195    DNLock tryWriteLockSubtree()
196    {
197      return tryLock(subtreeLock.writeLock(), entryLock.writeLock());
198    }
199
200    /** Locks the subtree read lock from the root down to the parent of this lock. */
201    private boolean tryAcquireParentSubtreeReadLock()
202    {
203      // First lock the parents of the parent.
204      if (parent == null)
205      {
206        return true;
207      }
208
209      if (!parent.tryAcquireParentSubtreeReadLock())
210      {
211        return false;
212      }
213
214      // Then lock the parent of this lock
215      if (tryLockWithTimeout(parent.subtreeLock.readLock()))
216      {
217        return true;
218      }
219
220      // Failed to grab the parent lock within the timeout, so roll-back the other locks.
221      releaseParentSubtreeReadLock();
222      return false;
223    }
224
225    private DNLock tryLock(final Lock subtreeLock, final Lock entryLock)
226    {
227      if (tryAcquireParentSubtreeReadLock())
228      {
229        if (tryLockWithTimeout(subtreeLock))
230        {
231          if (tryLockWithTimeout(entryLock))
232          {
233            return new DNLock(this, subtreeLock, entryLock);
234          }
235          subtreeLock.unlock();
236        }
237        releaseParentSubtreeReadLock();
238      }
239      // Failed to acquire all the necessary locks within the time out.
240      dereference(this);
241      return null;
242    }
243
244    private boolean tryLockWithTimeout(final Lock lock)
245    {
246      try
247      {
248        return lock.tryLock(lockTimeout, lockTimeoutUnits);
249      }
250      catch (final InterruptedException e)
251      {
252        // Unable to handle interrupts here.
253        Thread.currentThread().interrupt();
254        return false;
255      }
256    }
257  }
258
259  private static final long DEFAULT_LOCK_TIMEOUT = 9;
260  private static final TimeUnit DEFAULT_LOCK_TIMEOUT_UNITS = TimeUnit.SECONDS;
261  private static final int MINIMUM_NUMBER_OF_BUCKETS = 64;
262  private static final int THREAD_LOCAL_CACHE_SIZE = 8;
263
264  private final int numberOfBuckets;
265  private final LinkedList<DNLockHolder>[] lockTable;
266  private final long lockTimeout;
267  private final TimeUnit lockTimeoutUnits;
268
269  /** Avoid sub-classing in order to workaround class leaks in app servers. */
270  private final ThreadLocal<LinkedList<DNLockHolder>> threadLocalCache = new ThreadLocal<>();
271
272  /**
273   * Creates a new lock manager with a lock timeout of 9 seconds and an automatically chosen number
274   * of lock table buckets based on the number of processors.
275   */
276  public LockManager()
277  {
278    this(DEFAULT_LOCK_TIMEOUT, DEFAULT_LOCK_TIMEOUT_UNITS);
279  }
280
281  /**
282   * Creates a new lock manager with the specified lock timeout and an automatically chosen number
283   * of lock table buckets based on the number of processors.
284   *
285   * @param lockTimeout
286   *          The lock timeout.
287   * @param lockTimeoutUnit
288   *          The lock timeout units.
289   */
290  public LockManager(final long lockTimeout, final TimeUnit lockTimeoutUnit)
291  {
292    this(lockTimeout, lockTimeoutUnit, Runtime.getRuntime().availableProcessors() * 8);
293  }
294
295  /**
296   * Creates a new lock manager with the provided configuration.
297   *
298   * @param lockTimeout
299   *          The lock timeout.
300   * @param lockTimeoutUnit
301   *          The lock timeout units.
302   * @param numberOfBuckets
303   *          The number of buckets to use in the lock table. The minimum number of buckets is 64.
304   */
305  @SuppressWarnings("unchecked")
306  private LockManager(final long lockTimeout, final TimeUnit lockTimeoutUnit, final int numberOfBuckets)
307  {
308    Reject.ifFalse(lockTimeout >= 0, "lockTimeout must be a non-negative integer");
309    Reject.ifNull(lockTimeoutUnit, "lockTimeoutUnit must be non-null");
310    Reject.ifFalse(numberOfBuckets > 0, "numberOfBuckets must be a positive integer");
311
312    this.lockTimeout = lockTimeout;
313    this.lockTimeoutUnits = lockTimeoutUnit;
314    this.numberOfBuckets = getNumberOfBuckets(numberOfBuckets);
315    this.lockTable = new LinkedList[this.numberOfBuckets];
316    for (int i = 0; i < this.numberOfBuckets; i++)
317    {
318      this.lockTable[i] = new LinkedList<>();
319    }
320  }
321
322  @Override
323  public String toString()
324  {
325    final StringBuilder builder = new StringBuilder();
326    for (int i = 0; i < numberOfBuckets; i++)
327    {
328      final LinkedList<DNLockHolder> bucket = lockTable[i];
329      synchronized (bucket)
330      {
331        for (final DNLockHolder lock : bucket)
332        {
333          builder.append(lock);
334          builder.append('\n');
335        }
336      }
337    }
338    return builder.toString();
339  }
340
341  /**
342   * Acquires the read lock for the specified entry. This method will block if the entry is already
343   * write locked or if the entry, or any of its parents, have the subtree write lock taken.
344   *
345   * @param entry
346   *          The entry whose read lock is required.
347   * @return The lock, or {@code null} if the lock attempt timed out.
348   */
349  public DNLock tryReadLockEntry(final DN entry)
350  {
351    return acquireLockFromCache(entry).tryReadLockEntry();
352  }
353
354  /**
355   * Acquires the write lock for the specified entry. This method will block if the entry is already
356   * read or write locked or if the entry, or any of its parents, have the subtree write lock taken.
357   *
358   * @param entry
359   *          The entry whose write lock is required.
360   * @return The lock, or {@code null} if the lock attempt timed out.
361   */
362  public DNLock tryWriteLockEntry(final DN entry)
363  {
364    return acquireLockFromCache(entry).tryWriteLockEntry();
365  }
366
367  /**
368   * Acquires the write lock for the specified subtree. This method will block if any entry or
369   * subtree within the subtree is already read or write locked or if any of the parent entries of
370   * the subtree have the subtree write lock taken.
371   *
372   * @param subtree
373   *          The subtree whose write lock is required.
374   * @return The lock, or {@code null} if the lock attempt timed out.
375   */
376  public DNLock tryWriteLockSubtree(final DN subtree)
377  {
378    return acquireLockFromCache(subtree).tryWriteLockSubtree();
379  }
380
381  /** For unit testing. */
382  int getLockTableRefCountFor(final DN dn)
383  {
384    final int dnHashCode = dn.hashCode();
385    final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
386    synchronized (bucket)
387    {
388      for (final DNLockHolder lock : bucket)
389      {
390        if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
391        {
392          return lock.refCount.get();
393        }
394      }
395      return -1;
396    }
397  }
398
399  /** For unit testing. */
400  int getThreadLocalCacheRefCountFor(final DN dn)
401  {
402    final LinkedList<DNLockHolder> cache = threadLocalCache.get();
403    if (cache == null)
404    {
405      return -1;
406    }
407    final int dnHashCode = dn.hashCode();
408    for (final DNLockHolder lock : cache)
409    {
410      if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
411      {
412        return lock.refCount.get();
413      }
414    }
415    return -1;
416  }
417
418  private DNLockHolder acquireLockFromCache(final DN dn)
419  {
420    LinkedList<DNLockHolder> cache = threadLocalCache.get();
421    if (cache == null)
422    {
423      cache = new LinkedList<>();
424      threadLocalCache.set(cache);
425    }
426    return acquireLockFromCache0(dn, cache);
427  }
428
429  private DNLockHolder acquireLockFromCache0(final DN dn, final LinkedList<DNLockHolder> cache)
430  {
431    final int dnHashCode = dn.hashCode();
432    DNLockHolder lock = removeLock(cache, dn, dnHashCode);
433    if (lock == null)
434    {
435      lock = acquireLockFromLockTable(dn, dnHashCode, cache);
436      if (cache.size() >= THREAD_LOCAL_CACHE_SIZE)
437      {
438        // Cache too big: evict oldest entry.
439        dereference(cache.removeLast());
440      }
441    }
442    cache.addFirst(lock); // optimize for LRU
443    lock.refCount.incrementAndGet();
444    return lock;
445  }
446
447  private DNLockHolder acquireLockFromLockTable(final DN dn, final int dnHashCode, final LinkedList<DNLockHolder> cache)
448  {
449    /*
450     * The lock doesn't exist yet so we'll have to create a new one referencing its parent lock. The
451     * parent lock may not yet exist in the lock table either so acquire it before locking the
452     * bucket in order to avoid deadlocks resulting from reentrant bucket locks. Note that we
453     * pre-emptively fetch the parent lock because experiments show that the requested child lock is
454     * almost never in the lock-table. Specifically, this method is only called if we are already on
455     * the slow path due to a cache miss in the thread-local cache.
456     */
457    final DN parentDN = dn.parent();
458    final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
459    boolean parentLockWasUsed = false;
460    try
461    {
462      final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
463      synchronized (bucket)
464      {
465        DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
466        if (lock == null)
467        {
468          lock = new DNLockHolder(parentLock, dn, dnHashCode);
469          parentLockWasUsed = true;
470        }
471        bucket.addFirst(lock); // optimize for LRU
472        lock.refCount.incrementAndGet();
473        return lock;
474      }
475    }
476    finally
477    {
478      if (!parentLockWasUsed && parentLock != null)
479      {
480        dereference(parentLock);
481      }
482    }
483  }
484
485  private void dereference(final DNLockHolder lock)
486  {
487    if (lock.refCount.decrementAndGet() <= 0)
488    {
489      final LinkedList<DNLockHolder> bucket = getBucket(lock.dnHashCode);
490      boolean lockWasRemoved = false;
491      synchronized (bucket)
492      {
493        // Double check: another thread could have acquired the lock since we decremented it to zero.
494        if (lock.refCount.get() <= 0)
495        {
496          removeLock(bucket, lock.dn, lock.dnHashCode);
497          lockWasRemoved = true;
498        }
499      }
500
501      /*
502       * Dereference the parent outside of the bucket lock to avoid potential deadlocks due to
503       * reentrant bucket locks.
504       */
505      if (lockWasRemoved && lock.parent != null)
506      {
507        dereference(lock.parent);
508      }
509    }
510  }
511
512  private LinkedList<DNLockHolder> getBucket(final int dnHashCode)
513  {
514    return lockTable[dnHashCode & numberOfBuckets - 1];
515  }
516
517  /**
518   * Ensure that the number of buckets is a power of 2 in order to make it easier to map hash codes
519   * to bucket indexes.
520   */
521  private int getNumberOfBuckets(final int buckets)
522  {
523    final int roundedNumberOfBuckets = Math.min(buckets, MINIMUM_NUMBER_OF_BUCKETS);
524    int powerOf2 = 1;
525    while (powerOf2 < roundedNumberOfBuckets)
526    {
527      powerOf2 <<= 1;
528    }
529    return powerOf2;
530  }
531
532  private DNLockHolder removeLock(final LinkedList<DNLockHolder> lockList, final DN dn, final int dnHashCode)
533  {
534    final Iterator<DNLockHolder> iterator = lockList.iterator();
535    while (iterator.hasNext())
536    {
537      final DNLockHolder lock = iterator.next();
538      if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
539      {
540        // Found: remove the lock because it will be moved to the front of the list.
541        iterator.remove();
542        return lock;
543      }
544    }
545    return null;
546  }
547}