4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable=W0212
25 # W0212 since e.g. LockSet methods use (a lot) the internals of
38 from ganeti import errors
39 from ganeti import utils
40 from ganeti import compat
41 from ganeti import query
44 _EXCLUSIVE_TEXT = "exclusive"
45 _SHARED_TEXT = "shared"
46 _DELETED_TEXT = "deleted"
51 def ssynchronized(mylock, shared=0):
52 """Shared Synchronization decorator.
54 Calls the function holding the given lock, either in exclusive or shared
55 mode. It requires the passed lock to be a SharedLock (or support its
58 @type mylock: lockable object or string
59 @param mylock: lock to acquire or class member name of the lock to acquire
63 def sync_function(*args, **kwargs):
64 if isinstance(mylock, basestring):
65 assert args, "cannot ssynchronize on non-class method: self not found"
67 lock = getattr(args[0], mylock)
70 lock.acquire(shared=shared)
72 return fn(*args, **kwargs)
79 class _SingleNotifyPipeConditionWaiter(object):
80 """Helper class for SingleNotifyPipeCondition
88 def __init__(self, poller, fd):
89 """Constructor for _SingleNotifyPipeConditionWaiter
91 @type poller: select.poll
92 @param poller: Poller object
94 @param fd: File descriptor to wait for
101 def __call__(self, timeout):
102 """Wait for something to happen on the pipe.
104 @type timeout: float or None
105 @param timeout: Timeout for waiting (can be None)
108 running_timeout = utils.RunningTimeout(timeout, True)
111 remaining_time = running_timeout.Remaining()
113 if remaining_time is not None:
114 if remaining_time < 0.0:
117 # Our calculation uses seconds, poll() wants milliseconds
118 remaining_time *= 1000
121 result = self._poller.poll(remaining_time)
122 except EnvironmentError, err:
123 if err.errno != errno.EINTR:
127 # Check whether we were notified
128 if result and result[0][0] == self._fd:
132 class _BaseCondition(object):
133 """Base class containing common code for conditions.
135 Some of this code is taken from python's threading module.
147 def __init__(self, lock):
148 """Constructor for _BaseCondition.
150 @type lock: threading.Lock
151 @param lock: condition base lock
154 object.__init__(self)
157 self._release_save = lock._release_save
158 except AttributeError:
159 self._release_save = self._base_release_save
161 self._acquire_restore = lock._acquire_restore
162 except AttributeError:
163 self._acquire_restore = self._base_acquire_restore
165 self._is_owned = lock._is_owned
166 except AttributeError:
167 self._is_owned = self._base_is_owned
171 # Export the lock's acquire() and release() methods
172 self.acquire = lock.acquire
173 self.release = lock.release
175 def _base_is_owned(self):
176 """Check whether lock is owned by current thread.
179 if self._lock.acquire(0):
184 def _base_release_save(self):
187 def _base_acquire_restore(self, _):
190 def _check_owned(self):
191 """Raise an exception if the current thread doesn't own the lock.
194 if not self._is_owned():
195 raise RuntimeError("cannot work with un-aquired lock")
198 class SingleNotifyPipeCondition(_BaseCondition):
199 """Condition which can only be notified once.
201 This condition class uses pipes and poll, internally, to be able to wait for
202 notification with a timeout, without resorting to polling. It is almost
203 compatible with Python's threading.Condition, with the following differences:
204 - notifyAll can only be called once, and no wait can happen after that
205 - notify is not supported, only notifyAll
217 _waiter_class = _SingleNotifyPipeConditionWaiter
219 def __init__(self, lock):
220 """Constructor for SingleNotifyPipeCondition
223 _BaseCondition.__init__(self, lock)
225 self._notified = False
227 self._write_fd = None
230 def _check_unnotified(self):
231 """Throws an exception if already notified.
235 raise RuntimeError("cannot use already notified condition")
238 """Cleanup open file descriptors, if any.
241 if self._read_fd is not None:
242 os.close(self._read_fd)
245 if self._write_fd is not None:
246 os.close(self._write_fd)
247 self._write_fd = None
250 def wait(self, timeout):
251 """Wait for a notification.
253 @type timeout: float or None
254 @param timeout: Waiting timeout (can be None)
258 self._check_unnotified()
262 if self._poller is None:
263 (self._read_fd, self._write_fd) = os.pipe()
264 self._poller = select.poll()
265 self._poller.register(self._read_fd, select.POLLHUP)
267 wait_fn = self._waiter_class(self._poller, self._read_fd)
268 state = self._release_save()
270 # Wait for notification
274 self._acquire_restore(state)
277 if self._nwaiters == 0:
280 def notifyAll(self): # pylint: disable=C0103
281 """Close the writing side of the pipe to notify all waiters.
285 self._check_unnotified()
286 self._notified = True
287 if self._write_fd is not None:
288 os.close(self._write_fd)
289 self._write_fd = None
292 class PipeCondition(_BaseCondition):
293 """Group-only non-polling condition with counters.
295 This condition class uses pipes and poll, internally, to be able to wait for
296 notification with a timeout, without resorting to polling. It is almost
297 compatible with Python's threading.Condition, but only supports notifyAll and
298 non-recursive locks. As an additional features it's able to report whether
299 there are any waiting threads.
307 _single_condition_class = SingleNotifyPipeCondition
309 def __init__(self, lock):
310 """Initializes this class.
313 _BaseCondition.__init__(self, lock)
314 self._waiters = set()
315 self._single_condition = self._single_condition_class(self._lock)
317 def wait(self, timeout):
318 """Wait for a notification.
320 @type timeout: float or None
321 @param timeout: Waiting timeout (can be None)
326 # Keep local reference to the pipe. It could be replaced by another thread
327 # notifying while we're waiting.
328 cond = self._single_condition
330 self._waiters.add(threading.currentThread())
335 self._waiters.remove(threading.currentThread())
337 def notifyAll(self): # pylint: disable=C0103
338 """Notify all currently waiting threads.
342 self._single_condition.notifyAll()
343 self._single_condition = self._single_condition_class(self._lock)
345 def get_waiting(self):
346 """Returns a list of all waiting threads.
353 def has_waiting(self):
354 """Returns whether there are active waiters.
359 return bool(self._waiters)
362 return ("<%s.%s waiters=%s at %#x>" %
363 (self.__class__.__module__, self.__class__.__name__,
364 self._waiters, id(self)))
367 class _PipeConditionWithMode(PipeCondition):
372 def __init__(self, lock, shared):
373 """Initializes this class.
377 PipeCondition.__init__(self, lock)
380 class SharedLock(object):
381 """Implements a shared lock.
383 Multiple threads can acquire the lock in a shared way by calling
384 C{acquire(shared=1)}. In order to acquire the lock in an exclusive way
385 threads can call C{acquire(shared=0)}.
387 Notes on data structures: C{__pending} contains a priority queue (heapq) of
388 all pending acquires: C{[(priority1: prioqueue1), (priority2: prioqueue2),
389 ...]}. Each per-priority queue contains a normal in-order list of conditions
390 to be notified when the lock can be acquired. Shared locks are grouped
391 together by priority and the condition for them is stored in
392 C{__pending_shared} if it already exists. C{__pending_by_prio} keeps
393 references for the per-priority queues indexed by priority for faster access.
396 @ivar name: the name of the lock
412 __condition_class = _PipeConditionWithMode
414 def __init__(self, name, monitor=None, _time_fn=time.time):
415 """Construct a new SharedLock.
417 @param name: the name of the lock
418 @type monitor: L{LockMonitor}
419 @param monitor: Lock monitor with which to register
422 object.__init__(self)
426 # Used for unittesting
427 self.__time_fn = _time_fn
430 self.__lock = threading.Lock()
432 # Queue containing waiting acquires
434 self.__pending_by_prio = {}
435 self.__pending_shared = {}
437 # Current lock holders
441 # is this lock in the deleted state?
442 self.__deleted = False
444 # Register with lock monitor
446 logging.debug("Adding lock %s to monitor", name)
447 monitor.RegisterLock(self)
450 return ("<%s.%s name=%s at %#x>" %
451 (self.__class__.__module__, self.__class__.__name__,
452 self.name, id(self)))
454 def GetLockInfo(self, requested):
455 """Retrieves information for querying locks.
458 @param requested: Requested information, see C{query.LQ_*}
461 self.__lock.acquire()
463 # Note: to avoid unintentional race conditions, no references to
464 # modifiable objects should be returned unless they were created in this
469 if query.LQ_MODE in requested:
472 assert not (self.__exc or self.__shr)
474 mode = _EXCLUSIVE_TEXT
478 # Current owner(s) are wanted
479 if query.LQ_OWNER in requested:
486 assert not self.__deleted
487 owner_names = [i.getName() for i in owner]
489 # Pending acquires are wanted
490 if query.LQ_PENDING in requested:
493 # Sorting instead of copying and using heaq functions for simplicity
494 for (_, prioqueue) in sorted(self.__pending):
495 for cond in prioqueue:
497 pendmode = _SHARED_TEXT
499 pendmode = _EXCLUSIVE_TEXT
501 # List of names will be sorted in L{query._GetLockPending}
502 pending.append((pendmode, [i.getName()
503 for i in cond.get_waiting()]))
507 return [(self.name, mode, owner_names, pending)]
509 self.__lock.release()
511 def __check_deleted(self):
512 """Raises an exception if the lock has been deleted.
516 raise errors.LockError("Deleted lock %s" % self.name)
518 def __is_sharer(self):
519 """Is the current thread sharing the lock at this time?
522 return threading.currentThread() in self.__shr
524 def __is_exclusive(self):
525 """Is the current thread holding the lock exclusively at this time?
528 return threading.currentThread() == self.__exc
530 def __is_owned(self, shared=-1):
531 """Is the current thread somehow owning the lock at this time?
533 This is a private version of the function, which presumes you're holding
538 return self.__is_sharer() or self.__is_exclusive()
540 return self.__is_sharer()
542 return self.__is_exclusive()
544 def _is_owned(self, shared=-1):
545 """Is the current thread somehow owning the lock at this time?
548 - < 0: check for any type of ownership (default)
549 - 0: check for exclusive ownership
550 - > 0: check for shared ownership
553 self.__lock.acquire()
555 return self.__is_owned(shared=shared)
557 self.__lock.release()
561 def _count_pending(self):
562 """Returns the number of pending acquires.
567 self.__lock.acquire()
569 return sum(len(prioqueue) for (_, prioqueue) in self.__pending)
571 self.__lock.release()
573 def _check_empty(self):
574 """Checks whether there are any pending acquires.
579 self.__lock.acquire()
581 # Order is important: __find_first_pending_queue modifies __pending
582 (_, prioqueue) = self.__find_first_pending_queue()
584 return not (prioqueue or
586 self.__pending_by_prio or
587 self.__pending_shared)
589 self.__lock.release()
591 def __do_acquire(self, shared):
592 """Actually acquire the lock.
596 self.__shr.add(threading.currentThread())
598 self.__exc = threading.currentThread()
600 def __can_acquire(self, shared):
601 """Determine whether lock can be acquired.
605 return self.__exc is None
607 return len(self.__shr) == 0 and self.__exc is None
609 def __find_first_pending_queue(self):
610 """Tries to find the topmost queued entry with pending acquires.
612 Removes empty entries while going through the list.
615 while self.__pending:
616 (priority, prioqueue) = self.__pending[0]
619 return (priority, prioqueue)
622 heapq.heappop(self.__pending)
623 del self.__pending_by_prio[priority]
624 assert priority not in self.__pending_shared
628 def __is_on_top(self, cond):
629 """Checks whether the passed condition is on top of the queue.
631 The caller must make sure the queue isn't empty.
634 (_, prioqueue) = self.__find_first_pending_queue()
636 return cond == prioqueue[0]
638 def __acquire_unlocked(self, shared, timeout, priority):
639 """Acquire a shared lock.
641 @param shared: whether to acquire in shared mode; by default an
642 exclusive lock will be acquired
643 @param timeout: maximum waiting time before giving up
644 @type priority: integer
645 @param priority: Priority for acquiring lock
648 self.__check_deleted()
650 # We cannot acquire the lock if we already have it
651 assert not self.__is_owned(), ("double acquire() on a non-recursive lock"
654 # Remove empty entries from queue
655 self.__find_first_pending_queue()
657 # Check whether someone else holds the lock or there are pending acquires.
658 if not self.__pending and self.__can_acquire(shared):
659 # Apparently not, can acquire lock directly.
660 self.__do_acquire(shared)
663 prioqueue = self.__pending_by_prio.get(priority, None)
666 # Try to re-use condition for shared acquire
667 wait_condition = self.__pending_shared.get(priority, None)
668 assert (wait_condition is None or
669 (wait_condition.shared and wait_condition in prioqueue))
671 wait_condition = None
673 if wait_condition is None:
674 if prioqueue is None:
675 assert priority not in self.__pending_by_prio
678 heapq.heappush(self.__pending, (priority, prioqueue))
679 self.__pending_by_prio[priority] = prioqueue
681 wait_condition = self.__condition_class(self.__lock, shared)
682 prioqueue.append(wait_condition)
685 # Keep reference for further shared acquires on same priority. This is
686 # better than trying to find it in the list of pending acquires.
687 assert priority not in self.__pending_shared
688 self.__pending_shared[priority] = wait_condition
690 wait_start = self.__time_fn()
694 # Wait until we become the topmost acquire in the queue or the timeout
697 if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
698 self.__do_acquire(shared)
702 # A lot of code assumes blocking acquires always succeed, therefore we
703 # can never return False for a blocking acquire
704 if (timeout is not None and
705 utils.TimeoutExpired(wait_start, timeout, _time_fn=self.__time_fn)):
708 # Wait for notification
709 wait_condition.wait(timeout)
710 self.__check_deleted()
712 # Remove condition from queue if there are no more waiters
713 if not wait_condition.has_waiting():
714 prioqueue.remove(wait_condition)
715 if wait_condition.shared:
716 # Remove from list of shared acquires if it wasn't while releasing
717 # (e.g. on lock deletion)
718 self.__pending_shared.pop(priority, None)
722 def acquire(self, shared=0, timeout=None, priority=None,
724 """Acquire a shared lock.
726 @type shared: integer (0/1) used as a boolean
727 @param shared: whether to acquire in shared mode; by default an
728 exclusive lock will be acquired
730 @param timeout: maximum waiting time before giving up
731 @type priority: integer
732 @param priority: Priority for acquiring lock
733 @type test_notify: callable or None
734 @param test_notify: Special callback function for unittesting
738 priority = _DEFAULT_PRIORITY
740 self.__lock.acquire()
742 # We already got the lock, notify now
743 if __debug__ and callable(test_notify):
746 return self.__acquire_unlocked(shared, timeout, priority)
748 self.__lock.release()
751 """Changes the lock mode from exclusive to shared.
753 Pending acquires in shared mode on the same priority will go ahead.
756 self.__lock.acquire()
758 assert self.__is_owned(), "Lock must be owned"
760 if self.__is_exclusive():
761 # Do nothing if the lock is already acquired in shared mode
765 # Important: pending shared acquires should only jump ahead if there
766 # was a transition from exclusive to shared, otherwise an owner of a
767 # shared lock can keep calling this function to push incoming shared
769 (priority, prioqueue) = self.__find_first_pending_queue()
771 # Is there a pending shared acquire on this priority?
772 cond = self.__pending_shared.pop(priority, None)
775 assert cond in prioqueue
777 # Ensure shared acquire is on top of queue
778 if len(prioqueue) > 1:
779 prioqueue.remove(cond)
780 prioqueue.insert(0, cond)
785 assert not self.__is_exclusive()
786 assert self.__is_sharer()
790 self.__lock.release()
793 """Release a Shared Lock.
795 You must have acquired the lock, either in shared or in exclusive mode,
796 before calling this function.
799 self.__lock.acquire()
801 assert self.__is_exclusive() or self.__is_sharer(), \
802 "Cannot release non-owned lock"
804 # Autodetect release type
805 if self.__is_exclusive():
809 self.__shr.remove(threading.currentThread())
810 notify = not self.__shr
812 # Notify topmost condition in queue if there are no owners left (for
815 self.__notify_topmost()
817 self.__lock.release()
819 def __notify_topmost(self):
820 """Notifies topmost condition in queue of pending acquires.
823 (priority, prioqueue) = self.__find_first_pending_queue()
828 # Prevent further shared acquires from sneaking in while waiters are
830 self.__pending_shared.pop(priority, None)
832 def _notify_topmost(self):
833 """Exported version of L{__notify_topmost}.
836 self.__lock.acquire()
838 return self.__notify_topmost()
840 self.__lock.release()
842 def delete(self, timeout=None, priority=None):
843 """Delete a Shared Lock.
845 This operation will declare the lock for removal. First the lock will be
846 acquired in exclusive mode if you don't already own it, then the lock
847 will be put in a state where any future and pending acquire() fail.
850 @param timeout: maximum waiting time before giving up
851 @type priority: integer
852 @param priority: Priority for acquiring lock
856 priority = _DEFAULT_PRIORITY
858 self.__lock.acquire()
860 assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"
862 self.__check_deleted()
864 # The caller is allowed to hold the lock exclusively already.
865 acquired = self.__is_exclusive()
868 acquired = self.__acquire_unlocked(0, timeout, priority)
871 assert self.__is_exclusive() and not self.__is_sharer(), \
872 "Lock wasn't acquired in exclusive mode"
874 self.__deleted = True
877 assert not (self.__exc or self.__shr), "Found owner during deletion"
879 # Notify all acquires. They'll throw an error.
880 for (_, prioqueue) in self.__pending:
881 for cond in prioqueue:
884 assert self.__deleted
888 self.__lock.release()
890 def _release_save(self):
891 shared = self.__is_sharer()
895 def _acquire_restore(self, shared):
896 self.acquire(shared=shared)
899 # Whenever we want to acquire a full LockSet we pass None as the value
900 # to acquire. Hide this behind this nicely named constant.
904 class _AcquireTimeout(Exception):
905 """Internal exception to abort an acquire on a timeout.
911 """Implements a set of locks.
913 This abstraction implements a set of shared locks for the same resource type,
914 distinguished by name. The user can lock a subset of the resources and the
915 LockSet will take care of acquiring the locks always in the same order, thus
918 All the locks needed in the same set must be acquired together, though.
921 @ivar name: the name of the lockset
924 def __init__(self, members, name, monitor=None):
925 """Constructs a new LockSet.
927 @type members: list of strings
928 @param members: initial members of the set
929 @type monitor: L{LockMonitor}
930 @param monitor: Lock monitor with which to register member locks
933 assert members is not None, "members parameter is not a list"
937 self.__monitor = monitor
939 # Used internally to guarantee coherency
940 self.__lock = SharedLock(self._GetLockName("[lockset]"), monitor=monitor)
942 # The lockdict indexes the relationship name -> lock
943 # The order-of-locking is implied by the alphabetical order of names
946 for mname in members:
947 self.__lockdict[mname] = SharedLock(self._GetLockName(mname),
950 # The owner dict contains the set of locks each thread owns. For
951 # performance each thread can access its own key without a global lock on
952 # this structure. It is paramount though that *no* other type of access is
953 # done to this structure (eg. no looping over its keys). *_owner helper
954 # function are defined to guarantee access is correct, but in general never
955 # do anything different than __owners[threading.currentThread()], or there
959 def _GetLockName(self, mname):
960 """Returns the name for a member lock.
963 return "%s/%s" % (self.name, mname)
966 """Returns the lockset-internal lock.
971 def _get_lockdict(self):
972 """Returns the lockset-internal lock dictionary.
974 Accessing this structure is only safe in single-thread usage or when the
975 lockset-internal lock is held.
978 return self.__lockdict
981 """Is the current thread a current level owner?"""
982 return threading.currentThread() in self.__owners
984 def _add_owned(self, name=None):
985 """Note the current thread owns the given lock"""
987 if not self._is_owned():
988 self.__owners[threading.currentThread()] = set()
991 self.__owners[threading.currentThread()].add(name)
993 self.__owners[threading.currentThread()] = set([name])
995 def _del_owned(self, name=None):
996 """Note the current thread owns the given lock"""
998 assert not (name is None and self.__lock._is_owned()), \
999 "Cannot hold internal lock when deleting owner status"
1001 if name is not None:
1002 self.__owners[threading.currentThread()].remove(name)
1004 # Only remove the key if we don't hold the set-lock as well
1005 if (not self.__lock._is_owned() and
1006 not self.__owners[threading.currentThread()]):
1007 del self.__owners[threading.currentThread()]
1009 def _list_owned(self):
1010 """Get the set of resource names owned by the current thread"""
1011 if self._is_owned():
1012 return self.__owners[threading.currentThread()].copy()
1016 def _release_and_delete_owned(self):
1017 """Release and delete all resources owned by the current thread"""
1018 for lname in self._list_owned():
1019 lock = self.__lockdict[lname]
1020 if lock._is_owned():
1022 self._del_owned(name=lname)
1025 """Return the current set of names.
1027 Only call this function while holding __lock and don't iterate on the
1028 result after releasing the lock.
1031 return self.__lockdict.keys()
1034 """Return a copy of the current set of elements.
1036 Used only for debugging purposes.
1039 # If we don't already own the set-level lock acquired
1040 # we'll get it and note we need to release it later.
1041 release_lock = False
1042 if not self.__lock._is_owned():
1044 self.__lock.acquire(shared=1)
1046 result = self.__names()
1049 self.__lock.release()
1052 def acquire(self, names, timeout=None, shared=0, priority=None,
1054 """Acquire a set of resource locks.
1056 @type names: list of strings (or string)
1057 @param names: the names of the locks which shall be acquired
1058 (special lock names, or instance/node names)
1059 @type shared: integer (0/1) used as a boolean
1060 @param shared: whether to acquire in shared mode; by default an
1061 exclusive lock will be acquired
1062 @type timeout: float or None
1063 @param timeout: Maximum time to acquire all locks
1064 @type priority: integer
1065 @param priority: Priority for acquiring locks
1066 @type test_notify: callable or None
1067 @param test_notify: Special callback function for unittesting
1069 @return: Set of all locks successfully acquired or None in case of timeout
1071 @raise errors.LockError: when any lock we try to acquire has
1072 been deleted before we succeed. In this case none of the
1073 locks requested will be acquired.
1076 assert timeout is None or timeout >= 0.0
1078 # Check we don't already own locks at this level
1079 assert not self._is_owned(), ("Cannot acquire locks in the same set twice"
1080 " (lockset %s)" % self.name)
1082 if priority is None:
1083 priority = _DEFAULT_PRIORITY
1085 # We need to keep track of how long we spent waiting for a lock. The
1086 # timeout passed to this function is over all lock acquires.
1087 running_timeout = utils.RunningTimeout(timeout, False)
1090 if names is not None:
1091 # Support passing in a single resource to acquire rather than many
1092 if isinstance(names, basestring):
1095 return self.__acquire_inner(names, False, shared, priority,
1096 running_timeout.Remaining, test_notify)
1099 # If no names are given acquire the whole set by not letting new names
1100 # being added before we release, and getting the current list of names.
1101 # Some of them may then be deleted later, but we'll cope with this.
1103 # We'd like to acquire this lock in a shared way, as it's nice if
1104 # everybody else can use the instances at the same time. If we are
1105 # acquiring them exclusively though they won't be able to do this
1106 # anyway, though, so we'll get the list lock exclusively as well in
1107 # order to be able to do add() on the set while owning it.
1108 if not self.__lock.acquire(shared=shared, priority=priority,
1109 timeout=running_timeout.Remaining()):
1110 raise _AcquireTimeout()
1112 # note we own the set-lock
1115 return self.__acquire_inner(self.__names(), True, shared, priority,
1116 running_timeout.Remaining, test_notify)
1118 # We shouldn't have problems adding the lock to the owners list, but
1119 # if we did we'll try to release this lock and re-raise exception.
1120 # Of course something is going to be really wrong, after this.
1121 self.__lock.release()
1125 except _AcquireTimeout:
1128 def __acquire_inner(self, names, want_all, shared, priority,
1129 timeout_fn, test_notify):
1130 """Inner logic for acquiring a number of locks.
1132 @param names: Names of the locks to be acquired
1133 @param want_all: Whether all locks in the set should be acquired
1134 @param shared: Whether to acquire in shared mode
1135 @param timeout_fn: Function returning remaining timeout
1136 @param priority: Priority for acquiring locks
1137 @param test_notify: Special callback function for unittesting
1142 # First we look the locks up on __lockdict. We have no way of being sure
1143 # they will still be there after, but this makes it a lot faster should
1144 # just one of them be the already wrong. Using a sorted sequence to prevent
1146 for lname in sorted(utils.UniqueSequence(names)):
1148 lock = self.__lockdict[lname] # raises KeyError if lock is not there
1151 # We are acquiring all the set, it doesn't matter if this particular
1152 # element is not there anymore.
1155 raise errors.LockError("Non-existing lock %s in set %s (it may have"
1156 " been removed)" % (lname, self.name))
1158 acquire_list.append((lname, lock))
1160 # This will hold the locknames we effectively acquired.
1164 # Now acquire_list contains a sorted list of resources and locks we
1165 # want. In order to get them we loop on this (private) list and
1166 # acquire() them. We gave no real guarantee they will still exist till
1167 # this is done but .acquire() itself is safe and will alert us if the
1168 # lock gets deleted.
1169 for (lname, lock) in acquire_list:
1170 if __debug__ and callable(test_notify):
1171 test_notify_fn = lambda: test_notify(lname)
1173 test_notify_fn = None
1175 timeout = timeout_fn()
1178 # raises LockError if the lock was deleted
1179 acq_success = lock.acquire(shared=shared, timeout=timeout,
1181 test_notify=test_notify_fn)
1182 except errors.LockError:
1184 # We are acquiring all the set, it doesn't matter if this
1185 # particular element is not there anymore.
1188 raise errors.LockError("Non-existing lock %s in set %s (it may"
1189 " have been removed)" % (lname, self.name))
1192 # Couldn't get lock or timeout occurred
1194 # This shouldn't happen as SharedLock.acquire(timeout=None) is
1196 raise errors.LockError("Failed to get lock %s (set %s)" %
1199 raise _AcquireTimeout()
1202 # now the lock cannot be deleted, we have it!
1203 self._add_owned(name=lname)
1207 # We shouldn't have problems adding the lock to the owners list, but
1208 # if we did we'll try to release this lock and re-raise exception.
1209 # Of course something is going to be really wrong after this.
1210 if lock._is_owned():
1215 # Release all owned locks
1216 self._release_and_delete_owned()
1221 def downgrade(self, names=None):
1222 """Downgrade a set of resource locks from exclusive to shared mode.
1224 The locks must have been acquired in exclusive mode.
1227 assert self._is_owned(), ("downgrade on lockset %s while not owning any"
1228 " lock" % self.name)
1230 # Support passing in a single resource to downgrade rather than many
1231 if isinstance(names, basestring):
1234 owned = self._list_owned()
1240 assert owned.issuperset(names), \
1241 ("downgrade() on unheld resources %s (set %s)" %
1242 (names.difference(owned), self.name))
1244 for lockname in names:
1245 self.__lockdict[lockname].downgrade()
1247 # Do we own the lockset in exclusive mode?
1248 if self.__lock._is_owned(shared=0):
1249 # Have all locks been downgraded?
1250 if not compat.any(lock._is_owned(shared=0)
1251 for lock in self.__lockdict.values()):
1252 self.__lock.downgrade()
1253 assert self.__lock._is_owned(shared=1)
1257 def release(self, names=None):
1258 """Release a set of resource locks, at the same level.
1260 You must have acquired the locks, either in shared or in exclusive mode,
1261 before releasing them.
1263 @type names: list of strings, or None
1264 @param names: the names of the locks which shall be released
1265 (defaults to all the locks acquired at that level).
1268 assert self._is_owned(), ("release() on lock set %s while not owner" %
1271 # Support passing in a single resource to release rather than many
1272 if isinstance(names, basestring):
1276 names = self._list_owned()
1279 assert self._list_owned().issuperset(names), (
1280 "release() on unheld resources %s (set %s)" %
1281 (names.difference(self._list_owned()), self.name))
1283 # First of all let's release the "all elements" lock, if set.
1284 # After this 'add' can work again
1285 if self.__lock._is_owned():
1286 self.__lock.release()
1289 for lockname in names:
1290 # If we are sure the lock doesn't leave __lockdict without being
1291 # exclusively held we can do this...
1292 self.__lockdict[lockname].release()
1293 self._del_owned(name=lockname)
1295 def add(self, names, acquired=0, shared=0):
1296 """Add a new set of elements to the set
1298 @type names: list of strings
1299 @param names: names of the new elements to add
1300 @type acquired: integer (0/1) used as a boolean
1301 @param acquired: pre-acquire the new resource?
1302 @type shared: integer (0/1) used as a boolean
1303 @param shared: is the pre-acquisition shared?
1306 # Check we don't already own locks at this level
1307 assert not self._is_owned() or self.__lock._is_owned(shared=0), \
1308 ("Cannot add locks if the set %s is only partially owned, or shared" %
1311 # Support passing in a single resource to add rather than many
1312 if isinstance(names, basestring):
1315 # If we don't already own the set-level lock acquired in an exclusive way
1316 # we'll get it and note we need to release it later.
1317 release_lock = False
1318 if not self.__lock._is_owned():
1320 self.__lock.acquire()
1323 invalid_names = set(self.__names()).intersection(names)
1325 # This must be an explicit raise, not an assert, because assert is
1326 # turned off when using optimization, and this can happen because of
1327 # concurrency even if the user doesn't want it.
1328 raise errors.LockError("duplicate add(%s) on lockset %s" %
1329 (invalid_names, self.name))
1331 for lockname in names:
1332 lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor)
1335 # No need for priority or timeout here as this lock has just been
1337 lock.acquire(shared=shared)
1338 # now the lock cannot be deleted, we have it!
1340 self._add_owned(name=lockname)
1342 # We shouldn't have problems adding the lock to the owners list,
1343 # but if we did we'll try to release this lock and re-raise
1344 # exception. Of course something is going to be really wrong,
1345 # after this. On the other hand the lock hasn't been added to the
1346 # __lockdict yet so no other threads should be pending on it. This
1347 # release is just a safety measure.
1351 self.__lockdict[lockname] = lock
1354 # Only release __lock if we were not holding it previously.
1356 self.__lock.release()
1360 def remove(self, names):
1361 """Remove elements from the lock set.
1363 You can either not hold anything in the lockset or already hold a superset
1364 of the elements you want to delete, exclusively.
1366 @type names: list of strings
1367 @param names: names of the resource to remove.
1369 @return: a list of locks which we removed; the list is always
1370 equal to the names list if we were holding all the locks
1374 # Support passing in a single resource to remove rather than many
1375 if isinstance(names, basestring):
1378 # If we own any subset of this lock it must be a superset of what we want
1379 # to delete. The ownership must also be exclusive, but that will be checked
1380 # by the lock itself.
1381 assert not self._is_owned() or self._list_owned().issuperset(names), (
1382 "remove() on acquired lockset %s while not owning all elements" %
1388 # Calling delete() acquires the lock exclusively if we don't already own
1389 # it, and causes all pending and subsequent lock acquires to fail. It's
1390 # fine to call it out of order because delete() also implies release(),
1391 # and the assertion above guarantees that if we either already hold
1392 # everything we want to delete, or we hold none.
1394 self.__lockdict[lname].delete()
1395 removed.append(lname)
1396 except (KeyError, errors.LockError):
1397 # This cannot happen if we were already holding it, verify:
1398 assert not self._is_owned(), ("remove failed while holding lockset %s"
1401 # If no LockError was raised we are the ones who deleted the lock.
1402 # This means we can safely remove it from lockdict, as any further or
1403 # pending delete() or acquire() will fail (and nobody can have the lock
1404 # since before our call to delete()).
1406 # This is done in an else clause because if the exception was thrown
1407 # it's the job of the one who actually deleted it.
1408 del self.__lockdict[lname]
1409 # And let's remove it from our private list if we owned it.
1410 if self._is_owned():
1411 self._del_owned(name=lname)
1416 # Locking levels, must be acquired in increasing order.
1417 # Current rules are:
1418 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
1419 # acquired before performing any operation, either in shared or in exclusive
1420 # mode. acquiring the BGL in exclusive mode is discouraged and should be
1422 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
1423 # If you need more than one node, or more than one instance, acquire them at
1430 LEVELS = [LEVEL_CLUSTER,
1435 # Lock levels which are modifiable
1436 LEVELS_MOD = [LEVEL_NODE, LEVEL_NODEGROUP, LEVEL_INSTANCE]
1439 LEVEL_CLUSTER: "cluster",
1440 LEVEL_INSTANCE: "instance",
1441 LEVEL_NODEGROUP: "nodegroup",
1445 # Constant for the big ganeti lock
1449 class GanetiLockManager:
1450 """The Ganeti Locking Library
1452 The purpose of this small library is to manage locking for ganeti clusters
1453 in a central place, while at the same time doing dynamic checks against
1454 possible deadlocks. It will also make it easier to transition to a different
1455 lock type should we migrate away from python threads.
1460 def __init__(self, nodes, nodegroups, instances):
1461 """Constructs a new GanetiLockManager object.
1463 There should be only a GanetiLockManager object at any time, so this
1464 function raises an error if this is not the case.
1466 @param nodes: list of node names
1467 @param nodegroups: list of nodegroup uuids
1468 @param instances: list of instance names
1471 assert self.__class__._instance is None, \
1472 "double GanetiLockManager instance"
1474 self.__class__._instance = self
1476 self._monitor = LockMonitor()
1478 # The keyring contains all the locks, at their level and in the correct
1481 LEVEL_CLUSTER: LockSet([BGL], "BGL", monitor=self._monitor),
1482 LEVEL_NODE: LockSet(nodes, "nodes", monitor=self._monitor),
1483 LEVEL_NODEGROUP: LockSet(nodegroups, "nodegroups", monitor=self._monitor),
1484 LEVEL_INSTANCE: LockSet(instances, "instances",
1485 monitor=self._monitor),
1488 def AddToLockMonitor(self, provider):
1489 """Registers a new lock with the monitor.
1491 See L{LockMonitor.RegisterLock}.
1494 return self._monitor.RegisterLock(provider)
1496 def QueryLocks(self, fields):
1497 """Queries information from all locks.
1499 See L{LockMonitor.QueryLocks}.
1502 return self._monitor.QueryLocks(fields)
1504 def OldStyleQueryLocks(self, fields):
1505 """Queries information from all locks, returning old-style data.
1507 See L{LockMonitor.OldStyleQueryLocks}.
1510 return self._monitor.OldStyleQueryLocks(fields)
1512 def _names(self, level):
1513 """List the lock names at the given level.
1515 This can be used for debugging/testing purposes.
1517 @param level: the level whose list of locks to get
1520 assert level in LEVELS, "Invalid locking level %s" % level
1521 return self.__keyring[level]._names()
1523 def _is_owned(self, level):
1524 """Check whether we are owning locks at the given level
1527 return self.__keyring[level]._is_owned()
1529 is_owned = _is_owned
1531 def _list_owned(self, level):
1532 """Get the set of owned locks at the given level
1535 return self.__keyring[level]._list_owned()
1537 list_owned = _list_owned
1539 def _upper_owned(self, level):
1540 """Check that we don't own any lock at a level greater than the given one.
1543 # This way of checking only works if LEVELS[i] = i, which we check for in
1545 return compat.any((self._is_owned(l) for l in LEVELS[level + 1:]))
1547 def _BGL_owned(self): # pylint: disable=C0103
1548 """Check if the current thread owns the BGL.
1550 Both an exclusive or a shared acquisition work.
1553 return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
1556 def _contains_BGL(level, names): # pylint: disable=C0103
1557 """Check if the level contains the BGL.
1559 Check if acting on the given level and set of names will change
1560 the status of the Big Ganeti Lock.
1563 return level == LEVEL_CLUSTER and (names is None or BGL in names)
1565 def acquire(self, level, names, timeout=None, shared=0, priority=None):
1566 """Acquire a set of resource locks, at the same level.
1568 @type level: member of locking.LEVELS
1569 @param level: the level at which the locks shall be acquired
1570 @type names: list of strings (or string)
1571 @param names: the names of the locks which shall be acquired
1572 (special lock names, or instance/node names)
1573 @type shared: integer (0/1) used as a boolean
1574 @param shared: whether to acquire in shared mode; by default
1575 an exclusive lock will be acquired
1576 @type timeout: float
1577 @param timeout: Maximum time to acquire all locks
1578 @type priority: integer
1579 @param priority: Priority for acquiring lock
1582 assert level in LEVELS, "Invalid locking level %s" % level
1584 # Check that we are either acquiring the Big Ganeti Lock or we already own
1585 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
1586 # so even if we've migrated we need to at least share the BGL to be
1587 # compatible with them. Of course if we own the BGL exclusively there's no
1588 # point in acquiring any other lock, unless perhaps we are half way through
1589 # the migration of the current opcode.
1590 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
1591 "You must own the Big Ganeti Lock before acquiring any other")
1593 # Check we don't own locks at the same or upper levels.
1594 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1595 " while owning some at a greater one")
1597 # Acquire the locks in the set.
1598 return self.__keyring[level].acquire(names, shared=shared, timeout=timeout,
1601 def downgrade(self, level, names=None):
1602 """Downgrade a set of resource locks from exclusive to shared mode.
1604 You must have acquired the locks in exclusive mode.
1606 @type level: member of locking.LEVELS
1607 @param level: the level at which the locks shall be downgraded
1608 @type names: list of strings, or None
1609 @param names: the names of the locks which shall be downgraded
1610 (defaults to all the locks acquired at the level)
1613 assert level in LEVELS, "Invalid locking level %s" % level
1615 return self.__keyring[level].downgrade(names=names)
1617 def release(self, level, names=None):
1618 """Release a set of resource locks, at the same level.
1620 You must have acquired the locks, either in shared or in exclusive
1621 mode, before releasing them.
1623 @type level: member of locking.LEVELS
1624 @param level: the level at which the locks shall be released
1625 @type names: list of strings, or None
1626 @param names: the names of the locks which shall be released
1627 (defaults to all the locks acquired at that level)
1630 assert level in LEVELS, "Invalid locking level %s" % level
1631 assert (not self._contains_BGL(level, names) or
1632 not self._upper_owned(LEVEL_CLUSTER)), (
1633 "Cannot release the Big Ganeti Lock while holding something"
1634 " at upper levels (%r)" %
1635 (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self._list_owned(i))
1636 for i in self.__keyring.keys()]), ))
1638 # Release will complain if we don't own the locks already
1639 return self.__keyring[level].release(names)
1641 def add(self, level, names, acquired=0, shared=0):
1642 """Add locks at the specified level.
1644 @type level: member of locking.LEVELS_MOD
1645 @param level: the level at which the locks shall be added
1646 @type names: list of strings
1647 @param names: names of the locks to acquire
1648 @type acquired: integer (0/1) used as a boolean
1649 @param acquired: whether to acquire the newly added locks
1650 @type shared: integer (0/1) used as a boolean
1651 @param shared: whether the acquisition will be shared
1654 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1655 assert self._BGL_owned(), ("You must own the BGL before performing other"
1657 assert not self._upper_owned(level), ("Cannot add locks at a level"
1658 " while owning some at a greater one")
1659 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1661 def remove(self, level, names):
1662 """Remove locks from the specified level.
1664 You must either already own the locks you are trying to remove
1665 exclusively or not own any lock at an upper level.
1667 @type level: member of locking.LEVELS_MOD
1668 @param level: the level at which the locks shall be removed
1669 @type names: list of strings
1670 @param names: the names of the locks which shall be removed
1671 (special lock names, or instance/node names)
1674 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1675 assert self._BGL_owned(), ("You must own the BGL before performing other"
1677 # Check we either own the level or don't own anything from here
1678 # up. LockSet.remove() will check the case in which we don't own
1679 # all the needed resources, or we have a shared ownership.
1680 assert self._is_owned(level) or not self._upper_owned(level), (
1681 "Cannot remove locks at a level while not owning it or"
1682 " owning some at a greater one")
1683 return self.__keyring[level].remove(names)
1686 def _MonitorSortKey((item, idx, num)):
1687 """Sorting key function.
1689 Sort by name, registration order and then order of information. This provides
1690 a stable sort order over different providers, even if they return the same
1694 (name, _, _, _) = item
1696 return (utils.NiceSortKey(name), num, idx)
1699 class LockMonitor(object):
1700 _LOCK_ATTR = "_lock"
1703 """Initializes this class.
1706 self._lock = SharedLock("LockMonitor")
1708 # Counter for stable sorting
1709 self._counter = itertools.count(0)
1711 # Tracked locks. Weak references are used to avoid issues with circular
1712 # references and deletion.
1713 self._locks = weakref.WeakKeyDictionary()
1715 @ssynchronized(_LOCK_ATTR)
1716 def RegisterLock(self, provider):
1717 """Registers a new lock.
1719 @param provider: Object with a callable method named C{GetLockInfo}, taking
1720 a single C{set} containing the requested information items
1721 @note: It would be nicer to only receive the function generating the
1722 requested information but, as it turns out, weak references to bound
1723 methods (e.g. C{self.GetLockInfo}) are tricky; there are several
1724 workarounds, but none of the ones I found works properly in combination
1725 with a standard C{WeakKeyDictionary}
1728 assert provider not in self._locks, "Duplicate registration"
1730 # There used to be a check for duplicate names here. As it turned out, when
1731 # a lock is re-created with the same name in a very short timeframe, the
1732 # previous instance might not yet be removed from the weakref dictionary.
1733 # By keeping track of the order of incoming registrations, a stable sort
1734 # ordering can still be guaranteed.
1736 self._locks[provider] = self._counter.next()
1738 def _GetLockInfo(self, requested):
1739 """Get information from all locks.
1742 # Must hold lock while getting consistent list of tracked items
1743 self._lock.acquire(shared=1)
1745 items = self._locks.items()
1747 self._lock.release()
1749 return [(info, idx, num)
1750 for (provider, num) in items
1751 for (idx, info) in enumerate(provider.GetLockInfo(requested))]
1753 def _Query(self, fields):
1754 """Queries information from all locks.
1756 @type fields: list of strings
1757 @param fields: List of fields to return
1760 qobj = query.Query(query.LOCK_FIELDS, fields)
1762 # Get all data with internal lock held and then sort by name and incoming
1764 lockinfo = sorted(self._GetLockInfo(qobj.RequestedData()),
1765 key=_MonitorSortKey)
1767 # Extract lock information and build query data
1768 return (qobj, query.LockQueryData(map(compat.fst, lockinfo)))
1770 def QueryLocks(self, fields):
1771 """Queries information from all locks.
1773 @type fields: list of strings
1774 @param fields: List of fields to return
1777 (qobj, ctx) = self._Query(fields)
1779 # Prepare query response
1780 return query.GetQueryResponse(qobj, ctx)
1782 def OldStyleQueryLocks(self, fields):
1783 """Queries information from all locks, returning old-style data.
1785 @type fields: list of strings
1786 @param fields: List of fields to return
1789 (qobj, ctx) = self._Query(fields)
1791 return qobj.OldStyleQuery(ctx)