4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable=W0212
25 # W0212 since e.g. LockSet methods use (a lot) the internals of
37 from ganeti import errors
38 from ganeti import utils
39 from ganeti import compat
40 from ganeti import query
43 _EXCLUSIVE_TEXT = "exclusive"
44 _SHARED_TEXT = "shared"
45 _DELETED_TEXT = "deleted"
50 def ssynchronized(mylock, shared=0):
51 """Shared Synchronization decorator.
53 Calls the function holding the given lock, either in exclusive or shared
54 mode. It requires the passed lock to be a SharedLock (or support its
57 @type mylock: lockable object or string
58 @param mylock: lock to acquire or class member name of the lock to acquire
62 def sync_function(*args, **kwargs):
63 if isinstance(mylock, basestring):
64 assert args, "cannot ssynchronize on non-class method: self not found"
66 lock = getattr(args[0], mylock)
69 lock.acquire(shared=shared)
71 return fn(*args, **kwargs)
78 class _SingleNotifyPipeConditionWaiter(object):
79 """Helper class for SingleNotifyPipeCondition
87 def __init__(self, poller, fd):
88 """Constructor for _SingleNotifyPipeConditionWaiter
90 @type poller: select.poll
91 @param poller: Poller object
93 @param fd: File descriptor to wait for
100 def __call__(self, timeout):
101 """Wait for something to happen on the pipe.
103 @type timeout: float or None
104 @param timeout: Timeout for waiting (can be None)
107 running_timeout = utils.RunningTimeout(timeout, True)
110 remaining_time = running_timeout.Remaining()
112 if remaining_time is not None:
113 if remaining_time < 0.0:
116 # Our calculation uses seconds, poll() wants milliseconds
117 remaining_time *= 1000
120 result = self._poller.poll(remaining_time)
121 except EnvironmentError, err:
122 if err.errno != errno.EINTR:
126 # Check whether we were notified
127 if result and result[0][0] == self._fd:
131 class _BaseCondition(object):
132 """Base class containing common code for conditions.
134 Some of this code is taken from python's threading module.
146 def __init__(self, lock):
147 """Constructor for _BaseCondition.
149 @type lock: threading.Lock
150 @param lock: condition base lock
153 object.__init__(self)
156 self._release_save = lock._release_save
157 except AttributeError:
158 self._release_save = self._base_release_save
160 self._acquire_restore = lock._acquire_restore
161 except AttributeError:
162 self._acquire_restore = self._base_acquire_restore
164 self._is_owned = lock.is_owned
165 except AttributeError:
166 self._is_owned = self._base_is_owned
170 # Export the lock's acquire() and release() methods
171 self.acquire = lock.acquire
172 self.release = lock.release
174 def _base_is_owned(self):
175 """Check whether lock is owned by current thread.
178 if self._lock.acquire(0):
183 def _base_release_save(self):
186 def _base_acquire_restore(self, _):
189 def _check_owned(self):
190 """Raise an exception if the current thread doesn't own the lock.
193 if not self._is_owned():
194 raise RuntimeError("cannot work with un-aquired lock")
197 class SingleNotifyPipeCondition(_BaseCondition):
198 """Condition which can only be notified once.
200 This condition class uses pipes and poll, internally, to be able to wait for
201 notification with a timeout, without resorting to polling. It is almost
202 compatible with Python's threading.Condition, with the following differences:
203 - notifyAll can only be called once, and no wait can happen after that
204 - notify is not supported, only notifyAll
216 _waiter_class = _SingleNotifyPipeConditionWaiter
218 def __init__(self, lock):
219 """Constructor for SingleNotifyPipeCondition
222 _BaseCondition.__init__(self, lock)
224 self._notified = False
226 self._write_fd = None
229 def _check_unnotified(self):
230 """Throws an exception if already notified.
234 raise RuntimeError("cannot use already notified condition")
237 """Cleanup open file descriptors, if any.
240 if self._read_fd is not None:
241 os.close(self._read_fd)
244 if self._write_fd is not None:
245 os.close(self._write_fd)
246 self._write_fd = None
249 def wait(self, timeout):
250 """Wait for a notification.
252 @type timeout: float or None
253 @param timeout: Waiting timeout (can be None)
257 self._check_unnotified()
261 if self._poller is None:
262 (self._read_fd, self._write_fd) = os.pipe()
263 self._poller = select.poll()
264 self._poller.register(self._read_fd, select.POLLHUP)
266 wait_fn = self._waiter_class(self._poller, self._read_fd)
267 state = self._release_save()
269 # Wait for notification
273 self._acquire_restore(state)
276 if self._nwaiters == 0:
279 def notifyAll(self): # pylint: disable=C0103
280 """Close the writing side of the pipe to notify all waiters.
284 self._check_unnotified()
285 self._notified = True
286 if self._write_fd is not None:
287 os.close(self._write_fd)
288 self._write_fd = None
291 class PipeCondition(_BaseCondition):
292 """Group-only non-polling condition with counters.
294 This condition class uses pipes and poll, internally, to be able to wait for
295 notification with a timeout, without resorting to polling. It is almost
296 compatible with Python's threading.Condition, but only supports notifyAll and
297 non-recursive locks. As an additional features it's able to report whether
298 there are any waiting threads.
306 _single_condition_class = SingleNotifyPipeCondition
308 def __init__(self, lock):
309 """Initializes this class.
312 _BaseCondition.__init__(self, lock)
313 self._waiters = set()
314 self._single_condition = self._single_condition_class(self._lock)
316 def wait(self, timeout):
317 """Wait for a notification.
319 @type timeout: float or None
320 @param timeout: Waiting timeout (can be None)
325 # Keep local reference to the pipe. It could be replaced by another thread
326 # notifying while we're waiting.
327 cond = self._single_condition
329 self._waiters.add(threading.currentThread())
334 self._waiters.remove(threading.currentThread())
336 def notifyAll(self): # pylint: disable=C0103
337 """Notify all currently waiting threads.
341 self._single_condition.notifyAll()
342 self._single_condition = self._single_condition_class(self._lock)
344 def get_waiting(self):
345 """Returns a list of all waiting threads.
352 def has_waiting(self):
353 """Returns whether there are active waiters.
358 return bool(self._waiters)
361 return ("<%s.%s waiters=%s at %#x>" %
362 (self.__class__.__module__, self.__class__.__name__,
363 self._waiters, id(self)))
366 class _PipeConditionWithMode(PipeCondition):
371 def __init__(self, lock, shared):
372 """Initializes this class.
376 PipeCondition.__init__(self, lock)
379 class SharedLock(object):
380 """Implements a shared lock.
382 Multiple threads can acquire the lock in a shared way by calling
383 C{acquire(shared=1)}. In order to acquire the lock in an exclusive way
384 threads can call C{acquire(shared=0)}.
386 Notes on data structures: C{__pending} contains a priority queue (heapq) of
387 all pending acquires: C{[(priority1: prioqueue1), (priority2: prioqueue2),
388 ...]}. Each per-priority queue contains a normal in-order list of conditions
389 to be notified when the lock can be acquired. Shared locks are grouped
390 together by priority and the condition for them is stored in
391 C{__pending_shared} if it already exists. C{__pending_by_prio} keeps
392 references for the per-priority queues indexed by priority for faster access.
395 @ivar name: the name of the lock
410 __condition_class = _PipeConditionWithMode
412 def __init__(self, name, monitor=None):
413 """Construct a new SharedLock.
415 @param name: the name of the lock
416 @type monitor: L{LockMonitor}
417 @param monitor: Lock monitor with which to register
420 object.__init__(self)
425 self.__lock = threading.Lock()
427 # Queue containing waiting acquires
429 self.__pending_by_prio = {}
430 self.__pending_shared = {}
432 # Current lock holders
436 # is this lock in the deleted state?
437 self.__deleted = False
439 # Register with lock monitor
441 logging.debug("Adding lock %s to monitor", name)
442 monitor.RegisterLock(self)
445 return ("<%s.%s name=%s at %#x>" %
446 (self.__class__.__module__, self.__class__.__name__,
447 self.name, id(self)))
449 def GetLockInfo(self, requested):
450 """Retrieves information for querying locks.
453 @param requested: Requested information, see C{query.LQ_*}
456 self.__lock.acquire()
458 # Note: to avoid unintentional race conditions, no references to
459 # modifiable objects should be returned unless they were created in this
464 if query.LQ_MODE in requested:
467 assert not (self.__exc or self.__shr)
469 mode = _EXCLUSIVE_TEXT
473 # Current owner(s) are wanted
474 if query.LQ_OWNER in requested:
481 assert not self.__deleted
482 owner_names = [i.getName() for i in owner]
484 # Pending acquires are wanted
485 if query.LQ_PENDING in requested:
488 # Sorting instead of copying and using heaq functions for simplicity
489 for (_, prioqueue) in sorted(self.__pending):
490 for cond in prioqueue:
492 pendmode = _SHARED_TEXT
494 pendmode = _EXCLUSIVE_TEXT
496 # List of names will be sorted in L{query._GetLockPending}
497 pending.append((pendmode, [i.getName()
498 for i in cond.get_waiting()]))
502 return [(self.name, mode, owner_names, pending)]
504 self.__lock.release()
506 def __check_deleted(self):
507 """Raises an exception if the lock has been deleted.
511 raise errors.LockError("Deleted lock %s" % self.name)
513 def __is_sharer(self):
514 """Is the current thread sharing the lock at this time?
517 return threading.currentThread() in self.__shr
519 def __is_exclusive(self):
520 """Is the current thread holding the lock exclusively at this time?
523 return threading.currentThread() == self.__exc
525 def __is_owned(self, shared=-1):
526 """Is the current thread somehow owning the lock at this time?
528 This is a private version of the function, which presumes you're holding
533 return self.__is_sharer() or self.__is_exclusive()
535 return self.__is_sharer()
537 return self.__is_exclusive()
539 def is_owned(self, shared=-1):
540 """Is the current thread somehow owning the lock at this time?
543 - < 0: check for any type of ownership (default)
544 - 0: check for exclusive ownership
545 - > 0: check for shared ownership
548 self.__lock.acquire()
550 return self.__is_owned(shared=shared)
552 self.__lock.release()
554 #: Necessary to remain compatible with threading.Condition, which tries to
555 #: retrieve a locks' "_is_owned" attribute
558 def _count_pending(self):
559 """Returns the number of pending acquires.
564 self.__lock.acquire()
566 return sum(len(prioqueue) for (_, prioqueue) in self.__pending)
568 self.__lock.release()
570 def _check_empty(self):
571 """Checks whether there are any pending acquires.
576 self.__lock.acquire()
578 # Order is important: __find_first_pending_queue modifies __pending
579 (_, prioqueue) = self.__find_first_pending_queue()
581 return not (prioqueue or
583 self.__pending_by_prio or
584 self.__pending_shared)
586 self.__lock.release()
588 def __do_acquire(self, shared):
589 """Actually acquire the lock.
593 self.__shr.add(threading.currentThread())
595 self.__exc = threading.currentThread()
597 def __can_acquire(self, shared):
598 """Determine whether lock can be acquired.
602 return self.__exc is None
604 return len(self.__shr) == 0 and self.__exc is None
606 def __find_first_pending_queue(self):
607 """Tries to find the topmost queued entry with pending acquires.
609 Removes empty entries while going through the list.
612 while self.__pending:
613 (priority, prioqueue) = self.__pending[0]
616 return (priority, prioqueue)
619 heapq.heappop(self.__pending)
620 del self.__pending_by_prio[priority]
621 assert priority not in self.__pending_shared
625 def __is_on_top(self, cond):
626 """Checks whether the passed condition is on top of the queue.
628 The caller must make sure the queue isn't empty.
631 (_, prioqueue) = self.__find_first_pending_queue()
633 return cond == prioqueue[0]
635 def __acquire_unlocked(self, shared, timeout, priority):
636 """Acquire a shared lock.
638 @param shared: whether to acquire in shared mode; by default an
639 exclusive lock will be acquired
640 @param timeout: maximum waiting time before giving up
641 @type priority: integer
642 @param priority: Priority for acquiring lock
645 self.__check_deleted()
647 # We cannot acquire the lock if we already have it
648 assert not self.__is_owned(), ("double acquire() on a non-recursive lock"
651 # Remove empty entries from queue
652 self.__find_first_pending_queue()
654 # Check whether someone else holds the lock or there are pending acquires.
655 if not self.__pending and self.__can_acquire(shared):
656 # Apparently not, can acquire lock directly.
657 self.__do_acquire(shared)
660 prioqueue = self.__pending_by_prio.get(priority, None)
663 # Try to re-use condition for shared acquire
664 wait_condition = self.__pending_shared.get(priority, None)
665 assert (wait_condition is None or
666 (wait_condition.shared and wait_condition in prioqueue))
668 wait_condition = None
670 if wait_condition is None:
671 if prioqueue is None:
672 assert priority not in self.__pending_by_prio
675 heapq.heappush(self.__pending, (priority, prioqueue))
676 self.__pending_by_prio[priority] = prioqueue
678 wait_condition = self.__condition_class(self.__lock, shared)
679 prioqueue.append(wait_condition)
682 # Keep reference for further shared acquires on same priority. This is
683 # better than trying to find it in the list of pending acquires.
684 assert priority not in self.__pending_shared
685 self.__pending_shared[priority] = wait_condition
688 # Wait until we become the topmost acquire in the queue or the timeout
690 # TODO: Decrease timeout with spurious notifications
691 while not (self.__is_on_top(wait_condition) and
692 self.__can_acquire(shared)):
693 # Wait for notification
694 wait_condition.wait(timeout)
695 self.__check_deleted()
697 # A lot of code assumes blocking acquires always succeed. Loop
698 # internally for that case.
699 if timeout is not None:
702 if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
703 self.__do_acquire(shared)
706 # Remove condition from queue if there are no more waiters
707 if not wait_condition.has_waiting():
708 prioqueue.remove(wait_condition)
709 if wait_condition.shared:
710 # Remove from list of shared acquires if it wasn't while releasing
711 # (e.g. on lock deletion)
712 self.__pending_shared.pop(priority, None)
716 def acquire(self, shared=0, timeout=None, priority=None,
718 """Acquire a shared lock.
720 @type shared: integer (0/1) used as a boolean
721 @param shared: whether to acquire in shared mode; by default an
722 exclusive lock will be acquired
724 @param timeout: maximum waiting time before giving up
725 @type priority: integer
726 @param priority: Priority for acquiring lock
727 @type test_notify: callable or None
728 @param test_notify: Special callback function for unittesting
732 priority = _DEFAULT_PRIORITY
734 self.__lock.acquire()
736 # We already got the lock, notify now
737 if __debug__ and callable(test_notify):
740 return self.__acquire_unlocked(shared, timeout, priority)
742 self.__lock.release()
745 """Changes the lock mode from exclusive to shared.
747 Pending acquires in shared mode on the same priority will go ahead.
750 self.__lock.acquire()
752 assert self.__is_owned(), "Lock must be owned"
754 if self.__is_exclusive():
755 # Do nothing if the lock is already acquired in shared mode
759 # Important: pending shared acquires should only jump ahead if there
760 # was a transition from exclusive to shared, otherwise an owner of a
761 # shared lock can keep calling this function to push incoming shared
763 (priority, prioqueue) = self.__find_first_pending_queue()
765 # Is there a pending shared acquire on this priority?
766 cond = self.__pending_shared.pop(priority, None)
769 assert cond in prioqueue
771 # Ensure shared acquire is on top of queue
772 if len(prioqueue) > 1:
773 prioqueue.remove(cond)
774 prioqueue.insert(0, cond)
779 assert not self.__is_exclusive()
780 assert self.__is_sharer()
784 self.__lock.release()
787 """Release a Shared Lock.
789 You must have acquired the lock, either in shared or in exclusive mode,
790 before calling this function.
793 self.__lock.acquire()
795 assert self.__is_exclusive() or self.__is_sharer(), \
796 "Cannot release non-owned lock"
798 # Autodetect release type
799 if self.__is_exclusive():
802 self.__shr.remove(threading.currentThread())
804 # Notify topmost condition in queue
805 (priority, prioqueue) = self.__find_first_pending_queue()
810 # Prevent further shared acquires from sneaking in while waiters are
812 self.__pending_shared.pop(priority, None)
815 self.__lock.release()
817 def delete(self, timeout=None, priority=None):
818 """Delete a Shared Lock.
820 This operation will declare the lock for removal. First the lock will be
821 acquired in exclusive mode if you don't already own it, then the lock
822 will be put in a state where any future and pending acquire() fail.
825 @param timeout: maximum waiting time before giving up
826 @type priority: integer
827 @param priority: Priority for acquiring lock
831 priority = _DEFAULT_PRIORITY
833 self.__lock.acquire()
835 assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"
837 self.__check_deleted()
839 # The caller is allowed to hold the lock exclusively already.
840 acquired = self.__is_exclusive()
843 acquired = self.__acquire_unlocked(0, timeout, priority)
845 assert self.__is_exclusive() and not self.__is_sharer(), \
846 "Lock wasn't acquired in exclusive mode"
849 self.__deleted = True
852 assert not (self.__exc or self.__shr), "Found owner during deletion"
854 # Notify all acquires. They'll throw an error.
855 for (_, prioqueue) in self.__pending:
856 for cond in prioqueue:
859 assert self.__deleted
863 self.__lock.release()
865 def _release_save(self):
866 shared = self.__is_sharer()
870 def _acquire_restore(self, shared):
871 self.acquire(shared=shared)
874 # Whenever we want to acquire a full LockSet we pass None as the value
875 # to acquire. Hide this behind this nicely named constant.
879 class _AcquireTimeout(Exception):
880 """Internal exception to abort an acquire on a timeout.
886 """Implements a set of locks.
888 This abstraction implements a set of shared locks for the same resource type,
889 distinguished by name. The user can lock a subset of the resources and the
890 LockSet will take care of acquiring the locks always in the same order, thus
893 All the locks needed in the same set must be acquired together, though.
896 @ivar name: the name of the lockset
899 def __init__(self, members, name, monitor=None):
900 """Constructs a new LockSet.
902 @type members: list of strings
903 @param members: initial members of the set
904 @type monitor: L{LockMonitor}
905 @param monitor: Lock monitor with which to register member locks
908 assert members is not None, "members parameter is not a list"
912 self.__monitor = monitor
914 # Used internally to guarantee coherency
915 self.__lock = SharedLock(self._GetLockName("[lockset]"), monitor=monitor)
917 # The lockdict indexes the relationship name -> lock
918 # The order-of-locking is implied by the alphabetical order of names
921 for mname in members:
922 self.__lockdict[mname] = SharedLock(self._GetLockName(mname),
925 # The owner dict contains the set of locks each thread owns. For
926 # performance each thread can access its own key without a global lock on
927 # this structure. It is paramount though that *no* other type of access is
928 # done to this structure (eg. no looping over its keys). *_owner helper
929 # function are defined to guarantee access is correct, but in general never
930 # do anything different than __owners[threading.currentThread()], or there
934 def _GetLockName(self, mname):
935 """Returns the name for a member lock.
938 return "%s/%s" % (self.name, mname)
941 """Returns the lockset-internal lock.
946 def _get_lockdict(self):
947 """Returns the lockset-internal lock dictionary.
949 Accessing this structure is only safe in single-thread usage or when the
950 lockset-internal lock is held.
953 return self.__lockdict
956 """Is the current thread a current level owner?
958 @note: Use L{check_owned} to check if a specific lock is held
961 return threading.currentThread() in self.__owners
963 def check_owned(self, names, shared=-1):
964 """Check if locks are owned in a specific mode.
966 @type names: sequence or string
967 @param names: Lock names (or a single lock name)
968 @param shared: See L{SharedLock.is_owned}
970 @note: Use L{is_owned} to check if the current thread holds I{any} lock and
971 L{list_owned} to get the names of all owned locks
974 if isinstance(names, basestring):
977 # Avoid check if no locks are owned anyway
978 if names and self.is_owned():
981 # Gather references to all locks (in case they're deleted in the meantime)
984 lock = self.__lockdict[lname]
986 raise errors.LockError("Non-existing lock '%s' in set '%s' (it may"
987 " have been removed)" % (lname, self.name))
989 candidates.append(lock)
991 return compat.all(lock.is_owned(shared=shared) for lock in candidates)
995 def _add_owned(self, name=None):
996 """Note the current thread owns the given lock"""
998 if not self.is_owned():
999 self.__owners[threading.currentThread()] = set()
1002 self.__owners[threading.currentThread()].add(name)
1004 self.__owners[threading.currentThread()] = set([name])
1006 def _del_owned(self, name=None):
1007 """Note the current thread owns the given lock"""
1009 assert not (name is None and self.__lock.is_owned()), \
1010 "Cannot hold internal lock when deleting owner status"
1012 if name is not None:
1013 self.__owners[threading.currentThread()].remove(name)
1015 # Only remove the key if we don't hold the set-lock as well
1016 if (not self.__lock.is_owned() and
1017 not self.__owners[threading.currentThread()]):
1018 del self.__owners[threading.currentThread()]
1020 def list_owned(self):
1021 """Get the set of resource names owned by the current thread"""
1023 return self.__owners[threading.currentThread()].copy()
1027 def _release_and_delete_owned(self):
1028 """Release and delete all resources owned by the current thread"""
1029 for lname in self.list_owned():
1030 lock = self.__lockdict[lname]
1033 self._del_owned(name=lname)
1036 """Return the current set of names.
1038 Only call this function while holding __lock and don't iterate on the
1039 result after releasing the lock.
1042 return self.__lockdict.keys()
1045 """Return a copy of the current set of elements.
1047 Used only for debugging purposes.
1050 # If we don't already own the set-level lock acquired
1051 # we'll get it and note we need to release it later.
1052 release_lock = False
1053 if not self.__lock.is_owned():
1055 self.__lock.acquire(shared=1)
1057 result = self.__names()
1060 self.__lock.release()
1063 def acquire(self, names, timeout=None, shared=0, priority=None,
1065 """Acquire a set of resource locks.
1067 @type names: list of strings (or string)
1068 @param names: the names of the locks which shall be acquired
1069 (special lock names, or instance/node names)
1070 @type shared: integer (0/1) used as a boolean
1071 @param shared: whether to acquire in shared mode; by default an
1072 exclusive lock will be acquired
1073 @type timeout: float or None
1074 @param timeout: Maximum time to acquire all locks
1075 @type priority: integer
1076 @param priority: Priority for acquiring locks
1077 @type test_notify: callable or None
1078 @param test_notify: Special callback function for unittesting
1080 @return: Set of all locks successfully acquired or None in case of timeout
1082 @raise errors.LockError: when any lock we try to acquire has
1083 been deleted before we succeed. In this case none of the
1084 locks requested will be acquired.
1087 assert timeout is None or timeout >= 0.0
1089 # Check we don't already own locks at this level
1090 assert not self.is_owned(), ("Cannot acquire locks in the same set twice"
1091 " (lockset %s)" % self.name)
1093 if priority is None:
1094 priority = _DEFAULT_PRIORITY
1096 # We need to keep track of how long we spent waiting for a lock. The
1097 # timeout passed to this function is over all lock acquires.
1098 running_timeout = utils.RunningTimeout(timeout, False)
1101 if names is not None:
1102 # Support passing in a single resource to acquire rather than many
1103 if isinstance(names, basestring):
1106 return self.__acquire_inner(names, False, shared, priority,
1107 running_timeout.Remaining, test_notify)
1110 # If no names are given acquire the whole set by not letting new names
1111 # being added before we release, and getting the current list of names.
1112 # Some of them may then be deleted later, but we'll cope with this.
1114 # We'd like to acquire this lock in a shared way, as it's nice if
1115 # everybody else can use the instances at the same time. If we are
1116 # acquiring them exclusively though they won't be able to do this
1117 # anyway, though, so we'll get the list lock exclusively as well in
1118 # order to be able to do add() on the set while owning it.
1119 if not self.__lock.acquire(shared=shared, priority=priority,
1120 timeout=running_timeout.Remaining()):
1121 raise _AcquireTimeout()
1123 # note we own the set-lock
1126 return self.__acquire_inner(self.__names(), True, shared, priority,
1127 running_timeout.Remaining, test_notify)
1129 # We shouldn't have problems adding the lock to the owners list, but
1130 # if we did we'll try to release this lock and re-raise exception.
1131 # Of course something is going to be really wrong, after this.
1132 self.__lock.release()
1136 except _AcquireTimeout:
1139 def __acquire_inner(self, names, want_all, shared, priority,
1140 timeout_fn, test_notify):
1141 """Inner logic for acquiring a number of locks.
1143 @param names: Names of the locks to be acquired
1144 @param want_all: Whether all locks in the set should be acquired
1145 @param shared: Whether to acquire in shared mode
1146 @param timeout_fn: Function returning remaining timeout
1147 @param priority: Priority for acquiring locks
1148 @param test_notify: Special callback function for unittesting
1153 # First we look the locks up on __lockdict. We have no way of being sure
1154 # they will still be there after, but this makes it a lot faster should
1155 # just one of them be the already wrong. Using a sorted sequence to prevent
1157 for lname in sorted(utils.UniqueSequence(names)):
1159 lock = self.__lockdict[lname] # raises KeyError if lock is not there
1162 # We are acquiring all the set, it doesn't matter if this particular
1163 # element is not there anymore.
1166 raise errors.LockError("Non-existing lock %s in set %s (it may have"
1167 " been removed)" % (lname, self.name))
1169 acquire_list.append((lname, lock))
1171 # This will hold the locknames we effectively acquired.
1175 # Now acquire_list contains a sorted list of resources and locks we
1176 # want. In order to get them we loop on this (private) list and
1177 # acquire() them. We gave no real guarantee they will still exist till
1178 # this is done but .acquire() itself is safe and will alert us if the
1179 # lock gets deleted.
1180 for (lname, lock) in acquire_list:
1181 if __debug__ and callable(test_notify):
1182 test_notify_fn = lambda: test_notify(lname)
1184 test_notify_fn = None
1186 timeout = timeout_fn()
1189 # raises LockError if the lock was deleted
1190 acq_success = lock.acquire(shared=shared, timeout=timeout,
1192 test_notify=test_notify_fn)
1193 except errors.LockError:
1195 # We are acquiring all the set, it doesn't matter if this
1196 # particular element is not there anymore.
1199 raise errors.LockError("Non-existing lock %s in set %s (it may"
1200 " have been removed)" % (lname, self.name))
1203 # Couldn't get lock or timeout occurred
1205 # This shouldn't happen as SharedLock.acquire(timeout=None) is
1207 raise errors.LockError("Failed to get lock %s (set %s)" %
1210 raise _AcquireTimeout()
1213 # now the lock cannot be deleted, we have it!
1214 self._add_owned(name=lname)
1218 # We shouldn't have problems adding the lock to the owners list, but
1219 # if we did we'll try to release this lock and re-raise exception.
1220 # Of course something is going to be really wrong after this.
1226 # Release all owned locks
1227 self._release_and_delete_owned()
1232 def downgrade(self, names=None):
1233 """Downgrade a set of resource locks from exclusive to shared mode.
1235 The locks must have been acquired in exclusive mode.
1238 assert self.is_owned(), ("downgrade on lockset %s while not owning any"
1239 " lock" % self.name)
1241 # Support passing in a single resource to downgrade rather than many
1242 if isinstance(names, basestring):
1245 owned = self.list_owned()
1251 assert owned.issuperset(names), \
1252 ("downgrade() on unheld resources %s (set %s)" %
1253 (names.difference(owned), self.name))
1255 for lockname in names:
1256 self.__lockdict[lockname].downgrade()
1258 # Do we own the lockset in exclusive mode?
1259 if self.__lock.is_owned(shared=0):
1260 # Have all locks been downgraded?
1261 if not compat.any(lock.is_owned(shared=0)
1262 for lock in self.__lockdict.values()):
1263 self.__lock.downgrade()
1264 assert self.__lock.is_owned(shared=1)
1268 def release(self, names=None):
1269 """Release a set of resource locks, at the same level.
1271 You must have acquired the locks, either in shared or in exclusive mode,
1272 before releasing them.
1274 @type names: list of strings, or None
1275 @param names: the names of the locks which shall be released
1276 (defaults to all the locks acquired at that level).
1279 assert self.is_owned(), ("release() on lock set %s while not owner" %
1282 # Support passing in a single resource to release rather than many
1283 if isinstance(names, basestring):
1287 names = self.list_owned()
1290 assert self.list_owned().issuperset(names), (
1291 "release() on unheld resources %s (set %s)" %
1292 (names.difference(self.list_owned()), self.name))
1294 # First of all let's release the "all elements" lock, if set.
1295 # After this 'add' can work again
1296 if self.__lock.is_owned():
1297 self.__lock.release()
1300 for lockname in names:
1301 # If we are sure the lock doesn't leave __lockdict without being
1302 # exclusively held we can do this...
1303 self.__lockdict[lockname].release()
1304 self._del_owned(name=lockname)
1306 def add(self, names, acquired=0, shared=0):
1307 """Add a new set of elements to the set
1309 @type names: list of strings
1310 @param names: names of the new elements to add
1311 @type acquired: integer (0/1) used as a boolean
1312 @param acquired: pre-acquire the new resource?
1313 @type shared: integer (0/1) used as a boolean
1314 @param shared: is the pre-acquisition shared?
1317 # Check we don't already own locks at this level
1318 assert not self.is_owned() or self.__lock.is_owned(shared=0), \
1319 ("Cannot add locks if the set %s is only partially owned, or shared" %
1322 # Support passing in a single resource to add rather than many
1323 if isinstance(names, basestring):
1326 # If we don't already own the set-level lock acquired in an exclusive way
1327 # we'll get it and note we need to release it later.
1328 release_lock = False
1329 if not self.__lock.is_owned():
1331 self.__lock.acquire()
1334 invalid_names = set(self.__names()).intersection(names)
1336 # This must be an explicit raise, not an assert, because assert is
1337 # turned off when using optimization, and this can happen because of
1338 # concurrency even if the user doesn't want it.
1339 raise errors.LockError("duplicate add(%s) on lockset %s" %
1340 (invalid_names, self.name))
1342 for lockname in names:
1343 lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor)
1346 # No need for priority or timeout here as this lock has just been
1348 lock.acquire(shared=shared)
1349 # now the lock cannot be deleted, we have it!
1351 self._add_owned(name=lockname)
1353 # We shouldn't have problems adding the lock to the owners list,
1354 # but if we did we'll try to release this lock and re-raise
1355 # exception. Of course something is going to be really wrong,
1356 # after this. On the other hand the lock hasn't been added to the
1357 # __lockdict yet so no other threads should be pending on it. This
1358 # release is just a safety measure.
1362 self.__lockdict[lockname] = lock
1365 # Only release __lock if we were not holding it previously.
1367 self.__lock.release()
1371 def remove(self, names):
1372 """Remove elements from the lock set.
1374 You can either not hold anything in the lockset or already hold a superset
1375 of the elements you want to delete, exclusively.
1377 @type names: list of strings
1378 @param names: names of the resource to remove.
1380 @return: a list of locks which we removed; the list is always
1381 equal to the names list if we were holding all the locks
1385 # Support passing in a single resource to remove rather than many
1386 if isinstance(names, basestring):
1389 # If we own any subset of this lock it must be a superset of what we want
1390 # to delete. The ownership must also be exclusive, but that will be checked
1391 # by the lock itself.
1392 assert not self.is_owned() or self.list_owned().issuperset(names), (
1393 "remove() on acquired lockset %s while not owning all elements" %
1399 # Calling delete() acquires the lock exclusively if we don't already own
1400 # it, and causes all pending and subsequent lock acquires to fail. It's
1401 # fine to call it out of order because delete() also implies release(),
1402 # and the assertion above guarantees that if we either already hold
1403 # everything we want to delete, or we hold none.
1405 self.__lockdict[lname].delete()
1406 removed.append(lname)
1407 except (KeyError, errors.LockError):
1408 # This cannot happen if we were already holding it, verify:
1409 assert not self.is_owned(), ("remove failed while holding lockset %s" %
1412 # If no LockError was raised we are the ones who deleted the lock.
1413 # This means we can safely remove it from lockdict, as any further or
1414 # pending delete() or acquire() will fail (and nobody can have the lock
1415 # since before our call to delete()).
1417 # This is done in an else clause because if the exception was thrown
1418 # it's the job of the one who actually deleted it.
1419 del self.__lockdict[lname]
1420 # And let's remove it from our private list if we owned it.
1422 self._del_owned(name=lname)
1427 # Locking levels, must be acquired in increasing order.
1428 # Current rules are:
1429 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
1430 # acquired before performing any operation, either in shared or in exclusive
1431 # mode. acquiring the BGL in exclusive mode is discouraged and should be
1433 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
1434 # If you need more than one node, or more than one instance, acquire them at
1450 # Lock levels which are modifiable
1451 LEVELS_MOD = frozenset([
1458 #: Lock level names (make sure to use singular form)
1460 LEVEL_CLUSTER: "cluster",
1461 LEVEL_INSTANCE: "instance",
1462 LEVEL_NODEGROUP: "nodegroup",
1464 LEVEL_NODE_RES: "node-res",
1467 # Constant for the big ganeti lock
1471 class GanetiLockManager:
1472 """The Ganeti Locking Library
1474 The purpose of this small library is to manage locking for ganeti clusters
1475 in a central place, while at the same time doing dynamic checks against
1476 possible deadlocks. It will also make it easier to transition to a different
1477 lock type should we migrate away from python threads.
1482 def __init__(self, nodes, nodegroups, instances):
1483 """Constructs a new GanetiLockManager object.
1485 There should be only a GanetiLockManager object at any time, so this
1486 function raises an error if this is not the case.
1488 @param nodes: list of node names
1489 @param nodegroups: list of nodegroup uuids
1490 @param instances: list of instance names
1493 assert self.__class__._instance is None, \
1494 "double GanetiLockManager instance"
1496 self.__class__._instance = self
1498 self._monitor = LockMonitor()
1500 # The keyring contains all the locks, at their level and in the correct
1503 LEVEL_CLUSTER: LockSet([BGL], "cluster", monitor=self._monitor),
1504 LEVEL_NODE: LockSet(nodes, "node", monitor=self._monitor),
1505 LEVEL_NODE_RES: LockSet(nodes, "node-res", monitor=self._monitor),
1506 LEVEL_NODEGROUP: LockSet(nodegroups, "nodegroup", monitor=self._monitor),
1507 LEVEL_INSTANCE: LockSet(instances, "instance",
1508 monitor=self._monitor),
1511 assert compat.all(ls.name == LEVEL_NAMES[level]
1512 for (level, ls) in self.__keyring.items())
1514 def AddToLockMonitor(self, provider):
1515 """Registers a new lock with the monitor.
1517 See L{LockMonitor.RegisterLock}.
1520 return self._monitor.RegisterLock(provider)
1522 def QueryLocks(self, fields):
1523 """Queries information from all locks.
1525 See L{LockMonitor.QueryLocks}.
1528 return self._monitor.QueryLocks(fields)
1530 def OldStyleQueryLocks(self, fields):
1531 """Queries information from all locks, returning old-style data.
1533 See L{LockMonitor.OldStyleQueryLocks}.
1536 return self._monitor.OldStyleQueryLocks(fields)
1538 def _names(self, level):
1539 """List the lock names at the given level.
1541 This can be used for debugging/testing purposes.
1543 @param level: the level whose list of locks to get
1546 assert level in LEVELS, "Invalid locking level %s" % level
1547 return self.__keyring[level]._names()
1549 def is_owned(self, level):
1550 """Check whether we are owning locks at the given level
1553 return self.__keyring[level].is_owned()
1555 def list_owned(self, level):
1556 """Get the set of owned locks at the given level
1559 return self.__keyring[level].list_owned()
1561 def check_owned(self, level, names, shared=-1):
1562 """Check if locks at a certain level are owned in a specific mode.
1564 @see: L{LockSet.check_owned}
1567 return self.__keyring[level].check_owned(names, shared=shared)
1569 def _upper_owned(self, level):
1570 """Check that we don't own any lock at a level greater than the given one.
1573 # This way of checking only works if LEVELS[i] = i, which we check for in
1575 return compat.any((self.is_owned(l) for l in LEVELS[level + 1:]))
1577 def _BGL_owned(self): # pylint: disable=C0103
1578 """Check if the current thread owns the BGL.
1580 Both an exclusive or a shared acquisition work.
1583 return BGL in self.__keyring[LEVEL_CLUSTER].list_owned()
1586 def _contains_BGL(level, names): # pylint: disable=C0103
1587 """Check if the level contains the BGL.
1589 Check if acting on the given level and set of names will change
1590 the status of the Big Ganeti Lock.
1593 return level == LEVEL_CLUSTER and (names is None or BGL in names)
1595 def acquire(self, level, names, timeout=None, shared=0, priority=None):
1596 """Acquire a set of resource locks, at the same level.
1598 @type level: member of locking.LEVELS
1599 @param level: the level at which the locks shall be acquired
1600 @type names: list of strings (or string)
1601 @param names: the names of the locks which shall be acquired
1602 (special lock names, or instance/node names)
1603 @type shared: integer (0/1) used as a boolean
1604 @param shared: whether to acquire in shared mode; by default
1605 an exclusive lock will be acquired
1606 @type timeout: float
1607 @param timeout: Maximum time to acquire all locks
1608 @type priority: integer
1609 @param priority: Priority for acquiring lock
1612 assert level in LEVELS, "Invalid locking level %s" % level
1614 # Check that we are either acquiring the Big Ganeti Lock or we already own
1615 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
1616 # so even if we've migrated we need to at least share the BGL to be
1617 # compatible with them. Of course if we own the BGL exclusively there's no
1618 # point in acquiring any other lock, unless perhaps we are half way through
1619 # the migration of the current opcode.
1620 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
1621 "You must own the Big Ganeti Lock before acquiring any other")
1623 # Check we don't own locks at the same or upper levels.
1624 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1625 " while owning some at a greater one")
1627 # Acquire the locks in the set.
1628 return self.__keyring[level].acquire(names, shared=shared, timeout=timeout,
1631 def downgrade(self, level, names=None):
1632 """Downgrade a set of resource locks from exclusive to shared mode.
1634 You must have acquired the locks in exclusive mode.
1636 @type level: member of locking.LEVELS
1637 @param level: the level at which the locks shall be downgraded
1638 @type names: list of strings, or None
1639 @param names: the names of the locks which shall be downgraded
1640 (defaults to all the locks acquired at the level)
1643 assert level in LEVELS, "Invalid locking level %s" % level
1645 return self.__keyring[level].downgrade(names=names)
1647 def release(self, level, names=None):
1648 """Release a set of resource locks, at the same level.
1650 You must have acquired the locks, either in shared or in exclusive
1651 mode, before releasing them.
1653 @type level: member of locking.LEVELS
1654 @param level: the level at which the locks shall be released
1655 @type names: list of strings, or None
1656 @param names: the names of the locks which shall be released
1657 (defaults to all the locks acquired at that level)
1660 assert level in LEVELS, "Invalid locking level %s" % level
1661 assert (not self._contains_BGL(level, names) or
1662 not self._upper_owned(LEVEL_CLUSTER)), (
1663 "Cannot release the Big Ganeti Lock while holding something"
1664 " at upper levels (%r)" %
1665 (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self.list_owned(i))
1666 for i in self.__keyring.keys()]), ))
1668 # Release will complain if we don't own the locks already
1669 return self.__keyring[level].release(names)
1671 def add(self, level, names, acquired=0, shared=0):
1672 """Add locks at the specified level.
1674 @type level: member of locking.LEVELS_MOD
1675 @param level: the level at which the locks shall be added
1676 @type names: list of strings
1677 @param names: names of the locks to acquire
1678 @type acquired: integer (0/1) used as a boolean
1679 @param acquired: whether to acquire the newly added locks
1680 @type shared: integer (0/1) used as a boolean
1681 @param shared: whether the acquisition will be shared
1684 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1685 assert self._BGL_owned(), ("You must own the BGL before performing other"
1687 assert not self._upper_owned(level), ("Cannot add locks at a level"
1688 " while owning some at a greater one")
1689 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1691 def remove(self, level, names):
1692 """Remove locks from the specified level.
1694 You must either already own the locks you are trying to remove
1695 exclusively or not own any lock at an upper level.
1697 @type level: member of locking.LEVELS_MOD
1698 @param level: the level at which the locks shall be removed
1699 @type names: list of strings
1700 @param names: the names of the locks which shall be removed
1701 (special lock names, or instance/node names)
1704 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1705 assert self._BGL_owned(), ("You must own the BGL before performing other"
1707 # Check we either own the level or don't own anything from here
1708 # up. LockSet.remove() will check the case in which we don't own
1709 # all the needed resources, or we have a shared ownership.
1710 assert self.is_owned(level) or not self._upper_owned(level), (
1711 "Cannot remove locks at a level while not owning it or"
1712 " owning some at a greater one")
1713 return self.__keyring[level].remove(names)
1716 def _MonitorSortKey((item, idx, num)):
1717 """Sorting key function.
1719 Sort by name, registration order and then order of information. This provides
1720 a stable sort order over different providers, even if they return the same
1724 (name, _, _, _) = item
1726 return (utils.NiceSortKey(name), num, idx)
1729 class LockMonitor(object):
1730 _LOCK_ATTR = "_lock"
1733 """Initializes this class.
1736 self._lock = SharedLock("LockMonitor")
1738 # Counter for stable sorting
1739 self._counter = itertools.count(0)
1741 # Tracked locks. Weak references are used to avoid issues with circular
1742 # references and deletion.
1743 self._locks = weakref.WeakKeyDictionary()
1745 @ssynchronized(_LOCK_ATTR)
1746 def RegisterLock(self, provider):
1747 """Registers a new lock.
1749 @param provider: Object with a callable method named C{GetLockInfo}, taking
1750 a single C{set} containing the requested information items
1751 @note: It would be nicer to only receive the function generating the
1752 requested information but, as it turns out, weak references to bound
1753 methods (e.g. C{self.GetLockInfo}) are tricky; there are several
1754 workarounds, but none of the ones I found works properly in combination
1755 with a standard C{WeakKeyDictionary}
1758 assert provider not in self._locks, "Duplicate registration"
1760 # There used to be a check for duplicate names here. As it turned out, when
1761 # a lock is re-created with the same name in a very short timeframe, the
1762 # previous instance might not yet be removed from the weakref dictionary.
1763 # By keeping track of the order of incoming registrations, a stable sort
1764 # ordering can still be guaranteed.
1766 self._locks[provider] = self._counter.next()
1768 def _GetLockInfo(self, requested):
1769 """Get information from all locks.
1772 # Must hold lock while getting consistent list of tracked items
1773 self._lock.acquire(shared=1)
1775 items = self._locks.items()
1777 self._lock.release()
1779 return [(info, idx, num)
1780 for (provider, num) in items
1781 for (idx, info) in enumerate(provider.GetLockInfo(requested))]
1783 def _Query(self, fields):
1784 """Queries information from all locks.
1786 @type fields: list of strings
1787 @param fields: List of fields to return
1790 qobj = query.Query(query.LOCK_FIELDS, fields)
1792 # Get all data with internal lock held and then sort by name and incoming
1794 lockinfo = sorted(self._GetLockInfo(qobj.RequestedData()),
1795 key=_MonitorSortKey)
1797 # Extract lock information and build query data
1798 return (qobj, query.LockQueryData(map(compat.fst, lockinfo)))
1800 def QueryLocks(self, fields):
1801 """Queries information from all locks.
1803 @type fields: list of strings
1804 @param fields: List of fields to return
1807 (qobj, ctx) = self._Query(fields)
1809 # Prepare query response
1810 return query.GetQueryResponse(qobj, ctx)
1812 def OldStyleQueryLocks(self, fields):
1813 """Queries information from all locks, returning old-style data.
1815 @type fields: list of strings
1816 @param fields: List of fields to return
1819 (qobj, ctx) = self._Query(fields)
1821 return qobj.OldStyleQuery(ctx)