4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable-msg=W0212
25 # W0212 since e.g. LockSet methods use (a lot) the internals of
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import compat
41 _EXCLUSIVE_TEXT = "exclusive"
42 _SHARED_TEXT = "shared"
45 def ssynchronized(mylock, shared=0):
46 """Shared Synchronization decorator.
48 Calls the function holding the given lock, either in exclusive or shared
49 mode. It requires the passed lock to be a SharedLock (or support its
52 @type mylock: lockable object or string
53 @param mylock: lock to acquire or class member name of the lock to acquire
57 def sync_function(*args, **kwargs):
58 if isinstance(mylock, basestring):
59 assert args, "cannot ssynchronize on non-class method: self not found"
61 lock = getattr(args[0], mylock)
64 lock.acquire(shared=shared)
66 return fn(*args, **kwargs)
73 class RunningTimeout(object):
74 """Class to calculate remaining timeout when doing several operations.
84 def __init__(self, timeout, allow_negative, _time_fn=time.time):
85 """Initializes this class.
88 @param timeout: Timeout duration
89 @type allow_negative: bool
90 @param allow_negative: Whether to return values below zero
91 @param _time_fn: Time function for unittests
96 if timeout is not None and timeout < 0.0:
97 raise ValueError("Timeout must not be negative")
99 self._timeout = timeout
100 self._allow_negative = allow_negative
101 self._time_fn = _time_fn
103 self._start_time = None
106 """Returns the remaining timeout.
109 if self._timeout is None:
112 # Get start time on first calculation
113 if self._start_time is None:
114 self._start_time = self._time_fn()
116 # Calculate remaining time
117 remaining_timeout = self._start_time + self._timeout - self._time_fn()
119 if not self._allow_negative:
120 # Ensure timeout is always >= 0
121 return max(0.0, remaining_timeout)
123 return remaining_timeout
126 class _SingleNotifyPipeConditionWaiter(object):
127 """Helper class for SingleNotifyPipeCondition
135 def __init__(self, poller, fd):
136 """Constructor for _SingleNotifyPipeConditionWaiter
138 @type poller: select.poll
139 @param poller: Poller object
141 @param fd: File descriptor to wait for
144 object.__init__(self)
145 self._poller = poller
148 def __call__(self, timeout):
149 """Wait for something to happen on the pipe.
151 @type timeout: float or None
152 @param timeout: Timeout for waiting (can be None)
155 running_timeout = RunningTimeout(timeout, True)
158 remaining_time = running_timeout.Remaining()
160 if remaining_time is not None:
161 if remaining_time < 0.0:
164 # Our calculation uses seconds, poll() wants milliseconds
165 remaining_time *= 1000
168 result = self._poller.poll(remaining_time)
169 except EnvironmentError, err:
170 if err.errno != errno.EINTR:
174 # Check whether we were notified
175 if result and result[0][0] == self._fd:
179 class _BaseCondition(object):
180 """Base class containing common code for conditions.
182 Some of this code is taken from python's threading module.
194 def __init__(self, lock):
195 """Constructor for _BaseCondition.
197 @type lock: threading.Lock
198 @param lock: condition base lock
201 object.__init__(self)
204 self._release_save = lock._release_save
205 except AttributeError:
206 self._release_save = self._base_release_save
208 self._acquire_restore = lock._acquire_restore
209 except AttributeError:
210 self._acquire_restore = self._base_acquire_restore
212 self._is_owned = lock._is_owned
213 except AttributeError:
214 self._is_owned = self._base_is_owned
218 # Export the lock's acquire() and release() methods
219 self.acquire = lock.acquire
220 self.release = lock.release
222 def _base_is_owned(self):
223 """Check whether lock is owned by current thread.
226 if self._lock.acquire(0):
231 def _base_release_save(self):
234 def _base_acquire_restore(self, _):
237 def _check_owned(self):
238 """Raise an exception if the current thread doesn't own the lock.
241 if not self._is_owned():
242 raise RuntimeError("cannot work with un-aquired lock")
245 class SingleNotifyPipeCondition(_BaseCondition):
246 """Condition which can only be notified once.
248 This condition class uses pipes and poll, internally, to be able to wait for
249 notification with a timeout, without resorting to polling. It is almost
250 compatible with Python's threading.Condition, with the following differences:
251 - notifyAll can only be called once, and no wait can happen after that
252 - notify is not supported, only notifyAll
264 _waiter_class = _SingleNotifyPipeConditionWaiter
266 def __init__(self, lock):
267 """Constructor for SingleNotifyPipeCondition
270 _BaseCondition.__init__(self, lock)
272 self._notified = False
274 self._write_fd = None
277 def _check_unnotified(self):
278 """Throws an exception if already notified.
282 raise RuntimeError("cannot use already notified condition")
285 """Cleanup open file descriptors, if any.
288 if self._read_fd is not None:
289 os.close(self._read_fd)
292 if self._write_fd is not None:
293 os.close(self._write_fd)
294 self._write_fd = None
297 def wait(self, timeout=None):
298 """Wait for a notification.
300 @type timeout: float or None
301 @param timeout: Waiting timeout (can be None)
305 self._check_unnotified()
309 if self._poller is None:
310 (self._read_fd, self._write_fd) = os.pipe()
311 self._poller = select.poll()
312 self._poller.register(self._read_fd, select.POLLHUP)
314 wait_fn = self._waiter_class(self._poller, self._read_fd)
315 state = self._release_save()
317 # Wait for notification
321 self._acquire_restore(state)
324 if self._nwaiters == 0:
327 def notifyAll(self): # pylint: disable-msg=C0103
328 """Close the writing side of the pipe to notify all waiters.
332 self._check_unnotified()
333 self._notified = True
334 if self._write_fd is not None:
335 os.close(self._write_fd)
336 self._write_fd = None
339 class PipeCondition(_BaseCondition):
340 """Group-only non-polling condition with counters.
342 This condition class uses pipes and poll, internally, to be able to wait for
343 notification with a timeout, without resorting to polling. It is almost
344 compatible with Python's threading.Condition, but only supports notifyAll and
345 non-recursive locks. As an additional features it's able to report whether
346 there are any waiting threads.
354 _single_condition_class = SingleNotifyPipeCondition
356 def __init__(self, lock):
357 """Initializes this class.
360 _BaseCondition.__init__(self, lock)
361 self._waiters = set()
362 self._single_condition = self._single_condition_class(self._lock)
364 def wait(self, timeout=None):
365 """Wait for a notification.
367 @type timeout: float or None
368 @param timeout: Waiting timeout (can be None)
373 # Keep local reference to the pipe. It could be replaced by another thread
374 # notifying while we're waiting.
375 cond = self._single_condition
377 self._waiters.add(threading.currentThread())
382 self._waiters.remove(threading.currentThread())
384 def notifyAll(self): # pylint: disable-msg=C0103
385 """Notify all currently waiting threads.
389 self._single_condition.notifyAll()
390 self._single_condition = self._single_condition_class(self._lock)
392 def get_waiting(self):
393 """Returns a list of all waiting threads.
400 def has_waiting(self):
401 """Returns whether there are active waiters.
406 return bool(self._waiters)
409 class SharedLock(object):
410 """Implements a shared lock.
412 Multiple threads can acquire the lock in a shared way by calling
413 C{acquire(shared=1)}. In order to acquire the lock in an exclusive way
414 threads can call C{acquire(shared=0)}.
416 The lock prevents starvation but does not guarantee that threads will acquire
417 the shared lock in the order they queued for it, just that they will
421 @ivar name: the name of the lock
436 __condition_class = PipeCondition
438 def __init__(self, name, monitor=None):
439 """Construct a new SharedLock.
441 @param name: the name of the lock
442 @type monitor: L{LockMonitor}
443 @param monitor: Lock monitor with which to register
446 object.__init__(self)
451 self.__lock = threading.Lock()
453 # Queue containing waiting acquires
456 # Active and inactive conditions for shared locks
457 self.__active_shr_c = self.__condition_class(self.__lock)
458 self.__inactive_shr_c = self.__condition_class(self.__lock)
460 # Current lock holders
464 # is this lock in the deleted state?
465 self.__deleted = False
467 # Register with lock monitor
469 monitor.RegisterLock(self)
471 def GetInfo(self, fields):
472 """Retrieves information for querying locks.
474 @type fields: list of strings
475 @param fields: List of fields to return
478 self.__lock.acquire()
482 # Note: to avoid unintentional race conditions, no references to
483 # modifiable objects should be returned unless they were created in this
487 info.append(self.name)
488 elif fname == "mode":
490 info.append("deleted")
491 assert not (self.__exc or self.__shr)
493 info.append(_EXCLUSIVE_TEXT)
495 info.append(_SHARED_TEXT)
498 elif fname == "owner":
505 assert not self.__deleted
506 info.append([i.getName() for i in owner])
509 elif fname == "pending":
512 for cond in self.__pending:
513 if cond in (self.__active_shr_c, self.__inactive_shr_c):
516 mode = _EXCLUSIVE_TEXT
518 # This function should be fast as it runs with the lock held. Hence
519 # not using utils.NiceSort.
520 data.append((mode, sorted([i.getName()
521 for i in cond.get_waiting()])))
525 raise errors.OpExecError("Invalid query field '%s'" % fname)
529 self.__lock.release()
531 def __check_deleted(self):
532 """Raises an exception if the lock has been deleted.
536 raise errors.LockError("Deleted lock %s" % self.name)
538 def __is_sharer(self):
539 """Is the current thread sharing the lock at this time?
542 return threading.currentThread() in self.__shr
544 def __is_exclusive(self):
545 """Is the current thread holding the lock exclusively at this time?
548 return threading.currentThread() == self.__exc
550 def __is_owned(self, shared=-1):
551 """Is the current thread somehow owning the lock at this time?
553 This is a private version of the function, which presumes you're holding
558 return self.__is_sharer() or self.__is_exclusive()
560 return self.__is_sharer()
562 return self.__is_exclusive()
564 def _is_owned(self, shared=-1):
565 """Is the current thread somehow owning the lock at this time?
568 - < 0: check for any type of ownership (default)
569 - 0: check for exclusive ownership
570 - > 0: check for shared ownership
573 self.__lock.acquire()
575 return self.__is_owned(shared=shared)
577 self.__lock.release()
579 def _count_pending(self):
580 """Returns the number of pending acquires.
585 self.__lock.acquire()
587 return len(self.__pending)
589 self.__lock.release()
591 def __do_acquire(self, shared):
592 """Actually acquire the lock.
596 self.__shr.add(threading.currentThread())
598 self.__exc = threading.currentThread()
600 def __can_acquire(self, shared):
601 """Determine whether lock can be acquired.
605 return self.__exc is None
607 return len(self.__shr) == 0 and self.__exc is None
609 def __is_on_top(self, cond):
610 """Checks whether the passed condition is on top of the queue.
612 The caller must make sure the queue isn't empty.
615 return self.__pending[0] == cond
617 def __acquire_unlocked(self, shared, timeout):
618 """Acquire a shared lock.
620 @param shared: whether to acquire in shared mode; by default an
621 exclusive lock will be acquired
622 @param timeout: maximum waiting time before giving up
625 self.__check_deleted()
627 # We cannot acquire the lock if we already have it
628 assert not self.__is_owned(), ("double acquire() on a non-recursive lock"
631 # Check whether someone else holds the lock or there are pending acquires.
632 if not self.__pending and self.__can_acquire(shared):
633 # Apparently not, can acquire lock directly.
634 self.__do_acquire(shared)
638 wait_condition = self.__active_shr_c
640 # Check if we're not yet in the queue
641 if wait_condition not in self.__pending:
642 self.__pending.append(wait_condition)
644 wait_condition = self.__condition_class(self.__lock)
645 # Always add to queue
646 self.__pending.append(wait_condition)
649 # Wait until we become the topmost acquire in the queue or the timeout
651 while not (self.__is_on_top(wait_condition) and
652 self.__can_acquire(shared)):
653 # Wait for notification
654 wait_condition.wait(timeout)
655 self.__check_deleted()
657 # A lot of code assumes blocking acquires always succeed. Loop
658 # internally for that case.
659 if timeout is not None:
662 if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
663 self.__do_acquire(shared)
666 # Remove condition from queue if there are no more waiters
667 if not wait_condition.has_waiting() and not self.__deleted:
668 self.__pending.remove(wait_condition)
672 def acquire(self, shared=0, timeout=None, test_notify=None):
673 """Acquire a shared lock.
675 @type shared: integer (0/1) used as a boolean
676 @param shared: whether to acquire in shared mode; by default an
677 exclusive lock will be acquired
679 @param timeout: maximum waiting time before giving up
680 @type test_notify: callable or None
681 @param test_notify: Special callback function for unittesting
684 self.__lock.acquire()
686 # We already got the lock, notify now
687 if __debug__ and callable(test_notify):
690 return self.__acquire_unlocked(shared, timeout)
692 self.__lock.release()
695 """Release a Shared Lock.
697 You must have acquired the lock, either in shared or in exclusive mode,
698 before calling this function.
701 self.__lock.acquire()
703 assert self.__is_exclusive() or self.__is_sharer(), \
704 "Cannot release non-owned lock"
706 # Autodetect release type
707 if self.__is_exclusive():
710 self.__shr.remove(threading.currentThread())
712 # Notify topmost condition in queue
714 first_condition = self.__pending[0]
715 first_condition.notifyAll()
717 if first_condition == self.__active_shr_c:
718 self.__active_shr_c = self.__inactive_shr_c
719 self.__inactive_shr_c = first_condition
722 self.__lock.release()
724 def delete(self, timeout=None):
725 """Delete a Shared Lock.
727 This operation will declare the lock for removal. First the lock will be
728 acquired in exclusive mode if you don't already own it, then the lock
729 will be put in a state where any future and pending acquire() fail.
732 @param timeout: maximum waiting time before giving up
735 self.__lock.acquire()
737 assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"
739 self.__check_deleted()
741 # The caller is allowed to hold the lock exclusively already.
742 acquired = self.__is_exclusive()
745 acquired = self.__acquire_unlocked(0, timeout)
747 assert self.__is_exclusive() and not self.__is_sharer(), \
748 "Lock wasn't acquired in exclusive mode"
751 self.__deleted = True
754 assert not (self.__exc or self.__shr), "Found owner during deletion"
756 # Notify all acquires. They'll throw an error.
757 while self.__pending:
758 self.__pending.pop().notifyAll()
762 self.__lock.release()
764 def _release_save(self):
765 shared = self.__is_sharer()
769 def _acquire_restore(self, shared):
770 self.acquire(shared=shared)
773 # Whenever we want to acquire a full LockSet we pass None as the value
774 # to acquire. Hide this behind this nicely named constant.
778 class _AcquireTimeout(Exception):
779 """Internal exception to abort an acquire on a timeout.
785 """Implements a set of locks.
787 This abstraction implements a set of shared locks for the same resource type,
788 distinguished by name. The user can lock a subset of the resources and the
789 LockSet will take care of acquiring the locks always in the same order, thus
792 All the locks needed in the same set must be acquired together, though.
795 @ivar name: the name of the lockset
798 def __init__(self, members, name, monitor=None):
799 """Constructs a new LockSet.
801 @type members: list of strings
802 @param members: initial members of the set
803 @type monitor: L{LockMonitor}
804 @param monitor: Lock monitor with which to register member locks
807 assert members is not None, "members parameter is not a list"
811 self.__monitor = monitor
813 # Used internally to guarantee coherency.
814 self.__lock = SharedLock(name)
816 # The lockdict indexes the relationship name -> lock
817 # The order-of-locking is implied by the alphabetical order of names
820 for mname in members:
821 self.__lockdict[mname] = SharedLock(self._GetLockName(mname),
824 # The owner dict contains the set of locks each thread owns. For
825 # performance each thread can access its own key without a global lock on
826 # this structure. It is paramount though that *no* other type of access is
827 # done to this structure (eg. no looping over its keys). *_owner helper
828 # function are defined to guarantee access is correct, but in general never
829 # do anything different than __owners[threading.currentThread()], or there
833 def _GetLockName(self, mname):
834 """Returns the name for a member lock.
837 return "%s/%s" % (self.name, mname)
840 """Is the current thread a current level owner?"""
841 return threading.currentThread() in self.__owners
843 def _add_owned(self, name=None):
844 """Note the current thread owns the given lock"""
846 if not self._is_owned():
847 self.__owners[threading.currentThread()] = set()
850 self.__owners[threading.currentThread()].add(name)
852 self.__owners[threading.currentThread()] = set([name])
854 def _del_owned(self, name=None):
855 """Note the current thread owns the given lock"""
857 assert not (name is None and self.__lock._is_owned()), \
858 "Cannot hold internal lock when deleting owner status"
861 self.__owners[threading.currentThread()].remove(name)
863 # Only remove the key if we don't hold the set-lock as well
864 if (not self.__lock._is_owned() and
865 not self.__owners[threading.currentThread()]):
866 del self.__owners[threading.currentThread()]
868 def _list_owned(self):
869 """Get the set of resource names owned by the current thread"""
871 return self.__owners[threading.currentThread()].copy()
875 def _release_and_delete_owned(self):
876 """Release and delete all resources owned by the current thread"""
877 for lname in self._list_owned():
878 lock = self.__lockdict[lname]
881 self._del_owned(name=lname)
884 """Return the current set of names.
886 Only call this function while holding __lock and don't iterate on the
887 result after releasing the lock.
890 return self.__lockdict.keys()
893 """Return a copy of the current set of elements.
895 Used only for debugging purposes.
898 # If we don't already own the set-level lock acquired
899 # we'll get it and note we need to release it later.
901 if not self.__lock._is_owned():
903 self.__lock.acquire(shared=1)
905 result = self.__names()
908 self.__lock.release()
911 def acquire(self, names, timeout=None, shared=0, test_notify=None):
912 """Acquire a set of resource locks.
914 @type names: list of strings (or string)
915 @param names: the names of the locks which shall be acquired
916 (special lock names, or instance/node names)
917 @type shared: integer (0/1) used as a boolean
918 @param shared: whether to acquire in shared mode; by default an
919 exclusive lock will be acquired
920 @type timeout: float or None
921 @param timeout: Maximum time to acquire all locks
922 @type test_notify: callable or None
923 @param test_notify: Special callback function for unittesting
925 @return: Set of all locks successfully acquired or None in case of timeout
927 @raise errors.LockError: when any lock we try to acquire has
928 been deleted before we succeed. In this case none of the
929 locks requested will be acquired.
932 assert timeout is None or timeout >= 0.0
934 # Check we don't already own locks at this level
935 assert not self._is_owned(), ("Cannot acquire locks in the same set twice"
936 " (lockset %s)" % self.name)
938 # We need to keep track of how long we spent waiting for a lock. The
939 # timeout passed to this function is over all lock acquires.
940 running_timeout = RunningTimeout(timeout, False)
943 if names is not None:
944 # Support passing in a single resource to acquire rather than many
945 if isinstance(names, basestring):
948 return self.__acquire_inner(names, False, shared,
949 running_timeout.Remaining, test_notify)
952 # If no names are given acquire the whole set by not letting new names
953 # being added before we release, and getting the current list of names.
954 # Some of them may then be deleted later, but we'll cope with this.
956 # We'd like to acquire this lock in a shared way, as it's nice if
957 # everybody else can use the instances at the same time. If are
958 # acquiring them exclusively though they won't be able to do this
959 # anyway, though, so we'll get the list lock exclusively as well in
960 # order to be able to do add() on the set while owning it.
961 if not self.__lock.acquire(shared=shared,
962 timeout=running_timeout.Remaining()):
963 raise _AcquireTimeout()
965 # note we own the set-lock
968 return self.__acquire_inner(self.__names(), True, shared,
969 running_timeout.Remaining, test_notify)
971 # We shouldn't have problems adding the lock to the owners list, but
972 # if we did we'll try to release this lock and re-raise exception.
973 # Of course something is going to be really wrong, after this.
974 self.__lock.release()
978 except _AcquireTimeout:
981 def __acquire_inner(self, names, want_all, shared, timeout_fn, test_notify):
982 """Inner logic for acquiring a number of locks.
984 @param names: Names of the locks to be acquired
985 @param want_all: Whether all locks in the set should be acquired
986 @param shared: Whether to acquire in shared mode
987 @param timeout_fn: Function returning remaining timeout
988 @param test_notify: Special callback function for unittesting
993 # First we look the locks up on __lockdict. We have no way of being sure
994 # they will still be there after, but this makes it a lot faster should
995 # just one of them be the already wrong. Using a sorted sequence to prevent
997 for lname in sorted(utils.UniqueSequence(names)):
999 lock = self.__lockdict[lname] # raises KeyError if lock is not there
1002 # We are acquiring all the set, it doesn't matter if this particular
1003 # element is not there anymore.
1006 raise errors.LockError("Non-existing lock %s in set %s" %
1009 acquire_list.append((lname, lock))
1011 # This will hold the locknames we effectively acquired.
1015 # Now acquire_list contains a sorted list of resources and locks we
1016 # want. In order to get them we loop on this (private) list and
1017 # acquire() them. We gave no real guarantee they will still exist till
1018 # this is done but .acquire() itself is safe and will alert us if the
1019 # lock gets deleted.
1020 for (lname, lock) in acquire_list:
1021 if __debug__ and callable(test_notify):
1022 test_notify_fn = lambda: test_notify(lname)
1024 test_notify_fn = None
1026 timeout = timeout_fn()
1029 # raises LockError if the lock was deleted
1030 acq_success = lock.acquire(shared=shared, timeout=timeout,
1031 test_notify=test_notify_fn)
1032 except errors.LockError:
1034 # We are acquiring all the set, it doesn't matter if this
1035 # particular element is not there anymore.
1038 raise errors.LockError("Non-existing lock %s in set %s" %
1042 # Couldn't get lock or timeout occurred
1044 # This shouldn't happen as SharedLock.acquire(timeout=None) is
1046 raise errors.LockError("Failed to get lock %s (set %s)" %
1049 raise _AcquireTimeout()
1052 # now the lock cannot be deleted, we have it!
1053 self._add_owned(name=lname)
1057 # We shouldn't have problems adding the lock to the owners list, but
1058 # if we did we'll try to release this lock and re-raise exception.
1059 # Of course something is going to be really wrong after this.
1060 if lock._is_owned():
1065 # Release all owned locks
1066 self._release_and_delete_owned()
1071 def release(self, names=None):
1072 """Release a set of resource locks, at the same level.
1074 You must have acquired the locks, either in shared or in exclusive mode,
1075 before releasing them.
1077 @type names: list of strings, or None
1078 @param names: the names of the locks which shall be released
1079 (defaults to all the locks acquired at that level).
1082 assert self._is_owned(), ("release() on lock set %s while not owner" %
1085 # Support passing in a single resource to release rather than many
1086 if isinstance(names, basestring):
1090 names = self._list_owned()
1093 assert self._list_owned().issuperset(names), (
1094 "release() on unheld resources %s (set %s)" %
1095 (names.difference(self._list_owned()), self.name))
1097 # First of all let's release the "all elements" lock, if set.
1098 # After this 'add' can work again
1099 if self.__lock._is_owned():
1100 self.__lock.release()
1103 for lockname in names:
1104 # If we are sure the lock doesn't leave __lockdict without being
1105 # exclusively held we can do this...
1106 self.__lockdict[lockname].release()
1107 self._del_owned(name=lockname)
1109 def add(self, names, acquired=0, shared=0):
1110 """Add a new set of elements to the set
1112 @type names: list of strings
1113 @param names: names of the new elements to add
1114 @type acquired: integer (0/1) used as a boolean
1115 @param acquired: pre-acquire the new resource?
1116 @type shared: integer (0/1) used as a boolean
1117 @param shared: is the pre-acquisition shared?
1120 # Check we don't already own locks at this level
1121 assert not self._is_owned() or self.__lock._is_owned(shared=0), \
1122 ("Cannot add locks if the set %s is only partially owned, or shared" %
1125 # Support passing in a single resource to add rather than many
1126 if isinstance(names, basestring):
1129 # If we don't already own the set-level lock acquired in an exclusive way
1130 # we'll get it and note we need to release it later.
1131 release_lock = False
1132 if not self.__lock._is_owned():
1134 self.__lock.acquire()
1137 invalid_names = set(self.__names()).intersection(names)
1139 # This must be an explicit raise, not an assert, because assert is
1140 # turned off when using optimization, and this can happen because of
1141 # concurrency even if the user doesn't want it.
1142 raise errors.LockError("duplicate add(%s) on lockset %s" %
1143 (invalid_names, self.name))
1145 for lockname in names:
1146 lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor)
1149 lock.acquire(shared=shared)
1150 # now the lock cannot be deleted, we have it!
1152 self._add_owned(name=lockname)
1154 # We shouldn't have problems adding the lock to the owners list,
1155 # but if we did we'll try to release this lock and re-raise
1156 # exception. Of course something is going to be really wrong,
1157 # after this. On the other hand the lock hasn't been added to the
1158 # __lockdict yet so no other threads should be pending on it. This
1159 # release is just a safety measure.
1163 self.__lockdict[lockname] = lock
1166 # Only release __lock if we were not holding it previously.
1168 self.__lock.release()
1172 def remove(self, names):
1173 """Remove elements from the lock set.
1175 You can either not hold anything in the lockset or already hold a superset
1176 of the elements you want to delete, exclusively.
1178 @type names: list of strings
1179 @param names: names of the resource to remove.
1181 @return: a list of locks which we removed; the list is always
1182 equal to the names list if we were holding all the locks
1186 # Support passing in a single resource to remove rather than many
1187 if isinstance(names, basestring):
1190 # If we own any subset of this lock it must be a superset of what we want
1191 # to delete. The ownership must also be exclusive, but that will be checked
1192 # by the lock itself.
1193 assert not self._is_owned() or self._list_owned().issuperset(names), (
1194 "remove() on acquired lockset %s while not owning all elements" %
1200 # Calling delete() acquires the lock exclusively if we don't already own
1201 # it, and causes all pending and subsequent lock acquires to fail. It's
1202 # fine to call it out of order because delete() also implies release(),
1203 # and the assertion above guarantees that if we either already hold
1204 # everything we want to delete, or we hold none.
1206 self.__lockdict[lname].delete()
1207 removed.append(lname)
1208 except (KeyError, errors.LockError):
1209 # This cannot happen if we were already holding it, verify:
1210 assert not self._is_owned(), ("remove failed while holding lockset %s"
1213 # If no LockError was raised we are the ones who deleted the lock.
1214 # This means we can safely remove it from lockdict, as any further or
1215 # pending delete() or acquire() will fail (and nobody can have the lock
1216 # since before our call to delete()).
1218 # This is done in an else clause because if the exception was thrown
1219 # it's the job of the one who actually deleted it.
1220 del self.__lockdict[lname]
1221 # And let's remove it from our private list if we owned it.
1222 if self._is_owned():
1223 self._del_owned(name=lname)
1228 # Locking levels, must be acquired in increasing order.
1229 # Current rules are:
1230 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
1231 # acquired before performing any operation, either in shared or in exclusive
1232 # mode. acquiring the BGL in exclusive mode is discouraged and should be
1234 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
1235 # If you need more than one node, or more than one instance, acquire them at
1241 LEVELS = [LEVEL_CLUSTER,
1245 # Lock levels which are modifiable
1246 LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
1249 LEVEL_CLUSTER: "cluster",
1250 LEVEL_INSTANCE: "instance",
1254 # Constant for the big ganeti lock
1258 class GanetiLockManager:
1259 """The Ganeti Locking Library
1261 The purpose of this small library is to manage locking for ganeti clusters
1262 in a central place, while at the same time doing dynamic checks against
1263 possible deadlocks. It will also make it easier to transition to a different
1264 lock type should we migrate away from python threads.
1269 def __init__(self, nodes=None, instances=None):
1270 """Constructs a new GanetiLockManager object.
1272 There should be only a GanetiLockManager object at any time, so this
1273 function raises an error if this is not the case.
1275 @param nodes: list of node names
1276 @param instances: list of instance names
1279 assert self.__class__._instance is None, \
1280 "double GanetiLockManager instance"
1282 self.__class__._instance = self
1284 self._monitor = LockMonitor()
1286 # The keyring contains all the locks, at their level and in the correct
1289 LEVEL_CLUSTER: LockSet([BGL], "BGL", monitor=self._monitor),
1290 LEVEL_NODE: LockSet(nodes, "nodes", monitor=self._monitor),
1291 LEVEL_INSTANCE: LockSet(instances, "instances",
1292 monitor=self._monitor),
1295 def QueryLocks(self, fields, sync):
1296 """Queries information from all locks.
1298 See L{LockMonitor.QueryLocks}.
1301 return self._monitor.QueryLocks(fields, sync)
1303 def _names(self, level):
1304 """List the lock names at the given level.
1306 This can be used for debugging/testing purposes.
1308 @param level: the level whose list of locks to get
1311 assert level in LEVELS, "Invalid locking level %s" % level
1312 return self.__keyring[level]._names()
1314 def _is_owned(self, level):
1315 """Check whether we are owning locks at the given level
1318 return self.__keyring[level]._is_owned()
1320 is_owned = _is_owned
1322 def _list_owned(self, level):
1323 """Get the set of owned locks at the given level
1326 return self.__keyring[level]._list_owned()
1328 def _upper_owned(self, level):
1329 """Check that we don't own any lock at a level greater than the given one.
1332 # This way of checking only works if LEVELS[i] = i, which we check for in
1334 return compat.any((self._is_owned(l) for l in LEVELS[level + 1:]))
1336 def _BGL_owned(self): # pylint: disable-msg=C0103
1337 """Check if the current thread owns the BGL.
1339 Both an exclusive or a shared acquisition work.
1342 return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
1345 def _contains_BGL(level, names): # pylint: disable-msg=C0103
1346 """Check if the level contains the BGL.
1348 Check if acting on the given level and set of names will change
1349 the status of the Big Ganeti Lock.
1352 return level == LEVEL_CLUSTER and (names is None or BGL in names)
1354 def acquire(self, level, names, timeout=None, shared=0):
1355 """Acquire a set of resource locks, at the same level.
1357 @type level: member of locking.LEVELS
1358 @param level: the level at which the locks shall be acquired
1359 @type names: list of strings (or string)
1360 @param names: the names of the locks which shall be acquired
1361 (special lock names, or instance/node names)
1362 @type shared: integer (0/1) used as a boolean
1363 @param shared: whether to acquire in shared mode; by default
1364 an exclusive lock will be acquired
1365 @type timeout: float
1366 @param timeout: Maximum time to acquire all locks
1369 assert level in LEVELS, "Invalid locking level %s" % level
1371 # Check that we are either acquiring the Big Ganeti Lock or we already own
1372 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
1373 # so even if we've migrated we need to at least share the BGL to be
1374 # compatible with them. Of course if we own the BGL exclusively there's no
1375 # point in acquiring any other lock, unless perhaps we are half way through
1376 # the migration of the current opcode.
1377 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
1378 "You must own the Big Ganeti Lock before acquiring any other")
1380 # Check we don't own locks at the same or upper levels.
1381 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1382 " while owning some at a greater one")
1384 # Acquire the locks in the set.
1385 return self.__keyring[level].acquire(names, shared=shared, timeout=timeout)
1387 def release(self, level, names=None):
1388 """Release a set of resource locks, at the same level.
1390 You must have acquired the locks, either in shared or in exclusive
1391 mode, before releasing them.
1393 @type level: member of locking.LEVELS
1394 @param level: the level at which the locks shall be released
1395 @type names: list of strings, or None
1396 @param names: the names of the locks which shall be released
1397 (defaults to all the locks acquired at that level)
1400 assert level in LEVELS, "Invalid locking level %s" % level
1401 assert (not self._contains_BGL(level, names) or
1402 not self._upper_owned(LEVEL_CLUSTER)), (
1403 "Cannot release the Big Ganeti Lock while holding something"
1404 " at upper levels (%r)" %
1405 (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self._list_owned(i))
1406 for i in self.__keyring.keys()]), ))
1408 # Release will complain if we don't own the locks already
1409 return self.__keyring[level].release(names)
1411 def add(self, level, names, acquired=0, shared=0):
1412 """Add locks at the specified level.
1414 @type level: member of locking.LEVELS_MOD
1415 @param level: the level at which the locks shall be added
1416 @type names: list of strings
1417 @param names: names of the locks to acquire
1418 @type acquired: integer (0/1) used as a boolean
1419 @param acquired: whether to acquire the newly added locks
1420 @type shared: integer (0/1) used as a boolean
1421 @param shared: whether the acquisition will be shared
1424 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1425 assert self._BGL_owned(), ("You must own the BGL before performing other"
1427 assert not self._upper_owned(level), ("Cannot add locks at a level"
1428 " while owning some at a greater one")
1429 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1431 def remove(self, level, names):
1432 """Remove locks from the specified level.
1434 You must either already own the locks you are trying to remove
1435 exclusively or not own any lock at an upper level.
1437 @type level: member of locking.LEVELS_MOD
1438 @param level: the level at which the locks shall be removed
1439 @type names: list of strings
1440 @param names: the names of the locks which shall be removed
1441 (special lock names, or instance/node names)
1444 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1445 assert self._BGL_owned(), ("You must own the BGL before performing other"
1447 # Check we either own the level or don't own anything from here
1448 # up. LockSet.remove() will check the case in which we don't own
1449 # all the needed resources, or we have a shared ownership.
1450 assert self._is_owned(level) or not self._upper_owned(level), (
1451 "Cannot remove locks at a level while not owning it or"
1452 " owning some at a greater one")
1453 return self.__keyring[level].remove(names)
1456 class LockMonitor(object):
1457 _LOCK_ATTR = "_lock"
1460 """Initializes this class.
1463 self._lock = SharedLock("LockMonitor")
1465 # Tracked locks. Weak references are used to avoid issues with circular
1466 # references and deletion.
1467 self._locks = weakref.WeakKeyDictionary()
1469 @ssynchronized(_LOCK_ATTR)
1470 def RegisterLock(self, lock):
1471 """Registers a new lock.
1474 logging.debug("Registering lock %s", lock.name)
1475 assert lock not in self._locks, "Duplicate lock registration"
1476 assert not compat.any(lock.name == i.name for i in self._locks.keys()), \
1477 "Found duplicate lock name"
1478 self._locks[lock] = None
1480 @ssynchronized(_LOCK_ATTR)
1481 def _GetLockInfo(self, fields):
1482 """Get information from all locks while the monitor lock is held.
1487 for lock in self._locks.keys():
1488 assert lock.name not in result, "Found duplicate lock name"
1489 result[lock.name] = lock.GetInfo(fields)
1493 def QueryLocks(self, fields, sync):
1494 """Queries information from all locks.
1496 @type fields: list of strings
1497 @param fields: List of fields to return
1499 @param sync: Whether to operate in synchronous mode
1503 raise NotImplementedError("Synchronous queries are not implemented")
1505 # Get all data without sorting
1506 result = self._GetLockInfo(fields)
1509 return [result[name] for name in utils.NiceSort(result.keys())]