4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable-msg=W0613,W0201
26 # Wouldn't it be better to define LockingError in the locking module?
27 # Well, for now that's how the rest of the code does it...
28 from ganeti import errors
29 from ganeti import utils
32 def ssynchronized(lock, shared=0):
33 """Shared Synchronization decorator.
35 Calls the function holding the given lock, either in exclusive or shared
36 mode. It requires the passed lock to be a SharedLock (or support its
41 def sync_function(*args, **kwargs):
42 lock.acquire(shared=shared)
44 return fn(*args, **kwargs)
52 """Implements a shared lock.
54 Multiple threads can acquire the lock in a shared way, calling
55 acquire_shared(). In order to acquire the lock in an exclusive way threads
56 can call acquire_exclusive().
58 The lock prevents starvation but does not guarantee that threads will acquire
59 the shared lock in the order they queued for it, just that they will
64 """Construct a new SharedLock"""
65 # we have two conditions, c_shr and c_exc, sharing the same lock.
66 self.__lock = threading.Lock()
67 self.__turn_shr = threading.Condition(self.__lock)
68 self.__turn_exc = threading.Condition(self.__lock)
70 # current lock holders
79 # is this lock in the deleted state?
80 self.__deleted = False
82 def __is_sharer(self):
83 """Is the current thread sharing the lock at this time?"""
84 return threading.currentThread() in self.__shr
86 def __is_exclusive(self):
87 """Is the current thread holding the lock exclusively at this time?"""
88 return threading.currentThread() == self.__exc
90 def __is_owned(self, shared=-1):
91 """Is the current thread somehow owning the lock at this time?
93 This is a private version of the function, which presumes you're holding
98 return self.__is_sharer() or self.__is_exclusive()
100 return self.__is_sharer()
102 return self.__is_exclusive()
104 def _is_owned(self, shared=-1):
105 """Is the current thread somehow owning the lock at this time?
108 - < 0: check for any type of ownership (default)
109 - 0: check for exclusive ownership
110 - > 0: check for shared ownership
113 self.__lock.acquire()
115 result = self.__is_owned(shared=shared)
117 self.__lock.release()
122 """Wait on the given condition, and raise an exception if the current lock
123 is declared deleted in the meantime.
125 @param c: the condition to wait on
130 raise errors.LockError('deleted lock')
132 def __exclusive_acquire(self):
133 """Acquire the lock exclusively.
135 This is a private function that presumes you are already holding the
136 internal lock. It's defined separately to avoid code duplication between
137 acquire() and delete()
140 self.__nwait_exc += 1
142 # This is to save ourselves from a nasty race condition that could
143 # theoretically make the sharers starve.
144 if self.__nwait_shr > 0 or self.__nwait_exc > 1:
145 self.__wait(self.__turn_exc)
147 while len(self.__shr) > 0 or self.__exc is not None:
148 self.__wait(self.__turn_exc)
150 self.__exc = threading.currentThread()
152 self.__nwait_exc -= 1
154 assert self.__npass_shr == 0, "SharedLock: internal fairness violation"
156 def acquire(self, blocking=1, shared=0):
157 """Acquire a shared lock.
159 @param shared: whether to acquire in shared mode; by default an
160 exclusive lock will be acquired
161 @param blocking: whether to block while trying to acquire or to
162 operate in try-lock mode (this locking mode is not supported yet)
166 # We don't have non-blocking mode for now
167 raise NotImplementedError
169 self.__lock.acquire()
172 raise errors.LockError('deleted lock')
174 # We cannot acquire the lock if we already have it
175 assert not self.__is_owned(), "double acquire() on a non-recursive lock"
176 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
179 self.__nwait_shr += 1
182 # If there is an exclusive holder waiting we have to wait.
183 # We'll only do this once, though, when we start waiting for
184 # the lock. Then we'll just wait while there are no
186 if self.__nwait_exc > 0:
187 # TODO: if !blocking...
189 self.__wait(self.__turn_shr)
191 while self.__exc is not None:
193 # TODO: if !blocking...
194 self.__wait(self.__turn_shr)
196 self.__shr.add(threading.currentThread())
198 # If we were waiting note that we passed
200 self.__npass_shr -= 1
203 self.__nwait_shr -= 1
205 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
207 # TODO: if !blocking...
208 # (or modify __exclusive_acquire for non-blocking mode)
209 self.__exclusive_acquire()
212 self.__lock.release()
217 """Release a Shared Lock.
219 You must have acquired the lock, either in shared or in exclusive mode,
220 before calling this function.
223 self.__lock.acquire()
225 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
226 # Autodetect release type
227 if self.__is_exclusive():
230 # An exclusive holder has just had the lock, time to put it in shared
231 # mode if there are shared holders waiting. Otherwise wake up the next
233 if self.__nwait_shr > 0:
234 # Make sure at least the ones which were blocked pass.
235 self.__npass_shr = self.__nwait_shr
236 self.__turn_shr.notifyAll()
237 elif self.__nwait_exc > 0:
238 self.__turn_exc.notify()
240 elif self.__is_sharer():
241 self.__shr.remove(threading.currentThread())
243 # If there are shared holders waiting (and not just scheduled to pass)
244 # there *must* be an exclusive holder waiting as well; otherwise what
245 # were they waiting for?
246 assert (self.__nwait_exc > 0 or
247 self.__npass_shr == self.__nwait_shr), \
248 "Lock sharers waiting while no exclusive is queueing"
250 # If there are no more shared holders either in or scheduled to pass,
251 # and some exclusive holders are waiting let's wake one up.
252 if (len(self.__shr) == 0 and
253 self.__nwait_exc > 0 and
254 not self.__npass_shr > 0):
255 self.__turn_exc.notify()
258 assert False, "Cannot release non-owned lock"
261 self.__lock.release()
263 def delete(self, blocking=1):
264 """Delete a Shared Lock.
266 This operation will declare the lock for removal. First the lock will be
267 acquired in exclusive mode if you don't already own it, then the lock
268 will be put in a state where any future and pending acquire() fail.
270 @param blocking: whether to block while trying to acquire or to
271 operate in try-lock mode. this locking mode is not supported
272 yet unless you are already holding exclusively the lock.
275 self.__lock.acquire()
277 assert not self.__is_sharer(), "cannot delete() a lock while sharing it"
280 raise errors.LockError('deleted lock')
282 if not self.__is_exclusive():
284 # We don't have non-blocking mode for now
285 raise NotImplementedError
286 self.__exclusive_acquire()
288 self.__deleted = True
290 # Wake up everybody, they will fail acquiring the lock and
291 # raise an exception instead.
292 self.__turn_exc.notifyAll()
293 self.__turn_shr.notifyAll()
296 self.__lock.release()
299 # Whenever we want to acquire a full LockSet we pass None as the value
300 # to acquire. Hide this behind this nicely named constant.
305 """Implements a set of locks.
307 This abstraction implements a set of shared locks for the same resource type,
308 distinguished by name. The user can lock a subset of the resources and the
309 LockSet will take care of acquiring the locks always in the same order, thus
312 All the locks needed in the same set must be acquired together, though.
315 def __init__(self, members=None):
316 """Constructs a new LockSet.
318 @param members: initial members of the set
321 # Used internally to guarantee coherency.
322 self.__lock = SharedLock()
324 # The lockdict indexes the relationship name -> lock
325 # The order-of-locking is implied by the alphabetical order of names
328 if members is not None:
330 self.__lockdict[name] = SharedLock()
332 # The owner dict contains the set of locks each thread owns. For
333 # performance each thread can access its own key without a global lock on
334 # this structure. It is paramount though that *no* other type of access is
335 # done to this structure (eg. no looping over its keys). *_owner helper
336 # function are defined to guarantee access is correct, but in general never
337 # do anything different than __owners[threading.currentThread()], or there
342 """Is the current thread a current level owner?"""
343 return threading.currentThread() in self.__owners
345 def _add_owned(self, name=None):
346 """Note the current thread owns the given lock"""
348 if not self._is_owned():
349 self.__owners[threading.currentThread()] = set()
352 self.__owners[threading.currentThread()].add(name)
354 self.__owners[threading.currentThread()] = set([name])
357 def _del_owned(self, name=None):
358 """Note the current thread owns the given lock"""
361 self.__owners[threading.currentThread()].remove(name)
363 # Only remove the key if we don't hold the set-lock as well
364 if (not self.__lock._is_owned() and
365 not self.__owners[threading.currentThread()]):
366 del self.__owners[threading.currentThread()]
368 def _list_owned(self):
369 """Get the set of resource names owned by the current thread"""
371 return self.__owners[threading.currentThread()].copy()
376 """Return the current set of names.
378 Only call this function while holding __lock and don't iterate on the
379 result after releasing the lock.
382 return self.__lockdict.keys()
385 """Return a copy of the current set of elements.
387 Used only for debugging purposes.
390 # If we don't already own the set-level lock acquired
391 # we'll get it and note we need to release it later.
393 if not self.__lock._is_owned():
395 self.__lock.acquire(shared=1)
397 result = self.__names()
400 self.__lock.release()
403 def acquire(self, names, blocking=1, shared=0):
404 """Acquire a set of resource locks.
406 @param names: the names of the locks which shall be acquired
407 (special lock names, or instance/node names)
408 @param shared: whether to acquire in shared mode; by default an
409 exclusive lock will be acquired
410 @param blocking: whether to block while trying to acquire or to
411 operate in try-lock mode (this locking mode is not supported yet)
413 @return: True when all the locks are successfully acquired
415 @raise errors.LockError: when any lock we try to acquire has
416 been deleted before we succeed. In this case none of the
417 locks requested will be acquired.
421 # We don't have non-blocking mode for now
422 raise NotImplementedError
424 # Check we don't already own locks at this level
425 assert not self._is_owned(), "Cannot acquire locks in the same set twice"
428 # If no names are given acquire the whole set by not letting new names
429 # being added before we release, and getting the current list of names.
430 # Some of them may then be deleted later, but we'll cope with this.
432 # We'd like to acquire this lock in a shared way, as it's nice if
433 # everybody else can use the instances at the same time. If are acquiring
434 # them exclusively though they won't be able to do this anyway, though,
435 # so we'll get the list lock exclusively as well in order to be able to
436 # do add() on the set while owning it.
437 self.__lock.acquire(shared=shared)
439 # note we own the set-lock
441 names = self.__names()
443 # We shouldn't have problems adding the lock to the owners list, but
444 # if we did we'll try to release this lock and re-raise exception.
445 # Of course something is going to be really wrong, after this.
446 self.__lock.release()
450 # Support passing in a single resource to acquire rather than many
451 if isinstance(names, basestring):
454 names = sorted(names)
457 # First we look the locks up on __lockdict. We have no way of being sure
458 # they will still be there after, but this makes it a lot faster should
459 # just one of them be the already wrong
460 for lname in utils.UniqueSequence(names):
462 lock = self.__lockdict[lname] # raises KeyError if lock is not there
463 acquire_list.append((lname, lock))
465 if self.__lock._is_owned():
466 # We are acquiring all the set, it doesn't matter if this
467 # particular element is not there anymore.
470 raise errors.LockError('non-existing lock in set (%s)' % lname)
472 # This will hold the locknames we effectively acquired.
474 # Now acquire_list contains a sorted list of resources and locks we want.
475 # In order to get them we loop on this (private) list and acquire() them.
476 # We gave no real guarantee they will still exist till this is done but
477 # .acquire() itself is safe and will alert us if the lock gets deleted.
478 for (lname, lock) in acquire_list:
480 lock.acquire(shared=shared) # raises LockError if the lock is deleted
481 # now the lock cannot be deleted, we have it!
482 self._add_owned(name=lname)
484 except (errors.LockError):
485 if self.__lock._is_owned():
486 # We are acquiring all the set, it doesn't matter if this
487 # particular element is not there anymore.
491 for lname in self._list_owned():
492 self.__lockdict[lname].release()
493 self._del_owned(name=lname)
494 raise errors.LockError('non-existing lock in set (%s)' % name_fail)
496 # We shouldn't have problems adding the lock to the owners list, but
497 # if we did we'll try to release this lock and re-raise exception.
498 # Of course something is going to be really wrong, after this.
504 # If something went wrong and we had the set-lock let's release it...
505 if self.__lock._is_owned():
506 self.__lock.release()
511 def release(self, names=None):
512 """Release a set of resource locks, at the same level.
514 You must have acquired the locks, either in shared or in exclusive mode,
515 before releasing them.
517 @param names: the names of the locks which shall be released
518 (defaults to all the locks acquired at that level).
521 assert self._is_owned(), "release() on lock set while not owner"
523 # Support passing in a single resource to release rather than many
524 if isinstance(names, basestring):
528 names = self._list_owned()
531 assert self._list_owned().issuperset(names), (
532 "release() on unheld resources %s" %
533 names.difference(self._list_owned()))
535 # First of all let's release the "all elements" lock, if set.
536 # After this 'add' can work again
537 if self.__lock._is_owned():
538 self.__lock.release()
541 for lockname in names:
542 # If we are sure the lock doesn't leave __lockdict without being
543 # exclusively held we can do this...
544 self.__lockdict[lockname].release()
545 self._del_owned(name=lockname)
547 def add(self, names, acquired=0, shared=0):
548 """Add a new set of elements to the set
550 @param names: names of the new elements to add
551 @param acquired: pre-acquire the new resource?
552 @param shared: is the pre-acquisition shared?
555 # Check we don't already own locks at this level
556 assert not self._is_owned() or self.__lock._is_owned(shared=0), \
557 "Cannot add locks if the set is only partially owned, or shared"
559 # Support passing in a single resource to add rather than many
560 if isinstance(names, basestring):
563 # If we don't already own the set-level lock acquired in an exclusive way
564 # we'll get it and note we need to release it later.
566 if not self.__lock._is_owned():
568 self.__lock.acquire()
571 invalid_names = set(self.__names()).intersection(names)
573 # This must be an explicit raise, not an assert, because assert is
574 # turned off when using optimization, and this can happen because of
575 # concurrency even if the user doesn't want it.
576 raise errors.LockError("duplicate add() (%s)" % invalid_names)
578 for lockname in names:
582 lock.acquire(shared=shared)
583 # now the lock cannot be deleted, we have it!
585 self._add_owned(name=lockname)
587 # We shouldn't have problems adding the lock to the owners list,
588 # but if we did we'll try to release this lock and re-raise
589 # exception. Of course something is going to be really wrong,
590 # after this. On the other hand the lock hasn't been added to the
591 # __lockdict yet so no other threads should be pending on it. This
592 # release is just a safety measure.
596 self.__lockdict[lockname] = lock
599 # Only release __lock if we were not holding it previously.
601 self.__lock.release()
605 def remove(self, names, blocking=1):
606 """Remove elements from the lock set.
608 You can either not hold anything in the lockset or already hold a superset
609 of the elements you want to delete, exclusively.
611 @param names: names of the resource to remove.
612 @param blocking: whether to block while trying to acquire or to
613 operate in try-lock mode (this locking mode is not supported
614 yet unless you are already holding exclusively the locks)
616 @return:: a list of locks which we removed; the list is always
617 equal to the names list if we were holding all the locks
621 if not blocking and not self._is_owned():
622 # We don't have non-blocking mode for now
623 raise NotImplementedError
625 # Support passing in a single resource to remove rather than many
626 if isinstance(names, basestring):
629 # If we own any subset of this lock it must be a superset of what we want
630 # to delete. The ownership must also be exclusive, but that will be checked
631 # by the lock itself.
632 assert not self._is_owned() or self._list_owned().issuperset(names), (
633 "remove() on acquired lockset while not owning all elements")
638 # Calling delete() acquires the lock exclusively if we don't already own
639 # it, and causes all pending and subsequent lock acquires to fail. It's
640 # fine to call it out of order because delete() also implies release(),
641 # and the assertion above guarantees that if we either already hold
642 # everything we want to delete, or we hold none.
644 self.__lockdict[lname].delete()
645 removed.append(lname)
646 except (KeyError, errors.LockError):
647 # This cannot happen if we were already holding it, verify:
648 assert not self._is_owned(), "remove failed while holding lockset"
650 # If no LockError was raised we are the ones who deleted the lock.
651 # This means we can safely remove it from lockdict, as any further or
652 # pending delete() or acquire() will fail (and nobody can have the lock
653 # since before our call to delete()).
655 # This is done in an else clause because if the exception was thrown
656 # it's the job of the one who actually deleted it.
657 del self.__lockdict[lname]
658 # And let's remove it from our private list if we owned it.
660 self._del_owned(name=lname)
665 # Locking levels, must be acquired in increasing order.
667 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
668 # acquired before performing any operation, either in shared or in exclusive
669 # mode. acquiring the BGL in exclusive mode is discouraged and should be
671 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
672 # If you need more than one node, or more than one instance, acquire them at
678 LEVELS = [LEVEL_CLUSTER,
682 # Lock levels which are modifiable
683 LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
685 # Constant for the big ganeti lock
689 class GanetiLockManager:
690 """The Ganeti Locking Library
692 The purpose of this small library is to manage locking for ganeti clusters
693 in a central place, while at the same time doing dynamic checks against
694 possible deadlocks. It will also make it easier to transition to a different
695 lock type should we migrate away from python threads.
700 def __init__(self, nodes=None, instances=None):
701 """Constructs a new GanetiLockManager object.
703 There should be only a GanetiLockManager object at any time, so this
704 function raises an error if this is not the case.
706 @param nodes: list of node names
707 @param instances: list of instance names
710 assert self.__class__._instance is None, \
711 "double GanetiLockManager instance"
713 self.__class__._instance = self
715 # The keyring contains all the locks, at their level and in the correct
718 LEVEL_CLUSTER: LockSet([BGL]),
719 LEVEL_NODE: LockSet(nodes),
720 LEVEL_INSTANCE: LockSet(instances),
723 def _names(self, level):
724 """List the lock names at the given level.
726 This can be used for debugging/testing purposes.
728 @param level: the level whose list of locks to get
731 assert level in LEVELS, "Invalid locking level %s" % level
732 return self.__keyring[level]._names()
734 def _is_owned(self, level):
735 """Check whether we are owning locks at the given level
738 return self.__keyring[level]._is_owned()
742 def _list_owned(self, level):
743 """Get the set of owned locks at the given level
746 return self.__keyring[level]._list_owned()
748 def _upper_owned(self, level):
749 """Check that we don't own any lock at a level greater than the given one.
752 # This way of checking only works if LEVELS[i] = i, which we check for in
754 return utils.any((self._is_owned(l) for l in LEVELS[level + 1:]))
756 def _BGL_owned(self):
757 """Check if the current thread owns the BGL.
759 Both an exclusive or a shared acquisition work.
762 return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
764 def _contains_BGL(self, level, names):
765 """Check if the level contains the BGL.
767 Check if acting on the given level and set of names will change
768 the status of the Big Ganeti Lock.
771 return level == LEVEL_CLUSTER and (names is None or BGL in names)
773 def acquire(self, level, names, blocking=1, shared=0):
774 """Acquire a set of resource locks, at the same level.
776 @param level: the level at which the locks shall be acquired;
777 it must be a member of LEVELS.
778 @param names: the names of the locks which shall be acquired
779 (special lock names, or instance/node names)
780 @param shared: whether to acquire in shared mode; by default
781 an exclusive lock will be acquired
782 @param blocking: whether to block while trying to acquire or to
783 operate in try-lock mode (this locking mode is not supported yet)
786 assert level in LEVELS, "Invalid locking level %s" % level
788 # Check that we are either acquiring the Big Ganeti Lock or we already own
789 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
790 # so even if we've migrated we need to at least share the BGL to be
791 # compatible with them. Of course if we own the BGL exclusively there's no
792 # point in acquiring any other lock, unless perhaps we are half way through
793 # the migration of the current opcode.
794 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
795 "You must own the Big Ganeti Lock before acquiring any other")
797 # Check we don't own locks at the same or upper levels.
798 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
799 " while owning some at a greater one")
801 # Acquire the locks in the set.
802 return self.__keyring[level].acquire(names, shared=shared,
805 def release(self, level, names=None):
806 """Release a set of resource locks, at the same level.
808 You must have acquired the locks, either in shared or in exclusive
809 mode, before releasing them.
811 @param level: the level at which the locks shall be released;
812 it must be a member of LEVELS
813 @param names: the names of the locks which shall be released
814 (defaults to all the locks acquired at that level)
817 assert level in LEVELS, "Invalid locking level %s" % level
818 assert (not self._contains_BGL(level, names) or
819 not self._upper_owned(LEVEL_CLUSTER)), (
820 "Cannot release the Big Ganeti Lock while holding something"
823 # Release will complain if we don't own the locks already
824 return self.__keyring[level].release(names)
826 def add(self, level, names, acquired=0, shared=0):
827 """Add locks at the specified level.
829 @param level: the level at which the locks shall be added;
830 it must be a member of LEVELS_MOD.
831 @param names: names of the locks to acquire
832 @param acquired: whether to acquire the newly added locks
833 @param shared: whether the acquisition will be shared
836 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
837 assert self._BGL_owned(), ("You must own the BGL before performing other"
839 assert not self._upper_owned(level), ("Cannot add locks at a level"
840 " while owning some at a greater one")
841 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
843 def remove(self, level, names, blocking=1):
844 """Remove locks from the specified level.
846 You must either already own the locks you are trying to remove
847 exclusively or not own any lock at an upper level.
849 @param level: the level at which the locks shall be removed;
850 it must be a member of LEVELS_MOD
851 @param names: the names of the locks which shall be removed
852 (special lock names, or instance/node names)
853 @param blocking: whether to block while trying to operate in
854 try-lock mode (this locking mode is not supported yet)
857 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
858 assert self._BGL_owned(), ("You must own the BGL before performing other"
860 # Check we either own the level or don't own anything from here
861 # up. LockSet.remove() will check the case in which we don't own
862 # all the needed resources, or we have a shared ownership.
863 assert self._is_owned(level) or not self._upper_owned(level), (
864 "Cannot remove locks at a level while not owning it or"
865 " owning some at a greater one")
866 return self.__keyring[level].remove(names, blocking=blocking)