4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable-msg=W0613,W0201
26 # Wouldn't it be better to define LockingError in the locking module?
27 # Well, for now that's how the rest of the code does it...
28 from ganeti import errors
29 from ganeti import utils
32 def ssynchronized(lock, shared=0):
33 """Shared Synchronization decorator.
35 Calls the function holding the given lock, either in exclusive or shared
36 mode. It requires the passed lock to be a SharedLock (or support its
41 def sync_function(*args, **kwargs):
42 lock.acquire(shared=shared)
44 return fn(*args, **kwargs)
52 """Implements a shared lock.
54 Multiple threads can acquire the lock in a shared way, calling
55 acquire_shared(). In order to acquire the lock in an exclusive way threads
56 can call acquire_exclusive().
58 The lock prevents starvation but does not guarantee that threads will acquire
59 the shared lock in the order they queued for it, just that they will
64 """Construct a new SharedLock"""
65 # we have two conditions, c_shr and c_exc, sharing the same lock.
66 self.__lock = threading.Lock()
67 self.__turn_shr = threading.Condition(self.__lock)
68 self.__turn_exc = threading.Condition(self.__lock)
70 # current lock holders
79 # is this lock in the deleted state?
80 self.__deleted = False
82 def __is_sharer(self):
83 """Is the current thread sharing the lock at this time?"""
84 return threading.currentThread() in self.__shr
86 def __is_exclusive(self):
87 """Is the current thread holding the lock exclusively at this time?"""
88 return threading.currentThread() == self.__exc
90 def __is_owned(self, shared=-1):
91 """Is the current thread somehow owning the lock at this time?
93 This is a private version of the function, which presumes you're holding
98 return self.__is_sharer() or self.__is_exclusive()
100 return self.__is_sharer()
102 return self.__is_exclusive()
104 def _is_owned(self, shared=-1):
105 """Is the current thread somehow owning the lock at this time?
109 < 0: check for any type of ownership (default)
110 0: check for exclusive ownership
111 > 0: check for shared ownership
114 self.__lock.acquire()
116 result = self.__is_owned(shared=shared)
118 self.__lock.release()
123 """Wait on the given condition, and raise an exception if the current lock
124 is declared deleted in the meantime.
127 c: condition to wait on
132 raise errors.LockError('deleted lock')
134 def __exclusive_acquire(self):
135 """Acquire the lock exclusively.
137 This is a private function that presumes you are already holding the
138 internal lock. It's defined separately to avoid code duplication between
139 acquire() and delete()
142 self.__nwait_exc += 1
144 # This is to save ourselves from a nasty race condition that could
145 # theoretically make the sharers starve.
146 if self.__nwait_shr > 0 or self.__nwait_exc > 1:
147 self.__wait(self.__turn_exc)
149 while len(self.__shr) > 0 or self.__exc is not None:
150 self.__wait(self.__turn_exc)
152 self.__exc = threading.currentThread()
154 self.__nwait_exc -= 1
156 assert self.__npass_shr == 0, "SharedLock: internal fairness violation"
158 def acquire(self, blocking=1, shared=0):
159 """Acquire a shared lock.
162 shared: whether to acquire in shared mode. By default an exclusive lock
164 blocking: whether to block while trying to acquire or to operate in
165 try-lock mode. this locking mode is not supported yet.
169 # We don't have non-blocking mode for now
170 raise NotImplementedError
172 self.__lock.acquire()
175 raise errors.LockError('deleted lock')
177 # We cannot acquire the lock if we already have it
178 assert not self.__is_owned(), "double acquire() on a non-recursive lock"
179 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
182 self.__nwait_shr += 1
185 # If there is an exclusive holder waiting we have to wait. We'll
186 # only do this once, though, when we start waiting for the lock. Then
187 # we'll just wait while there are no exclusive holders.
188 if self.__nwait_exc > 0:
189 # TODO: if !blocking...
191 self.__wait(self.__turn_shr)
193 while self.__exc is not None:
195 # TODO: if !blocking...
196 self.__wait(self.__turn_shr)
198 self.__shr.add(threading.currentThread())
200 # If we were waiting note that we passed
202 self.__npass_shr -= 1
205 self.__nwait_shr -= 1
207 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
209 # TODO: if !blocking...
210 # (or modify __exclusive_acquire for non-blocking mode)
211 self.__exclusive_acquire()
214 self.__lock.release()
219 """Release a Shared Lock.
221 You must have acquired the lock, either in shared or in exclusive mode,
222 before calling this function.
225 self.__lock.acquire()
227 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
228 # Autodetect release type
229 if self.__is_exclusive():
232 # An exclusive holder has just had the lock, time to put it in shared
233 # mode if there are shared holders waiting. Otherwise wake up the next
235 if self.__nwait_shr > 0:
236 # Make sure at least the ones which were blocked pass.
237 self.__npass_shr = self.__nwait_shr
238 self.__turn_shr.notifyAll()
239 elif self.__nwait_exc > 0:
240 self.__turn_exc.notify()
242 elif self.__is_sharer():
243 self.__shr.remove(threading.currentThread())
245 # If there are shared holders waiting (and not just scheduled to pass)
246 # there *must* be an exclusive holder waiting as well; otherwise what
247 # were they waiting for?
248 assert (self.__nwait_exc > 0 or self.__npass_shr == self.__nwait_shr), \
249 "Lock sharers waiting while no exclusive is queueing"
251 # If there are no more shared holders either in or scheduled to pass,
252 # and some exclusive holders are waiting let's wake one up.
253 if (len(self.__shr) == 0 and
254 self.__nwait_exc > 0 and
255 not self.__npass_shr > 0):
256 self.__turn_exc.notify()
259 assert False, "Cannot release non-owned lock"
262 self.__lock.release()
264 def delete(self, blocking=1):
265 """Delete a Shared Lock.
267 This operation will declare the lock for removal. First the lock will be
268 acquired in exclusive mode if you don't already own it, then the lock
269 will be put in a state where any future and pending acquire() fail.
272 blocking: whether to block while trying to acquire or to operate in
273 try-lock mode. this locking mode is not supported yet unless
274 you are already holding exclusively the lock.
277 self.__lock.acquire()
279 assert not self.__is_sharer(), "cannot delete() a lock while sharing it"
282 raise errors.LockError('deleted lock')
284 if not self.__is_exclusive():
286 # We don't have non-blocking mode for now
287 raise NotImplementedError
288 self.__exclusive_acquire()
290 self.__deleted = True
292 # Wake up everybody, they will fail acquiring the lock and
293 # raise an exception instead.
294 self.__turn_exc.notifyAll()
295 self.__turn_shr.notifyAll()
298 self.__lock.release()
302 """Implements a set of locks.
304 This abstraction implements a set of shared locks for the same resource type,
305 distinguished by name. The user can lock a subset of the resources and the
306 LockSet will take care of acquiring the locks always in the same order, thus
309 All the locks needed in the same set must be acquired together, though.
312 def __init__(self, members=None):
313 """Constructs a new LockSet.
316 members: initial members of the set
319 # Used internally to guarantee coherency.
320 self.__lock = SharedLock()
322 # The lockdict indexes the relationship name -> lock
323 # The order-of-locking is implied by the alphabetical order of names
326 if members is not None:
328 self.__lockdict[name] = SharedLock()
330 # The owner dict contains the set of locks each thread owns. For
331 # performance each thread can access its own key without a global lock on
332 # this structure. It is paramount though that *no* other type of access is
333 # done to this structure (eg. no looping over its keys). *_owner helper
334 # function are defined to guarantee access is correct, but in general never
335 # do anything different than __owners[threading.currentThread()], or there
340 """Is the current thread a current level owner?"""
341 return threading.currentThread() in self.__owners
343 def _add_owned(self, name=None):
344 """Note the current thread owns the given lock"""
346 if not self._is_owned():
347 self.__owners[threading.currentThread()] = set()
350 self.__owners[threading.currentThread()].add(name)
352 self.__owners[threading.currentThread()] = set([name])
355 def _del_owned(self, name=None):
356 """Note the current thread owns the given lock"""
359 self.__owners[threading.currentThread()].remove(name)
361 # Only remove the key if we don't hold the set-lock as well
362 if (not self.__lock._is_owned() and
363 not self.__owners[threading.currentThread()]):
364 del self.__owners[threading.currentThread()]
366 def _list_owned(self):
367 """Get the set of resource names owned by the current thread"""
369 return self.__owners[threading.currentThread()].copy()
374 """Return the current set of names.
376 Only call this function while holding __lock and don't iterate on the
377 result after releasing the lock.
380 return self.__lockdict.keys()
383 """Return a copy of the current set of elements.
385 Used only for debugging purposes.
388 self.__lock.acquire(shared=1)
390 result = self.__names()
392 self.__lock.release()
395 def acquire(self, names, blocking=1, shared=0):
396 """Acquire a set of resource locks.
399 names: the names of the locks which shall be acquired.
400 (special lock names, or instance/node names)
401 shared: whether to acquire in shared mode. By default an exclusive lock
403 blocking: whether to block while trying to acquire or to operate in
404 try-lock mode. this locking mode is not supported yet.
407 True: when all the locks are successfully acquired
410 errors.LockError: when any lock we try to acquire has been deleted
411 before we succeed. In this case none of the locks requested will be
416 # We don't have non-blocking mode for now
417 raise NotImplementedError
419 # Check we don't already own locks at this level
420 assert not self._is_owned(), "Cannot acquire locks in the same set twice"
423 # If no names are given acquire the whole set by not letting new names
424 # being added before we release, and getting the current list of names.
425 # Some of them may then be deleted later, but we'll cope with this.
427 # We'd like to acquire this lock in a shared way, as it's nice if
428 # everybody else can use the instances at the same time. If are acquiring
429 # them exclusively though they won't be able to do this anyway, though,
430 # so we'll get the list lock exclusively as well in order to be able to
431 # do add() on the set while owning it.
432 self.__lock.acquire(shared=shared)
434 # note we own the set-lock
436 names = self.__names()
438 # We shouldn't have problems adding the lock to the owners list, but
439 # if we did we'll try to release this lock and re-raise exception.
440 # Of course something is going to be really wrong, after this.
441 self.__lock.release()
445 # Support passing in a single resource to acquire rather than many
446 if isinstance(names, basestring):
452 # First we look the locks up on __lockdict. We have no way of being sure
453 # they will still be there after, but this makes it a lot faster should
454 # just one of them be the already wrong
457 lock = self.__lockdict[lname] # raises KeyError if lock is not there
458 acquire_list.append((lname, lock))
460 if self.__lock._is_owned():
461 # We are acquiring all the set, it doesn't matter if this particular
462 # element is not there anymore.
465 raise errors.LockError('non-existing lock in set (%s)' % lname)
467 # This will hold the locknames we effectively acquired.
469 # Now acquire_list contains a sorted list of resources and locks we want.
470 # In order to get them we loop on this (private) list and acquire() them.
471 # We gave no real guarantee they will still exist till this is done but
472 # .acquire() itself is safe and will alert us if the lock gets deleted.
473 for (lname, lock) in acquire_list:
475 lock.acquire(shared=shared) # raises LockError if the lock is deleted
476 # now the lock cannot be deleted, we have it!
477 self._add_owned(name=lname)
479 except (errors.LockError):
480 if self.__lock._is_owned():
481 # We are acquiring all the set, it doesn't matter if this particular
482 # element is not there anymore.
486 for lname in self._list_owned():
487 self.__lockdict[lname].release()
488 self._del_owned(name=lname)
489 raise errors.LockError('non-existing lock in set (%s)' % name_fail)
491 # We shouldn't have problems adding the lock to the owners list, but
492 # if we did we'll try to release this lock and re-raise exception.
493 # Of course something is going to be really wrong, after this.
499 # If something went wrong and we had the set-lock let's release it...
500 if self.__lock._is_owned():
501 self.__lock.release()
506 def release(self, names=None):
507 """Release a set of resource locks, at the same level.
509 You must have acquired the locks, either in shared or in exclusive mode,
510 before releasing them.
513 names: the names of the locks which shall be released.
514 (defaults to all the locks acquired at that level).
517 assert self._is_owned(), "release() on lock set while not owner"
519 # Support passing in a single resource to release rather than many
520 if isinstance(names, basestring):
524 names = self._list_owned()
527 assert self._list_owned().issuperset(names), (
528 "release() on unheld resources %s" %
529 names.difference(self._list_owned()))
531 # First of all let's release the "all elements" lock, if set.
532 # After this 'add' can work again
533 if self.__lock._is_owned():
534 self.__lock.release()
537 for lockname in names:
538 # If we are sure the lock doesn't leave __lockdict without being
539 # exclusively held we can do this...
540 self.__lockdict[lockname].release()
541 self._del_owned(name=lockname)
543 def add(self, names, acquired=0, shared=0):
544 """Add a new set of elements to the set
547 names: names of the new elements to add
548 acquired: pre-acquire the new resource?
549 shared: is the pre-acquisition shared?
553 assert not self.__lock._is_owned(shared=1), (
554 "Cannot add new elements while sharing the set-lock")
556 # Support passing in a single resource to add rather than many
557 if isinstance(names, basestring):
560 # If we don't already own the set-level lock acquire it in an exclusive way
561 # we'll get it and note we need to release it later.
563 if not self.__lock._is_owned():
565 self.__lock.acquire()
568 invalid_names = set(self.__names()).intersection(names)
570 # This must be an explicit raise, not an assert, because assert is
571 # turned off when using optimization, and this can happen because of
572 # concurrency even if the user doesn't want it.
573 raise errors.LockError("duplicate add() (%s)" % invalid_names)
575 for lockname in names:
579 lock.acquire(shared=shared)
580 # now the lock cannot be deleted, we have it!
582 self._add_owned(name=lockname)
584 # We shouldn't have problems adding the lock to the owners list,
585 # but if we did we'll try to release this lock and re-raise
586 # exception. Of course something is going to be really wrong,
587 # after this. On the other hand the lock hasn't been added to the
588 # __lockdict yet so no other threads should be pending on it. This
589 # release is just a safety measure.
593 self.__lockdict[lockname] = lock
596 # Only release __lock if we were not holding it previously.
598 self.__lock.release()
602 def remove(self, names, blocking=1):
603 """Remove elements from the lock set.
605 You can either not hold anything in the lockset or already hold a superset
606 of the elements you want to delete, exclusively.
609 names: names of the resource to remove.
610 blocking: whether to block while trying to acquire or to operate in
611 try-lock mode. this locking mode is not supported yet unless
612 you are already holding exclusively the locks.
615 A list of lock which we removed. The list is always equal to the names
616 list if we were holding all the locks exclusively.
619 if not blocking and not self._is_owned():
620 # We don't have non-blocking mode for now
621 raise NotImplementedError
623 # Support passing in a single resource to remove rather than many
624 if isinstance(names, basestring):
627 # If we own any subset of this lock it must be a superset of what we want
628 # to delete. The ownership must also be exclusive, but that will be checked
629 # by the lock itself.
630 assert not self._is_owned() or self._list_owned().issuperset(names), (
631 "remove() on acquired lockset while not owning all elements")
636 # Calling delete() acquires the lock exclusively if we don't already own
637 # it, and causes all pending and subsequent lock acquires to fail. It's
638 # fine to call it out of order because delete() also implies release(),
639 # and the assertion above guarantees that if we either already hold
640 # everything we want to delete, or we hold none.
642 self.__lockdict[lname].delete()
643 removed.append(lname)
644 except (KeyError, errors.LockError):
645 # This cannot happen if we were already holding it, verify:
646 assert not self._is_owned(), "remove failed while holding lockset"
648 # If no LockError was raised we are the ones who deleted the lock.
649 # This means we can safely remove it from lockdict, as any further or
650 # pending delete() or acquire() will fail (and nobody can have the lock
651 # since before our call to delete()).
653 # This is done in an else clause because if the exception was thrown
654 # it's the job of the one who actually deleted it.
655 del self.__lockdict[lname]
656 # And let's remove it from our private list if we owned it.
658 self._del_owned(name=lname)
663 # Locking levels, must be acquired in increasing order.
665 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
666 # acquired before performing any operation, either in shared or in exclusive
667 # mode. acquiring the BGL in exclusive mode is discouraged and should be
669 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
670 # If you need more than one node, or more than one instance, acquire them at
676 LEVELS = [LEVEL_CLUSTER,
680 # Lock levels which are modifiable
681 LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
683 # Constant for the big ganeti lock
687 class GanetiLockManager:
688 """The Ganeti Locking Library
690 The purpouse of this small library is to manage locking for ganeti clusters
691 in a central place, while at the same time doing dynamic checks against
692 possible deadlocks. It will also make it easier to transition to a different
693 lock type should we migrate away from python threads.
698 def __init__(self, nodes=None, instances=None):
699 """Constructs a new GanetiLockManager object.
701 There should be only a GanetiLockManager object at any time, so this
702 function raises an error if this is not the case.
705 nodes: list of node names
706 instances: list of instance names
709 assert self.__class__._instance is None, "double GanetiLockManager instance"
710 self.__class__._instance = self
712 # The keyring contains all the locks, at their level and in the correct
715 LEVEL_CLUSTER: LockSet([BGL]),
716 LEVEL_NODE: LockSet(nodes),
717 LEVEL_INSTANCE: LockSet(instances),
720 def _names(self, level):
721 """List the lock names at the given level.
722 Used for debugging/testing purposes.
725 level: the level whose list of locks to get
728 assert level in LEVELS, "Invalid locking level %s" % level
729 return self.__keyring[level]._names()
731 def _is_owned(self, level):
732 """Check whether we are owning locks at the given level
735 return self.__keyring[level]._is_owned()
737 def _list_owned(self, level):
738 """Get the set of owned locks at the given level
741 return self.__keyring[level]._list_owned()
743 def _upper_owned(self, level):
744 """Check that we don't own any lock at a level greater than the given one.
747 # This way of checking only works if LEVELS[i] = i, which we check for in
749 return utils.any((self._is_owned(l) for l in LEVELS[level + 1:]))
751 def _BGL_owned(self):
752 """Check if the current thread owns the BGL.
754 Both an exclusive or a shared acquisition work.
757 return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
759 def _contains_BGL(self, level, names):
760 """Check if acting on the given level and set of names will change the
761 status of the Big Ganeti Lock.
764 return level == LEVEL_CLUSTER and (names is None or BGL in names)
766 def acquire(self, level, names, blocking=1, shared=0):
767 """Acquire a set of resource locks, at the same level.
770 level: the level at which the locks shall be acquired.
771 It must be a memmber of LEVELS.
772 names: the names of the locks which shall be acquired.
773 (special lock names, or instance/node names)
774 shared: whether to acquire in shared mode. By default an exclusive lock
776 blocking: whether to block while trying to acquire or to operate in
777 try-lock mode. this locking mode is not supported yet.
780 assert level in LEVELS, "Invalid locking level %s" % level
782 # Check that we are either acquiring the Big Ganeti Lock or we already own
783 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
784 # so even if we've migrated we need to at least share the BGL to be
785 # compatible with them. Of course if we own the BGL exclusively there's no
786 # point in acquiring any other lock, unless perhaps we are half way through
787 # the migration of the current opcode.
788 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
789 "You must own the Big Ganeti Lock before acquiring any other")
791 # Check we don't own locks at the same or upper levels.
792 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
793 " while owning some at a greater one")
795 # Acquire the locks in the set.
796 return self.__keyring[level].acquire(names, shared=shared,
799 def release(self, level, names=None):
800 """Release a set of resource locks, at the same level.
802 You must have acquired the locks, either in shared or in exclusive mode,
803 before releasing them.
806 level: the level at which the locks shall be released.
807 It must be a memmber of LEVELS.
808 names: the names of the locks which shall be released.
809 (defaults to all the locks acquired at that level).
812 assert level in LEVELS, "Invalid locking level %s" % level
813 assert (not self._contains_BGL(level, names) or
814 not self._upper_owned(LEVEL_CLUSTER)), (
815 "Cannot release the Big Ganeti Lock while holding something"
818 # Release will complain if we don't own the locks already
819 return self.__keyring[level].release(names)
821 def add(self, level, names, acquired=0, shared=0):
822 """Add locks at the specified level.
825 level: the level at which the locks shall be added.
826 It must be a memmber of LEVELS_MOD.
827 names: names of the locks to acquire
828 acquired: whether to acquire the newly added locks
829 shared: whether the acquisition will be shared
831 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
832 assert self._BGL_owned(), ("You must own the BGL before performing other"
834 assert not self._upper_owned(level), ("Cannot add locks at a level"
835 " while owning some at a greater one")
836 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
838 def remove(self, level, names, blocking=1):
839 """Remove locks from the specified level.
841 You must either already own the locks you are trying to remove exclusively
842 or not own any lock at an upper level.
845 level: the level at which the locks shall be removed.
846 It must be a memmber of LEVELS_MOD.
847 names: the names of the locks which shall be removed.
848 (special lock names, or instance/node names)
849 blocking: whether to block while trying to operate in try-lock mode.
850 this locking mode is not supported yet.
853 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
854 assert self._BGL_owned(), ("You must own the BGL before performing other"
856 # Check we either own the level or don't own anything from here up.
857 # LockSet.remove() will check the case in which we don't own all the needed
858 # resources, or we have a shared ownership.
859 assert self._is_owned(level) or not self._upper_owned(level), (
860 "Cannot remove locks at a level while not owning it or"
861 " owning some at a greater one")
862 return self.__keyring[level].remove(names, blocking=blocking)