4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable-msg=W0613,W0201
26 # Wouldn't it be better to define LockingError in the locking module?
27 # Well, for now that's how the rest of the code does it...
28 from ganeti import errors
29 from ganeti import utils
33 """Implements a shared lock.
35 Multiple threads can acquire the lock in a shared way, calling
36 acquire_shared(). In order to acquire the lock in an exclusive way threads
37 can call acquire_exclusive().
39 The lock prevents starvation but does not guarantee that threads will acquire
40 the shared lock in the order they queued for it, just that they will
45 """Construct a new SharedLock"""
46 # we have two conditions, c_shr and c_exc, sharing the same lock.
47 self.__lock = threading.Lock()
48 self.__turn_shr = threading.Condition(self.__lock)
49 self.__turn_exc = threading.Condition(self.__lock)
51 # current lock holders
60 # is this lock in the deleted state?
61 self.__deleted = False
63 def __is_sharer(self):
64 """Is the current thread sharing the lock at this time?"""
65 return threading.currentThread() in self.__shr
67 def __is_exclusive(self):
68 """Is the current thread holding the lock exclusively at this time?"""
69 return threading.currentThread() == self.__exc
71 def __is_owned(self, shared=-1):
72 """Is the current thread somehow owning the lock at this time?
74 This is a private version of the function, which presumes you're holding
79 return self.__is_sharer() or self.__is_exclusive()
81 return self.__is_sharer()
83 return self.__is_exclusive()
85 def _is_owned(self, shared=-1):
86 """Is the current thread somehow owning the lock at this time?
90 < 0: check for any type of ownership (default)
91 0: check for exclusive ownership
92 > 0: check for shared ownership
97 result = self.__is_owned(shared=shared)
104 """Wait on the given condition, and raise an exception if the current lock
105 is declared deleted in the meantime.
108 c: condition to wait on
113 raise errors.LockError('deleted lock')
115 def __exclusive_acquire(self):
116 """Acquire the lock exclusively.
118 This is a private function that presumes you are already holding the
119 internal lock. It's defined separately to avoid code duplication between
120 acquire() and delete()
123 self.__nwait_exc += 1
125 # This is to save ourselves from a nasty race condition that could
126 # theoretically make the sharers starve.
127 if self.__nwait_shr > 0 or self.__nwait_exc > 1:
128 self.__wait(self.__turn_exc)
130 while len(self.__shr) > 0 or self.__exc is not None:
131 self.__wait(self.__turn_exc)
133 self.__exc = threading.currentThread()
135 self.__nwait_exc -= 1
137 assert self.__npass_shr == 0, "SharedLock: internal fairness violation"
139 def acquire(self, blocking=1, shared=0):
140 """Acquire a shared lock.
143 shared: whether to acquire in shared mode. By default an exclusive lock
145 blocking: whether to block while trying to acquire or to operate in
146 try-lock mode. this locking mode is not supported yet.
150 # We don't have non-blocking mode for now
151 raise NotImplementedError
153 self.__lock.acquire()
156 raise errors.LockError('deleted lock')
158 # We cannot acquire the lock if we already have it
159 assert not self.__is_owned(), "double acquire() on a non-recursive lock"
160 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
163 self.__nwait_shr += 1
166 # If there is an exclusive holder waiting we have to wait. We'll
167 # only do this once, though, when we start waiting for the lock. Then
168 # we'll just wait while there are no exclusive holders.
169 if self.__nwait_exc > 0:
170 # TODO: if !blocking...
172 self.__wait(self.__turn_shr)
174 while self.__exc is not None:
176 # TODO: if !blocking...
177 self.__wait(self.__turn_shr)
179 self.__shr.add(threading.currentThread())
181 # If we were waiting note that we passed
183 self.__npass_shr -= 1
186 self.__nwait_shr -= 1
188 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
190 # TODO: if !blocking...
191 # (or modify __exclusive_acquire for non-blocking mode)
192 self.__exclusive_acquire()
195 self.__lock.release()
200 """Release a Shared Lock.
202 You must have acquired the lock, either in shared or in exclusive mode,
203 before calling this function.
206 self.__lock.acquire()
208 assert self.__npass_shr >= 0, "Internal fairness condition weirdness"
209 # Autodetect release type
210 if self.__is_exclusive():
213 # An exclusive holder has just had the lock, time to put it in shared
214 # mode if there are shared holders waiting. Otherwise wake up the next
216 if self.__nwait_shr > 0:
217 # Make sure at least the ones which were blocked pass.
218 self.__npass_shr = self.__nwait_shr
219 self.__turn_shr.notifyAll()
220 elif self.__nwait_exc > 0:
221 self.__turn_exc.notify()
223 elif self.__is_sharer():
224 self.__shr.remove(threading.currentThread())
226 # If there are shared holders waiting (and not just scheduled to pass)
227 # there *must* be an exclusive holder waiting as well; otherwise what
228 # were they waiting for?
229 assert (self.__nwait_exc > 0 or self.__npass_shr == self.__nwait_shr), \
230 "Lock sharers waiting while no exclusive is queueing"
232 # If there are no more shared holders either in or scheduled to pass,
233 # and some exclusive holders are waiting let's wake one up.
234 if (len(self.__shr) == 0 and
235 self.__nwait_exc > 0 and
236 not self.__npass_shr > 0):
237 self.__turn_exc.notify()
240 assert False, "Cannot release non-owned lock"
243 self.__lock.release()
245 def delete(self, blocking=1):
246 """Delete a Shared Lock.
248 This operation will declare the lock for removal. First the lock will be
249 acquired in exclusive mode if you don't already own it, then the lock
250 will be put in a state where any future and pending acquire() fail.
253 blocking: whether to block while trying to acquire or to operate in
254 try-lock mode. this locking mode is not supported yet unless
255 you are already holding exclusively the lock.
258 self.__lock.acquire()
260 assert not self.__is_sharer(), "cannot delete() a lock while sharing it"
263 raise errors.LockError('deleted lock')
265 if not self.__is_exclusive():
267 # We don't have non-blocking mode for now
268 raise NotImplementedError
269 self.__exclusive_acquire()
271 self.__deleted = True
273 # Wake up everybody, they will fail acquiring the lock and
274 # raise an exception instead.
275 self.__turn_exc.notifyAll()
276 self.__turn_shr.notifyAll()
279 self.__lock.release()
283 """Implements a set of locks.
285 This abstraction implements a set of shared locks for the same resource type,
286 distinguished by name. The user can lock a subset of the resources and the
287 LockSet will take care of acquiring the locks always in the same order, thus
290 All the locks needed in the same set must be acquired together, though.
293 def __init__(self, members=None):
294 """Constructs a new LockSet.
297 members: initial members of the set
300 # Used internally to guarantee coherency.
301 self.__lock = SharedLock()
303 # The lockdict indexes the relationship name -> lock
304 # The order-of-locking is implied by the alphabetical order of names
307 if members is not None:
309 self.__lockdict[name] = SharedLock()
311 # The owner dict contains the set of locks each thread owns. For
312 # performance each thread can access its own key without a global lock on
313 # this structure. It is paramount though that *no* other type of access is
314 # done to this structure (eg. no looping over its keys). *_owner helper
315 # function are defined to guarantee access is correct, but in general never
316 # do anything different than __owners[threading.currentThread()], or there
321 """Is the current thread a current level owner?"""
322 return threading.currentThread() in self.__owners
324 def _add_owned(self, name=None):
325 """Note the current thread owns the given lock"""
327 if not self._is_owned():
328 self.__owners[threading.currentThread()] = set()
331 self.__owners[threading.currentThread()].add(name)
333 self.__owners[threading.currentThread()] = set([name])
336 def _del_owned(self, name=None):
337 """Note the current thread owns the given lock"""
340 self.__owners[threading.currentThread()].remove(name)
342 # Only remove the key if we don't hold the set-lock as well
343 if (not self.__lock._is_owned() and
344 not self.__owners[threading.currentThread()]):
345 del self.__owners[threading.currentThread()]
347 def _list_owned(self):
348 """Get the set of resource names owned by the current thread"""
350 return self.__owners[threading.currentThread()].copy()
355 """Return the current set of names.
357 Only call this function while holding __lock and don't iterate on the
358 result after releasing the lock.
361 return self.__lockdict.keys()
364 """Return a copy of the current set of elements.
366 Used only for debugging purposes.
369 self.__lock.acquire(shared=1)
371 result = self.__names()
373 self.__lock.release()
376 def acquire(self, names, blocking=1, shared=0):
377 """Acquire a set of resource locks.
380 names: the names of the locks which shall be acquired.
381 (special lock names, or instance/node names)
382 shared: whether to acquire in shared mode. By default an exclusive lock
384 blocking: whether to block while trying to acquire or to operate in
385 try-lock mode. this locking mode is not supported yet.
388 True: when all the locks are successfully acquired
391 errors.LockError: when any lock we try to acquire has been deleted
392 before we succeed. In this case none of the locks requested will be
397 # We don't have non-blocking mode for now
398 raise NotImplementedError
400 # Check we don't already own locks at this level
401 assert not self._is_owned(), "Cannot acquire locks in the same set twice"
404 # If no names are given acquire the whole set by not letting new names
405 # being added before we release, and getting the current list of names.
406 # Some of them may then be deleted later, but we'll cope with this.
408 # We'd like to acquire this lock in a shared way, as it's nice if
409 # everybody else can use the instances at the same time. If are acquiring
410 # them exclusively though they won't be able to do this anyway, though,
411 # so we'll get the list lock exclusively as well in order to be able to
412 # do add() on the set while owning it.
413 self.__lock.acquire(shared=shared)
415 # note we own the set-lock
417 names = self.__names()
419 # We shouldn't have problems adding the lock to the owners list, but
420 # if we did we'll try to release this lock and re-raise exception.
421 # Of course something is going to be really wrong, after this.
422 self.__lock.release()
426 # Support passing in a single resource to acquire rather than many
427 if isinstance(names, basestring):
433 # First we look the locks up on __lockdict. We have no way of being sure
434 # they will still be there after, but this makes it a lot faster should
435 # just one of them be the already wrong
438 lock = self.__lockdict[lname] # raises KeyError if lock is not there
439 acquire_list.append((lname, lock))
441 if self.__lock._is_owned():
442 # We are acquiring all the set, it doesn't matter if this particular
443 # element is not there anymore.
446 raise errors.LockError('non-existing lock in set (%s)' % lname)
448 # This will hold the locknames we effectively acquired.
450 # Now acquire_list contains a sorted list of resources and locks we want.
451 # In order to get them we loop on this (private) list and acquire() them.
452 # We gave no real guarantee they will still exist till this is done but
453 # .acquire() itself is safe and will alert us if the lock gets deleted.
454 for (lname, lock) in acquire_list:
456 lock.acquire(shared=shared) # raises LockError if the lock is deleted
457 # now the lock cannot be deleted, we have it!
458 self._add_owned(name=lname)
460 except (errors.LockError):
461 if self.__lock._is_owned():
462 # We are acquiring all the set, it doesn't matter if this particular
463 # element is not there anymore.
467 for lname in self._list_owned():
468 self.__lockdict[lname].release()
469 self._del_owned(name=lname)
470 raise errors.LockError('non-existing lock in set (%s)' % name_fail)
472 # We shouldn't have problems adding the lock to the owners list, but
473 # if we did we'll try to release this lock and re-raise exception.
474 # Of course something is going to be really wrong, after this.
480 # If something went wrong and we had the set-lock let's release it...
481 if self.__lock._is_owned():
482 self.__lock.release()
487 def release(self, names=None):
488 """Release a set of resource locks, at the same level.
490 You must have acquired the locks, either in shared or in exclusive mode,
491 before releasing them.
494 names: the names of the locks which shall be released.
495 (defaults to all the locks acquired at that level).
498 assert self._is_owned(), "release() on lock set while not owner"
500 # Support passing in a single resource to release rather than many
501 if isinstance(names, basestring):
505 names = self._list_owned()
508 assert self._list_owned().issuperset(names), (
509 "release() on unheld resources %s" %
510 names.difference(self._list_owned()))
512 # First of all let's release the "all elements" lock, if set.
513 # After this 'add' can work again
514 if self.__lock._is_owned():
515 self.__lock.release()
518 for lockname in names:
519 # If we are sure the lock doesn't leave __lockdict without being
520 # exclusively held we can do this...
521 self.__lockdict[lockname].release()
522 self._del_owned(name=lockname)
524 def add(self, names, acquired=0, shared=0):
525 """Add a new set of elements to the set
528 names: names of the new elements to add
529 acquired: pre-acquire the new resource?
530 shared: is the pre-acquisition shared?
534 assert not self.__lock._is_owned(shared=1), (
535 "Cannot add new elements while sharing the set-lock")
537 # Support passing in a single resource to add rather than many
538 if isinstance(names, basestring):
541 # If we don't already own the set-level lock acquire it in an exclusive way
542 # we'll get it and note we need to release it later.
544 if not self.__lock._is_owned():
546 self.__lock.acquire()
549 invalid_names = set(self.__names()).intersection(names)
551 # This must be an explicit raise, not an assert, because assert is
552 # turned off when using optimization, and this can happen because of
553 # concurrency even if the user doesn't want it.
554 raise errors.LockError("duplicate add() (%s)" % invalid_names)
556 for lockname in names:
560 lock.acquire(shared=shared)
561 # now the lock cannot be deleted, we have it!
563 self._add_owned(name=lockname)
565 # We shouldn't have problems adding the lock to the owners list,
566 # but if we did we'll try to release this lock and re-raise
567 # exception. Of course something is going to be really wrong,
568 # after this. On the other hand the lock hasn't been added to the
569 # __lockdict yet so no other threads should be pending on it. This
570 # release is just a safety measure.
574 self.__lockdict[lockname] = lock
577 # Only release __lock if we were not holding it previously.
579 self.__lock.release()
583 def remove(self, names, blocking=1):
584 """Remove elements from the lock set.
586 You can either not hold anything in the lockset or already hold a superset
587 of the elements you want to delete, exclusively.
590 names: names of the resource to remove.
591 blocking: whether to block while trying to acquire or to operate in
592 try-lock mode. this locking mode is not supported yet unless
593 you are already holding exclusively the locks.
596 A list of lock which we removed. The list is always equal to the names
597 list if we were holding all the locks exclusively.
600 if not blocking and not self._is_owned():
601 # We don't have non-blocking mode for now
602 raise NotImplementedError
604 # Support passing in a single resource to remove rather than many
605 if isinstance(names, basestring):
608 # If we own any subset of this lock it must be a superset of what we want
609 # to delete. The ownership must also be exclusive, but that will be checked
610 # by the lock itself.
611 assert not self._is_owned() or self._list_owned().issuperset(names), (
612 "remove() on acquired lockset while not owning all elements")
617 # Calling delete() acquires the lock exclusively if we don't already own
618 # it, and causes all pending and subsequent lock acquires to fail. It's
619 # fine to call it out of order because delete() also implies release(),
620 # and the assertion above guarantees that if we either already hold
621 # everything we want to delete, or we hold none.
623 self.__lockdict[lname].delete()
624 removed.append(lname)
625 except (KeyError, errors.LockError):
626 # This cannot happen if we were already holding it, verify:
627 assert not self._is_owned(), "remove failed while holding lockset"
629 # If no LockError was raised we are the ones who deleted the lock.
630 # This means we can safely remove it from lockdict, as any further or
631 # pending delete() or acquire() will fail (and nobody can have the lock
632 # since before our call to delete()).
634 # This is done in an else clause because if the exception was thrown
635 # it's the job of the one who actually deleted it.
636 del self.__lockdict[lname]
637 # And let's remove it from our private list if we owned it.
639 self._del_owned(name=lname)
644 # Locking levels, must be acquired in increasing order.
646 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
647 # acquired before performing any operation, either in shared or in exclusive
648 # mode. acquiring the BGL in exclusive mode is discouraged and should be
650 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
651 # If you need more than one node, or more than one instance, acquire them at
653 # - level LEVEL_CONFIG contains the configuration lock, which you must acquire
654 # before reading or changing the config file.
660 LEVELS = [LEVEL_CLUSTER,
665 # Lock levels which are modifiable
666 LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
668 # Constant for the big ganeti lock and config lock
673 class GanetiLockManager:
674 """The Ganeti Locking Library
676 The purpouse of this small library is to manage locking for ganeti clusters
677 in a central place, while at the same time doing dynamic checks against
678 possible deadlocks. It will also make it easier to transition to a different
679 lock type should we migrate away from python threads.
684 def __init__(self, nodes=None, instances=None):
685 """Constructs a new GanetiLockManager object.
687 There should be only a GanetiLockManager object at any time, so this
688 function raises an error if this is not the case.
691 nodes: list of node names
692 instances: list of instance names
695 assert self.__class__._instance is None, "double GanetiLockManager instance"
696 self.__class__._instance = self
698 # The keyring contains all the locks, at their level and in the correct
701 LEVEL_CLUSTER: LockSet([BGL]),
702 LEVEL_NODE: LockSet(nodes),
703 LEVEL_INSTANCE: LockSet(instances),
704 LEVEL_CONFIG: LockSet([CONFIG]),
707 def _names(self, level):
708 """List the lock names at the given level.
709 Used for debugging/testing purposes.
712 level: the level whose list of locks to get
715 assert level in LEVELS, "Invalid locking level %s" % level
716 return self.__keyring[level]._names()
718 def _is_owned(self, level):
719 """Check whether we are owning locks at the given level
722 return self.__keyring[level]._is_owned()
724 def _list_owned(self, level):
725 """Get the set of owned locks at the given level
728 return self.__keyring[level]._list_owned()
730 def _upper_owned(self, level):
731 """Check that we don't own any lock at a level greater than the given one.
734 # This way of checking only works if LEVELS[i] = i, which we check for in
736 return utils.any((self._is_owned(l) for l in LEVELS[level + 1:]))
738 def _BGL_owned(self):
739 """Check if the current thread owns the BGL.
741 Both an exclusive or a shared acquisition work.
744 return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
746 def _contains_BGL(self, level, names):
747 """Check if acting on the given level and set of names will change the
748 status of the Big Ganeti Lock.
751 return level == LEVEL_CLUSTER and (names is None or BGL in names)
753 def acquire(self, level, names, blocking=1, shared=0):
754 """Acquire a set of resource locks, at the same level.
757 level: the level at which the locks shall be acquired.
758 It must be a memmber of LEVELS.
759 names: the names of the locks which shall be acquired.
760 (special lock names, or instance/node names)
761 shared: whether to acquire in shared mode. By default an exclusive lock
763 blocking: whether to block while trying to acquire or to operate in
764 try-lock mode. this locking mode is not supported yet.
767 assert level in LEVELS, "Invalid locking level %s" % level
769 # Check that we are either acquiring the Big Ganeti Lock or we already own
770 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
771 # so even if we've migrated we need to at least share the BGL to be
772 # compatible with them. Of course if we own the BGL exclusively there's no
773 # point in acquiring any other lock, unless perhaps we are half way through
774 # the migration of the current opcode.
775 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
776 "You must own the Big Ganeti Lock before acquiring any other")
778 # Check we don't own locks at the same or upper levels.
779 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
780 " while owning some at a greater one")
782 # Acquire the locks in the set.
783 return self.__keyring[level].acquire(names, shared=shared,
786 def release(self, level, names=None):
787 """Release a set of resource locks, at the same level.
789 You must have acquired the locks, either in shared or in exclusive mode,
790 before releasing them.
793 level: the level at which the locks shall be released.
794 It must be a memmber of LEVELS.
795 names: the names of the locks which shall be released.
796 (defaults to all the locks acquired at that level).
799 assert level in LEVELS, "Invalid locking level %s" % level
800 assert (not self._contains_BGL(level, names) or
801 not self._upper_owned(LEVEL_CLUSTER)), (
802 "Cannot release the Big Ganeti Lock while holding something"
805 # Release will complain if we don't own the locks already
806 return self.__keyring[level].release(names)
808 def add(self, level, names, acquired=0, shared=0):
809 """Add locks at the specified level.
812 level: the level at which the locks shall be added.
813 It must be a memmber of LEVELS_MOD.
814 names: names of the locks to acquire
815 acquired: whether to acquire the newly added locks
816 shared: whether the acquisition will be shared
818 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
819 assert self._BGL_owned(), ("You must own the BGL before performing other"
821 assert not self._upper_owned(level), ("Cannot add locks at a level"
822 " while owning some at a greater one")
823 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
825 def remove(self, level, names, blocking=1):
826 """Remove locks from the specified level.
828 You must either already own the locks you are trying to remove exclusively
829 or not own any lock at an upper level.
832 level: the level at which the locks shall be removed.
833 It must be a memmber of LEVELS_MOD.
834 names: the names of the locks which shall be removed.
835 (special lock names, or instance/node names)
836 blocking: whether to block while trying to operate in try-lock mode.
837 this locking mode is not supported yet.
840 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
841 assert self._BGL_owned(), ("You must own the BGL before performing other"
843 # Check we either own the level or don't own anything from here up.
844 # LockSet.remove() will check the case in which we don't own all the needed
845 # resources, or we have a shared ownership.
846 assert self._is_owned(level) or not self._upper_owned(level), (
847 "Cannot remove locks at a level while not owning it or"
848 " owning some at a greater one")
849 return self.__keyring[level].remove(names, blocking=blocking)