4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Module implementing the Ganeti locking code."""
23 # pylint: disable-msg=W0613,W0201
26 # Wouldn't it be better to define LockingError in the locking module?
27 # Well, for now that's how the rest of the code does it...
28 from ganeti import errors
29 from ganeti import utils
33 """Implements a shared lock.
35 Multiple threads can acquire the lock in a shared way, calling
36 acquire_shared(). In order to acquire the lock in an exclusive way threads
37 can call acquire_exclusive().
39 The lock prevents starvation but does not guarantee that threads will acquire
40 the shared lock in the order they queued for it, just that they will
45 """Construct a new SharedLock"""
46 # we have two conditions, c_shr and c_exc, sharing the same lock.
47 self.__lock = threading.Lock()
48 self.__turn_shr = threading.Condition(self.__lock)
49 self.__turn_exc = threading.Condition(self.__lock)
51 # current lock holders
59 # is this lock in the deleted state?
60 self.__deleted = False
62 def __is_sharer(self):
63 """Is the current thread sharing the lock at this time?"""
64 return threading.currentThread() in self.__shr
66 def __is_exclusive(self):
67 """Is the current thread holding the lock exclusively at this time?"""
68 return threading.currentThread() == self.__exc
70 def __is_owned(self, shared=-1):
71 """Is the current thread somehow owning the lock at this time?
73 This is a private version of the function, which presumes you're holding
78 return self.__is_sharer() or self.__is_exclusive()
80 return self.__is_sharer()
82 return self.__is_exclusive()
84 def _is_owned(self, shared=-1):
85 """Is the current thread somehow owning the lock at this time?
89 < 0: check for any type of ownership (default)
90 0: check for exclusive ownership
91 > 0: check for shared ownership
96 result = self.__is_owned(shared=shared)
103 """Wait on the given condition, and raise an exception if the current lock
104 is declared deleted in the meantime.
107 c: condition to wait on
112 raise errors.LockError('deleted lock')
114 def __exclusive_acquire(self):
115 """Acquire the lock exclusively.
117 This is a private function that presumes you are already holding the
118 internal lock. It's defined separately to avoid code duplication between
119 acquire() and delete()
122 self.__nwait_exc += 1
124 # This is to save ourselves from a nasty race condition that could
125 # theoretically make the sharers starve.
126 if self.__nwait_shr > 0 or self.__nwait_exc > 1:
127 self.__wait(self.__turn_exc)
129 while len(self.__shr) > 0 or self.__exc is not None:
130 self.__wait(self.__turn_exc)
132 self.__exc = threading.currentThread()
134 self.__nwait_exc -= 1
136 def acquire(self, blocking=1, shared=0):
137 """Acquire a shared lock.
140 shared: whether to acquire in shared mode. By default an exclusive lock
142 blocking: whether to block while trying to acquire or to operate in try-lock mode.
143 this locking mode is not supported yet.
147 # We don't have non-blocking mode for now
148 raise NotImplementedError
150 self.__lock.acquire()
153 raise errors.LockError('deleted lock')
155 # We cannot acquire the lock if we already have it
156 assert not self.__is_owned(), "double acquire() on a non-recursive lock"
159 self.__nwait_shr += 1
161 # If there is an exclusive holder waiting we have to wait. We'll
162 # only do this once, though, when we start waiting for the lock. Then
163 # we'll just wait while there are no exclusive holders.
164 if self.__nwait_exc > 0:
165 # TODO: if !blocking...
166 self.__wait(self.__turn_shr)
168 while self.__exc is not None:
169 # TODO: if !blocking...
170 self.__wait(self.__turn_shr)
172 self.__shr.add(threading.currentThread())
174 self.__nwait_shr -= 1
177 # TODO: if !blocking...
178 # (or modify __exclusive_acquire for non-blocking mode)
179 self.__exclusive_acquire()
182 self.__lock.release()
187 """Release a Shared Lock.
189 You must have acquired the lock, either in shared or in exclusive mode,
190 before calling this function.
193 self.__lock.acquire()
195 # Autodetect release type
196 if self.__is_exclusive():
199 # An exclusive holder has just had the lock, time to put it in shared
200 # mode if there are shared holders waiting. Otherwise wake up the next
202 if self.__nwait_shr > 0:
203 self.__turn_shr.notifyAll()
204 elif self.__nwait_exc > 0:
205 self.__turn_exc.notify()
207 elif self.__is_sharer():
208 self.__shr.remove(threading.currentThread())
210 # If there are no more shared holders and some exclusive holders are
211 # waiting let's wake one up.
212 if len(self.__shr) == 0 and self.__nwait_exc > 0:
213 self.__turn_exc.notify()
216 assert False, "Cannot release non-owned lock"
219 self.__lock.release()
221 def delete(self, blocking=1):
222 """Delete a Shared Lock.
224 This operation will declare the lock for removal. First the lock will be
225 acquired in exclusive mode if you don't already own it, then the lock
226 will be put in a state where any future and pending acquire() fail.
229 blocking: whether to block while trying to acquire or to operate in
230 try-lock mode. this locking mode is not supported yet unless
231 you are already holding exclusively the lock.
234 self.__lock.acquire()
236 assert not self.__is_sharer(), "cannot delete() a lock while sharing it"
239 raise errors.LockError('deleted lock')
241 if not self.__is_exclusive():
243 # We don't have non-blocking mode for now
244 raise NotImplementedError
245 self.__exclusive_acquire()
247 self.__deleted = True
249 # Wake up everybody, they will fail acquiring the lock and
250 # raise an exception instead.
251 self.__turn_exc.notifyAll()
252 self.__turn_shr.notifyAll()
255 self.__lock.release()
259 """Implements a set of locks.
261 This abstraction implements a set of shared locks for the same resource type,
262 distinguished by name. The user can lock a subset of the resources and the
263 LockSet will take care of acquiring the locks always in the same order, thus
266 All the locks needed in the same set must be acquired together, though.
269 def __init__(self, members=None):
270 """Constructs a new LockSet.
273 members: initial members of the set
276 # Used internally to guarantee coherency.
277 self.__lock = SharedLock()
279 # The lockdict indexes the relationship name -> lock
280 # The order-of-locking is implied by the alphabetical order of names
283 if members is not None:
285 self.__lockdict[name] = SharedLock()
287 # The owner dict contains the set of locks each thread owns. For
288 # performance each thread can access its own key without a global lock on
289 # this structure. It is paramount though that *no* other type of access is
290 # done to this structure (eg. no looping over its keys). *_owner helper
291 # function are defined to guarantee access is correct, but in general never
292 # do anything different than __owners[threading.currentThread()], or there
297 """Is the current thread a current level owner?"""
298 return threading.currentThread() in self.__owners
300 def _add_owned(self, name=None):
301 """Note the current thread owns the given lock"""
303 if not self._is_owned():
304 self.__owners[threading.currentThread()] = set()
307 self.__owners[threading.currentThread()].add(name)
309 self.__owners[threading.currentThread()] = set([name])
312 def _del_owned(self, name=None):
313 """Note the current thread owns the given lock"""
316 self.__owners[threading.currentThread()].remove(name)
318 # Only remove the key if we don't hold the set-lock as well
319 if (not self.__lock._is_owned() and
320 not self.__owners[threading.currentThread()]):
321 del self.__owners[threading.currentThread()]
323 def _list_owned(self):
324 """Get the set of resource names owned by the current thread"""
326 return self.__owners[threading.currentThread()].copy()
331 """Return the current set of names.
333 Only call this function while holding __lock and don't iterate on the
334 result after releasing the lock.
337 return self.__lockdict.keys()
340 """Return a copy of the current set of elements.
342 Used only for debugging purposes.
345 self.__lock.acquire(shared=1)
347 result = self.__names()
349 self.__lock.release()
352 def acquire(self, names, blocking=1, shared=0):
353 """Acquire a set of resource locks.
356 names: the names of the locks which shall be acquired.
357 (special lock names, or instance/node names)
358 shared: whether to acquire in shared mode. By default an exclusive lock
360 blocking: whether to block while trying to acquire or to operate in try-lock mode.
361 this locking mode is not supported yet.
364 True: when all the locks are successfully acquired
367 errors.LockError: when any lock we try to acquire has been deleted
368 before we succeed. In this case none of the locks requested will be
373 # We don't have non-blocking mode for now
374 raise NotImplementedError
376 # Check we don't already own locks at this level
377 assert not self._is_owned(), "Cannot acquire locks in the same set twice"
380 # If no names are given acquire the whole set by not letting new names
381 # being added before we release, and getting the current list of names.
382 # Some of them may then be deleted later, but we'll cope with this.
384 # We'd like to acquire this lock in a shared way, as it's nice if
385 # everybody else can use the instances at the same time. If are acquiring
386 # them exclusively though they won't be able to do this anyway, though,
387 # so we'll get the list lock exclusively as well in order to be able to
388 # do add() on the set while owning it.
389 self.__lock.acquire(shared=shared)
391 # note we own the set-lock
393 names = self.__names()
395 # We shouldn't have problems adding the lock to the owners list, but
396 # if we did we'll try to release this lock and re-raise exception.
397 # Of course something is going to be really wrong, after this.
398 self.__lock.release()
402 # Support passing in a single resource to acquire rather than many
403 if isinstance(names, basestring):
409 # First we look the locks up on __lockdict. We have no way of being sure
410 # they will still be there after, but this makes it a lot faster should
411 # just one of them be the already wrong
414 lock = self.__lockdict[lname] # raises KeyError if the lock is not there
415 acquire_list.append((lname, lock))
417 if self.__lock._is_owned():
418 # We are acquiring all the set, it doesn't matter if this particular
419 # element is not there anymore.
422 raise errors.LockError('non-existing lock in set (%s)' % lname)
424 # This will hold the locknames we effectively acquired.
426 # Now acquire_list contains a sorted list of resources and locks we want.
427 # In order to get them we loop on this (private) list and acquire() them.
428 # We gave no real guarantee they will still exist till this is done but
429 # .acquire() itself is safe and will alert us if the lock gets deleted.
430 for (lname, lock) in acquire_list:
432 lock.acquire(shared=shared) # raises LockError if the lock is deleted
433 # now the lock cannot be deleted, we have it!
434 self._add_owned(name=lname)
436 except (errors.LockError):
437 if self.__lock._is_owned():
438 # We are acquiring all the set, it doesn't matter if this particular
439 # element is not there anymore.
443 for lname in self._list_owned():
444 self.__lockdict[lname].release()
445 self._del_owned(name=lname)
446 raise errors.LockError('non-existing lock in set (%s)' % name_fail)
448 # We shouldn't have problems adding the lock to the owners list, but
449 # if we did we'll try to release this lock and re-raise exception.
450 # Of course something is going to be really wrong, after this.
456 # If something went wrong and we had the set-lock let's release it...
457 if self.__lock._is_owned():
458 self.__lock.release()
463 def release(self, names=None):
464 """Release a set of resource locks, at the same level.
466 You must have acquired the locks, either in shared or in exclusive mode,
467 before releasing them.
470 names: the names of the locks which shall be released.
471 (defaults to all the locks acquired at that level).
474 assert self._is_owned(), "release() on lock set while not owner"
476 # Support passing in a single resource to release rather than many
477 if isinstance(names, basestring):
481 names = self._list_owned()
484 assert self._list_owned().issuperset(names), (
485 "release() on unheld resources %s" %
486 names.difference(self._list_owned()))
488 # First of all let's release the "all elements" lock, if set.
489 # After this 'add' can work again
490 if self.__lock._is_owned():
491 self.__lock.release()
494 for lockname in names:
495 # If we are sure the lock doesn't leave __lockdict without being
496 # exclusively held we can do this...
497 self.__lockdict[lockname].release()
498 self._del_owned(name=lockname)
500 def add(self, names, acquired=0, shared=0):
501 """Add a new set of elements to the set
504 names: names of the new elements to add
505 acquired: pre-acquire the new resource?
506 shared: is the pre-acquisition shared?
510 assert not self.__lock._is_owned(shared=1), (
511 "Cannot add new elements while sharing the set-lock")
513 # Support passing in a single resource to add rather than many
514 if isinstance(names, basestring):
517 # If we don't already own the set-level lock acquire it in an exclusive way
518 # we'll get it and note we need to release it later.
520 if not self.__lock._is_owned():
522 self.__lock.acquire()
525 invalid_names = set(self.__names()).intersection(names)
527 # This must be an explicit raise, not an assert, because assert is
528 # turned off when using optimization, and this can happen because of
529 # concurrency even if the user doesn't want it.
530 raise errors.LockError("duplicate add() (%s)" % invalid_names)
532 for lockname in names:
536 lock.acquire(shared=shared)
537 # now the lock cannot be deleted, we have it!
539 self._add_owned(name=lockname)
541 # We shouldn't have problems adding the lock to the owners list,
542 # but if we did we'll try to release this lock and re-raise
543 # exception. Of course something is going to be really wrong,
544 # after this. On the other hand the lock hasn't been added to the
545 # __lockdict yet so no other threads should be pending on it. This
546 # release is just a safety measure.
550 self.__lockdict[lockname] = lock
553 # Only release __lock if we were not holding it previously.
555 self.__lock.release()
559 def remove(self, names, blocking=1):
560 """Remove elements from the lock set.
562 You can either not hold anything in the lockset or already hold a superset
563 of the elements you want to delete, exclusively.
566 names: names of the resource to remove.
567 blocking: whether to block while trying to acquire or to operate in
568 try-lock mode. this locking mode is not supported yet unless
569 you are already holding exclusively the locks.
572 A list of lock which we removed. The list is always equal to the names
573 list if we were holding all the locks exclusively.
576 if not blocking and not self._is_owned():
577 # We don't have non-blocking mode for now
578 raise NotImplementedError
580 # Support passing in a single resource to remove rather than many
581 if isinstance(names, basestring):
584 # If we own any subset of this lock it must be a superset of what we want
585 # to delete. The ownership must also be exclusive, but that will be checked
586 # by the lock itself.
587 assert not self._is_owned() or self._list_owned().issuperset(names), (
588 "remove() on acquired lockset while not owning all elements")
593 # Calling delete() acquires the lock exclusively if we don't already own
594 # it, and causes all pending and subsequent lock acquires to fail. It's
595 # fine to call it out of order because delete() also implies release(),
596 # and the assertion above guarantees that if we either already hold
597 # everything we want to delete, or we hold none.
599 self.__lockdict[lname].delete()
600 removed.append(lname)
601 except (KeyError, errors.LockError):
602 # This cannot happen if we were already holding it, verify:
603 assert not self._is_owned(), "remove failed while holding lockset"
605 # If no LockError was raised we are the ones who deleted the lock.
606 # This means we can safely remove it from lockdict, as any further or
607 # pending delete() or acquire() will fail (and nobody can have the lock
608 # since before our call to delete()).
610 # This is done in an else clause because if the exception was thrown
611 # it's the job of the one who actually deleted it.
612 del self.__lockdict[lname]
613 # And let's remove it from our private list if we owned it.
615 self._del_owned(name=lname)
620 # Locking levels, must be acquired in increasing order.
622 # - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
623 # acquired before performing any operation, either in shared or in exclusive
624 # mode. acquiring the BGL in exclusive mode is discouraged and should be
626 # - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
627 # If you need more than one node, or more than one instance, acquire them at
629 # - level LEVEL_CONFIG contains the configuration lock, which you must acquire
630 # before reading or changing the config file.
636 LEVELS = [LEVEL_CLUSTER,
641 # Lock levels which are modifiable
642 LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
644 # Constant for the big ganeti lock and config lock
649 class GanetiLockManager:
650 """The Ganeti Locking Library
652 The purpouse of this small library is to manage locking for ganeti clusters
653 in a central place, while at the same time doing dynamic checks against
654 possible deadlocks. It will also make it easier to transition to a different
655 lock type should we migrate away from python threads.
660 def __init__(self, nodes=None, instances=None):
661 """Constructs a new GanetiLockManager object.
663 There should be only a
664 GanetiLockManager object at any time, so this function raises an error if this
668 nodes: list of node names
669 instances: list of instance names
672 assert self.__class__._instance is None, "double GanetiLockManager instance"
673 self.__class__._instance = self
675 # The keyring contains all the locks, at their level and in the correct
678 LEVEL_CLUSTER: LockSet([BGL]),
679 LEVEL_NODE: LockSet(nodes),
680 LEVEL_INSTANCE: LockSet(instances),
681 LEVEL_CONFIG: LockSet([CONFIG]),
684 def _names(self, level):
685 """List the lock names at the given level.
686 Used for debugging/testing purposes.
689 level: the level whose list of locks to get
692 assert level in LEVELS, "Invalid locking level %s" % level
693 return self.__keyring[level]._names()
695 def _is_owned(self, level):
696 """Check whether we are owning locks at the given level
699 return self.__keyring[level]._is_owned()
701 def _list_owned(self, level):
702 """Get the set of owned locks at the given level
705 return self.__keyring[level]._list_owned()
707 def _upper_owned(self, level):
708 """Check that we don't own any lock at a level greater than the given one.
711 # This way of checking only works if LEVELS[i] = i, which we check for in
713 return utils.any((self._is_owned(l) for l in LEVELS[level + 1:]))
715 def _BGL_owned(self):
716 """Check if the current thread owns the BGL.
718 Both an exclusive or a shared acquisition work.
721 return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
723 def _contains_BGL(self, level, names):
724 """Check if acting on the given level and set of names will change the
725 status of the Big Ganeti Lock.
728 return level == LEVEL_CLUSTER and (names is None or BGL in names)
730 def acquire(self, level, names, blocking=1, shared=0):
731 """Acquire a set of resource locks, at the same level.
734 level: the level at which the locks shall be acquired.
735 It must be a memmber of LEVELS.
736 names: the names of the locks which shall be acquired.
737 (special lock names, or instance/node names)
738 shared: whether to acquire in shared mode. By default an exclusive lock
740 blocking: whether to block while trying to acquire or to operate in try-lock mode.
741 this locking mode is not supported yet.
744 assert level in LEVELS, "Invalid locking level %s" % level
746 # Check that we are either acquiring the Big Ganeti Lock or we already own
747 # it. Some "legacy" opcodes need to be sure they are run non-concurrently
748 # so even if we've migrated we need to at least share the BGL to be
749 # compatible with them. Of course if we own the BGL exclusively there's no
750 # point in acquiring any other lock, unless perhaps we are half way through
751 # the migration of the current opcode.
752 assert (self._contains_BGL(level, names) or self._BGL_owned()), (
753 "You must own the Big Ganeti Lock before acquiring any other")
755 # Check we don't own locks at the same or upper levels.
756 assert not self._upper_owned(level), ("Cannot acquire locks at a level"
757 " while owning some at a greater one")
759 # Acquire the locks in the set.
760 return self.__keyring[level].acquire(names, shared=shared,
763 def release(self, level, names=None):
764 """Release a set of resource locks, at the same level.
766 You must have acquired the locks, either in shared or in exclusive mode,
767 before releasing them.
770 level: the level at which the locks shall be released.
771 It must be a memmber of LEVELS.
772 names: the names of the locks which shall be released.
773 (defaults to all the locks acquired at that level).
776 assert level in LEVELS, "Invalid locking level %s" % level
777 assert (not self._contains_BGL(level, names) or
778 not self._upper_owned(LEVEL_CLUSTER)), (
779 "Cannot release the Big Ganeti Lock while holding something"
782 # Release will complain if we don't own the locks already
783 return self.__keyring[level].release(names)
785 def add(self, level, names, acquired=0, shared=0):
786 """Add locks at the specified level.
789 level: the level at which the locks shall be added.
790 It must be a memmber of LEVELS_MOD.
791 names: names of the locks to acquire
792 acquired: whether to acquire the newly added locks
793 shared: whether the acquisition will be shared
795 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
796 assert self._BGL_owned(), ("You must own the BGL before performing other"
798 assert not self._upper_owned(level), ("Cannot add locks at a level"
799 " while owning some at a greater one")
800 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
802 def remove(self, level, names, blocking=1):
803 """Remove locks from the specified level.
805 You must either already own the locks you are trying to remove exclusively
806 or not own any lock at an upper level.
809 level: the level at which the locks shall be removed.
810 It must be a memmber of LEVELS_MOD.
811 names: the names of the locks which shall be removed.
812 (special lock names, or instance/node names)
813 blocking: whether to block while trying to operate in try-lock mode.
814 this locking mode is not supported yet.
817 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
818 assert self._BGL_owned(), ("You must own the BGL before performing other"
820 # Check we either own the level or don't own anything from here up.
821 # LockSet.remove() will check the case in which we don't own all the needed
822 # resources, or we have a shared ownership.
823 assert self._is_owned(level) or not self._upper_owned(level), (
824 "Cannot remove locks at a level while not owning it or"
825 " owning some at a greater one")
826 return self.__keyring[level].remove(names, blocking=blocking)