Statistics
| Branch: | Tag: | Revision:

root / lib / locking.py @ d76167a5

History | View | Annotate | Download (34.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Module implementing the Ganeti locking code."""
22

    
23
import os
24
import select
25
import threading
26
import time
27
import errno
28

    
29
from ganeti import errors
30
from ganeti import utils
31

    
32

    
33
def ssynchronized(lock, shared=0):
34
  """Shared Synchronization decorator.
35

36
  Calls the function holding the given lock, either in exclusive or shared
37
  mode. It requires the passed lock to be a SharedLock (or support its
38
  semantics).
39

40
  """
41
  def wrap(fn):
42
    def sync_function(*args, **kwargs):
43
      lock.acquire(shared=shared)
44
      try:
45
        return fn(*args, **kwargs)
46
      finally:
47
        lock.release()
48
    return sync_function
49
  return wrap
50

    
51

    
52
class _SingleActionPipeConditionWaiter(object):
53
  """Callable helper class for _SingleActionPipeCondition.
54

55
  """
56
  __slots__ = [
57
    "_cond",
58
    "_fd",
59
    "_poller",
60
    ]
61

    
62
  def __init__(self, cond, poller, fd):
63
    """Initializes this class.
64

65
    @type cond: L{_SingleActionPipeCondition}
66
    @param cond: Parent condition
67
    @type poller: select.poll
68
    @param poller: Poller object
69
    @type fd: int
70
    @param fd: File descriptor to wait for
71

72
    """
73
    object.__init__(self)
74

    
75
    self._cond = cond
76
    self._poller = poller
77
    self._fd = fd
78

    
79
  def __call__(self, timeout):
80
    """Wait for something to happen on the pipe.
81

82
    @type timeout: float or None
83
    @param timeout: Timeout for waiting (can be None)
84

85
    """
86
    start_time = time.time()
87
    remaining_time = timeout
88

    
89
    while timeout is None or remaining_time > 0:
90
      try:
91
        result = self._poller.poll(remaining_time)
92
      except EnvironmentError, err:
93
        if err.errno != errno.EINTR:
94
          raise
95
        result = None
96

    
97
      # Check whether we were notified
98
      if result and result[0][0] == self._fd:
99
        break
100

    
101
      # Re-calculate timeout if necessary
102
      if timeout is not None:
103
        remaining_time = start_time + timeout - time.time()
104

    
105

    
106
class _SingleActionPipeCondition(object):
107
  """Wrapper around a pipe for usage inside conditions.
108

109
  This class contains a POSIX pipe(2) and a poller to poll it. The pipe is
110
  always allocated when constructing the class. Extra care is taken to always
111
  close the file descriptors.
112

113
  An additional class, L{_SingleActionPipeConditionWaiter}, is used to wait for
114
  notifications.
115

116
  Warning: This class is designed to be used as the underlying component of a
117
  locking condition, but is not by itself thread safe, and needs to be
118
  protected by an external lock.
119

120
  """
121
  __slots__ = [
122
    "_poller",
123
    "_read_fd",
124
    "_write_fd",
125
    "_nwaiters",
126
    ]
127

    
128
  _waiter_class = _SingleActionPipeConditionWaiter
129

    
130
  def __init__(self):
131
    """Initializes this class.
132

133
    """
134
    object.__init__(self)
135

    
136
    self._nwaiters = 0
137

    
138
    # Just assume the unpacking is successful, otherwise error handling gets
139
    # very complicated.
140
    (self._read_fd, self._write_fd) = os.pipe()
141
    try:
142
      # The poller looks for closure of the write side
143
      poller = select.poll()
144
      poller.register(self._read_fd, select.POLLHUP)
145

    
146
      self._poller = poller
147
    except:
148
      if self._read_fd is not None:
149
        os.close(self._read_fd)
150
      if self._write_fd is not None:
151
        os.close(self._write_fd)
152
      raise
153

    
154
    # There should be no code here anymore, otherwise the pipe file descriptors
155
    # may be not be cleaned up properly in case of errors.
156

    
157
  def StartWaiting(self):
158
    """Return function to wait for notification.
159

160
    @rtype: L{_SingleActionPipeConditionWaiter}
161
    @return: Function to wait for notification
162

163
    """
164
    assert self._nwaiters >= 0
165

    
166
    if self._poller is None:
167
      raise RuntimeError("Already cleaned up")
168

    
169
    # Create waiter function and increase number of waiters
170
    wait_fn = self._waiter_class(self, self._poller, self._read_fd)
171
    self._nwaiters += 1
172
    return wait_fn
173

    
174
  def DoneWaiting(self):
175
    """Decrement number of waiters and automatic cleanup.
176

177
    Must be called after waiting for a notification.
178

179
    @rtype: bool
180
    @return: Whether this was the last waiter
181

182
    """
183
    assert self._nwaiters > 0
184

    
185
    self._nwaiters -= 1
186

    
187
    if self._nwaiters == 0:
188
      self._Cleanup()
189
      return True
190

    
191
    return False
192

    
193
  def notifyAll(self):
194
    """Close the writing side of the pipe to notify all waiters.
195

196
    """
197
    if self._write_fd is None:
198
      raise RuntimeError("Can only notify once")
199

    
200
    os.close(self._write_fd)
201
    self._write_fd = None
202

    
203
  def _Cleanup(self):
204
    """Close all file descriptors.
205

206
    """
207
    if self._read_fd is not None:
208
      os.close(self._read_fd)
209
      self._read_fd = None
210

    
211
    if self._write_fd is not None:
212
      os.close(self._write_fd)
213
      self._write_fd = None
214

    
215
    self._poller = None
216

    
217
  def __del__(self):
218
    """Called on object deletion.
219

220
    Ensure no file descriptors are left open.
221

222
    """
223
    self._Cleanup()
224

    
225

    
226
class _CountingCondition(object):
227
  """Wrapper for Python's built-in threading.Condition class.
228

229
  This wrapper keeps a count of active waiters. We can't access the internal
230
  "__waiters" attribute of threading.Condition because it's not thread-safe.
231

232
  """
233
  __slots__ = [
234
    "_cond",
235
    "_nwaiters",
236
    ]
237

    
238
  def __init__(self, lock):
239
    """Initializes this class.
240

241
    """
242
    object.__init__(self)
243
    self._cond = threading.Condition(lock=lock)
244
    self._nwaiters = 0
245

    
246
  def notifyAll(self):
247
    """Notifies the condition.
248

249
    """
250
    return self._cond.notifyAll()
251

    
252
  def wait(self, timeout=None):
253
    """Waits for the condition to be notified.
254

255
    @type timeout: float or None
256
    @param timeout: Timeout in seconds
257

258
    """
259
    assert self._nwaiters >= 0
260

    
261
    self._nwaiters += 1
262
    try:
263
      return self._cond.wait(timeout=timeout)
264
    finally:
265
      self._nwaiters -= 1
266

    
267
  def has_waiting(self):
268
    """Returns whether there are active waiters.
269

270
    """
271
    return bool(self._nwaiters)
272

    
273

    
274
class SharedLock(object):
275
  """Implements a shared lock.
276

277
  Multiple threads can acquire the lock in a shared way, calling
278
  acquire_shared().  In order to acquire the lock in an exclusive way threads
279
  can call acquire_exclusive().
280

281
  The lock prevents starvation but does not guarantee that threads will acquire
282
  the shared lock in the order they queued for it, just that they will
283
  eventually do so.
284

285
  """
286
  __slots__ = [
287
    "__active_shr_c",
288
    "__inactive_shr_c",
289
    "__deleted",
290
    "__exc",
291
    "__lock",
292
    "__pending",
293
    "__shr",
294
    ]
295

    
296
  __condition_class = _CountingCondition
297

    
298
  def __init__(self):
299
    """Construct a new SharedLock.
300

301
    """
302
    object.__init__(self)
303

    
304
    # Internal lock
305
    self.__lock = threading.Lock()
306

    
307
    # Queue containing waiting acquires
308
    self.__pending = []
309

    
310
    # Active and inactive conditions for shared locks
311
    self.__active_shr_c = self.__condition_class(self.__lock)
312
    self.__inactive_shr_c = self.__condition_class(self.__lock)
313

    
314
    # Current lock holders
315
    self.__shr = set()
316
    self.__exc = None
317

    
318
    # is this lock in the deleted state?
319
    self.__deleted = False
320

    
321
  def __check_deleted(self):
322
    """Raises an exception if the lock has been deleted.
323

324
    """
325
    if self.__deleted:
326
      raise errors.LockError("Deleted lock")
327

    
328
  def __is_sharer(self):
329
    """Is the current thread sharing the lock at this time?
330

331
    """
332
    return threading.currentThread() in self.__shr
333

    
334
  def __is_exclusive(self):
335
    """Is the current thread holding the lock exclusively at this time?
336

337
    """
338
    return threading.currentThread() == self.__exc
339

    
340
  def __is_owned(self, shared=-1):
341
    """Is the current thread somehow owning the lock at this time?
342

343
    This is a private version of the function, which presumes you're holding
344
    the internal lock.
345

346
    """
347
    if shared < 0:
348
      return self.__is_sharer() or self.__is_exclusive()
349
    elif shared:
350
      return self.__is_sharer()
351
    else:
352
      return self.__is_exclusive()
353

    
354
  def _is_owned(self, shared=-1):
355
    """Is the current thread somehow owning the lock at this time?
356

357
    @param shared:
358
        - < 0: check for any type of ownership (default)
359
        - 0: check for exclusive ownership
360
        - > 0: check for shared ownership
361

362
    """
363
    self.__lock.acquire()
364
    try:
365
      return self.__is_owned(shared=shared)
366
    finally:
367
      self.__lock.release()
368

    
369
  def _count_pending(self):
370
    """Returns the number of pending acquires.
371

372
    @rtype: int
373

374
    """
375
    self.__lock.acquire()
376
    try:
377
      return len(self.__pending)
378
    finally:
379
      self.__lock.release()
380

    
381
  def __do_acquire(self, shared):
382
    """Actually acquire the lock.
383

384
    """
385
    if shared:
386
      self.__shr.add(threading.currentThread())
387
    else:
388
      self.__exc = threading.currentThread()
389

    
390
  def __can_acquire(self, shared):
391
    """Determine whether lock can be acquired.
392

393
    """
394
    if shared:
395
      return self.__exc is None
396
    else:
397
      return len(self.__shr) == 0 and self.__exc is None
398

    
399
  def __is_on_top(self, cond):
400
    """Checks whether the passed condition is on top of the queue.
401

402
    The caller must make sure the queue isn't empty.
403

404
    """
405
    return self.__pending[0] == cond
406

    
407
  def __acquire_unlocked(self, shared=0, timeout=None):
408
    """Acquire a shared lock.
409

410
    @param shared: whether to acquire in shared mode; by default an
411
        exclusive lock will be acquired
412
    @param timeout: maximum waiting time before giving up
413

414
    """
415
    self.__check_deleted()
416

    
417
    # We cannot acquire the lock if we already have it
418
    assert not self.__is_owned(), "double acquire() on a non-recursive lock"
419

    
420
    # Check whether someone else holds the lock or there are pending acquires.
421
    if not self.__pending and self.__can_acquire(shared):
422
      # Apparently not, can acquire lock directly.
423
      self.__do_acquire(shared)
424
      return True
425

    
426
    if shared:
427
      wait_condition = self.__active_shr_c
428

    
429
      # Check if we're not yet in the queue
430
      if wait_condition not in self.__pending:
431
        self.__pending.append(wait_condition)
432
    else:
433
      wait_condition = self.__condition_class(self.__lock)
434
      # Always add to queue
435
      self.__pending.append(wait_condition)
436

    
437
    try:
438
      # Wait until we become the topmost acquire in the queue or the timeout
439
      # expires.
440
      while not (self.__is_on_top(wait_condition) and
441
                 self.__can_acquire(shared)):
442
        # Wait for notification
443
        wait_condition.wait(timeout)
444
        self.__check_deleted()
445

    
446
        # A lot of code assumes blocking acquires always succeed. Loop
447
        # internally for that case.
448
        if timeout is not None:
449
          break
450

    
451
      if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
452
        self.__do_acquire(shared)
453
        return True
454
    finally:
455
      # Remove condition from queue if there are no more waiters
456
      if not wait_condition.has_waiting() and not self.__deleted:
457
        self.__pending.remove(wait_condition)
458

    
459
    return False
460

    
461
  def acquire(self, shared=0, timeout=None):
462
    """Acquire a shared lock.
463

464
    @type shared: int
465
    @param shared: whether to acquire in shared mode; by default an
466
        exclusive lock will be acquired
467
    @type timeout: float
468
    @param timeout: maximum waiting time before giving up
469

470
    """
471
    self.__lock.acquire()
472
    try:
473
      return self.__acquire_unlocked(shared, timeout)
474
    finally:
475
      self.__lock.release()
476

    
477
  def release(self):
478
    """Release a Shared Lock.
479

480
    You must have acquired the lock, either in shared or in exclusive mode,
481
    before calling this function.
482

483
    """
484
    self.__lock.acquire()
485
    try:
486
      assert self.__is_exclusive() or self.__is_sharer(), \
487
        "Cannot release non-owned lock"
488

    
489
      # Autodetect release type
490
      if self.__is_exclusive():
491
        self.__exc = None
492
      else:
493
        self.__shr.remove(threading.currentThread())
494

    
495
      # Notify topmost condition in queue
496
      if self.__pending:
497
        first_condition = self.__pending[0]
498
        first_condition.notifyAll()
499

    
500
        if first_condition == self.__active_shr_c:
501
          self.__active_shr_c = self.__inactive_shr_c
502
          self.__inactive_shr_c = first_condition
503

    
504
    finally:
505
      self.__lock.release()
506

    
507
  def delete(self, timeout=None):
508
    """Delete a Shared Lock.
509

510
    This operation will declare the lock for removal. First the lock will be
511
    acquired in exclusive mode if you don't already own it, then the lock
512
    will be put in a state where any future and pending acquire() fail.
513

514
    @type timeout: float
515
    @param timeout: maximum waiting time before giving up
516

517
    """
518
    self.__lock.acquire()
519
    try:
520
      assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"
521

    
522
      self.__check_deleted()
523

    
524
      # The caller is allowed to hold the lock exclusively already.
525
      acquired = self.__is_exclusive()
526

    
527
      if not acquired:
528
        acquired = self.__acquire_unlocked(timeout)
529

    
530
      if acquired:
531
        self.__deleted = True
532
        self.__exc = None
533

    
534
        # Notify all acquires. They'll throw an error.
535
        while self.__pending:
536
          self.__pending.pop().notifyAll()
537

    
538
      return acquired
539
    finally:
540
      self.__lock.release()
541

    
542

    
543
# Whenever we want to acquire a full LockSet we pass None as the value
544
# to acquire.  Hide this behind this nicely named constant.
545
ALL_SET = None
546

    
547

    
548
class LockSet:
549
  """Implements a set of locks.
550

551
  This abstraction implements a set of shared locks for the same resource type,
552
  distinguished by name. The user can lock a subset of the resources and the
553
  LockSet will take care of acquiring the locks always in the same order, thus
554
  preventing deadlock.
555

556
  All the locks needed in the same set must be acquired together, though.
557

558
  """
559
  def __init__(self, members=None):
560
    """Constructs a new LockSet.
561

562
    @param members: initial members of the set
563

564
    """
565
    # Used internally to guarantee coherency.
566
    self.__lock = SharedLock()
567

    
568
    # The lockdict indexes the relationship name -> lock
569
    # The order-of-locking is implied by the alphabetical order of names
570
    self.__lockdict = {}
571

    
572
    if members is not None:
573
      for name in members:
574
        self.__lockdict[name] = SharedLock()
575

    
576
    # The owner dict contains the set of locks each thread owns. For
577
    # performance each thread can access its own key without a global lock on
578
    # this structure. It is paramount though that *no* other type of access is
579
    # done to this structure (eg. no looping over its keys). *_owner helper
580
    # function are defined to guarantee access is correct, but in general never
581
    # do anything different than __owners[threading.currentThread()], or there
582
    # will be trouble.
583
    self.__owners = {}
584

    
585
  def _is_owned(self):
586
    """Is the current thread a current level owner?"""
587
    return threading.currentThread() in self.__owners
588

    
589
  def _add_owned(self, name=None):
590
    """Note the current thread owns the given lock"""
591
    if name is None:
592
      if not self._is_owned():
593
        self.__owners[threading.currentThread()] = set()
594
    else:
595
      if self._is_owned():
596
        self.__owners[threading.currentThread()].add(name)
597
      else:
598
        self.__owners[threading.currentThread()] = set([name])
599

    
600
  def _del_owned(self, name=None):
601
    """Note the current thread owns the given lock"""
602

    
603
    if name is not None:
604
      self.__owners[threading.currentThread()].remove(name)
605

    
606
    # Only remove the key if we don't hold the set-lock as well
607
    if (not self.__lock._is_owned() and
608
        not self.__owners[threading.currentThread()]):
609
      del self.__owners[threading.currentThread()]
610

    
611
  def _list_owned(self):
612
    """Get the set of resource names owned by the current thread"""
613
    if self._is_owned():
614
      return self.__owners[threading.currentThread()].copy()
615
    else:
616
      return set()
617

    
618
  def __names(self):
619
    """Return the current set of names.
620

621
    Only call this function while holding __lock and don't iterate on the
622
    result after releasing the lock.
623

624
    """
625
    return self.__lockdict.keys()
626

    
627
  def _names(self):
628
    """Return a copy of the current set of elements.
629

630
    Used only for debugging purposes.
631

632
    """
633
    # If we don't already own the set-level lock acquired
634
    # we'll get it and note we need to release it later.
635
    release_lock = False
636
    if not self.__lock._is_owned():
637
      release_lock = True
638
      self.__lock.acquire(shared=1)
639
    try:
640
      result = self.__names()
641
    finally:
642
      if release_lock:
643
        self.__lock.release()
644
    return set(result)
645

    
646
  def acquire(self, names, blocking=1, shared=0):
647
    """Acquire a set of resource locks.
648

649
    @param names: the names of the locks which shall be acquired
650
        (special lock names, or instance/node names)
651
    @param shared: whether to acquire in shared mode; by default an
652
        exclusive lock will be acquired
653
    @param blocking: whether to block while trying to acquire or to
654
        operate in try-lock mode (this locking mode is not supported yet)
655

656
    @return: True when all the locks are successfully acquired
657

658
    @raise errors.LockError: when any lock we try to acquire has
659
        been deleted before we succeed. In this case none of the
660
        locks requested will be acquired.
661

662
    """
663
    if not blocking:
664
      # We don't have non-blocking mode for now
665
      raise NotImplementedError
666

    
667
    # Check we don't already own locks at this level
668
    assert not self._is_owned(), "Cannot acquire locks in the same set twice"
669

    
670
    if names is None:
671
      # If no names are given acquire the whole set by not letting new names
672
      # being added before we release, and getting the current list of names.
673
      # Some of them may then be deleted later, but we'll cope with this.
674
      #
675
      # We'd like to acquire this lock in a shared way, as it's nice if
676
      # everybody else can use the instances at the same time. If are acquiring
677
      # them exclusively though they won't be able to do this anyway, though,
678
      # so we'll get the list lock exclusively as well in order to be able to
679
      # do add() on the set while owning it.
680
      self.__lock.acquire(shared=shared)
681
      try:
682
        # note we own the set-lock
683
        self._add_owned()
684
        names = self.__names()
685
      except:
686
        # We shouldn't have problems adding the lock to the owners list, but
687
        # if we did we'll try to release this lock and re-raise exception.
688
        # Of course something is going to be really wrong, after this.
689
        self.__lock.release()
690
        raise
691

    
692
    try:
693
      # Support passing in a single resource to acquire rather than many
694
      if isinstance(names, basestring):
695
        names = [names]
696
      else:
697
        names = sorted(names)
698

    
699
      acquire_list = []
700
      # First we look the locks up on __lockdict. We have no way of being sure
701
      # they will still be there after, but this makes it a lot faster should
702
      # just one of them be the already wrong
703
      for lname in utils.UniqueSequence(names):
704
        try:
705
          lock = self.__lockdict[lname] # raises KeyError if lock is not there
706
          acquire_list.append((lname, lock))
707
        except (KeyError):
708
          if self.__lock._is_owned():
709
            # We are acquiring all the set, it doesn't matter if this
710
            # particular element is not there anymore.
711
            continue
712
          else:
713
            raise errors.LockError('non-existing lock in set (%s)' % lname)
714

    
715
      # This will hold the locknames we effectively acquired.
716
      acquired = set()
717
      # Now acquire_list contains a sorted list of resources and locks we want.
718
      # In order to get them we loop on this (private) list and acquire() them.
719
      # We gave no real guarantee they will still exist till this is done but
720
      # .acquire() itself is safe and will alert us if the lock gets deleted.
721
      for (lname, lock) in acquire_list:
722
        try:
723
          lock.acquire(shared=shared) # raises LockError if the lock is deleted
724
          # now the lock cannot be deleted, we have it!
725
          self._add_owned(name=lname)
726
          acquired.add(lname)
727
        except (errors.LockError):
728
          if self.__lock._is_owned():
729
            # We are acquiring all the set, it doesn't matter if this
730
            # particular element is not there anymore.
731
            continue
732
          else:
733
            name_fail = lname
734
            for lname in self._list_owned():
735
              self.__lockdict[lname].release()
736
              self._del_owned(name=lname)
737
            raise errors.LockError('non-existing lock in set (%s)' % name_fail)
738
        except:
739
          # We shouldn't have problems adding the lock to the owners list, but
740
          # if we did we'll try to release this lock and re-raise exception.
741
          # Of course something is going to be really wrong, after this.
742
          if lock._is_owned():
743
            lock.release()
744
          raise
745

    
746
    except:
747
      # If something went wrong and we had the set-lock let's release it...
748
      if self.__lock._is_owned():
749
        self.__lock.release()
750
      raise
751

    
752
    return acquired
753

    
754
  def release(self, names=None):
755
    """Release a set of resource locks, at the same level.
756

757
    You must have acquired the locks, either in shared or in exclusive mode,
758
    before releasing them.
759

760
    @param names: the names of the locks which shall be released
761
        (defaults to all the locks acquired at that level).
762

763
    """
764
    assert self._is_owned(), "release() on lock set while not owner"
765

    
766
    # Support passing in a single resource to release rather than many
767
    if isinstance(names, basestring):
768
      names = [names]
769

    
770
    if names is None:
771
      names = self._list_owned()
772
    else:
773
      names = set(names)
774
      assert self._list_owned().issuperset(names), (
775
               "release() on unheld resources %s" %
776
               names.difference(self._list_owned()))
777

    
778
    # First of all let's release the "all elements" lock, if set.
779
    # After this 'add' can work again
780
    if self.__lock._is_owned():
781
      self.__lock.release()
782
      self._del_owned()
783

    
784
    for lockname in names:
785
      # If we are sure the lock doesn't leave __lockdict without being
786
      # exclusively held we can do this...
787
      self.__lockdict[lockname].release()
788
      self._del_owned(name=lockname)
789

    
790
  def add(self, names, acquired=0, shared=0):
791
    """Add a new set of elements to the set
792

793
    @param names: names of the new elements to add
794
    @param acquired: pre-acquire the new resource?
795
    @param shared: is the pre-acquisition shared?
796

797
    """
798
    # Check we don't already own locks at this level
799
    assert not self._is_owned() or self.__lock._is_owned(shared=0), \
800
      "Cannot add locks if the set is only partially owned, or shared"
801

    
802
    # Support passing in a single resource to add rather than many
803
    if isinstance(names, basestring):
804
      names = [names]
805

    
806
    # If we don't already own the set-level lock acquired in an exclusive way
807
    # we'll get it and note we need to release it later.
808
    release_lock = False
809
    if not self.__lock._is_owned():
810
      release_lock = True
811
      self.__lock.acquire()
812

    
813
    try:
814
      invalid_names = set(self.__names()).intersection(names)
815
      if invalid_names:
816
        # This must be an explicit raise, not an assert, because assert is
817
        # turned off when using optimization, and this can happen because of
818
        # concurrency even if the user doesn't want it.
819
        raise errors.LockError("duplicate add() (%s)" % invalid_names)
820

    
821
      for lockname in names:
822
        lock = SharedLock()
823

    
824
        if acquired:
825
          lock.acquire(shared=shared)
826
          # now the lock cannot be deleted, we have it!
827
          try:
828
            self._add_owned(name=lockname)
829
          except:
830
            # We shouldn't have problems adding the lock to the owners list,
831
            # but if we did we'll try to release this lock and re-raise
832
            # exception.  Of course something is going to be really wrong,
833
            # after this.  On the other hand the lock hasn't been added to the
834
            # __lockdict yet so no other threads should be pending on it. This
835
            # release is just a safety measure.
836
            lock.release()
837
            raise
838

    
839
        self.__lockdict[lockname] = lock
840

    
841
    finally:
842
      # Only release __lock if we were not holding it previously.
843
      if release_lock:
844
        self.__lock.release()
845

    
846
    return True
847

    
848
  def remove(self, names, blocking=1):
849
    """Remove elements from the lock set.
850

851
    You can either not hold anything in the lockset or already hold a superset
852
    of the elements you want to delete, exclusively.
853

854
    @param names: names of the resource to remove.
855
    @param blocking: whether to block while trying to acquire or to
856
        operate in try-lock mode (this locking mode is not supported
857
        yet unless you are already holding exclusively the locks)
858

859
    @return:: a list of locks which we removed; the list is always
860
        equal to the names list if we were holding all the locks
861
        exclusively
862

863
    """
864
    if not blocking and not self._is_owned():
865
      # We don't have non-blocking mode for now
866
      raise NotImplementedError
867

    
868
    # Support passing in a single resource to remove rather than many
869
    if isinstance(names, basestring):
870
      names = [names]
871

    
872
    # If we own any subset of this lock it must be a superset of what we want
873
    # to delete. The ownership must also be exclusive, but that will be checked
874
    # by the lock itself.
875
    assert not self._is_owned() or self._list_owned().issuperset(names), (
876
      "remove() on acquired lockset while not owning all elements")
877

    
878
    removed = []
879

    
880
    for lname in names:
881
      # Calling delete() acquires the lock exclusively if we don't already own
882
      # it, and causes all pending and subsequent lock acquires to fail. It's
883
      # fine to call it out of order because delete() also implies release(),
884
      # and the assertion above guarantees that if we either already hold
885
      # everything we want to delete, or we hold none.
886
      try:
887
        self.__lockdict[lname].delete()
888
        removed.append(lname)
889
      except (KeyError, errors.LockError):
890
        # This cannot happen if we were already holding it, verify:
891
        assert not self._is_owned(), "remove failed while holding lockset"
892
      else:
893
        # If no LockError was raised we are the ones who deleted the lock.
894
        # This means we can safely remove it from lockdict, as any further or
895
        # pending delete() or acquire() will fail (and nobody can have the lock
896
        # since before our call to delete()).
897
        #
898
        # This is done in an else clause because if the exception was thrown
899
        # it's the job of the one who actually deleted it.
900
        del self.__lockdict[lname]
901
        # And let's remove it from our private list if we owned it.
902
        if self._is_owned():
903
          self._del_owned(name=lname)
904

    
905
    return removed
906

    
907

    
908
# Locking levels, must be acquired in increasing order.
909
# Current rules are:
910
#   - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
911
#   acquired before performing any operation, either in shared or in exclusive
912
#   mode. acquiring the BGL in exclusive mode is discouraged and should be
913
#   avoided.
914
#   - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
915
#   If you need more than one node, or more than one instance, acquire them at
916
#   the same time.
917
LEVEL_CLUSTER = 0
918
LEVEL_INSTANCE = 1
919
LEVEL_NODE = 2
920

    
921
LEVELS = [LEVEL_CLUSTER,
922
          LEVEL_INSTANCE,
923
          LEVEL_NODE]
924

    
925
# Lock levels which are modifiable
926
LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
927

    
928
LEVEL_NAMES = {
929
  LEVEL_CLUSTER: "cluster",
930
  LEVEL_INSTANCE: "instance",
931
  LEVEL_NODE: "node",
932
  }
933

    
934
# Constant for the big ganeti lock
935
BGL = 'BGL'
936

    
937

    
938
class GanetiLockManager:
939
  """The Ganeti Locking Library
940

941
  The purpose of this small library is to manage locking for ganeti clusters
942
  in a central place, while at the same time doing dynamic checks against
943
  possible deadlocks. It will also make it easier to transition to a different
944
  lock type should we migrate away from python threads.
945

946
  """
947
  _instance = None
948

    
949
  def __init__(self, nodes=None, instances=None):
950
    """Constructs a new GanetiLockManager object.
951

952
    There should be only a GanetiLockManager object at any time, so this
953
    function raises an error if this is not the case.
954

955
    @param nodes: list of node names
956
    @param instances: list of instance names
957

958
    """
959
    assert self.__class__._instance is None, \
960
           "double GanetiLockManager instance"
961

    
962
    self.__class__._instance = self
963

    
964
    # The keyring contains all the locks, at their level and in the correct
965
    # locking order.
966
    self.__keyring = {
967
      LEVEL_CLUSTER: LockSet([BGL]),
968
      LEVEL_NODE: LockSet(nodes),
969
      LEVEL_INSTANCE: LockSet(instances),
970
    }
971

    
972
  def _names(self, level):
973
    """List the lock names at the given level.
974

975
    This can be used for debugging/testing purposes.
976

977
    @param level: the level whose list of locks to get
978

979
    """
980
    assert level in LEVELS, "Invalid locking level %s" % level
981
    return self.__keyring[level]._names()
982

    
983
  def _is_owned(self, level):
984
    """Check whether we are owning locks at the given level
985

986
    """
987
    return self.__keyring[level]._is_owned()
988

    
989
  is_owned = _is_owned
990

    
991
  def _list_owned(self, level):
992
    """Get the set of owned locks at the given level
993

994
    """
995
    return self.__keyring[level]._list_owned()
996

    
997
  def _upper_owned(self, level):
998
    """Check that we don't own any lock at a level greater than the given one.
999

1000
    """
1001
    # This way of checking only works if LEVELS[i] = i, which we check for in
1002
    # the test cases.
1003
    return utils.any((self._is_owned(l) for l in LEVELS[level + 1:]))
1004

    
1005
  def _BGL_owned(self):
1006
    """Check if the current thread owns the BGL.
1007

1008
    Both an exclusive or a shared acquisition work.
1009

1010
    """
1011
    return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
1012

    
1013
  def _contains_BGL(self, level, names):
1014
    """Check if the level contains the BGL.
1015

1016
    Check if acting on the given level and set of names will change
1017
    the status of the Big Ganeti Lock.
1018

1019
    """
1020
    return level == LEVEL_CLUSTER and (names is None or BGL in names)
1021

    
1022
  def acquire(self, level, names, blocking=1, shared=0):
1023
    """Acquire a set of resource locks, at the same level.
1024

1025
    @param level: the level at which the locks shall be acquired;
1026
        it must be a member of LEVELS.
1027
    @param names: the names of the locks which shall be acquired
1028
        (special lock names, or instance/node names)
1029
    @param shared: whether to acquire in shared mode; by default
1030
        an exclusive lock will be acquired
1031
    @param blocking: whether to block while trying to acquire or to
1032
        operate in try-lock mode (this locking mode is not supported yet)
1033

1034
    """
1035
    assert level in LEVELS, "Invalid locking level %s" % level
1036

    
1037
    # Check that we are either acquiring the Big Ganeti Lock or we already own
1038
    # it. Some "legacy" opcodes need to be sure they are run non-concurrently
1039
    # so even if we've migrated we need to at least share the BGL to be
1040
    # compatible with them. Of course if we own the BGL exclusively there's no
1041
    # point in acquiring any other lock, unless perhaps we are half way through
1042
    # the migration of the current opcode.
1043
    assert (self._contains_BGL(level, names) or self._BGL_owned()), (
1044
            "You must own the Big Ganeti Lock before acquiring any other")
1045

    
1046
    # Check we don't own locks at the same or upper levels.
1047
    assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1048
           " while owning some at a greater one")
1049

    
1050
    # Acquire the locks in the set.
1051
    return self.__keyring[level].acquire(names, shared=shared,
1052
                                         blocking=blocking)
1053

    
1054
  def release(self, level, names=None):
1055
    """Release a set of resource locks, at the same level.
1056

1057
    You must have acquired the locks, either in shared or in exclusive
1058
    mode, before releasing them.
1059

1060
    @param level: the level at which the locks shall be released;
1061
        it must be a member of LEVELS
1062
    @param names: the names of the locks which shall be released
1063
        (defaults to all the locks acquired at that level)
1064

1065
    """
1066
    assert level in LEVELS, "Invalid locking level %s" % level
1067
    assert (not self._contains_BGL(level, names) or
1068
            not self._upper_owned(LEVEL_CLUSTER)), (
1069
            "Cannot release the Big Ganeti Lock while holding something"
1070
            " at upper levels")
1071

    
1072
    # Release will complain if we don't own the locks already
1073
    return self.__keyring[level].release(names)
1074

    
1075
  def add(self, level, names, acquired=0, shared=0):
1076
    """Add locks at the specified level.
1077

1078
    @param level: the level at which the locks shall be added;
1079
        it must be a member of LEVELS_MOD.
1080
    @param names: names of the locks to acquire
1081
    @param acquired: whether to acquire the newly added locks
1082
    @param shared: whether the acquisition will be shared
1083

1084
    """
1085
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1086
    assert self._BGL_owned(), ("You must own the BGL before performing other"
1087
           " operations")
1088
    assert not self._upper_owned(level), ("Cannot add locks at a level"
1089
           " while owning some at a greater one")
1090
    return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1091

    
1092
  def remove(self, level, names, blocking=1):
1093
    """Remove locks from the specified level.
1094

1095
    You must either already own the locks you are trying to remove
1096
    exclusively or not own any lock at an upper level.
1097

1098
    @param level: the level at which the locks shall be removed;
1099
        it must be a member of LEVELS_MOD
1100
    @param names: the names of the locks which shall be removed
1101
        (special lock names, or instance/node names)
1102
    @param blocking: whether to block while trying to operate in
1103
        try-lock mode (this locking mode is not supported yet)
1104

1105
    """
1106
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1107
    assert self._BGL_owned(), ("You must own the BGL before performing other"
1108
           " operations")
1109
    # Check we either own the level or don't own anything from here
1110
    # up. LockSet.remove() will check the case in which we don't own
1111
    # all the needed resources, or we have a shared ownership.
1112
    assert self._is_owned(level) or not self._upper_owned(level), (
1113
           "Cannot remove locks at a level while not owning it or"
1114
           " owning some at a greater one")
1115
    return self.__keyring[level].remove(names, blocking=blocking)