Statistics
| Branch: | Tag: | Revision:

root / lib / locking.py @ da4a52a3

History | View | Annotate | Download (61.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Module implementing the Ganeti locking code."""
22

    
23
# pylint: disable=W0212
24

    
25
# W0212 since e.g. LockSet methods use (a lot) the internals of
26
# SharedLock
27

    
28
import os
29
import select
30
import threading
31
import errno
32
import weakref
33
import logging
34
import heapq
35
import itertools
36
import time
37

    
38
from ganeti import errors
39
from ganeti import utils
40
from ganeti import compat
41
from ganeti import query
42

    
43

    
44
_EXCLUSIVE_TEXT = "exclusive"
45
_SHARED_TEXT = "shared"
46
_DELETED_TEXT = "deleted"
47

    
48
_DEFAULT_PRIORITY = 0
49

    
50
#: Minimum timeout required to consider scheduling a pending acquisition
51
#: (seconds)
52
_LOCK_ACQUIRE_MIN_TIMEOUT = (1.0 / 1000)
53

    
54
# Internal lock acquisition modes for L{LockSet}
55
(_LS_ACQUIRE_EXACT,
56
 _LS_ACQUIRE_ALL,
57
 _LS_ACQUIRE_OPPORTUNISTIC) = range(1, 4)
58

    
59
_LS_ACQUIRE_MODES = compat.UniqueFrozenset([
60
  _LS_ACQUIRE_EXACT,
61
  _LS_ACQUIRE_ALL,
62
  _LS_ACQUIRE_OPPORTUNISTIC,
63
  ])
64

    
65

    
66
def ssynchronized(mylock, shared=0):
67
  """Shared Synchronization decorator.
68

69
  Calls the function holding the given lock, either in exclusive or shared
70
  mode. It requires the passed lock to be a SharedLock (or support its
71
  semantics).
72

73
  @type mylock: lockable object or string
74
  @param mylock: lock to acquire or class member name of the lock to acquire
75

76
  """
77
  def wrap(fn):
78
    def sync_function(*args, **kwargs):
79
      if isinstance(mylock, basestring):
80
        assert args, "cannot ssynchronize on non-class method: self not found"
81
        # args[0] is "self"
82
        lock = getattr(args[0], mylock)
83
      else:
84
        lock = mylock
85
      lock.acquire(shared=shared)
86
      try:
87
        return fn(*args, **kwargs)
88
      finally:
89
        lock.release()
90
    return sync_function
91
  return wrap
92

    
93

    
94
class _SingleNotifyPipeConditionWaiter(object):
95
  """Helper class for SingleNotifyPipeCondition
96

97
  """
98
  __slots__ = [
99
    "_fd",
100
    "_poller",
101
    ]
102

    
103
  def __init__(self, poller, fd):
104
    """Constructor for _SingleNotifyPipeConditionWaiter
105

106
    @type poller: select.poll
107
    @param poller: Poller object
108
    @type fd: int
109
    @param fd: File descriptor to wait for
110

111
    """
112
    object.__init__(self)
113
    self._poller = poller
114
    self._fd = fd
115

    
116
  def __call__(self, timeout):
117
    """Wait for something to happen on the pipe.
118

119
    @type timeout: float or None
120
    @param timeout: Timeout for waiting (can be None)
121

122
    """
123
    running_timeout = utils.RunningTimeout(timeout, True)
124

    
125
    while True:
126
      remaining_time = running_timeout.Remaining()
127

    
128
      if remaining_time is not None:
129
        if remaining_time < 0.0:
130
          break
131

    
132
        # Our calculation uses seconds, poll() wants milliseconds
133
        remaining_time *= 1000
134

    
135
      try:
136
        result = self._poller.poll(remaining_time)
137
      except EnvironmentError, err:
138
        if err.errno != errno.EINTR:
139
          raise
140
        result = None
141

    
142
      # Check whether we were notified
143
      if result and result[0][0] == self._fd:
144
        break
145

    
146

    
147
class _BaseCondition(object):
148
  """Base class containing common code for conditions.
149

150
  Some of this code is taken from python's threading module.
151

152
  """
153
  __slots__ = [
154
    "_lock",
155
    "acquire",
156
    "release",
157
    "_is_owned",
158
    "_acquire_restore",
159
    "_release_save",
160
    ]
161

    
162
  def __init__(self, lock):
163
    """Constructor for _BaseCondition.
164

165
    @type lock: threading.Lock
166
    @param lock: condition base lock
167

168
    """
169
    object.__init__(self)
170

    
171
    try:
172
      self._release_save = lock._release_save
173
    except AttributeError:
174
      self._release_save = self._base_release_save
175
    try:
176
      self._acquire_restore = lock._acquire_restore
177
    except AttributeError:
178
      self._acquire_restore = self._base_acquire_restore
179
    try:
180
      self._is_owned = lock.is_owned
181
    except AttributeError:
182
      self._is_owned = self._base_is_owned
183

    
184
    self._lock = lock
185

    
186
    # Export the lock's acquire() and release() methods
187
    self.acquire = lock.acquire
188
    self.release = lock.release
189

    
190
  def _base_is_owned(self):
191
    """Check whether lock is owned by current thread.
192

193
    """
194
    if self._lock.acquire(0):
195
      self._lock.release()
196
      return False
197
    return True
198

    
199
  def _base_release_save(self):
200
    self._lock.release()
201

    
202
  def _base_acquire_restore(self, _):
203
    self._lock.acquire()
204

    
205
  def _check_owned(self):
206
    """Raise an exception if the current thread doesn't own the lock.
207

208
    """
209
    if not self._is_owned():
210
      raise RuntimeError("cannot work with un-aquired lock")
211

    
212

    
213
class SingleNotifyPipeCondition(_BaseCondition):
214
  """Condition which can only be notified once.
215

216
  This condition class uses pipes and poll, internally, to be able to wait for
217
  notification with a timeout, without resorting to polling. It is almost
218
  compatible with Python's threading.Condition, with the following differences:
219
    - notifyAll can only be called once, and no wait can happen after that
220
    - notify is not supported, only notifyAll
221

222
  """
223

    
224
  __slots__ = [
225
    "_poller",
226
    "_read_fd",
227
    "_write_fd",
228
    "_nwaiters",
229
    "_notified",
230
    ]
231

    
232
  _waiter_class = _SingleNotifyPipeConditionWaiter
233

    
234
  def __init__(self, lock):
235
    """Constructor for SingleNotifyPipeCondition
236

237
    """
238
    _BaseCondition.__init__(self, lock)
239
    self._nwaiters = 0
240
    self._notified = False
241
    self._read_fd = None
242
    self._write_fd = None
243
    self._poller = None
244

    
245
  def _check_unnotified(self):
246
    """Throws an exception if already notified.
247

248
    """
249
    if self._notified:
250
      raise RuntimeError("cannot use already notified condition")
251

    
252
  def _Cleanup(self):
253
    """Cleanup open file descriptors, if any.
254

255
    """
256
    if self._read_fd is not None:
257
      os.close(self._read_fd)
258
      self._read_fd = None
259

    
260
    if self._write_fd is not None:
261
      os.close(self._write_fd)
262
      self._write_fd = None
263
    self._poller = None
264

    
265
  def wait(self, timeout):
266
    """Wait for a notification.
267

268
    @type timeout: float or None
269
    @param timeout: Waiting timeout (can be None)
270

271
    """
272
    self._check_owned()
273
    self._check_unnotified()
274

    
275
    self._nwaiters += 1
276
    try:
277
      if self._poller is None:
278
        (self._read_fd, self._write_fd) = os.pipe()
279
        self._poller = select.poll()
280
        self._poller.register(self._read_fd, select.POLLHUP)
281

    
282
      wait_fn = self._waiter_class(self._poller, self._read_fd)
283
      state = self._release_save()
284
      try:
285
        # Wait for notification
286
        wait_fn(timeout)
287
      finally:
288
        # Re-acquire lock
289
        self._acquire_restore(state)
290
    finally:
291
      self._nwaiters -= 1
292
      if self._nwaiters == 0:
293
        self._Cleanup()
294

    
295
  def notifyAll(self): # pylint: disable=C0103
296
    """Close the writing side of the pipe to notify all waiters.
297

298
    """
299
    self._check_owned()
300
    self._check_unnotified()
301
    self._notified = True
302
    if self._write_fd is not None:
303
      os.close(self._write_fd)
304
      self._write_fd = None
305

    
306

    
307
class PipeCondition(_BaseCondition):
308
  """Group-only non-polling condition with counters.
309

310
  This condition class uses pipes and poll, internally, to be able to wait for
311
  notification with a timeout, without resorting to polling. It is almost
312
  compatible with Python's threading.Condition, but only supports notifyAll and
313
  non-recursive locks. As an additional features it's able to report whether
314
  there are any waiting threads.
315

316
  """
317
  __slots__ = [
318
    "_waiters",
319
    "_single_condition",
320
    ]
321

    
322
  _single_condition_class = SingleNotifyPipeCondition
323

    
324
  def __init__(self, lock):
325
    """Initializes this class.
326

327
    """
328
    _BaseCondition.__init__(self, lock)
329
    self._waiters = set()
330
    self._single_condition = self._single_condition_class(self._lock)
331

    
332
  def wait(self, timeout):
333
    """Wait for a notification.
334

335
    @type timeout: float or None
336
    @param timeout: Waiting timeout (can be None)
337

338
    """
339
    self._check_owned()
340

    
341
    # Keep local reference to the pipe. It could be replaced by another thread
342
    # notifying while we're waiting.
343
    cond = self._single_condition
344

    
345
    self._waiters.add(threading.currentThread())
346
    try:
347
      cond.wait(timeout)
348
    finally:
349
      self._check_owned()
350
      self._waiters.remove(threading.currentThread())
351

    
352
  def notifyAll(self): # pylint: disable=C0103
353
    """Notify all currently waiting threads.
354

355
    """
356
    self._check_owned()
357
    self._single_condition.notifyAll()
358
    self._single_condition = self._single_condition_class(self._lock)
359

    
360
  def get_waiting(self):
361
    """Returns a list of all waiting threads.
362

363
    """
364
    self._check_owned()
365

    
366
    return self._waiters
367

    
368
  def has_waiting(self):
369
    """Returns whether there are active waiters.
370

371
    """
372
    self._check_owned()
373

    
374
    return bool(self._waiters)
375

    
376
  def __repr__(self):
377
    return ("<%s.%s waiters=%s at %#x>" %
378
            (self.__class__.__module__, self.__class__.__name__,
379
             self._waiters, id(self)))
380

    
381

    
382
class _PipeConditionWithMode(PipeCondition):
383
  __slots__ = [
384
    "shared",
385
    ]
386

    
387
  def __init__(self, lock, shared):
388
    """Initializes this class.
389

390
    """
391
    self.shared = shared
392
    PipeCondition.__init__(self, lock)
393

    
394

    
395
class SharedLock(object):
396
  """Implements a shared lock.
397

398
  Multiple threads can acquire the lock in a shared way by calling
399
  C{acquire(shared=1)}. In order to acquire the lock in an exclusive way
400
  threads can call C{acquire(shared=0)}.
401

402
  Notes on data structures: C{__pending} contains a priority queue (heapq) of
403
  all pending acquires: C{[(priority1: prioqueue1), (priority2: prioqueue2),
404
  ...]}. Each per-priority queue contains a normal in-order list of conditions
405
  to be notified when the lock can be acquired. Shared locks are grouped
406
  together by priority and the condition for them is stored in
407
  C{__pending_shared} if it already exists. C{__pending_by_prio} keeps
408
  references for the per-priority queues indexed by priority for faster access.
409

410
  @type name: string
411
  @ivar name: the name of the lock
412

413
  """
414
  __slots__ = [
415
    "__weakref__",
416
    "__deleted",
417
    "__exc",
418
    "__lock",
419
    "__pending",
420
    "__pending_by_prio",
421
    "__pending_shared",
422
    "__shr",
423
    "__time_fn",
424
    "name",
425
    ]
426

    
427
  __condition_class = _PipeConditionWithMode
428

    
429
  def __init__(self, name, monitor=None, _time_fn=time.time):
430
    """Construct a new SharedLock.
431

432
    @param name: the name of the lock
433
    @type monitor: L{LockMonitor}
434
    @param monitor: Lock monitor with which to register
435

436
    """
437
    object.__init__(self)
438

    
439
    self.name = name
440

    
441
    # Used for unittesting
442
    self.__time_fn = _time_fn
443

    
444
    # Internal lock
445
    self.__lock = threading.Lock()
446

    
447
    # Queue containing waiting acquires
448
    self.__pending = []
449
    self.__pending_by_prio = {}
450
    self.__pending_shared = {}
451

    
452
    # Current lock holders
453
    self.__shr = set()
454
    self.__exc = None
455

    
456
    # is this lock in the deleted state?
457
    self.__deleted = False
458

    
459
    # Register with lock monitor
460
    if monitor:
461
      logging.debug("Adding lock %s to monitor", name)
462
      monitor.RegisterLock(self)
463

    
464
  def __repr__(self):
465
    return ("<%s.%s name=%s at %#x>" %
466
            (self.__class__.__module__, self.__class__.__name__,
467
             self.name, id(self)))
468

    
469
  def GetLockInfo(self, requested):
470
    """Retrieves information for querying locks.
471

472
    @type requested: set
473
    @param requested: Requested information, see C{query.LQ_*}
474

475
    """
476
    self.__lock.acquire()
477
    try:
478
      # Note: to avoid unintentional race conditions, no references to
479
      # modifiable objects should be returned unless they were created in this
480
      # function.
481
      mode = None
482
      owner_names = None
483

    
484
      if query.LQ_MODE in requested:
485
        if self.__deleted:
486
          mode = _DELETED_TEXT
487
          assert not (self.__exc or self.__shr)
488
        elif self.__exc:
489
          mode = _EXCLUSIVE_TEXT
490
        elif self.__shr:
491
          mode = _SHARED_TEXT
492

    
493
      # Current owner(s) are wanted
494
      if query.LQ_OWNER in requested:
495
        if self.__exc:
496
          owner = [self.__exc]
497
        else:
498
          owner = self.__shr
499

    
500
        if owner:
501
          assert not self.__deleted
502
          owner_names = [i.getName() for i in owner]
503

    
504
      # Pending acquires are wanted
505
      if query.LQ_PENDING in requested:
506
        pending = []
507

    
508
        # Sorting instead of copying and using heaq functions for simplicity
509
        for (_, prioqueue) in sorted(self.__pending):
510
          for cond in prioqueue:
511
            if cond.shared:
512
              pendmode = _SHARED_TEXT
513
            else:
514
              pendmode = _EXCLUSIVE_TEXT
515

    
516
            # List of names will be sorted in L{query._GetLockPending}
517
            pending.append((pendmode, [i.getName()
518
                                       for i in cond.get_waiting()]))
519
      else:
520
        pending = None
521

    
522
      return [(self.name, mode, owner_names, pending)]
523
    finally:
524
      self.__lock.release()
525

    
526
  def __check_deleted(self):
527
    """Raises an exception if the lock has been deleted.
528

529
    """
530
    if self.__deleted:
531
      raise errors.LockError("Deleted lock %s" % self.name)
532

    
533
  def __is_sharer(self):
534
    """Is the current thread sharing the lock at this time?
535

536
    """
537
    return threading.currentThread() in self.__shr
538

    
539
  def __is_exclusive(self):
540
    """Is the current thread holding the lock exclusively at this time?
541

542
    """
543
    return threading.currentThread() == self.__exc
544

    
545
  def __is_owned(self, shared=-1):
546
    """Is the current thread somehow owning the lock at this time?
547

548
    This is a private version of the function, which presumes you're holding
549
    the internal lock.
550

551
    """
552
    if shared < 0:
553
      return self.__is_sharer() or self.__is_exclusive()
554
    elif shared:
555
      return self.__is_sharer()
556
    else:
557
      return self.__is_exclusive()
558

    
559
  def is_owned(self, shared=-1):
560
    """Is the current thread somehow owning the lock at this time?
561

562
    @param shared:
563
        - < 0: check for any type of ownership (default)
564
        - 0: check for exclusive ownership
565
        - > 0: check for shared ownership
566

567
    """
568
    self.__lock.acquire()
569
    try:
570
      return self.__is_owned(shared=shared)
571
    finally:
572
      self.__lock.release()
573

    
574
  #: Necessary to remain compatible with threading.Condition, which tries to
575
  #: retrieve a locks' "_is_owned" attribute
576
  _is_owned = is_owned
577

    
578
  def _count_pending(self):
579
    """Returns the number of pending acquires.
580

581
    @rtype: int
582

583
    """
584
    self.__lock.acquire()
585
    try:
586
      return sum(len(prioqueue) for (_, prioqueue) in self.__pending)
587
    finally:
588
      self.__lock.release()
589

    
590
  def _check_empty(self):
591
    """Checks whether there are any pending acquires.
592

593
    @rtype: bool
594

595
    """
596
    self.__lock.acquire()
597
    try:
598
      # Order is important: __find_first_pending_queue modifies __pending
599
      (_, prioqueue) = self.__find_first_pending_queue()
600

    
601
      return not (prioqueue or
602
                  self.__pending or
603
                  self.__pending_by_prio or
604
                  self.__pending_shared)
605
    finally:
606
      self.__lock.release()
607

    
608
  def __do_acquire(self, shared):
609
    """Actually acquire the lock.
610

611
    """
612
    if shared:
613
      self.__shr.add(threading.currentThread())
614
    else:
615
      self.__exc = threading.currentThread()
616

    
617
  def __can_acquire(self, shared):
618
    """Determine whether lock can be acquired.
619

620
    """
621
    if shared:
622
      return self.__exc is None
623
    else:
624
      return len(self.__shr) == 0 and self.__exc is None
625

    
626
  def __find_first_pending_queue(self):
627
    """Tries to find the topmost queued entry with pending acquires.
628

629
    Removes empty entries while going through the list.
630

631
    """
632
    while self.__pending:
633
      (priority, prioqueue) = self.__pending[0]
634

    
635
      if prioqueue:
636
        return (priority, prioqueue)
637

    
638
      # Remove empty queue
639
      heapq.heappop(self.__pending)
640
      del self.__pending_by_prio[priority]
641
      assert priority not in self.__pending_shared
642

    
643
    return (None, None)
644

    
645
  def __is_on_top(self, cond):
646
    """Checks whether the passed condition is on top of the queue.
647

648
    The caller must make sure the queue isn't empty.
649

650
    """
651
    (_, prioqueue) = self.__find_first_pending_queue()
652

    
653
    return cond == prioqueue[0]
654

    
655
  def __acquire_unlocked(self, shared, timeout, priority):
656
    """Acquire a shared lock.
657

658
    @param shared: whether to acquire in shared mode; by default an
659
        exclusive lock will be acquired
660
    @param timeout: maximum waiting time before giving up
661
    @type priority: integer
662
    @param priority: Priority for acquiring lock
663

664
    """
665
    self.__check_deleted()
666

    
667
    # We cannot acquire the lock if we already have it
668
    assert not self.__is_owned(), ("double acquire() on a non-recursive lock"
669
                                   " %s" % self.name)
670

    
671
    # Remove empty entries from queue
672
    self.__find_first_pending_queue()
673

    
674
    # Check whether someone else holds the lock or there are pending acquires.
675
    if not self.__pending and self.__can_acquire(shared):
676
      # Apparently not, can acquire lock directly.
677
      self.__do_acquire(shared)
678
      return True
679

    
680
    # The lock couldn't be acquired right away, so if a timeout is given and is
681
    # considered too short, return right away as scheduling a pending
682
    # acquisition is quite expensive
683
    if timeout is not None and timeout < _LOCK_ACQUIRE_MIN_TIMEOUT:
684
      return False
685

    
686
    prioqueue = self.__pending_by_prio.get(priority, None)
687

    
688
    if shared:
689
      # Try to re-use condition for shared acquire
690
      wait_condition = self.__pending_shared.get(priority, None)
691
      assert (wait_condition is None or
692
              (wait_condition.shared and wait_condition in prioqueue))
693
    else:
694
      wait_condition = None
695

    
696
    if wait_condition is None:
697
      if prioqueue is None:
698
        assert priority not in self.__pending_by_prio
699

    
700
        prioqueue = []
701
        heapq.heappush(self.__pending, (priority, prioqueue))
702
        self.__pending_by_prio[priority] = prioqueue
703

    
704
      wait_condition = self.__condition_class(self.__lock, shared)
705
      prioqueue.append(wait_condition)
706

    
707
      if shared:
708
        # Keep reference for further shared acquires on same priority. This is
709
        # better than trying to find it in the list of pending acquires.
710
        assert priority not in self.__pending_shared
711
        self.__pending_shared[priority] = wait_condition
712

    
713
    wait_start = self.__time_fn()
714
    acquired = False
715

    
716
    try:
717
      # Wait until we become the topmost acquire in the queue or the timeout
718
      # expires.
719
      while True:
720
        if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
721
          self.__do_acquire(shared)
722
          acquired = True
723
          break
724

    
725
        # A lot of code assumes blocking acquires always succeed, therefore we
726
        # can never return False for a blocking acquire
727
        if (timeout is not None and
728
            utils.TimeoutExpired(wait_start, timeout, _time_fn=self.__time_fn)):
729
          break
730

    
731
        # Wait for notification
732
        wait_condition.wait(timeout)
733
        self.__check_deleted()
734
    finally:
735
      # Remove condition from queue if there are no more waiters
736
      if not wait_condition.has_waiting():
737
        prioqueue.remove(wait_condition)
738
        if wait_condition.shared:
739
          # Remove from list of shared acquires if it wasn't while releasing
740
          # (e.g. on lock deletion)
741
          self.__pending_shared.pop(priority, None)
742

    
743
    return acquired
744

    
745
  def acquire(self, shared=0, timeout=None, priority=None,
746
              test_notify=None):
747
    """Acquire a shared lock.
748

749
    @type shared: integer (0/1) used as a boolean
750
    @param shared: whether to acquire in shared mode; by default an
751
        exclusive lock will be acquired
752
    @type timeout: float
753
    @param timeout: maximum waiting time before giving up
754
    @type priority: integer
755
    @param priority: Priority for acquiring lock
756
    @type test_notify: callable or None
757
    @param test_notify: Special callback function for unittesting
758

759
    """
760
    if priority is None:
761
      priority = _DEFAULT_PRIORITY
762

    
763
    self.__lock.acquire()
764
    try:
765
      # We already got the lock, notify now
766
      if __debug__ and callable(test_notify):
767
        test_notify()
768

    
769
      return self.__acquire_unlocked(shared, timeout, priority)
770
    finally:
771
      self.__lock.release()
772

    
773
  def downgrade(self):
774
    """Changes the lock mode from exclusive to shared.
775

776
    Pending acquires in shared mode on the same priority will go ahead.
777

778
    """
779
    self.__lock.acquire()
780
    try:
781
      assert self.__is_owned(), "Lock must be owned"
782

    
783
      if self.__is_exclusive():
784
        # Do nothing if the lock is already acquired in shared mode
785
        self.__exc = None
786
        self.__do_acquire(1)
787

    
788
        # Important: pending shared acquires should only jump ahead if there
789
        # was a transition from exclusive to shared, otherwise an owner of a
790
        # shared lock can keep calling this function to push incoming shared
791
        # acquires
792
        (priority, prioqueue) = self.__find_first_pending_queue()
793
        if prioqueue:
794
          # Is there a pending shared acquire on this priority?
795
          cond = self.__pending_shared.pop(priority, None)
796
          if cond:
797
            assert cond.shared
798
            assert cond in prioqueue
799

    
800
            # Ensure shared acquire is on top of queue
801
            if len(prioqueue) > 1:
802
              prioqueue.remove(cond)
803
              prioqueue.insert(0, cond)
804

    
805
            # Notify
806
            cond.notifyAll()
807

    
808
      assert not self.__is_exclusive()
809
      assert self.__is_sharer()
810

    
811
      return True
812
    finally:
813
      self.__lock.release()
814

    
815
  def release(self):
816
    """Release a Shared Lock.
817

818
    You must have acquired the lock, either in shared or in exclusive mode,
819
    before calling this function.
820

821
    """
822
    self.__lock.acquire()
823
    try:
824
      assert self.__is_exclusive() or self.__is_sharer(), \
825
        "Cannot release non-owned lock"
826

    
827
      # Autodetect release type
828
      if self.__is_exclusive():
829
        self.__exc = None
830
        notify = True
831
      else:
832
        self.__shr.remove(threading.currentThread())
833
        notify = not self.__shr
834

    
835
      # Notify topmost condition in queue if there are no owners left (for
836
      # shared locks)
837
      if notify:
838
        self.__notify_topmost()
839
    finally:
840
      self.__lock.release()
841

    
842
  def __notify_topmost(self):
843
    """Notifies topmost condition in queue of pending acquires.
844

845
    """
846
    (priority, prioqueue) = self.__find_first_pending_queue()
847
    if prioqueue:
848
      cond = prioqueue[0]
849
      cond.notifyAll()
850
      if cond.shared:
851
        # Prevent further shared acquires from sneaking in while waiters are
852
        # notified
853
        self.__pending_shared.pop(priority, None)
854

    
855
  def _notify_topmost(self):
856
    """Exported version of L{__notify_topmost}.
857

858
    """
859
    self.__lock.acquire()
860
    try:
861
      return self.__notify_topmost()
862
    finally:
863
      self.__lock.release()
864

    
865
  def delete(self, timeout=None, priority=None):
866
    """Delete a Shared Lock.
867

868
    This operation will declare the lock for removal. First the lock will be
869
    acquired in exclusive mode if you don't already own it, then the lock
870
    will be put in a state where any future and pending acquire() fail.
871

872
    @type timeout: float
873
    @param timeout: maximum waiting time before giving up
874
    @type priority: integer
875
    @param priority: Priority for acquiring lock
876

877
    """
878
    if priority is None:
879
      priority = _DEFAULT_PRIORITY
880

    
881
    self.__lock.acquire()
882
    try:
883
      assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"
884

    
885
      self.__check_deleted()
886

    
887
      # The caller is allowed to hold the lock exclusively already.
888
      acquired = self.__is_exclusive()
889

    
890
      if not acquired:
891
        acquired = self.__acquire_unlocked(0, timeout, priority)
892

    
893
      if acquired:
894
        assert self.__is_exclusive() and not self.__is_sharer(), \
895
          "Lock wasn't acquired in exclusive mode"
896

    
897
        self.__deleted = True
898
        self.__exc = None
899

    
900
        assert not (self.__exc or self.__shr), "Found owner during deletion"
901

    
902
        # Notify all acquires. They'll throw an error.
903
        for (_, prioqueue) in self.__pending:
904
          for cond in prioqueue:
905
            cond.notifyAll()
906

    
907
        assert self.__deleted
908

    
909
      return acquired
910
    finally:
911
      self.__lock.release()
912

    
913
  def _release_save(self):
914
    shared = self.__is_sharer()
915
    self.release()
916
    return shared
917

    
918
  def _acquire_restore(self, shared):
919
    self.acquire(shared=shared)
920

    
921

    
922
# Whenever we want to acquire a full LockSet we pass None as the value
923
# to acquire.  Hide this behind this nicely named constant.
924
ALL_SET = None
925

    
926

    
927
def _TimeoutZero():
928
  """Returns the number zero.
929

930
  """
931
  return 0
932

    
933

    
934
def _GetLsAcquireModeAndTimeouts(want_all, timeout, opportunistic):
935
  """Determines modes and timeouts for L{LockSet.acquire}.
936

937
  @type want_all: boolean
938
  @param want_all: Whether all locks in set should be acquired
939
  @param timeout: Timeout in seconds or C{None}
940
  @param opportunistic: Whther locks should be acquired opportunistically
941
  @rtype: tuple
942
  @return: Tuple containing mode to be passed to L{LockSet.__acquire_inner}
943
    (one of L{_LS_ACQUIRE_MODES}), a function to calculate timeout for
944
    acquiring the lockset-internal lock (might be C{None}) and a function to
945
    calculate the timeout for acquiring individual locks
946

947
  """
948
  # Short circuit when no running timeout is needed
949
  if opportunistic and not want_all:
950
    assert timeout is None, "Got timeout for an opportunistic acquisition"
951
    return (_LS_ACQUIRE_OPPORTUNISTIC, None, _TimeoutZero)
952

    
953
  # We need to keep track of how long we spent waiting for a lock. The
954
  # timeout passed to this function is over all lock acquisitions.
955
  running_timeout = utils.RunningTimeout(timeout, False)
956

    
957
  if want_all:
958
    mode = _LS_ACQUIRE_ALL
959
    ls_timeout_fn = running_timeout.Remaining
960
  else:
961
    mode = _LS_ACQUIRE_EXACT
962
    ls_timeout_fn = None
963

    
964
  if opportunistic:
965
    mode = _LS_ACQUIRE_OPPORTUNISTIC
966
    timeout_fn = _TimeoutZero
967
  else:
968
    timeout_fn = running_timeout.Remaining
969

    
970
  return (mode, ls_timeout_fn, timeout_fn)
971

    
972

    
973
class _AcquireTimeout(Exception):
974
  """Internal exception to abort an acquire on a timeout.
975

976
  """
977

    
978

    
979
class LockSet:
980
  """Implements a set of locks.
981

982
  This abstraction implements a set of shared locks for the same resource type,
983
  distinguished by name. The user can lock a subset of the resources and the
984
  LockSet will take care of acquiring the locks always in the same order, thus
985
  preventing deadlock.
986

987
  All the locks needed in the same set must be acquired together, though.
988

989
  @type name: string
990
  @ivar name: the name of the lockset
991

992
  """
993
  def __init__(self, members, name, monitor=None):
994
    """Constructs a new LockSet.
995

996
    @type members: list of strings
997
    @param members: initial members of the set
998
    @type monitor: L{LockMonitor}
999
    @param monitor: Lock monitor with which to register member locks
1000

1001
    """
1002
    assert members is not None, "members parameter is not a list"
1003
    self.name = name
1004

    
1005
    # Lock monitor
1006
    self.__monitor = monitor
1007

    
1008
    # Used internally to guarantee coherency
1009
    self.__lock = SharedLock(self._GetLockName("[lockset]"), monitor=monitor)
1010

    
1011
    # The lockdict indexes the relationship name -> lock
1012
    # The order-of-locking is implied by the alphabetical order of names
1013
    self.__lockdict = {}
1014

    
1015
    for mname in members:
1016
      self.__lockdict[mname] = SharedLock(self._GetLockName(mname),
1017
                                          monitor=monitor)
1018

    
1019
    # The owner dict contains the set of locks each thread owns. For
1020
    # performance each thread can access its own key without a global lock on
1021
    # this structure. It is paramount though that *no* other type of access is
1022
    # done to this structure (eg. no looping over its keys). *_owner helper
1023
    # function are defined to guarantee access is correct, but in general never
1024
    # do anything different than __owners[threading.currentThread()], or there
1025
    # will be trouble.
1026
    self.__owners = {}
1027

    
1028
  def _GetLockName(self, mname):
1029
    """Returns the name for a member lock.
1030

1031
    """
1032
    return "%s/%s" % (self.name, mname)
1033

    
1034
  def _get_lock(self):
1035
    """Returns the lockset-internal lock.
1036

1037
    """
1038
    return self.__lock
1039

    
1040
  def _get_lockdict(self):
1041
    """Returns the lockset-internal lock dictionary.
1042

1043
    Accessing this structure is only safe in single-thread usage or when the
1044
    lockset-internal lock is held.
1045

1046
    """
1047
    return self.__lockdict
1048

    
1049
  def is_owned(self):
1050
    """Is the current thread a current level owner?
1051

1052
    @note: Use L{check_owned} to check if a specific lock is held
1053

1054
    """
1055
    return threading.currentThread() in self.__owners
1056

    
1057
  def check_owned(self, names, shared=-1):
1058
    """Check if locks are owned in a specific mode.
1059

1060
    @type names: sequence or string
1061
    @param names: Lock names (or a single lock name)
1062
    @param shared: See L{SharedLock.is_owned}
1063
    @rtype: bool
1064
    @note: Use L{is_owned} to check if the current thread holds I{any} lock and
1065
      L{list_owned} to get the names of all owned locks
1066

1067
    """
1068
    if isinstance(names, basestring):
1069
      names = [names]
1070

    
1071
    # Avoid check if no locks are owned anyway
1072
    if names and self.is_owned():
1073
      candidates = []
1074

    
1075
      # Gather references to all locks (in case they're deleted in the meantime)
1076
      for lname in names:
1077
        try:
1078
          lock = self.__lockdict[lname]
1079
        except KeyError:
1080
          raise errors.LockError("Non-existing lock '%s' in set '%s' (it may"
1081
                                 " have been removed)" % (lname, self.name))
1082
        else:
1083
          candidates.append(lock)
1084

    
1085
      return compat.all(lock.is_owned(shared=shared) for lock in candidates)
1086
    else:
1087
      return False
1088

    
1089
  def owning_all(self):
1090
    """Checks whether current thread owns internal lock.
1091

1092
    Holding the internal lock is equivalent with holding all locks in the set
1093
    (the opposite does not necessarily hold as it can not be easily
1094
    determined). L{add} and L{remove} require the internal lock.
1095

1096
    @rtype: boolean
1097

1098
    """
1099
    return self.__lock.is_owned()
1100

    
1101
  def _add_owned(self, name=None):
1102
    """Note the current thread owns the given lock"""
1103
    if name is None:
1104
      if not self.is_owned():
1105
        self.__owners[threading.currentThread()] = set()
1106
    else:
1107
      if self.is_owned():
1108
        self.__owners[threading.currentThread()].add(name)
1109
      else:
1110
        self.__owners[threading.currentThread()] = set([name])
1111

    
1112
  def _del_owned(self, name=None):
1113
    """Note the current thread owns the given lock"""
1114

    
1115
    assert not (name is None and self.__lock.is_owned()), \
1116
           "Cannot hold internal lock when deleting owner status"
1117

    
1118
    if name is not None:
1119
      self.__owners[threading.currentThread()].remove(name)
1120

    
1121
    # Only remove the key if we don't hold the set-lock as well
1122
    if not (self.__lock.is_owned() or
1123
            self.__owners[threading.currentThread()]):
1124
      del self.__owners[threading.currentThread()]
1125

    
1126
  def list_owned(self):
1127
    """Get the set of resource names owned by the current thread"""
1128
    if self.is_owned():
1129
      return self.__owners[threading.currentThread()].copy()
1130
    else:
1131
      return set()
1132

    
1133
  def _release_and_delete_owned(self):
1134
    """Release and delete all resources owned by the current thread"""
1135
    for lname in self.list_owned():
1136
      lock = self.__lockdict[lname]
1137
      if lock.is_owned():
1138
        lock.release()
1139
      self._del_owned(name=lname)
1140

    
1141
  def __names(self):
1142
    """Return the current set of names.
1143

1144
    Only call this function while holding __lock and don't iterate on the
1145
    result after releasing the lock.
1146

1147
    """
1148
    return self.__lockdict.keys()
1149

    
1150
  def _names(self):
1151
    """Return a copy of the current set of elements.
1152

1153
    Used only for debugging purposes.
1154

1155
    """
1156
    # If we don't already own the set-level lock acquired
1157
    # we'll get it and note we need to release it later.
1158
    release_lock = False
1159
    if not self.__lock.is_owned():
1160
      release_lock = True
1161
      self.__lock.acquire(shared=1)
1162
    try:
1163
      result = self.__names()
1164
    finally:
1165
      if release_lock:
1166
        self.__lock.release()
1167
    return set(result)
1168

    
1169
  def acquire(self, names, timeout=None, shared=0, priority=None,
1170
              opportunistic=False, test_notify=None):
1171
    """Acquire a set of resource locks.
1172

1173
    @note: When acquiring locks opportunistically, any number of locks might
1174
      actually be acquired, even zero.
1175

1176
    @type names: list of strings (or string)
1177
    @param names: the names of the locks which shall be acquired
1178
        (special lock names, or instance/node names)
1179
    @type shared: integer (0/1) used as a boolean
1180
    @param shared: whether to acquire in shared mode; by default an
1181
        exclusive lock will be acquired
1182
    @type timeout: float or None
1183
    @param timeout: Maximum time to acquire all locks; for opportunistic
1184
      acquisitions, a timeout can only be given when C{names} is C{None}, in
1185
      which case it is exclusively used for acquiring the L{LockSet}-internal
1186
      lock; opportunistic acquisitions don't use a timeout for acquiring
1187
      individual locks
1188
    @type priority: integer
1189
    @param priority: Priority for acquiring locks
1190
    @type opportunistic: boolean
1191
    @param opportunistic: Acquire locks opportunistically; use the return value
1192
      to determine which locks were actually acquired
1193
    @type test_notify: callable or None
1194
    @param test_notify: Special callback function for unittesting
1195

1196
    @return: Set of all locks successfully acquired or None in case of timeout
1197

1198
    @raise errors.LockError: when any lock we try to acquire has
1199
        been deleted before we succeed. In this case none of the
1200
        locks requested will be acquired.
1201

1202
    """
1203
    assert timeout is None or timeout >= 0.0
1204

    
1205
    # Check we don't already own locks at this level
1206
    assert not self.is_owned(), ("Cannot acquire locks in the same set twice"
1207
                                 " (lockset %s)" % self.name)
1208

    
1209
    if priority is None:
1210
      priority = _DEFAULT_PRIORITY
1211

    
1212
    try:
1213
      if names is not None:
1214
        assert timeout is None or not opportunistic, \
1215
          ("Opportunistic acquisitions can only use a timeout if no"
1216
           " names are given; see docstring for details")
1217

    
1218
        # Support passing in a single resource to acquire rather than many
1219
        if isinstance(names, basestring):
1220
          names = [names]
1221

    
1222
        (mode, _, timeout_fn) = \
1223
          _GetLsAcquireModeAndTimeouts(False, timeout, opportunistic)
1224

    
1225
        return self.__acquire_inner(names, mode, shared, priority,
1226
                                    timeout_fn, test_notify)
1227

    
1228
      else:
1229
        (mode, ls_timeout_fn, timeout_fn) = \
1230
          _GetLsAcquireModeAndTimeouts(True, timeout, opportunistic)
1231

    
1232
        # If no names are given acquire the whole set by not letting new names
1233
        # being added before we release, and getting the current list of names.
1234
        # Some of them may then be deleted later, but we'll cope with this.
1235
        #
1236
        # We'd like to acquire this lock in a shared way, as it's nice if
1237
        # everybody else can use the instances at the same time. If we are
1238
        # acquiring them exclusively though they won't be able to do this
1239
        # anyway, though, so we'll get the list lock exclusively as well in
1240
        # order to be able to do add() on the set while owning it.
1241
        if not self.__lock.acquire(shared=shared, priority=priority,
1242
                                   timeout=ls_timeout_fn()):
1243
          raise _AcquireTimeout()
1244

    
1245
        try:
1246
          # note we own the set-lock
1247
          self._add_owned()
1248

    
1249
          return self.__acquire_inner(self.__names(), mode, shared,
1250
                                      priority, timeout_fn, test_notify)
1251
        except:
1252
          # We shouldn't have problems adding the lock to the owners list, but
1253
          # if we did we'll try to release this lock and re-raise exception.
1254
          # Of course something is going to be really wrong, after this.
1255
          self.__lock.release()
1256
          self._del_owned()
1257
          raise
1258

    
1259
    except _AcquireTimeout:
1260
      return None
1261

    
1262
  def __acquire_inner(self, names, mode, shared, priority,
1263
                      timeout_fn, test_notify):
1264
    """Inner logic for acquiring a number of locks.
1265

1266
    Acquisition modes:
1267

1268
      - C{_LS_ACQUIRE_ALL}: C{names} contains names of all locks in set, but
1269
        deleted locks can be ignored as the whole set is being acquired with
1270
        its internal lock held
1271
      - C{_LS_ACQUIRE_EXACT}: The names listed in C{names} must be acquired;
1272
        timeouts and deleted locks are fatal
1273
      - C{_LS_ACQUIRE_OPPORTUNISTIC}: C{names} lists names of locks (potentially
1274
        all within the set) which should be acquired opportunistically, that is
1275
        failures are ignored
1276

1277
    @param names: Names of the locks to be acquired
1278
    @param mode: Lock acquisition mode (one of L{_LS_ACQUIRE_MODES})
1279
    @param shared: Whether to acquire in shared mode
1280
    @param timeout_fn: Function returning remaining timeout (C{None} for
1281
      opportunistic acquisitions)
1282
    @param priority: Priority for acquiring locks
1283
    @param test_notify: Special callback function for unittesting
1284

1285
    """
1286
    assert mode in _LS_ACQUIRE_MODES
1287

    
1288
    acquire_list = []
1289

    
1290
    # First we look the locks up on __lockdict. We have no way of being sure
1291
    # they will still be there after, but this makes it a lot faster should
1292
    # just one of them be the already wrong. Using a sorted sequence to prevent
1293
    # deadlocks.
1294
    for lname in sorted(frozenset(names)):
1295
      try:
1296
        lock = self.__lockdict[lname] # raises KeyError if lock is not there
1297
      except KeyError:
1298
        # We are acquiring the whole set, it doesn't matter if this particular
1299
        # element is not there anymore. If, however, only certain names should
1300
        # be acquired, not finding a lock is an error.
1301
        if mode == _LS_ACQUIRE_EXACT:
1302
          raise errors.LockError("Lock '%s' not found in set '%s' (it may have"
1303
                                 " been removed)" % (lname, self.name))
1304
      else:
1305
        acquire_list.append((lname, lock))
1306

    
1307
    # This will hold the locknames we effectively acquired.
1308
    acquired = set()
1309

    
1310
    try:
1311
      # Now acquire_list contains a sorted list of resources and locks we
1312
      # want.  In order to get them we loop on this (private) list and
1313
      # acquire() them.  We gave no real guarantee they will still exist till
1314
      # this is done but .acquire() itself is safe and will alert us if the
1315
      # lock gets deleted.
1316
      for (lname, lock) in acquire_list:
1317
        if __debug__ and callable(test_notify):
1318
          test_notify_fn = lambda: test_notify(lname)
1319
        else:
1320
          test_notify_fn = None
1321

    
1322
        timeout = timeout_fn()
1323

    
1324
        try:
1325
          # raises LockError if the lock was deleted
1326
          acq_success = lock.acquire(shared=shared, timeout=timeout,
1327
                                     priority=priority,
1328
                                     test_notify=test_notify_fn)
1329
        except errors.LockError:
1330
          if mode in (_LS_ACQUIRE_ALL, _LS_ACQUIRE_OPPORTUNISTIC):
1331
            # We are acquiring the whole set, it doesn't matter if this
1332
            # particular element is not there anymore.
1333
            continue
1334

    
1335
          raise errors.LockError("Lock '%s' not found in set '%s' (it may have"
1336
                                 " been removed)" % (lname, self.name))
1337

    
1338
        if not acq_success:
1339
          # Couldn't get lock or timeout occurred
1340
          if mode == _LS_ACQUIRE_OPPORTUNISTIC:
1341
            # Ignore timeouts on opportunistic acquisitions
1342
            continue
1343

    
1344
          if timeout is None:
1345
            # This shouldn't happen as SharedLock.acquire(timeout=None) is
1346
            # blocking.
1347
            raise errors.LockError("Failed to get lock %s (set %s)" %
1348
                                   (lname, self.name))
1349

    
1350
          raise _AcquireTimeout()
1351

    
1352
        try:
1353
          # now the lock cannot be deleted, we have it!
1354
          self._add_owned(name=lname)
1355
          acquired.add(lname)
1356

    
1357
        except:
1358
          # We shouldn't have problems adding the lock to the owners list, but
1359
          # if we did we'll try to release this lock and re-raise exception.
1360
          # Of course something is going to be really wrong after this.
1361
          if lock.is_owned():
1362
            lock.release()
1363
          raise
1364

    
1365
    except:
1366
      # Release all owned locks
1367
      self._release_and_delete_owned()
1368
      raise
1369

    
1370
    return acquired
1371

    
1372
  def downgrade(self, names=None):
1373
    """Downgrade a set of resource locks from exclusive to shared mode.
1374

1375
    The locks must have been acquired in exclusive mode.
1376

1377
    """
1378
    assert self.is_owned(), ("downgrade on lockset %s while not owning any"
1379
                             " lock" % self.name)
1380

    
1381
    # Support passing in a single resource to downgrade rather than many
1382
    if isinstance(names, basestring):
1383
      names = [names]
1384

    
1385
    owned = self.list_owned()
1386

    
1387
    if names is None:
1388
      names = owned
1389
    else:
1390
      names = set(names)
1391
      assert owned.issuperset(names), \
1392
        ("downgrade() on unheld resources %s (set %s)" %
1393
         (names.difference(owned), self.name))
1394

    
1395
    for lockname in names:
1396
      self.__lockdict[lockname].downgrade()
1397

    
1398
    # Do we own the lockset in exclusive mode?
1399
    if self.__lock.is_owned(shared=0):
1400
      # Have all locks been downgraded?
1401
      if not compat.any(lock.is_owned(shared=0)
1402
                        for lock in self.__lockdict.values()):
1403
        self.__lock.downgrade()
1404
        assert self.__lock.is_owned(shared=1)
1405

    
1406
    return True
1407

    
1408
  def release(self, names=None):
1409
    """Release a set of resource locks, at the same level.
1410

1411
    You must have acquired the locks, either in shared or in exclusive mode,
1412
    before releasing them.
1413

1414
    @type names: list of strings, or None
1415
    @param names: the names of the locks which shall be released
1416
        (defaults to all the locks acquired at that level).
1417

1418
    """
1419
    assert self.is_owned(), ("release() on lock set %s while not owner" %
1420
                             self.name)
1421

    
1422
    # Support passing in a single resource to release rather than many
1423
    if isinstance(names, basestring):
1424
      names = [names]
1425

    
1426
    if names is None:
1427
      names = self.list_owned()
1428
    else:
1429
      names = set(names)
1430
      assert self.list_owned().issuperset(names), (
1431
               "release() on unheld resources %s (set %s)" %
1432
               (names.difference(self.list_owned()), self.name))
1433

    
1434
    # First of all let's release the "all elements" lock, if set.
1435
    # After this 'add' can work again
1436
    if self.__lock.is_owned():
1437
      self.__lock.release()
1438
      self._del_owned()
1439

    
1440
    for lockname in names:
1441
      # If we are sure the lock doesn't leave __lockdict without being
1442
      # exclusively held we can do this...
1443
      self.__lockdict[lockname].release()
1444
      self._del_owned(name=lockname)
1445

    
1446
  def add(self, names, acquired=0, shared=0):
1447
    """Add a new set of elements to the set
1448

1449
    @type names: list of strings
1450
    @param names: names of the new elements to add
1451
    @type acquired: integer (0/1) used as a boolean
1452
    @param acquired: pre-acquire the new resource?
1453
    @type shared: integer (0/1) used as a boolean
1454
    @param shared: is the pre-acquisition shared?
1455

1456
    """
1457
    # Check we don't already own locks at this level
1458
    assert not self.is_owned() or self.__lock.is_owned(shared=0), \
1459
      ("Cannot add locks if the set %s is only partially owned, or shared" %
1460
       self.name)
1461

    
1462
    # Support passing in a single resource to add rather than many
1463
    if isinstance(names, basestring):
1464
      names = [names]
1465

    
1466
    # If we don't already own the set-level lock acquired in an exclusive way
1467
    # we'll get it and note we need to release it later.
1468
    release_lock = False
1469
    if not self.__lock.is_owned():
1470
      release_lock = True
1471
      self.__lock.acquire()
1472

    
1473
    try:
1474
      invalid_names = set(self.__names()).intersection(names)
1475
      if invalid_names:
1476
        # This must be an explicit raise, not an assert, because assert is
1477
        # turned off when using optimization, and this can happen because of
1478
        # concurrency even if the user doesn't want it.
1479
        raise errors.LockError("duplicate add(%s) on lockset %s" %
1480
                               (invalid_names, self.name))
1481

    
1482
      for lockname in names:
1483
        lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor)
1484

    
1485
        if acquired:
1486
          # No need for priority or timeout here as this lock has just been
1487
          # created
1488
          lock.acquire(shared=shared)
1489
          # now the lock cannot be deleted, we have it!
1490
          try:
1491
            self._add_owned(name=lockname)
1492
          except:
1493
            # We shouldn't have problems adding the lock to the owners list,
1494
            # but if we did we'll try to release this lock and re-raise
1495
            # exception.  Of course something is going to be really wrong,
1496
            # after this.  On the other hand the lock hasn't been added to the
1497
            # __lockdict yet so no other threads should be pending on it. This
1498
            # release is just a safety measure.
1499
            lock.release()
1500
            raise
1501

    
1502
        self.__lockdict[lockname] = lock
1503

    
1504
    finally:
1505
      # Only release __lock if we were not holding it previously.
1506
      if release_lock:
1507
        self.__lock.release()
1508

    
1509
    return True
1510

    
1511
  def remove(self, names):
1512
    """Remove elements from the lock set.
1513

1514
    You can either not hold anything in the lockset or already hold a superset
1515
    of the elements you want to delete, exclusively.
1516

1517
    @type names: list of strings
1518
    @param names: names of the resource to remove.
1519

1520
    @return: a list of locks which we removed; the list is always
1521
        equal to the names list if we were holding all the locks
1522
        exclusively
1523

1524
    """
1525
    # Support passing in a single resource to remove rather than many
1526
    if isinstance(names, basestring):
1527
      names = [names]
1528

    
1529
    # If we own any subset of this lock it must be a superset of what we want
1530
    # to delete. The ownership must also be exclusive, but that will be checked
1531
    # by the lock itself.
1532
    assert not self.is_owned() or self.list_owned().issuperset(names), (
1533
      "remove() on acquired lockset %s while not owning all elements" %
1534
      self.name)
1535

    
1536
    removed = []
1537

    
1538
    for lname in names:
1539
      # Calling delete() acquires the lock exclusively if we don't already own
1540
      # it, and causes all pending and subsequent lock acquires to fail. It's
1541
      # fine to call it out of order because delete() also implies release(),
1542
      # and the assertion above guarantees that if we either already hold
1543
      # everything we want to delete, or we hold none.
1544
      try:
1545
        self.__lockdict[lname].delete()
1546
        removed.append(lname)
1547
      except (KeyError, errors.LockError):
1548
        # This cannot happen if we were already holding it, verify:
1549
        assert not self.is_owned(), ("remove failed while holding lockset %s" %
1550
                                     self.name)
1551
      else:
1552
        # If no LockError was raised we are the ones who deleted the lock.
1553
        # This means we can safely remove it from lockdict, as any further or
1554
        # pending delete() or acquire() will fail (and nobody can have the lock
1555
        # since before our call to delete()).
1556
        #
1557
        # This is done in an else clause because if the exception was thrown
1558
        # it's the job of the one who actually deleted it.
1559
        del self.__lockdict[lname]
1560
        # And let's remove it from our private list if we owned it.
1561
        if self.is_owned():
1562
          self._del_owned(name=lname)
1563

    
1564
    return removed
1565

    
1566

    
1567
# Locking levels, must be acquired in increasing order. Current rules are:
1568
# - At level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
1569
#   acquired before performing any operation, either in shared or exclusive
1570
#   mode. Acquiring the BGL in exclusive mode is discouraged and should be
1571
#   avoided..
1572
# - At levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks. If
1573
#   you need more than one node, or more than one instance, acquire them at the
1574
#   same time.
1575
# - LEVEL_NODE_RES is for node resources and should be used by operations with
1576
#   possibly high impact on the node's disks.
1577
# - LEVEL_NODE_ALLOC blocks instance allocations for the whole cluster
1578
#   ("NAL" is the only lock at this level). It should be acquired in shared
1579
#   mode when an opcode blocks all or a significant amount of a cluster's
1580
#   locks. Opcodes doing instance allocations should acquire in exclusive mode.
1581
#   Once the set of acquired locks for an opcode has been reduced to the working
1582
#   set, the NAL should be released as well to allow allocations to proceed.
1583
(LEVEL_CLUSTER,
1584
 LEVEL_INSTANCE,
1585
 LEVEL_NODE_ALLOC,
1586
 LEVEL_NODEGROUP,
1587
 LEVEL_NODE,
1588
 LEVEL_NODE_RES,
1589
 LEVEL_NETWORK) = range(0, 7)
1590

    
1591
LEVELS = [
1592
  LEVEL_CLUSTER,
1593
  LEVEL_INSTANCE,
1594
  LEVEL_NODE_ALLOC,
1595
  LEVEL_NODEGROUP,
1596
  LEVEL_NODE,
1597
  LEVEL_NODE_RES,
1598
  LEVEL_NETWORK,
1599
  ]
1600

    
1601
# Lock levels which are modifiable
1602
LEVELS_MOD = compat.UniqueFrozenset([
1603
  LEVEL_NODE_RES,
1604
  LEVEL_NODE,
1605
  LEVEL_NODEGROUP,
1606
  LEVEL_INSTANCE,
1607
  LEVEL_NETWORK,
1608
  ])
1609

    
1610
#: Lock level names (make sure to use singular form)
1611
LEVEL_NAMES = {
1612
  LEVEL_CLUSTER: "cluster",
1613
  LEVEL_INSTANCE: "instance",
1614
  LEVEL_NODE_ALLOC: "node-alloc",
1615
  LEVEL_NODEGROUP: "nodegroup",
1616
  LEVEL_NODE: "node",
1617
  LEVEL_NODE_RES: "node-res",
1618
  LEVEL_NETWORK: "network",
1619
  }
1620

    
1621
# Constant for the big ganeti lock
1622
BGL = "BGL"
1623

    
1624
#: Node allocation lock
1625
NAL = "NAL"
1626

    
1627

    
1628
class GanetiLockManager:
1629
  """The Ganeti Locking Library
1630

1631
  The purpose of this small library is to manage locking for ganeti clusters
1632
  in a central place, while at the same time doing dynamic checks against
1633
  possible deadlocks. It will also make it easier to transition to a different
1634
  lock type should we migrate away from python threads.
1635

1636
  """
1637
  _instance = None
1638

    
1639
  def __init__(self, node_uuids, nodegroups, instance_names, networks):
1640
    """Constructs a new GanetiLockManager object.
1641

1642
    There should be only a GanetiLockManager object at any time, so this
1643
    function raises an error if this is not the case.
1644

1645
    @param node_uuids: list of node UUIDs
1646
    @param nodegroups: list of nodegroup uuids
1647
    @param instance_names: list of instance names
1648

1649
    """
1650
    assert self.__class__._instance is None, \
1651
           "double GanetiLockManager instance"
1652

    
1653
    self.__class__._instance = self
1654

    
1655
    self._monitor = LockMonitor()
1656

    
1657
    # The keyring contains all the locks, at their level and in the correct
1658
    # locking order.
1659
    self.__keyring = {
1660
      LEVEL_CLUSTER: LockSet([BGL], "cluster", monitor=self._monitor),
1661
      LEVEL_NODE: LockSet(node_uuids, "node", monitor=self._monitor),
1662
      LEVEL_NODE_RES: LockSet(node_uuids, "node-res", monitor=self._monitor),
1663
      LEVEL_NODEGROUP: LockSet(nodegroups, "nodegroup", monitor=self._monitor),
1664
      LEVEL_INSTANCE: LockSet(instance_names, "instance",
1665
                              monitor=self._monitor),
1666
      LEVEL_NETWORK: LockSet(networks, "network", monitor=self._monitor),
1667
      LEVEL_NODE_ALLOC: LockSet([NAL], "node-alloc", monitor=self._monitor),
1668
      }
1669

    
1670
    assert compat.all(ls.name == LEVEL_NAMES[level]
1671
                      for (level, ls) in self.__keyring.items()), \
1672
      "Keyring name mismatch"
1673

    
1674
  def AddToLockMonitor(self, provider):
1675
    """Registers a new lock with the monitor.
1676

1677
    See L{LockMonitor.RegisterLock}.
1678

1679
    """
1680
    return self._monitor.RegisterLock(provider)
1681

    
1682
  def QueryLocks(self, fields):
1683
    """Queries information from all locks.
1684

1685
    See L{LockMonitor.QueryLocks}.
1686

1687
    """
1688
    return self._monitor.QueryLocks(fields)
1689

    
1690
  def _names(self, level):
1691
    """List the lock names at the given level.
1692

1693
    This can be used for debugging/testing purposes.
1694

1695
    @param level: the level whose list of locks to get
1696

1697
    """
1698
    assert level in LEVELS, "Invalid locking level %s" % level
1699
    return self.__keyring[level]._names()
1700

    
1701
  def is_owned(self, level):
1702
    """Check whether we are owning locks at the given level
1703

1704
    """
1705
    return self.__keyring[level].is_owned()
1706

    
1707
  def list_owned(self, level):
1708
    """Get the set of owned locks at the given level
1709

1710
    """
1711
    return self.__keyring[level].list_owned()
1712

    
1713
  def check_owned(self, level, names, shared=-1):
1714
    """Check if locks at a certain level are owned in a specific mode.
1715

1716
    @see: L{LockSet.check_owned}
1717

1718
    """
1719
    return self.__keyring[level].check_owned(names, shared=shared)
1720

    
1721
  def owning_all(self, level):
1722
    """Checks whether current thread owns all locks at a certain level.
1723

1724
    @see: L{LockSet.owning_all}
1725

1726
    """
1727
    return self.__keyring[level].owning_all()
1728

    
1729
  def _upper_owned(self, level):
1730
    """Check that we don't own any lock at a level greater than the given one.
1731

1732
    """
1733
    # This way of checking only works if LEVELS[i] = i, which we check for in
1734
    # the test cases.
1735
    return compat.any((self.is_owned(l) for l in LEVELS[level + 1:]))
1736

    
1737
  def _BGL_owned(self): # pylint: disable=C0103
1738
    """Check if the current thread owns the BGL.
1739

1740
    Both an exclusive or a shared acquisition work.
1741

1742
    """
1743
    return BGL in self.__keyring[LEVEL_CLUSTER].list_owned()
1744

    
1745
  @staticmethod
1746
  def _contains_BGL(level, names): # pylint: disable=C0103
1747
    """Check if the level contains the BGL.
1748

1749
    Check if acting on the given level and set of names will change
1750
    the status of the Big Ganeti Lock.
1751

1752
    """
1753
    return level == LEVEL_CLUSTER and (names is None or BGL in names)
1754

    
1755
  def acquire(self, level, names, timeout=None, shared=0, priority=None,
1756
              opportunistic=False):
1757
    """Acquire a set of resource locks, at the same level.
1758

1759
    @type level: member of locking.LEVELS
1760
    @param level: the level at which the locks shall be acquired
1761
    @type names: list of strings (or string)
1762
    @param names: the names of the locks which shall be acquired
1763
        (special lock names, or instance/node names)
1764
    @type shared: integer (0/1) used as a boolean
1765
    @param shared: whether to acquire in shared mode; by default
1766
        an exclusive lock will be acquired
1767
    @type timeout: float
1768
    @param timeout: Maximum time to acquire all locks
1769
    @type priority: integer
1770
    @param priority: Priority for acquiring lock
1771
    @type opportunistic: boolean
1772
    @param opportunistic: Acquire locks opportunistically; use the return value
1773
      to determine which locks were actually acquired
1774

1775
    """
1776
    assert level in LEVELS, "Invalid locking level %s" % level
1777

    
1778
    # Check that we are either acquiring the Big Ganeti Lock or we already own
1779
    # it. Some "legacy" opcodes need to be sure they are run non-concurrently
1780
    # so even if we've migrated we need to at least share the BGL to be
1781
    # compatible with them. Of course if we own the BGL exclusively there's no
1782
    # point in acquiring any other lock, unless perhaps we are half way through
1783
    # the migration of the current opcode.
1784
    assert (self._contains_BGL(level, names) or self._BGL_owned()), (
1785
      "You must own the Big Ganeti Lock before acquiring any other")
1786

    
1787
    # Check we don't own locks at the same or upper levels.
1788
    assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1789
                                          " while owning some at a greater one")
1790

    
1791
    # Acquire the locks in the set.
1792
    return self.__keyring[level].acquire(names, shared=shared, timeout=timeout,
1793
                                         priority=priority,
1794
                                         opportunistic=opportunistic)
1795

    
1796
  def downgrade(self, level, names=None):
1797
    """Downgrade a set of resource locks from exclusive to shared mode.
1798

1799
    You must have acquired the locks in exclusive mode.
1800

1801
    @type level: member of locking.LEVELS
1802
    @param level: the level at which the locks shall be downgraded
1803
    @type names: list of strings, or None
1804
    @param names: the names of the locks which shall be downgraded
1805
        (defaults to all the locks acquired at the level)
1806

1807
    """
1808
    assert level in LEVELS, "Invalid locking level %s" % level
1809

    
1810
    return self.__keyring[level].downgrade(names=names)
1811

    
1812
  def release(self, level, names=None):
1813
    """Release a set of resource locks, at the same level.
1814

1815
    You must have acquired the locks, either in shared or in exclusive
1816
    mode, before releasing them.
1817

1818
    @type level: member of locking.LEVELS
1819
    @param level: the level at which the locks shall be released
1820
    @type names: list of strings, or None
1821
    @param names: the names of the locks which shall be released
1822
        (defaults to all the locks acquired at that level)
1823

1824
    """
1825
    assert level in LEVELS, "Invalid locking level %s" % level
1826
    assert (not self._contains_BGL(level, names) or
1827
            not self._upper_owned(LEVEL_CLUSTER)), (
1828
              "Cannot release the Big Ganeti Lock while holding something"
1829
              " at upper levels (%r)" %
1830
              (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self.list_owned(i))
1831
                                for i in self.__keyring.keys()]), ))
1832

    
1833
    # Release will complain if we don't own the locks already
1834
    return self.__keyring[level].release(names)
1835

    
1836
  def add(self, level, names, acquired=0, shared=0):
1837
    """Add locks at the specified level.
1838

1839
    @type level: member of locking.LEVELS_MOD
1840
    @param level: the level at which the locks shall be added
1841
    @type names: list of strings
1842
    @param names: names of the locks to acquire
1843
    @type acquired: integer (0/1) used as a boolean
1844
    @param acquired: whether to acquire the newly added locks
1845
    @type shared: integer (0/1) used as a boolean
1846
    @param shared: whether the acquisition will be shared
1847

1848
    """
1849
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1850
    assert self._BGL_owned(), ("You must own the BGL before performing other"
1851
                               " operations")
1852
    assert not self._upper_owned(level), ("Cannot add locks at a level"
1853
                                          " while owning some at a greater one")
1854
    return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1855

    
1856
  def remove(self, level, names):
1857
    """Remove locks from the specified level.
1858

1859
    You must either already own the locks you are trying to remove
1860
    exclusively or not own any lock at an upper level.
1861

1862
    @type level: member of locking.LEVELS_MOD
1863
    @param level: the level at which the locks shall be removed
1864
    @type names: list of strings
1865
    @param names: the names of the locks which shall be removed
1866
        (special lock names, or instance/node names)
1867

1868
    """
1869
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1870
    assert self._BGL_owned(), ("You must own the BGL before performing other"
1871
                               " operations")
1872
    # Check we either own the level or don't own anything from here
1873
    # up. LockSet.remove() will check the case in which we don't own
1874
    # all the needed resources, or we have a shared ownership.
1875
    assert self.is_owned(level) or not self._upper_owned(level), (
1876
           "Cannot remove locks at a level while not owning it or"
1877
           " owning some at a greater one")
1878
    return self.__keyring[level].remove(names)
1879

    
1880

    
1881
def _MonitorSortKey((item, idx, num)):
1882
  """Sorting key function.
1883

1884
  Sort by name, registration order and then order of information. This provides
1885
  a stable sort order over different providers, even if they return the same
1886
  name.
1887

1888
  """
1889
  (name, _, _, _) = item
1890

    
1891
  return (utils.NiceSortKey(name), num, idx)
1892

    
1893

    
1894
class LockMonitor(object):
1895
  _LOCK_ATTR = "_lock"
1896

    
1897
  def __init__(self):
1898
    """Initializes this class.
1899

1900
    """
1901
    self._lock = SharedLock("LockMonitor")
1902

    
1903
    # Counter for stable sorting
1904
    self._counter = itertools.count(0)
1905

    
1906
    # Tracked locks. Weak references are used to avoid issues with circular
1907
    # references and deletion.
1908
    self._locks = weakref.WeakKeyDictionary()
1909

    
1910
  @ssynchronized(_LOCK_ATTR)
1911
  def RegisterLock(self, provider):
1912
    """Registers a new lock.
1913

1914
    @param provider: Object with a callable method named C{GetLockInfo}, taking
1915
      a single C{set} containing the requested information items
1916
    @note: It would be nicer to only receive the function generating the
1917
      requested information but, as it turns out, weak references to bound
1918
      methods (e.g. C{self.GetLockInfo}) are tricky; there are several
1919
      workarounds, but none of the ones I found works properly in combination
1920
      with a standard C{WeakKeyDictionary}
1921

1922
    """
1923
    assert provider not in self._locks, "Duplicate registration"
1924

    
1925
    # There used to be a check for duplicate names here. As it turned out, when
1926
    # a lock is re-created with the same name in a very short timeframe, the
1927
    # previous instance might not yet be removed from the weakref dictionary.
1928
    # By keeping track of the order of incoming registrations, a stable sort
1929
    # ordering can still be guaranteed.
1930

    
1931
    self._locks[provider] = self._counter.next()
1932

    
1933
  def _GetLockInfo(self, requested):
1934
    """Get information from all locks.
1935

1936
    """
1937
    # Must hold lock while getting consistent list of tracked items
1938
    self._lock.acquire(shared=1)
1939
    try:
1940
      items = self._locks.items()
1941
    finally:
1942
      self._lock.release()
1943

    
1944
    return [(info, idx, num)
1945
            for (provider, num) in items
1946
            for (idx, info) in enumerate(provider.GetLockInfo(requested))]
1947

    
1948
  def _Query(self, fields):
1949
    """Queries information from all locks.
1950

1951
    @type fields: list of strings
1952
    @param fields: List of fields to return
1953

1954
    """
1955
    qobj = query.Query(query.LOCK_FIELDS, fields)
1956

    
1957
    # Get all data with internal lock held and then sort by name and incoming
1958
    # order
1959
    lockinfo = sorted(self._GetLockInfo(qobj.RequestedData()),
1960
                      key=_MonitorSortKey)
1961

    
1962
    # Extract lock information and build query data
1963
    return (qobj, query.LockQueryData(map(compat.fst, lockinfo)))
1964

    
1965
  def QueryLocks(self, fields):
1966
    """Queries information from all locks.
1967

1968
    @type fields: list of strings
1969
    @param fields: List of fields to return
1970

1971
    """
1972
    (qobj, ctx) = self._Query(fields)
1973

    
1974
    # Prepare query response
1975
    return query.GetQueryResponse(qobj, ctx)