Statistics
| Branch: | Tag: | Revision:

root / lib / locking.py @ adb6d685

History | View | Annotate | Download (39.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Module implementing the Ganeti locking code."""
22

    
23
# pylint: disable-msg=W0212
24

    
25
# W0212 since e.g. LockSet methods use (a lot) the internals of
26
# SharedLock
27

    
28
import os
29
import select
30
import threading
31
import time
32
import errno
33

    
34
from ganeti import errors
35
from ganeti import utils
36
from ganeti import compat
37

    
38

    
39
def ssynchronized(lock, shared=0):
40
  """Shared Synchronization decorator.
41

42
  Calls the function holding the given lock, either in exclusive or shared
43
  mode. It requires the passed lock to be a SharedLock (or support its
44
  semantics).
45

46
  """
47
  def wrap(fn):
48
    def sync_function(*args, **kwargs):
49
      lock.acquire(shared=shared)
50
      try:
51
        return fn(*args, **kwargs)
52
      finally:
53
        lock.release()
54
    return sync_function
55
  return wrap
56

    
57

    
58
class RunningTimeout(object):
59
  """Class to calculate remaining timeout when doing several operations.
60

61
  """
62
  __slots__ = [
63
    "_allow_negative",
64
    "_start_time",
65
    "_time_fn",
66
    "_timeout",
67
    ]
68

    
69
  def __init__(self, timeout, allow_negative, _time_fn=time.time):
70
    """Initializes this class.
71

72
    @type timeout: float
73
    @param timeout: Timeout duration
74
    @type allow_negative: bool
75
    @param allow_negative: Whether to return values below zero
76
    @param _time_fn: Time function for unittests
77

78
    """
79
    object.__init__(self)
80

    
81
    if timeout is not None and timeout < 0.0:
82
      raise ValueError("Timeout must not be negative")
83

    
84
    self._timeout = timeout
85
    self._allow_negative = allow_negative
86
    self._time_fn = _time_fn
87

    
88
    self._start_time = None
89

    
90
  def Remaining(self):
91
    """Returns the remaining timeout.
92

93
    """
94
    if self._timeout is None:
95
      return None
96

    
97
    # Get start time on first calculation
98
    if self._start_time is None:
99
      self._start_time = self._time_fn()
100

    
101
    # Calculate remaining time
102
    remaining_timeout = self._start_time + self._timeout - self._time_fn()
103

    
104
    if not self._allow_negative:
105
      # Ensure timeout is always >= 0
106
      return max(0.0, remaining_timeout)
107

    
108
    return remaining_timeout
109

    
110

    
111
class _SingleNotifyPipeConditionWaiter(object):
112
  """Helper class for SingleNotifyPipeCondition
113

114
  """
115
  __slots__ = [
116
    "_fd",
117
    "_poller",
118
    ]
119

    
120
  def __init__(self, poller, fd):
121
    """Constructor for _SingleNotifyPipeConditionWaiter
122

123
    @type poller: select.poll
124
    @param poller: Poller object
125
    @type fd: int
126
    @param fd: File descriptor to wait for
127

128
    """
129
    object.__init__(self)
130
    self._poller = poller
131
    self._fd = fd
132

    
133
  def __call__(self, timeout):
134
    """Wait for something to happen on the pipe.
135

136
    @type timeout: float or None
137
    @param timeout: Timeout for waiting (can be None)
138

139
    """
140
    running_timeout = RunningTimeout(timeout, True)
141

    
142
    while True:
143
      remaining_time = running_timeout.Remaining()
144

    
145
      if remaining_time is not None:
146
        if remaining_time < 0.0:
147
          break
148

    
149
        # Our calculation uses seconds, poll() wants milliseconds
150
        remaining_time *= 1000
151

    
152
      try:
153
        result = self._poller.poll(remaining_time)
154
      except EnvironmentError, err:
155
        if err.errno != errno.EINTR:
156
          raise
157
        result = None
158

    
159
      # Check whether we were notified
160
      if result and result[0][0] == self._fd:
161
        break
162

    
163

    
164
class _BaseCondition(object):
165
  """Base class containing common code for conditions.
166

167
  Some of this code is taken from python's threading module.
168

169
  """
170
  __slots__ = [
171
    "_lock",
172
    "acquire",
173
    "release",
174
    "_is_owned",
175
    "_acquire_restore",
176
    "_release_save",
177
    ]
178

    
179
  def __init__(self, lock):
180
    """Constructor for _BaseCondition.
181

182
    @type lock: threading.Lock
183
    @param lock: condition base lock
184

185
    """
186
    object.__init__(self)
187

    
188
    try:
189
      self._release_save = lock._release_save
190
    except AttributeError:
191
      self._release_save = self._base_release_save
192
    try:
193
      self._acquire_restore = lock._acquire_restore
194
    except AttributeError:
195
      self._acquire_restore = self._base_acquire_restore
196
    try:
197
      self._is_owned = lock._is_owned
198
    except AttributeError:
199
      self._is_owned = self._base_is_owned
200

    
201
    self._lock = lock
202

    
203
    # Export the lock's acquire() and release() methods
204
    self.acquire = lock.acquire
205
    self.release = lock.release
206

    
207
  def _base_is_owned(self):
208
    """Check whether lock is owned by current thread.
209

210
    """
211
    if self._lock.acquire(0):
212
      self._lock.release()
213
      return False
214
    return True
215

    
216
  def _base_release_save(self):
217
    self._lock.release()
218

    
219
  def _base_acquire_restore(self, _):
220
    self._lock.acquire()
221

    
222
  def _check_owned(self):
223
    """Raise an exception if the current thread doesn't own the lock.
224

225
    """
226
    if not self._is_owned():
227
      raise RuntimeError("cannot work with un-aquired lock")
228

    
229

    
230
class SingleNotifyPipeCondition(_BaseCondition):
231
  """Condition which can only be notified once.
232

233
  This condition class uses pipes and poll, internally, to be able to wait for
234
  notification with a timeout, without resorting to polling. It is almost
235
  compatible with Python's threading.Condition, with the following differences:
236
    - notifyAll can only be called once, and no wait can happen after that
237
    - notify is not supported, only notifyAll
238

239
  """
240

    
241
  __slots__ = [
242
    "_poller",
243
    "_read_fd",
244
    "_write_fd",
245
    "_nwaiters",
246
    "_notified",
247
    ]
248

    
249
  _waiter_class = _SingleNotifyPipeConditionWaiter
250

    
251
  def __init__(self, lock):
252
    """Constructor for SingleNotifyPipeCondition
253

254
    """
255
    _BaseCondition.__init__(self, lock)
256
    self._nwaiters = 0
257
    self._notified = False
258
    self._read_fd = None
259
    self._write_fd = None
260
    self._poller = None
261

    
262
  def _check_unnotified(self):
263
    """Throws an exception if already notified.
264

265
    """
266
    if self._notified:
267
      raise RuntimeError("cannot use already notified condition")
268

    
269
  def _Cleanup(self):
270
    """Cleanup open file descriptors, if any.
271

272
    """
273
    if self._read_fd is not None:
274
      os.close(self._read_fd)
275
      self._read_fd = None
276

    
277
    if self._write_fd is not None:
278
      os.close(self._write_fd)
279
      self._write_fd = None
280
    self._poller = None
281

    
282
  def wait(self, timeout=None):
283
    """Wait for a notification.
284

285
    @type timeout: float or None
286
    @param timeout: Waiting timeout (can be None)
287

288
    """
289
    self._check_owned()
290
    self._check_unnotified()
291

    
292
    self._nwaiters += 1
293
    try:
294
      if self._poller is None:
295
        (self._read_fd, self._write_fd) = os.pipe()
296
        self._poller = select.poll()
297
        self._poller.register(self._read_fd, select.POLLHUP)
298

    
299
      wait_fn = self._waiter_class(self._poller, self._read_fd)
300
      state = self._release_save()
301
      try:
302
        # Wait for notification
303
        wait_fn(timeout)
304
      finally:
305
        # Re-acquire lock
306
        self._acquire_restore(state)
307
    finally:
308
      self._nwaiters -= 1
309
      if self._nwaiters == 0:
310
        self._Cleanup()
311

    
312
  def notifyAll(self): # pylint: disable-msg=C0103
313
    """Close the writing side of the pipe to notify all waiters.
314

315
    """
316
    self._check_owned()
317
    self._check_unnotified()
318
    self._notified = True
319
    if self._write_fd is not None:
320
      os.close(self._write_fd)
321
      self._write_fd = None
322

    
323

    
324
class PipeCondition(_BaseCondition):
325
  """Group-only non-polling condition with counters.
326

327
  This condition class uses pipes and poll, internally, to be able to wait for
328
  notification with a timeout, without resorting to polling. It is almost
329
  compatible with Python's threading.Condition, but only supports notifyAll and
330
  non-recursive locks. As an additional features it's able to report whether
331
  there are any waiting threads.
332

333
  """
334
  __slots__ = [
335
    "_nwaiters",
336
    "_single_condition",
337
    ]
338

    
339
  _single_condition_class = SingleNotifyPipeCondition
340

    
341
  def __init__(self, lock):
342
    """Initializes this class.
343

344
    """
345
    _BaseCondition.__init__(self, lock)
346
    self._nwaiters = 0
347
    self._single_condition = self._single_condition_class(self._lock)
348

    
349
  def wait(self, timeout=None):
350
    """Wait for a notification.
351

352
    @type timeout: float or None
353
    @param timeout: Waiting timeout (can be None)
354

355
    """
356
    self._check_owned()
357

    
358
    # Keep local reference to the pipe. It could be replaced by another thread
359
    # notifying while we're waiting.
360
    my_condition = self._single_condition
361

    
362
    assert self._nwaiters >= 0
363
    self._nwaiters += 1
364
    try:
365
      my_condition.wait(timeout)
366
    finally:
367
      assert self._nwaiters > 0
368
      self._nwaiters -= 1
369

    
370
  def notifyAll(self): # pylint: disable-msg=C0103
371
    """Notify all currently waiting threads.
372

373
    """
374
    self._check_owned()
375
    self._single_condition.notifyAll()
376
    self._single_condition = self._single_condition_class(self._lock)
377

    
378
  def has_waiting(self):
379
    """Returns whether there are active waiters.
380

381
    """
382
    self._check_owned()
383

    
384
    return bool(self._nwaiters)
385

    
386

    
387
class SharedLock(object):
388
  """Implements a shared lock.
389

390
  Multiple threads can acquire the lock in a shared way, calling
391
  acquire_shared().  In order to acquire the lock in an exclusive way threads
392
  can call acquire_exclusive().
393

394
  The lock prevents starvation but does not guarantee that threads will acquire
395
  the shared lock in the order they queued for it, just that they will
396
  eventually do so.
397

398
  """
399
  __slots__ = [
400
    "__active_shr_c",
401
    "__inactive_shr_c",
402
    "__deleted",
403
    "__exc",
404
    "__lock",
405
    "__pending",
406
    "__shr",
407
    ]
408

    
409
  __condition_class = PipeCondition
410

    
411
  def __init__(self):
412
    """Construct a new SharedLock.
413

414
    """
415
    object.__init__(self)
416

    
417
    # Internal lock
418
    self.__lock = threading.Lock()
419

    
420
    # Queue containing waiting acquires
421
    self.__pending = []
422

    
423
    # Active and inactive conditions for shared locks
424
    self.__active_shr_c = self.__condition_class(self.__lock)
425
    self.__inactive_shr_c = self.__condition_class(self.__lock)
426

    
427
    # Current lock holders
428
    self.__shr = set()
429
    self.__exc = None
430

    
431
    # is this lock in the deleted state?
432
    self.__deleted = False
433

    
434
  def __check_deleted(self):
435
    """Raises an exception if the lock has been deleted.
436

437
    """
438
    if self.__deleted:
439
      raise errors.LockError("Deleted lock")
440

    
441
  def __is_sharer(self):
442
    """Is the current thread sharing the lock at this time?
443

444
    """
445
    return threading.currentThread() in self.__shr
446

    
447
  def __is_exclusive(self):
448
    """Is the current thread holding the lock exclusively at this time?
449

450
    """
451
    return threading.currentThread() == self.__exc
452

    
453
  def __is_owned(self, shared=-1):
454
    """Is the current thread somehow owning the lock at this time?
455

456
    This is a private version of the function, which presumes you're holding
457
    the internal lock.
458

459
    """
460
    if shared < 0:
461
      return self.__is_sharer() or self.__is_exclusive()
462
    elif shared:
463
      return self.__is_sharer()
464
    else:
465
      return self.__is_exclusive()
466

    
467
  def _is_owned(self, shared=-1):
468
    """Is the current thread somehow owning the lock at this time?
469

470
    @param shared:
471
        - < 0: check for any type of ownership (default)
472
        - 0: check for exclusive ownership
473
        - > 0: check for shared ownership
474

475
    """
476
    self.__lock.acquire()
477
    try:
478
      return self.__is_owned(shared=shared)
479
    finally:
480
      self.__lock.release()
481

    
482
  def _count_pending(self):
483
    """Returns the number of pending acquires.
484

485
    @rtype: int
486

487
    """
488
    self.__lock.acquire()
489
    try:
490
      return len(self.__pending)
491
    finally:
492
      self.__lock.release()
493

    
494
  def __do_acquire(self, shared):
495
    """Actually acquire the lock.
496

497
    """
498
    if shared:
499
      self.__shr.add(threading.currentThread())
500
    else:
501
      self.__exc = threading.currentThread()
502

    
503
  def __can_acquire(self, shared):
504
    """Determine whether lock can be acquired.
505

506
    """
507
    if shared:
508
      return self.__exc is None
509
    else:
510
      return len(self.__shr) == 0 and self.__exc is None
511

    
512
  def __is_on_top(self, cond):
513
    """Checks whether the passed condition is on top of the queue.
514

515
    The caller must make sure the queue isn't empty.
516

517
    """
518
    return self.__pending[0] == cond
519

    
520
  def __acquire_unlocked(self, shared, timeout):
521
    """Acquire a shared lock.
522

523
    @param shared: whether to acquire in shared mode; by default an
524
        exclusive lock will be acquired
525
    @param timeout: maximum waiting time before giving up
526

527
    """
528
    self.__check_deleted()
529

    
530
    # We cannot acquire the lock if we already have it
531
    assert not self.__is_owned(), "double acquire() on a non-recursive lock"
532

    
533
    # Check whether someone else holds the lock or there are pending acquires.
534
    if not self.__pending and self.__can_acquire(shared):
535
      # Apparently not, can acquire lock directly.
536
      self.__do_acquire(shared)
537
      return True
538

    
539
    if shared:
540
      wait_condition = self.__active_shr_c
541

    
542
      # Check if we're not yet in the queue
543
      if wait_condition not in self.__pending:
544
        self.__pending.append(wait_condition)
545
    else:
546
      wait_condition = self.__condition_class(self.__lock)
547
      # Always add to queue
548
      self.__pending.append(wait_condition)
549

    
550
    try:
551
      # Wait until we become the topmost acquire in the queue or the timeout
552
      # expires.
553
      while not (self.__is_on_top(wait_condition) and
554
                 self.__can_acquire(shared)):
555
        # Wait for notification
556
        wait_condition.wait(timeout)
557
        self.__check_deleted()
558

    
559
        # A lot of code assumes blocking acquires always succeed. Loop
560
        # internally for that case.
561
        if timeout is not None:
562
          break
563

    
564
      if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
565
        self.__do_acquire(shared)
566
        return True
567
    finally:
568
      # Remove condition from queue if there are no more waiters
569
      if not wait_condition.has_waiting() and not self.__deleted:
570
        self.__pending.remove(wait_condition)
571

    
572
    return False
573

    
574
  def acquire(self, shared=0, timeout=None, test_notify=None):
575
    """Acquire a shared lock.
576

577
    @type shared: integer (0/1) used as a boolean
578
    @param shared: whether to acquire in shared mode; by default an
579
        exclusive lock will be acquired
580
    @type timeout: float
581
    @param timeout: maximum waiting time before giving up
582
    @type test_notify: callable or None
583
    @param test_notify: Special callback function for unittesting
584

585
    """
586
    self.__lock.acquire()
587
    try:
588
      # We already got the lock, notify now
589
      if __debug__ and callable(test_notify):
590
        test_notify()
591

    
592
      return self.__acquire_unlocked(shared, timeout)
593
    finally:
594
      self.__lock.release()
595

    
596
  def release(self):
597
    """Release a Shared Lock.
598

599
    You must have acquired the lock, either in shared or in exclusive mode,
600
    before calling this function.
601

602
    """
603
    self.__lock.acquire()
604
    try:
605
      assert self.__is_exclusive() or self.__is_sharer(), \
606
        "Cannot release non-owned lock"
607

    
608
      # Autodetect release type
609
      if self.__is_exclusive():
610
        self.__exc = None
611
      else:
612
        self.__shr.remove(threading.currentThread())
613

    
614
      # Notify topmost condition in queue
615
      if self.__pending:
616
        first_condition = self.__pending[0]
617
        first_condition.notifyAll()
618

    
619
        if first_condition == self.__active_shr_c:
620
          self.__active_shr_c = self.__inactive_shr_c
621
          self.__inactive_shr_c = first_condition
622

    
623
    finally:
624
      self.__lock.release()
625

    
626
  def delete(self, timeout=None):
627
    """Delete a Shared Lock.
628

629
    This operation will declare the lock for removal. First the lock will be
630
    acquired in exclusive mode if you don't already own it, then the lock
631
    will be put in a state where any future and pending acquire() fail.
632

633
    @type timeout: float
634
    @param timeout: maximum waiting time before giving up
635

636
    """
637
    self.__lock.acquire()
638
    try:
639
      assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"
640

    
641
      self.__check_deleted()
642

    
643
      # The caller is allowed to hold the lock exclusively already.
644
      acquired = self.__is_exclusive()
645

    
646
      if not acquired:
647
        acquired = self.__acquire_unlocked(0, timeout)
648

    
649
        assert self.__is_exclusive() and not self.__is_sharer(), \
650
          "Lock wasn't acquired in exclusive mode"
651

    
652
      if acquired:
653
        self.__deleted = True
654
        self.__exc = None
655

    
656
        # Notify all acquires. They'll throw an error.
657
        while self.__pending:
658
          self.__pending.pop().notifyAll()
659

    
660
      return acquired
661
    finally:
662
      self.__lock.release()
663

    
664
  def _release_save(self):
665
    shared = self.__is_sharer()
666
    self.release()
667
    return shared
668

    
669
  def _acquire_restore(self, shared):
670
    self.acquire(shared=shared)
671

    
672

    
673
# Whenever we want to acquire a full LockSet we pass None as the value
674
# to acquire.  Hide this behind this nicely named constant.
675
ALL_SET = None
676

    
677

    
678
class _AcquireTimeout(Exception):
679
  """Internal exception to abort an acquire on a timeout.
680

681
  """
682

    
683

    
684
class LockSet:
685
  """Implements a set of locks.
686

687
  This abstraction implements a set of shared locks for the same resource type,
688
  distinguished by name. The user can lock a subset of the resources and the
689
  LockSet will take care of acquiring the locks always in the same order, thus
690
  preventing deadlock.
691

692
  All the locks needed in the same set must be acquired together, though.
693

694
  """
695
  def __init__(self, members=None):
696
    """Constructs a new LockSet.
697

698
    @type members: list of strings
699
    @param members: initial members of the set
700

701
    """
702
    # Used internally to guarantee coherency.
703
    self.__lock = SharedLock()
704

    
705
    # The lockdict indexes the relationship name -> lock
706
    # The order-of-locking is implied by the alphabetical order of names
707
    self.__lockdict = {}
708

    
709
    if members is not None:
710
      for name in members:
711
        self.__lockdict[name] = SharedLock()
712

    
713
    # The owner dict contains the set of locks each thread owns. For
714
    # performance each thread can access its own key without a global lock on
715
    # this structure. It is paramount though that *no* other type of access is
716
    # done to this structure (eg. no looping over its keys). *_owner helper
717
    # function are defined to guarantee access is correct, but in general never
718
    # do anything different than __owners[threading.currentThread()], or there
719
    # will be trouble.
720
    self.__owners = {}
721

    
722
  def _is_owned(self):
723
    """Is the current thread a current level owner?"""
724
    return threading.currentThread() in self.__owners
725

    
726
  def _add_owned(self, name=None):
727
    """Note the current thread owns the given lock"""
728
    if name is None:
729
      if not self._is_owned():
730
        self.__owners[threading.currentThread()] = set()
731
    else:
732
      if self._is_owned():
733
        self.__owners[threading.currentThread()].add(name)
734
      else:
735
        self.__owners[threading.currentThread()] = set([name])
736

    
737
  def _del_owned(self, name=None):
738
    """Note the current thread owns the given lock"""
739

    
740
    assert not (name is None and self.__lock._is_owned()), \
741
           "Cannot hold internal lock when deleting owner status"
742

    
743
    if name is not None:
744
      self.__owners[threading.currentThread()].remove(name)
745

    
746
    # Only remove the key if we don't hold the set-lock as well
747
    if (not self.__lock._is_owned() and
748
        not self.__owners[threading.currentThread()]):
749
      del self.__owners[threading.currentThread()]
750

    
751
  def _list_owned(self):
752
    """Get the set of resource names owned by the current thread"""
753
    if self._is_owned():
754
      return self.__owners[threading.currentThread()].copy()
755
    else:
756
      return set()
757

    
758
  def _release_and_delete_owned(self):
759
    """Release and delete all resources owned by the current thread"""
760
    for lname in self._list_owned():
761
      lock = self.__lockdict[lname]
762
      if lock._is_owned():
763
        lock.release()
764
      self._del_owned(name=lname)
765

    
766
  def __names(self):
767
    """Return the current set of names.
768

769
    Only call this function while holding __lock and don't iterate on the
770
    result after releasing the lock.
771

772
    """
773
    return self.__lockdict.keys()
774

    
775
  def _names(self):
776
    """Return a copy of the current set of elements.
777

778
    Used only for debugging purposes.
779

780
    """
781
    # If we don't already own the set-level lock acquired
782
    # we'll get it and note we need to release it later.
783
    release_lock = False
784
    if not self.__lock._is_owned():
785
      release_lock = True
786
      self.__lock.acquire(shared=1)
787
    try:
788
      result = self.__names()
789
    finally:
790
      if release_lock:
791
        self.__lock.release()
792
    return set(result)
793

    
794
  def acquire(self, names, timeout=None, shared=0, test_notify=None):
795
    """Acquire a set of resource locks.
796

797
    @type names: list of strings (or string)
798
    @param names: the names of the locks which shall be acquired
799
        (special lock names, or instance/node names)
800
    @type shared: integer (0/1) used as a boolean
801
    @param shared: whether to acquire in shared mode; by default an
802
        exclusive lock will be acquired
803
    @type timeout: float or None
804
    @param timeout: Maximum time to acquire all locks
805
    @type test_notify: callable or None
806
    @param test_notify: Special callback function for unittesting
807

808
    @return: Set of all locks successfully acquired or None in case of timeout
809

810
    @raise errors.LockError: when any lock we try to acquire has
811
        been deleted before we succeed. In this case none of the
812
        locks requested will be acquired.
813

814
    """
815
    assert timeout is None or timeout >= 0.0
816

    
817
    # Check we don't already own locks at this level
818
    assert not self._is_owned(), "Cannot acquire locks in the same set twice"
819

    
820
    # We need to keep track of how long we spent waiting for a lock. The
821
    # timeout passed to this function is over all lock acquires.
822
    running_timeout = RunningTimeout(timeout, False)
823

    
824
    try:
825
      if names is not None:
826
        # Support passing in a single resource to acquire rather than many
827
        if isinstance(names, basestring):
828
          names = [names]
829

    
830
        return self.__acquire_inner(names, False, shared,
831
                                    running_timeout.Remaining, test_notify)
832

    
833
      else:
834
        # If no names are given acquire the whole set by not letting new names
835
        # being added before we release, and getting the current list of names.
836
        # Some of them may then be deleted later, but we'll cope with this.
837
        #
838
        # We'd like to acquire this lock in a shared way, as it's nice if
839
        # everybody else can use the instances at the same time. If are
840
        # acquiring them exclusively though they won't be able to do this
841
        # anyway, though, so we'll get the list lock exclusively as well in
842
        # order to be able to do add() on the set while owning it.
843
        if not self.__lock.acquire(shared=shared,
844
                                   timeout=running_timeout.Remaining()):
845
          raise _AcquireTimeout()
846
        try:
847
          # note we own the set-lock
848
          self._add_owned()
849

    
850
          return self.__acquire_inner(self.__names(), True, shared,
851
                                      running_timeout.Remaining, test_notify)
852
        except:
853
          # We shouldn't have problems adding the lock to the owners list, but
854
          # if we did we'll try to release this lock and re-raise exception.
855
          # Of course something is going to be really wrong, after this.
856
          self.__lock.release()
857
          self._del_owned()
858
          raise
859

    
860
    except _AcquireTimeout:
861
      return None
862

    
863
  def __acquire_inner(self, names, want_all, shared, timeout_fn, test_notify):
864
    """Inner logic for acquiring a number of locks.
865

866
    @param names: Names of the locks to be acquired
867
    @param want_all: Whether all locks in the set should be acquired
868
    @param shared: Whether to acquire in shared mode
869
    @param timeout_fn: Function returning remaining timeout
870
    @param test_notify: Special callback function for unittesting
871

872
    """
873
    acquire_list = []
874

    
875
    # First we look the locks up on __lockdict. We have no way of being sure
876
    # they will still be there after, but this makes it a lot faster should
877
    # just one of them be the already wrong. Using a sorted sequence to prevent
878
    # deadlocks.
879
    for lname in sorted(utils.UniqueSequence(names)):
880
      try:
881
        lock = self.__lockdict[lname] # raises KeyError if lock is not there
882
      except KeyError:
883
        if want_all:
884
          # We are acquiring all the set, it doesn't matter if this particular
885
          # element is not there anymore.
886
          continue
887

    
888
        raise errors.LockError("Non-existing lock in set (%s)" % lname)
889

    
890
      acquire_list.append((lname, lock))
891

    
892
    # This will hold the locknames we effectively acquired.
893
    acquired = set()
894

    
895
    try:
896
      # Now acquire_list contains a sorted list of resources and locks we
897
      # want.  In order to get them we loop on this (private) list and
898
      # acquire() them.  We gave no real guarantee they will still exist till
899
      # this is done but .acquire() itself is safe and will alert us if the
900
      # lock gets deleted.
901
      for (lname, lock) in acquire_list:
902
        if __debug__ and callable(test_notify):
903
          test_notify_fn = lambda: test_notify(lname)
904
        else:
905
          test_notify_fn = None
906

    
907
        timeout = timeout_fn()
908

    
909
        try:
910
          # raises LockError if the lock was deleted
911
          acq_success = lock.acquire(shared=shared, timeout=timeout,
912
                                     test_notify=test_notify_fn)
913
        except errors.LockError:
914
          if want_all:
915
            # We are acquiring all the set, it doesn't matter if this
916
            # particular element is not there anymore.
917
            continue
918

    
919
          raise errors.LockError("Non-existing lock in set (%s)" % lname)
920

    
921
        if not acq_success:
922
          # Couldn't get lock or timeout occurred
923
          if timeout is None:
924
            # This shouldn't happen as SharedLock.acquire(timeout=None) is
925
            # blocking.
926
            raise errors.LockError("Failed to get lock %s" % lname)
927

    
928
          raise _AcquireTimeout()
929

    
930
        try:
931
          # now the lock cannot be deleted, we have it!
932
          self._add_owned(name=lname)
933
          acquired.add(lname)
934

    
935
        except:
936
          # We shouldn't have problems adding the lock to the owners list, but
937
          # if we did we'll try to release this lock and re-raise exception.
938
          # Of course something is going to be really wrong after this.
939
          if lock._is_owned():
940
            lock.release()
941
          raise
942

    
943
    except:
944
      # Release all owned locks
945
      self._release_and_delete_owned()
946
      raise
947

    
948
    return acquired
949

    
950
  def release(self, names=None):
951
    """Release a set of resource locks, at the same level.
952

953
    You must have acquired the locks, either in shared or in exclusive mode,
954
    before releasing them.
955

956
    @type names: list of strings, or None
957
    @param names: the names of the locks which shall be released
958
        (defaults to all the locks acquired at that level).
959

960
    """
961
    assert self._is_owned(), "release() on lock set while not owner"
962

    
963
    # Support passing in a single resource to release rather than many
964
    if isinstance(names, basestring):
965
      names = [names]
966

    
967
    if names is None:
968
      names = self._list_owned()
969
    else:
970
      names = set(names)
971
      assert self._list_owned().issuperset(names), (
972
               "release() on unheld resources %s" %
973
               names.difference(self._list_owned()))
974

    
975
    # First of all let's release the "all elements" lock, if set.
976
    # After this 'add' can work again
977
    if self.__lock._is_owned():
978
      self.__lock.release()
979
      self._del_owned()
980

    
981
    for lockname in names:
982
      # If we are sure the lock doesn't leave __lockdict without being
983
      # exclusively held we can do this...
984
      self.__lockdict[lockname].release()
985
      self._del_owned(name=lockname)
986

    
987
  def add(self, names, acquired=0, shared=0):
988
    """Add a new set of elements to the set
989

990
    @type names: list of strings
991
    @param names: names of the new elements to add
992
    @type acquired: integer (0/1) used as a boolean
993
    @param acquired: pre-acquire the new resource?
994
    @type shared: integer (0/1) used as a boolean
995
    @param shared: is the pre-acquisition shared?
996

997
    """
998
    # Check we don't already own locks at this level
999
    assert not self._is_owned() or self.__lock._is_owned(shared=0), \
1000
      "Cannot add locks if the set is only partially owned, or shared"
1001

    
1002
    # Support passing in a single resource to add rather than many
1003
    if isinstance(names, basestring):
1004
      names = [names]
1005

    
1006
    # If we don't already own the set-level lock acquired in an exclusive way
1007
    # we'll get it and note we need to release it later.
1008
    release_lock = False
1009
    if not self.__lock._is_owned():
1010
      release_lock = True
1011
      self.__lock.acquire()
1012

    
1013
    try:
1014
      invalid_names = set(self.__names()).intersection(names)
1015
      if invalid_names:
1016
        # This must be an explicit raise, not an assert, because assert is
1017
        # turned off when using optimization, and this can happen because of
1018
        # concurrency even if the user doesn't want it.
1019
        raise errors.LockError("duplicate add() (%s)" % invalid_names)
1020

    
1021
      for lockname in names:
1022
        lock = SharedLock()
1023

    
1024
        if acquired:
1025
          lock.acquire(shared=shared)
1026
          # now the lock cannot be deleted, we have it!
1027
          try:
1028
            self._add_owned(name=lockname)
1029
          except:
1030
            # We shouldn't have problems adding the lock to the owners list,
1031
            # but if we did we'll try to release this lock and re-raise
1032
            # exception.  Of course something is going to be really wrong,
1033
            # after this.  On the other hand the lock hasn't been added to the
1034
            # __lockdict yet so no other threads should be pending on it. This
1035
            # release is just a safety measure.
1036
            lock.release()
1037
            raise
1038

    
1039
        self.__lockdict[lockname] = lock
1040

    
1041
    finally:
1042
      # Only release __lock if we were not holding it previously.
1043
      if release_lock:
1044
        self.__lock.release()
1045

    
1046
    return True
1047

    
1048
  def remove(self, names):
1049
    """Remove elements from the lock set.
1050

1051
    You can either not hold anything in the lockset or already hold a superset
1052
    of the elements you want to delete, exclusively.
1053

1054
    @type names: list of strings
1055
    @param names: names of the resource to remove.
1056

1057
    @return: a list of locks which we removed; the list is always
1058
        equal to the names list if we were holding all the locks
1059
        exclusively
1060

1061
    """
1062
    # Support passing in a single resource to remove rather than many
1063
    if isinstance(names, basestring):
1064
      names = [names]
1065

    
1066
    # If we own any subset of this lock it must be a superset of what we want
1067
    # to delete. The ownership must also be exclusive, but that will be checked
1068
    # by the lock itself.
1069
    assert not self._is_owned() or self._list_owned().issuperset(names), (
1070
      "remove() on acquired lockset while not owning all elements")
1071

    
1072
    removed = []
1073

    
1074
    for lname in names:
1075
      # Calling delete() acquires the lock exclusively if we don't already own
1076
      # it, and causes all pending and subsequent lock acquires to fail. It's
1077
      # fine to call it out of order because delete() also implies release(),
1078
      # and the assertion above guarantees that if we either already hold
1079
      # everything we want to delete, or we hold none.
1080
      try:
1081
        self.__lockdict[lname].delete()
1082
        removed.append(lname)
1083
      except (KeyError, errors.LockError):
1084
        # This cannot happen if we were already holding it, verify:
1085
        assert not self._is_owned(), "remove failed while holding lockset"
1086
      else:
1087
        # If no LockError was raised we are the ones who deleted the lock.
1088
        # This means we can safely remove it from lockdict, as any further or
1089
        # pending delete() or acquire() will fail (and nobody can have the lock
1090
        # since before our call to delete()).
1091
        #
1092
        # This is done in an else clause because if the exception was thrown
1093
        # it's the job of the one who actually deleted it.
1094
        del self.__lockdict[lname]
1095
        # And let's remove it from our private list if we owned it.
1096
        if self._is_owned():
1097
          self._del_owned(name=lname)
1098

    
1099
    return removed
1100

    
1101

    
1102
# Locking levels, must be acquired in increasing order.
1103
# Current rules are:
1104
#   - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
1105
#   acquired before performing any operation, either in shared or in exclusive
1106
#   mode. acquiring the BGL in exclusive mode is discouraged and should be
1107
#   avoided.
1108
#   - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
1109
#   If you need more than one node, or more than one instance, acquire them at
1110
#   the same time.
1111
LEVEL_CLUSTER = 0
1112
LEVEL_INSTANCE = 1
1113
LEVEL_NODE = 2
1114

    
1115
LEVELS = [LEVEL_CLUSTER,
1116
          LEVEL_INSTANCE,
1117
          LEVEL_NODE]
1118

    
1119
# Lock levels which are modifiable
1120
LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]
1121

    
1122
LEVEL_NAMES = {
1123
  LEVEL_CLUSTER: "cluster",
1124
  LEVEL_INSTANCE: "instance",
1125
  LEVEL_NODE: "node",
1126
  }
1127

    
1128
# Constant for the big ganeti lock
1129
BGL = 'BGL'
1130

    
1131

    
1132
class GanetiLockManager:
1133
  """The Ganeti Locking Library
1134

1135
  The purpose of this small library is to manage locking for ganeti clusters
1136
  in a central place, while at the same time doing dynamic checks against
1137
  possible deadlocks. It will also make it easier to transition to a different
1138
  lock type should we migrate away from python threads.
1139

1140
  """
1141
  _instance = None
1142

    
1143
  def __init__(self, nodes=None, instances=None):
1144
    """Constructs a new GanetiLockManager object.
1145

1146
    There should be only a GanetiLockManager object at any time, so this
1147
    function raises an error if this is not the case.
1148

1149
    @param nodes: list of node names
1150
    @param instances: list of instance names
1151

1152
    """
1153
    assert self.__class__._instance is None, \
1154
           "double GanetiLockManager instance"
1155

    
1156
    self.__class__._instance = self
1157

    
1158
    # The keyring contains all the locks, at their level and in the correct
1159
    # locking order.
1160
    self.__keyring = {
1161
      LEVEL_CLUSTER: LockSet([BGL]),
1162
      LEVEL_NODE: LockSet(nodes),
1163
      LEVEL_INSTANCE: LockSet(instances),
1164
    }
1165

    
1166
  def _names(self, level):
1167
    """List the lock names at the given level.
1168

1169
    This can be used for debugging/testing purposes.
1170

1171
    @param level: the level whose list of locks to get
1172

1173
    """
1174
    assert level in LEVELS, "Invalid locking level %s" % level
1175
    return self.__keyring[level]._names()
1176

    
1177
  def _is_owned(self, level):
1178
    """Check whether we are owning locks at the given level
1179

1180
    """
1181
    return self.__keyring[level]._is_owned()
1182

    
1183
  is_owned = _is_owned
1184

    
1185
  def _list_owned(self, level):
1186
    """Get the set of owned locks at the given level
1187

1188
    """
1189
    return self.__keyring[level]._list_owned()
1190

    
1191
  def _upper_owned(self, level):
1192
    """Check that we don't own any lock at a level greater than the given one.
1193

1194
    """
1195
    # This way of checking only works if LEVELS[i] = i, which we check for in
1196
    # the test cases.
1197
    return compat.any((self._is_owned(l) for l in LEVELS[level + 1:]))
1198

    
1199
  def _BGL_owned(self): # pylint: disable-msg=C0103
1200
    """Check if the current thread owns the BGL.
1201

1202
    Both an exclusive or a shared acquisition work.
1203

1204
    """
1205
    return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()
1206

    
1207
  @staticmethod
1208
  def _contains_BGL(level, names): # pylint: disable-msg=C0103
1209
    """Check if the level contains the BGL.
1210

1211
    Check if acting on the given level and set of names will change
1212
    the status of the Big Ganeti Lock.
1213

1214
    """
1215
    return level == LEVEL_CLUSTER and (names is None or BGL in names)
1216

    
1217
  def acquire(self, level, names, timeout=None, shared=0):
1218
    """Acquire a set of resource locks, at the same level.
1219

1220
    @type level: member of locking.LEVELS
1221
    @param level: the level at which the locks shall be acquired
1222
    @type names: list of strings (or string)
1223
    @param names: the names of the locks which shall be acquired
1224
        (special lock names, or instance/node names)
1225
    @type shared: integer (0/1) used as a boolean
1226
    @param shared: whether to acquire in shared mode; by default
1227
        an exclusive lock will be acquired
1228
    @type timeout: float
1229
    @param timeout: Maximum time to acquire all locks
1230

1231
    """
1232
    assert level in LEVELS, "Invalid locking level %s" % level
1233

    
1234
    # Check that we are either acquiring the Big Ganeti Lock or we already own
1235
    # it. Some "legacy" opcodes need to be sure they are run non-concurrently
1236
    # so even if we've migrated we need to at least share the BGL to be
1237
    # compatible with them. Of course if we own the BGL exclusively there's no
1238
    # point in acquiring any other lock, unless perhaps we are half way through
1239
    # the migration of the current opcode.
1240
    assert (self._contains_BGL(level, names) or self._BGL_owned()), (
1241
            "You must own the Big Ganeti Lock before acquiring any other")
1242

    
1243
    # Check we don't own locks at the same or upper levels.
1244
    assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1245
           " while owning some at a greater one")
1246

    
1247
    # Acquire the locks in the set.
1248
    return self.__keyring[level].acquire(names, shared=shared, timeout=timeout)
1249

    
1250
  def release(self, level, names=None):
1251
    """Release a set of resource locks, at the same level.
1252

1253
    You must have acquired the locks, either in shared or in exclusive
1254
    mode, before releasing them.
1255

1256
    @type level: member of locking.LEVELS
1257
    @param level: the level at which the locks shall be released
1258
    @type names: list of strings, or None
1259
    @param names: the names of the locks which shall be released
1260
        (defaults to all the locks acquired at that level)
1261

1262
    """
1263
    assert level in LEVELS, "Invalid locking level %s" % level
1264
    assert (not self._contains_BGL(level, names) or
1265
            not self._upper_owned(LEVEL_CLUSTER)), (
1266
            "Cannot release the Big Ganeti Lock while holding something"
1267
            " at upper levels (%r)" %
1268
            (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self._list_owned(i))
1269
                              for i in self.__keyring.keys()]), ))
1270

    
1271
    # Release will complain if we don't own the locks already
1272
    return self.__keyring[level].release(names)
1273

    
1274
  def add(self, level, names, acquired=0, shared=0):
1275
    """Add locks at the specified level.
1276

1277
    @type level: member of locking.LEVELS_MOD
1278
    @param level: the level at which the locks shall be added
1279
    @type names: list of strings
1280
    @param names: names of the locks to acquire
1281
    @type acquired: integer (0/1) used as a boolean
1282
    @param acquired: whether to acquire the newly added locks
1283
    @type shared: integer (0/1) used as a boolean
1284
    @param shared: whether the acquisition will be shared
1285

1286
    """
1287
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1288
    assert self._BGL_owned(), ("You must own the BGL before performing other"
1289
           " operations")
1290
    assert not self._upper_owned(level), ("Cannot add locks at a level"
1291
           " while owning some at a greater one")
1292
    return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1293

    
1294
  def remove(self, level, names):
1295
    """Remove locks from the specified level.
1296

1297
    You must either already own the locks you are trying to remove
1298
    exclusively or not own any lock at an upper level.
1299

1300
    @type level: member of locking.LEVELS_MOD
1301
    @param level: the level at which the locks shall be removed
1302
    @type names: list of strings
1303
    @param names: the names of the locks which shall be removed
1304
        (special lock names, or instance/node names)
1305

1306
    """
1307
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
1308
    assert self._BGL_owned(), ("You must own the BGL before performing other"
1309
           " operations")
1310
    # Check we either own the level or don't own anything from here
1311
    # up. LockSet.remove() will check the case in which we don't own
1312
    # all the needed resources, or we have a shared ownership.
1313
    assert self._is_owned(level) or not self._upper_owned(level), (
1314
           "Cannot remove locks at a level while not owning it or"
1315
           " owning some at a greater one")
1316
    return self.__keyring[level].remove(names)