Statistics
| Branch: | Tag: | Revision:

root / lib / mcpu.py @ a52978c7

History | View | Annotate | Download (18.6 kB)

1 2f31098c Iustin Pop
#
2 a8083063 Iustin Pop
#
3 a8083063 Iustin Pop
4 a1a7bc78 Iustin Pop
# Copyright (C) 2006, 2007, 2011 Google Inc.
5 a8083063 Iustin Pop
#
6 a8083063 Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 a8083063 Iustin Pop
# it under the terms of the GNU General Public License as published by
8 a8083063 Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 a8083063 Iustin Pop
# (at your option) any later version.
10 a8083063 Iustin Pop
#
11 a8083063 Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 a8083063 Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a8083063 Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a8083063 Iustin Pop
# General Public License for more details.
15 a8083063 Iustin Pop
#
16 a8083063 Iustin Pop
# You should have received a copy of the GNU General Public License
17 a8083063 Iustin Pop
# along with this program; if not, write to the Free Software
18 a8083063 Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a8083063 Iustin Pop
# 02110-1301, USA.
20 a8083063 Iustin Pop
21 a8083063 Iustin Pop
22 a8083063 Iustin Pop
"""Module implementing the logic behind the cluster operations
23 a8083063 Iustin Pop

24 a8083063 Iustin Pop
This module implements the logic for doing operations in the cluster. There
25 a8083063 Iustin Pop
are two kinds of classes defined:
26 a8083063 Iustin Pop
  - logical units, which know how to deal with their specific opcode only
27 a8083063 Iustin Pop
  - the processor, which dispatches the opcodes to their logical units
28 a8083063 Iustin Pop

29 a8083063 Iustin Pop
"""
30 a8083063 Iustin Pop
31 a5eb7789 Iustin Pop
import logging
32 407339d0 Michael Hanselmann
import random
33 407339d0 Michael Hanselmann
import time
34 a8083063 Iustin Pop
35 a8083063 Iustin Pop
from ganeti import opcodes
36 a8083063 Iustin Pop
from ganeti import constants
37 a8083063 Iustin Pop
from ganeti import errors
38 a8083063 Iustin Pop
from ganeti import rpc
39 a8083063 Iustin Pop
from ganeti import cmdlib
40 04864530 Guido Trotter
from ganeti import locking
41 557838c1 René Nussbaumer
from ganeti import utils
42 ebc75510 Michael Hanselmann
from ganeti import compat
43 a8083063 Iustin Pop
44 7c0d6283 Michael Hanselmann
45 a1a7bc78 Iustin Pop
_OP_PREFIX = "Op"
46 a1a7bc78 Iustin Pop
_LU_PREFIX = "LU"
47 a1a7bc78 Iustin Pop
48 a1a7bc78 Iustin Pop
49 831bbbc1 Michael Hanselmann
class LockAcquireTimeout(Exception):
50 831bbbc1 Michael Hanselmann
  """Exception to report timeouts on acquiring locks.
51 407339d0 Michael Hanselmann

52 407339d0 Michael Hanselmann
  """
53 407339d0 Michael Hanselmann
54 407339d0 Michael Hanselmann
55 e3200b18 Michael Hanselmann
def _CalculateLockAttemptTimeouts():
56 e3200b18 Michael Hanselmann
  """Calculate timeouts for lock attempts.
57 e3200b18 Michael Hanselmann

58 e3200b18 Michael Hanselmann
  """
59 d385a174 Iustin Pop
  result = [constants.LOCK_ATTEMPTS_MINWAIT]
60 d385a174 Iustin Pop
  running_sum = result[0]
61 e3200b18 Michael Hanselmann
62 d385a174 Iustin Pop
  # Wait for a total of at least LOCK_ATTEMPTS_TIMEOUT before doing a
63 d385a174 Iustin Pop
  # blocking acquire
64 d385a174 Iustin Pop
  while running_sum < constants.LOCK_ATTEMPTS_TIMEOUT:
65 e3200b18 Michael Hanselmann
    timeout = (result[-1] * 1.05) ** 1.25
66 e3200b18 Michael Hanselmann
67 d385a174 Iustin Pop
    # Cap max timeout. This gives other jobs a chance to run even if
68 d385a174 Iustin Pop
    # we're still trying to get our locks, before finally moving to a
69 d385a174 Iustin Pop
    # blocking acquire.
70 d385a174 Iustin Pop
    timeout = min(timeout, constants.LOCK_ATTEMPTS_MAXWAIT)
71 d385a174 Iustin Pop
    # And also cap the lower boundary for safety
72 d385a174 Iustin Pop
    timeout = max(timeout, constants.LOCK_ATTEMPTS_MINWAIT)
73 e3200b18 Michael Hanselmann
74 e3200b18 Michael Hanselmann
    result.append(timeout)
75 d385a174 Iustin Pop
    running_sum += timeout
76 e3200b18 Michael Hanselmann
77 e3200b18 Michael Hanselmann
  return result
78 e3200b18 Michael Hanselmann
79 e3200b18 Michael Hanselmann
80 a7770f03 Michael Hanselmann
class LockAttemptTimeoutStrategy(object):
81 407339d0 Michael Hanselmann
  """Class with lock acquire timeout strategy.
82 407339d0 Michael Hanselmann

83 407339d0 Michael Hanselmann
  """
84 407339d0 Michael Hanselmann
  __slots__ = [
85 a7770f03 Michael Hanselmann
    "_timeouts",
86 407339d0 Michael Hanselmann
    "_random_fn",
87 e3200b18 Michael Hanselmann
    "_time_fn",
88 407339d0 Michael Hanselmann
    ]
89 407339d0 Michael Hanselmann
90 e3200b18 Michael Hanselmann
  _TIMEOUT_PER_ATTEMPT = _CalculateLockAttemptTimeouts()
91 407339d0 Michael Hanselmann
92 a7770f03 Michael Hanselmann
  def __init__(self, _time_fn=time.time, _random_fn=random.random):
93 407339d0 Michael Hanselmann
    """Initializes this class.
94 407339d0 Michael Hanselmann

95 e3200b18 Michael Hanselmann
    @param _time_fn: Time function for unittests
96 407339d0 Michael Hanselmann
    @param _random_fn: Random number generator for unittests
97 407339d0 Michael Hanselmann

98 407339d0 Michael Hanselmann
    """
99 407339d0 Michael Hanselmann
    object.__init__(self)
100 407339d0 Michael Hanselmann
101 a7770f03 Michael Hanselmann
    self._timeouts = iter(self._TIMEOUT_PER_ATTEMPT)
102 e3200b18 Michael Hanselmann
    self._time_fn = _time_fn
103 e3200b18 Michael Hanselmann
    self._random_fn = _random_fn
104 e3200b18 Michael Hanselmann
105 407339d0 Michael Hanselmann
  def NextAttempt(self):
106 a7770f03 Michael Hanselmann
    """Returns the timeout for the next attempt.
107 407339d0 Michael Hanselmann

108 407339d0 Michael Hanselmann
    """
109 a7770f03 Michael Hanselmann
    try:
110 a7770f03 Michael Hanselmann
      timeout = self._timeouts.next()
111 a7770f03 Michael Hanselmann
    except StopIteration:
112 a7770f03 Michael Hanselmann
      # No more timeouts, do blocking acquire
113 a7770f03 Michael Hanselmann
      timeout = None
114 407339d0 Michael Hanselmann
115 a6db1af2 Michael Hanselmann
    if timeout is not None:
116 a6db1af2 Michael Hanselmann
      # Add a small variation (-/+ 5%) to timeout. This helps in situations
117 a6db1af2 Michael Hanselmann
      # where two or more jobs are fighting for the same lock(s).
118 a6db1af2 Michael Hanselmann
      variation_range = timeout * 0.1
119 a6db1af2 Michael Hanselmann
      timeout += ((self._random_fn() * variation_range) -
120 a6db1af2 Michael Hanselmann
                  (variation_range * 0.5))
121 407339d0 Michael Hanselmann
122 a6db1af2 Michael Hanselmann
    return timeout
123 407339d0 Michael Hanselmann
124 407339d0 Michael Hanselmann
125 b459a848 Andrea Spadaccini
class OpExecCbBase: # pylint: disable=W0232
126 031a3e57 Michael Hanselmann
  """Base class for OpCode execution callbacks.
127 031a3e57 Michael Hanselmann

128 031a3e57 Michael Hanselmann
  """
129 031a3e57 Michael Hanselmann
  def NotifyStart(self):
130 031a3e57 Michael Hanselmann
    """Called when we are about to execute the LU.
131 031a3e57 Michael Hanselmann

132 031a3e57 Michael Hanselmann
    This function is called when we're about to start the lu's Exec() method,
133 031a3e57 Michael Hanselmann
    that is, after we have acquired all locks.
134 031a3e57 Michael Hanselmann

135 031a3e57 Michael Hanselmann
    """
136 031a3e57 Michael Hanselmann
137 031a3e57 Michael Hanselmann
  def Feedback(self, *args):
138 031a3e57 Michael Hanselmann
    """Sends feedback from the LU code to the end-user.
139 031a3e57 Michael Hanselmann

140 031a3e57 Michael Hanselmann
    """
141 031a3e57 Michael Hanselmann
142 acf931b7 Michael Hanselmann
  def CheckCancel(self):
143 acf931b7 Michael Hanselmann
    """Check whether job has been cancelled.
144 ef2df7d3 Michael Hanselmann

145 ef2df7d3 Michael Hanselmann
    """
146 ef2df7d3 Michael Hanselmann
147 6a373640 Michael Hanselmann
  def SubmitManyJobs(self, jobs):
148 6a373640 Michael Hanselmann
    """Submits jobs for processing.
149 6a373640 Michael Hanselmann

150 6a373640 Michael Hanselmann
    See L{jqueue.JobQueue.SubmitManyJobs}.
151 6a373640 Michael Hanselmann

152 6a373640 Michael Hanselmann
    """
153 6a373640 Michael Hanselmann
    raise NotImplementedError
154 6a373640 Michael Hanselmann
155 031a3e57 Michael Hanselmann
156 a1a7bc78 Iustin Pop
def _LUNameForOpName(opname):
157 a1a7bc78 Iustin Pop
  """Computes the LU name for a given OpCode name.
158 a1a7bc78 Iustin Pop

159 a1a7bc78 Iustin Pop
  """
160 a1a7bc78 Iustin Pop
  assert opname.startswith(_OP_PREFIX), \
161 a1a7bc78 Iustin Pop
      "Invalid OpCode name, doesn't start with %s: %s" % (_OP_PREFIX, opname)
162 a1a7bc78 Iustin Pop
163 a1a7bc78 Iustin Pop
  return _LU_PREFIX + opname[len(_OP_PREFIX):]
164 a1a7bc78 Iustin Pop
165 a1a7bc78 Iustin Pop
166 a1a7bc78 Iustin Pop
def _ComputeDispatchTable():
167 a1a7bc78 Iustin Pop
  """Computes the opcode-to-lu dispatch table.
168 a1a7bc78 Iustin Pop

169 a1a7bc78 Iustin Pop
  """
170 a1a7bc78 Iustin Pop
  return dict((op, getattr(cmdlib, _LUNameForOpName(op.__name__)))
171 a1a7bc78 Iustin Pop
              for op in opcodes.OP_MAPPING.values()
172 a1a7bc78 Iustin Pop
              if op.WITH_LU)
173 a1a7bc78 Iustin Pop
174 a1a7bc78 Iustin Pop
175 a8083063 Iustin Pop
class Processor(object):
176 a8083063 Iustin Pop
  """Object which runs OpCodes"""
177 a1a7bc78 Iustin Pop
  DISPATCH_TABLE = _ComputeDispatchTable()
178 a8083063 Iustin Pop
179 adfa97e3 Guido Trotter
  def __init__(self, context, ec_id):
180 a8083063 Iustin Pop
    """Constructor for Processor
181 a8083063 Iustin Pop

182 adfa97e3 Guido Trotter
    @type context: GanetiContext
183 adfa97e3 Guido Trotter
    @param context: global Ganeti context
184 adfa97e3 Guido Trotter
    @type ec_id: string
185 adfa97e3 Guido Trotter
    @param ec_id: execution context identifier
186 adfa97e3 Guido Trotter

187 a8083063 Iustin Pop
    """
188 1c901d13 Guido Trotter
    self.context = context
189 adfa97e3 Guido Trotter
    self._ec_id = ec_id
190 031a3e57 Michael Hanselmann
    self._cbs = None
191 72737a7f Iustin Pop
    self.rpc = rpc.RpcRunner(context.cfg)
192 cd46f3b4 Luca Bigliardi
    self.hmclass = HooksMaster
193 a8083063 Iustin Pop
194 f879a9c7 Michael Hanselmann
  def _AcquireLocks(self, level, names, shared, timeout, priority):
195 211b6132 Michael Hanselmann
    """Acquires locks via the Ganeti lock manager.
196 211b6132 Michael Hanselmann

197 211b6132 Michael Hanselmann
    @type level: int
198 211b6132 Michael Hanselmann
    @param level: Lock level
199 211b6132 Michael Hanselmann
    @type names: list or string
200 211b6132 Michael Hanselmann
    @param names: Lock names
201 211b6132 Michael Hanselmann
    @type shared: bool
202 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
203 211b6132 Michael Hanselmann
    @type timeout: None or float
204 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
205 900df6cd Michael Hanselmann
    @raise LockAcquireTimeout: In case locks couldn't be acquired in specified
206 900df6cd Michael Hanselmann
        amount of time
207 211b6132 Michael Hanselmann

208 211b6132 Michael Hanselmann
    """
209 acf931b7 Michael Hanselmann
    if self._cbs:
210 acf931b7 Michael Hanselmann
      self._cbs.CheckCancel()
211 211b6132 Michael Hanselmann
212 211b6132 Michael Hanselmann
    acquired = self.context.glm.acquire(level, names, shared=shared,
213 f879a9c7 Michael Hanselmann
                                        timeout=timeout, priority=priority)
214 211b6132 Michael Hanselmann
215 900df6cd Michael Hanselmann
    if acquired is None:
216 900df6cd Michael Hanselmann
      raise LockAcquireTimeout()
217 900df6cd Michael Hanselmann
218 211b6132 Michael Hanselmann
    return acquired
219 211b6132 Michael Hanselmann
220 6a373640 Michael Hanselmann
  def _ProcessResult(self, result):
221 eb279644 Michael Hanselmann
    """Examines opcode result.
222 eb279644 Michael Hanselmann

223 eb279644 Michael Hanselmann
    If necessary, additional processing on the result is done.
224 6a373640 Michael Hanselmann

225 6a373640 Michael Hanselmann
    """
226 6a373640 Michael Hanselmann
    if isinstance(result, cmdlib.ResultWithJobs):
227 6a373640 Michael Hanselmann
      # Submit jobs
228 6a373640 Michael Hanselmann
      job_submission = self._cbs.SubmitManyJobs(result.jobs)
229 6a373640 Michael Hanselmann
230 6a373640 Michael Hanselmann
      # Build dictionary
231 6a373640 Michael Hanselmann
      result = result.other
232 6a373640 Michael Hanselmann
233 6a373640 Michael Hanselmann
      assert constants.JOB_IDS_KEY not in result, \
234 6a373640 Michael Hanselmann
        "Key '%s' found in additional return values" % constants.JOB_IDS_KEY
235 6a373640 Michael Hanselmann
236 6a373640 Michael Hanselmann
      result[constants.JOB_IDS_KEY] = job_submission
237 6a373640 Michael Hanselmann
238 6a373640 Michael Hanselmann
    return result
239 6a373640 Michael Hanselmann
240 36c381d7 Guido Trotter
  def _ExecLU(self, lu):
241 36c381d7 Guido Trotter
    """Logical Unit execution sequence.
242 36c381d7 Guido Trotter

243 36c381d7 Guido Trotter
    """
244 36c381d7 Guido Trotter
    write_count = self.context.cfg.write_count
245 36c381d7 Guido Trotter
    lu.CheckPrereq()
246 4b5e8271 Iustin Pop
    hm = HooksMaster(self.rpc.call_hooks_runner, lu)
247 36c381d7 Guido Trotter
    h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
248 36c381d7 Guido Trotter
    lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results,
249 7b4c1cb9 Michael Hanselmann
                     self.Log, None)
250 20777413 Iustin Pop
251 20777413 Iustin Pop
    if getattr(lu.op, "dry_run", False):
252 20777413 Iustin Pop
      # in this mode, no post-hooks are run, and the config is not
253 20777413 Iustin Pop
      # written (as it might have been modified by another LU, and we
254 20777413 Iustin Pop
      # shouldn't do writeout on behalf of other threads
255 20777413 Iustin Pop
      self.LogInfo("dry-run mode requested, not actually executing"
256 20777413 Iustin Pop
                   " the operation")
257 20777413 Iustin Pop
      return lu.dry_run_result
258 20777413 Iustin Pop
259 36c381d7 Guido Trotter
    try:
260 6a373640 Michael Hanselmann
      result = self._ProcessResult(lu.Exec(self.Log))
261 36c381d7 Guido Trotter
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
262 36c381d7 Guido Trotter
      result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results,
263 7b4c1cb9 Michael Hanselmann
                                self.Log, result)
264 36c381d7 Guido Trotter
    finally:
265 36c381d7 Guido Trotter
      # FIXME: This needs locks if not lu_class.REQ_BGL
266 36c381d7 Guido Trotter
      if write_count != self.context.cfg.write_count:
267 36c381d7 Guido Trotter
        hm.RunConfigUpdate()
268 36c381d7 Guido Trotter
269 36c381d7 Guido Trotter
    return result
270 36c381d7 Guido Trotter
271 f879a9c7 Michael Hanselmann
  def _LockAndExecLU(self, lu, level, calc_timeout, priority):
272 68adfdb2 Guido Trotter
    """Execute a Logical Unit, with the needed locks.
273 68adfdb2 Guido Trotter

274 68adfdb2 Guido Trotter
    This is a recursive function that starts locking the given level, and
275 68adfdb2 Guido Trotter
    proceeds up, till there are no more locks to acquire. Then it executes the
276 68adfdb2 Guido Trotter
    given LU and its opcodes.
277 68adfdb2 Guido Trotter

278 68adfdb2 Guido Trotter
    """
279 ca2a79e1 Guido Trotter
    adding_locks = level in lu.add_locks
280 ca2a79e1 Guido Trotter
    acquiring_locks = level in lu.needed_locks
281 8a2941c4 Guido Trotter
    if level not in locking.LEVELS:
282 031a3e57 Michael Hanselmann
      if self._cbs:
283 031a3e57 Michael Hanselmann
        self._cbs.NotifyStart()
284 031a3e57 Michael Hanselmann
285 8a2941c4 Guido Trotter
      result = self._ExecLU(lu)
286 407339d0 Michael Hanselmann
287 ca2a79e1 Guido Trotter
    elif adding_locks and acquiring_locks:
288 ca2a79e1 Guido Trotter
      # We could both acquire and add locks at the same level, but for now we
289 ca2a79e1 Guido Trotter
      # don't need this, so we'll avoid the complicated code needed.
290 407339d0 Michael Hanselmann
      raise NotImplementedError("Can't declare locks to acquire when adding"
291 407339d0 Michael Hanselmann
                                " others")
292 407339d0 Michael Hanselmann
293 ca2a79e1 Guido Trotter
    elif adding_locks or acquiring_locks:
294 fb8dcb62 Guido Trotter
      lu.DeclareLocks(level)
295 3977a4c1 Guido Trotter
      share = lu.share_locks[level]
296 407339d0 Michael Hanselmann
297 68adfdb2 Guido Trotter
      try:
298 407339d0 Michael Hanselmann
        assert adding_locks ^ acquiring_locks, \
299 407339d0 Michael Hanselmann
          "Locks must be either added or acquired"
300 407339d0 Michael Hanselmann
301 407339d0 Michael Hanselmann
        if acquiring_locks:
302 407339d0 Michael Hanselmann
          # Acquiring locks
303 407339d0 Michael Hanselmann
          needed_locks = lu.needed_locks[level]
304 407339d0 Michael Hanselmann
305 0d5a0b96 Michael Hanselmann
          self._AcquireLocks(level, needed_locks, share,
306 0d5a0b96 Michael Hanselmann
                             calc_timeout(), priority)
307 407339d0 Michael Hanselmann
        else:
308 407339d0 Michael Hanselmann
          # Adding locks
309 407339d0 Michael Hanselmann
          add_locks = lu.add_locks[level]
310 407339d0 Michael Hanselmann
          lu.remove_locks[level] = add_locks
311 407339d0 Michael Hanselmann
312 407339d0 Michael Hanselmann
          try:
313 407339d0 Michael Hanselmann
            self.context.glm.add(level, add_locks, acquired=1, shared=share)
314 407339d0 Michael Hanselmann
          except errors.LockError:
315 407339d0 Michael Hanselmann
            raise errors.OpPrereqError(
316 407339d0 Michael Hanselmann
              "Couldn't add locks (%s), probably because of a race condition"
317 debac808 Iustin Pop
              " with another job, who added them first" % add_locks,
318 debac808 Iustin Pop
              errors.ECODE_FAULT)
319 407339d0 Michael Hanselmann
320 ca2a79e1 Guido Trotter
        try:
321 f879a9c7 Michael Hanselmann
          result = self._LockAndExecLU(lu, level + 1, calc_timeout, priority)
322 ca2a79e1 Guido Trotter
        finally:
323 ca2a79e1 Guido Trotter
          if level in lu.remove_locks:
324 ca2a79e1 Guido Trotter
            self.context.glm.remove(level, lu.remove_locks[level])
325 68adfdb2 Guido Trotter
      finally:
326 80ee04a4 Guido Trotter
        if self.context.glm.is_owned(level):
327 68adfdb2 Guido Trotter
          self.context.glm.release(level)
328 407339d0 Michael Hanselmann
329 68adfdb2 Guido Trotter
    else:
330 f879a9c7 Michael Hanselmann
      result = self._LockAndExecLU(lu, level + 1, calc_timeout, priority)
331 68adfdb2 Guido Trotter
332 68adfdb2 Guido Trotter
    return result
333 68adfdb2 Guido Trotter
334 f879a9c7 Michael Hanselmann
  def ExecOpCode(self, op, cbs, timeout=None, priority=None):
335 a8083063 Iustin Pop
    """Execute an opcode.
336 a8083063 Iustin Pop

337 e92376d7 Iustin Pop
    @type op: an OpCode instance
338 e92376d7 Iustin Pop
    @param op: the opcode to be executed
339 031a3e57 Michael Hanselmann
    @type cbs: L{OpExecCbBase}
340 031a3e57 Michael Hanselmann
    @param cbs: Runtime callbacks
341 831bbbc1 Michael Hanselmann
    @type timeout: float or None
342 831bbbc1 Michael Hanselmann
    @param timeout: Maximum time to acquire all locks, None for no timeout
343 f879a9c7 Michael Hanselmann
    @type priority: number or None
344 f879a9c7 Michael Hanselmann
    @param priority: Priority for acquiring lock(s)
345 831bbbc1 Michael Hanselmann
    @raise LockAcquireTimeout: In case locks couldn't be acquired in specified
346 831bbbc1 Michael Hanselmann
        amount of time
347 a8083063 Iustin Pop

348 a8083063 Iustin Pop
    """
349 a8083063 Iustin Pop
    if not isinstance(op, opcodes.OpCode):
350 3ecf6786 Iustin Pop
      raise errors.ProgrammerError("Non-opcode instance passed"
351 c7bb3095 Michael Hanselmann
                                   " to ExecOpcode (%s)" % type(op))
352 a8083063 Iustin Pop
353 831bbbc1 Michael Hanselmann
    lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
354 831bbbc1 Michael Hanselmann
    if lu_class is None:
355 831bbbc1 Michael Hanselmann
      raise errors.OpCodeUnknown("Unknown opcode")
356 831bbbc1 Michael Hanselmann
357 831bbbc1 Michael Hanselmann
    if timeout is None:
358 831bbbc1 Michael Hanselmann
      calc_timeout = lambda: None
359 831bbbc1 Michael Hanselmann
    else:
360 557838c1 René Nussbaumer
      calc_timeout = utils.RunningTimeout(timeout, False).Remaining
361 831bbbc1 Michael Hanselmann
362 031a3e57 Michael Hanselmann
    self._cbs = cbs
363 fe482621 Iustin Pop
    try:
364 831bbbc1 Michael Hanselmann
      # Acquire the Big Ganeti Lock exclusively if this LU requires it,
365 831bbbc1 Michael Hanselmann
      # and in a shared fashion otherwise (to prevent concurrent run with
366 831bbbc1 Michael Hanselmann
      # an exclusive LU.
367 900df6cd Michael Hanselmann
      self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL,
368 900df6cd Michael Hanselmann
                          not lu_class.REQ_BGL, calc_timeout(),
369 900df6cd Michael Hanselmann
                          priority)
370 831bbbc1 Michael Hanselmann
      try:
371 831bbbc1 Michael Hanselmann
        lu = lu_class(self, op, self.context, self.rpc)
372 831bbbc1 Michael Hanselmann
        lu.ExpandNames()
373 831bbbc1 Michael Hanselmann
        assert lu.needed_locks is not None, "needed_locks not set by LU"
374 407339d0 Michael Hanselmann
375 407339d0 Michael Hanselmann
        try:
376 1ce03fb1 Michael Hanselmann
          result = self._LockAndExecLU(lu, locking.LEVEL_INSTANCE, calc_timeout,
377 1ce03fb1 Michael Hanselmann
                                       priority)
378 831bbbc1 Michael Hanselmann
        finally:
379 831bbbc1 Michael Hanselmann
          if self._ec_id:
380 831bbbc1 Michael Hanselmann
            self.context.cfg.DropECReservations(self._ec_id)
381 831bbbc1 Michael Hanselmann
      finally:
382 831bbbc1 Michael Hanselmann
        self.context.glm.release(locking.LEVEL_CLUSTER)
383 04864530 Guido Trotter
    finally:
384 031a3e57 Michael Hanselmann
      self._cbs = None
385 6a4aa7c1 Iustin Pop
386 1ce03fb1 Michael Hanselmann
    resultcheck_fn = op.OP_RESULT
387 1ce03fb1 Michael Hanselmann
    if not (resultcheck_fn is None or resultcheck_fn(result)):
388 1ce03fb1 Michael Hanselmann
      logging.error("Expected opcode result matching %s, got %s",
389 1ce03fb1 Michael Hanselmann
                    resultcheck_fn, result)
390 1ce03fb1 Michael Hanselmann
      raise errors.OpResultError("Opcode result does not match %s" %
391 1ce03fb1 Michael Hanselmann
                                 resultcheck_fn)
392 1ce03fb1 Michael Hanselmann
393 1ce03fb1 Michael Hanselmann
    return result
394 1ce03fb1 Michael Hanselmann
395 7b4c1cb9 Michael Hanselmann
  def Log(self, *args):
396 031a3e57 Michael Hanselmann
    """Forward call to feedback callback function.
397 031a3e57 Michael Hanselmann

398 031a3e57 Michael Hanselmann
    """
399 031a3e57 Michael Hanselmann
    if self._cbs:
400 031a3e57 Michael Hanselmann
      self._cbs.Feedback(*args)
401 031a3e57 Michael Hanselmann
402 0fbbf897 Iustin Pop
  def LogStep(self, current, total, message):
403 0fbbf897 Iustin Pop
    """Log a change in LU execution progress.
404 0fbbf897 Iustin Pop

405 0fbbf897 Iustin Pop
    """
406 a5eb7789 Iustin Pop
    logging.debug("Step %d/%d %s", current, total, message)
407 7b4c1cb9 Michael Hanselmann
    self.Log("STEP %d/%d %s" % (current, total, message))
408 0fbbf897 Iustin Pop
409 c0088fb9 Iustin Pop
  def LogWarning(self, message, *args, **kwargs):
410 0fbbf897 Iustin Pop
    """Log a warning to the logs and the user.
411 0fbbf897 Iustin Pop

412 c0088fb9 Iustin Pop
    The optional keyword argument is 'hint' and can be used to show a
413 c0088fb9 Iustin Pop
    hint to the user (presumably related to the warning). If the
414 c0088fb9 Iustin Pop
    message is empty, it will not be printed at all, allowing one to
415 c0088fb9 Iustin Pop
    show only a hint.
416 0fbbf897 Iustin Pop

417 c0088fb9 Iustin Pop
    """
418 c0088fb9 Iustin Pop
    assert not kwargs or (len(kwargs) == 1 and "hint" in kwargs), \
419 c0088fb9 Iustin Pop
           "Invalid keyword arguments for LogWarning (%s)" % str(kwargs)
420 c0088fb9 Iustin Pop
    if args:
421 c0088fb9 Iustin Pop
      message = message % tuple(args)
422 c0088fb9 Iustin Pop
    if message:
423 c0088fb9 Iustin Pop
      logging.warning(message)
424 7b4c1cb9 Michael Hanselmann
      self.Log(" - WARNING: %s" % message)
425 c0088fb9 Iustin Pop
    if "hint" in kwargs:
426 7b4c1cb9 Michael Hanselmann
      self.Log("      Hint: %s" % kwargs["hint"])
427 c0088fb9 Iustin Pop
428 c0088fb9 Iustin Pop
  def LogInfo(self, message, *args):
429 0fbbf897 Iustin Pop
    """Log an informational message to the logs and the user.
430 0fbbf897 Iustin Pop

431 0fbbf897 Iustin Pop
    """
432 c0088fb9 Iustin Pop
    if args:
433 c0088fb9 Iustin Pop
      message = message % tuple(args)
434 a5eb7789 Iustin Pop
    logging.info(message)
435 7b4c1cb9 Michael Hanselmann
    self.Log(" - INFO: %s" % message)
436 0fbbf897 Iustin Pop
437 adfa97e3 Guido Trotter
  def GetECId(self):
438 3ae70d76 Michael Hanselmann
    """Returns the current execution context ID.
439 3ae70d76 Michael Hanselmann

440 3ae70d76 Michael Hanselmann
    """
441 adfa97e3 Guido Trotter
    if not self._ec_id:
442 3ae70d76 Michael Hanselmann
      raise errors.ProgrammerError("Tried to use execution context id when"
443 3ae70d76 Michael Hanselmann
                                   " not set")
444 adfa97e3 Guido Trotter
    return self._ec_id
445 adfa97e3 Guido Trotter
446 a8083063 Iustin Pop
447 a8083063 Iustin Pop
class HooksMaster(object):
448 a8083063 Iustin Pop
  """Hooks master.
449 a8083063 Iustin Pop

450 a8083063 Iustin Pop
  This class distributes the run commands to the nodes based on the
451 a8083063 Iustin Pop
  specific LU class.
452 a8083063 Iustin Pop

453 a8083063 Iustin Pop
  In order to remove the direct dependency on the rpc module, the
454 a8083063 Iustin Pop
  constructor needs a function which actually does the remote
455 a8083063 Iustin Pop
  call. This will usually be rpc.call_hooks_runner, but any function
456 a8083063 Iustin Pop
  which behaves the same works.
457 a8083063 Iustin Pop

458 a8083063 Iustin Pop
  """
459 4b5e8271 Iustin Pop
  def __init__(self, callfn, lu):
460 a8083063 Iustin Pop
    self.callfn = callfn
461 a8083063 Iustin Pop
    self.lu = lu
462 a8083063 Iustin Pop
    self.op = lu.op
463 07e0896f Michael Hanselmann
    self.pre_env = self._BuildEnv(constants.HOOKS_PHASE_PRE)
464 07e0896f Michael Hanselmann
465 07e0896f Michael Hanselmann
    if self.lu.HPATH is None:
466 07e0896f Michael Hanselmann
      nodes = (None, None)
467 07e0896f Michael Hanselmann
    else:
468 07e0896f Michael Hanselmann
      nodes = map(frozenset, self.lu.BuildHooksNodes())
469 07e0896f Michael Hanselmann
470 07e0896f Michael Hanselmann
    (self.pre_nodes, self.post_nodes) = nodes
471 a8083063 Iustin Pop
472 dd7f6776 Michael Hanselmann
  def _BuildEnv(self, phase):
473 a8083063 Iustin Pop
    """Compute the environment and the target nodes.
474 a8083063 Iustin Pop

475 a8083063 Iustin Pop
    Based on the opcode and the current node list, this builds the
476 a8083063 Iustin Pop
    environment for the hooks and the target node list for the run.
477 a8083063 Iustin Pop

478 a8083063 Iustin Pop
    """
479 dd7f6776 Michael Hanselmann
    if phase == constants.HOOKS_PHASE_PRE:
480 dd7f6776 Michael Hanselmann
      prefix = "GANETI_"
481 dd7f6776 Michael Hanselmann
    elif phase == constants.HOOKS_PHASE_POST:
482 dd7f6776 Michael Hanselmann
      prefix = "GANETI_POST_"
483 dd7f6776 Michael Hanselmann
    else:
484 dd7f6776 Michael Hanselmann
      raise AssertionError("Unknown phase '%s'" % phase)
485 dd7f6776 Michael Hanselmann
486 dd7f6776 Michael Hanselmann
    env = {}
487 a8083063 Iustin Pop
488 9a395a76 Iustin Pop
    if self.lu.HPATH is not None:
489 07e0896f Michael Hanselmann
      lu_env = self.lu.BuildHooksEnv()
490 9a395a76 Iustin Pop
      if lu_env:
491 07e0896f Michael Hanselmann
        assert not compat.any(key.upper().startswith(prefix) for key in lu_env)
492 dd7f6776 Michael Hanselmann
        env.update(("%s%s" % (prefix, key), value)
493 dd7f6776 Michael Hanselmann
                   for (key, value) in lu_env.items())
494 a8083063 Iustin Pop
495 dd7f6776 Michael Hanselmann
    if phase == constants.HOOKS_PHASE_PRE:
496 dd7f6776 Michael Hanselmann
      assert compat.all((key.startswith("GANETI_") and
497 dd7f6776 Michael Hanselmann
                         not key.startswith("GANETI_POST_"))
498 dd7f6776 Michael Hanselmann
                        for key in env)
499 dd7f6776 Michael Hanselmann
500 dd7f6776 Michael Hanselmann
    elif phase == constants.HOOKS_PHASE_POST:
501 dd7f6776 Michael Hanselmann
      assert compat.all(key.startswith("GANETI_POST_") for key in env)
502 07e0896f Michael Hanselmann
      assert isinstance(self.pre_env, dict)
503 dd7f6776 Michael Hanselmann
504 07e0896f Michael Hanselmann
      # Merge with pre-phase environment
505 07e0896f Michael Hanselmann
      assert not compat.any(key.startswith("GANETI_POST_")
506 07e0896f Michael Hanselmann
                            for key in self.pre_env)
507 07e0896f Michael Hanselmann
      env.update(self.pre_env)
508 dd7f6776 Michael Hanselmann
    else:
509 dd7f6776 Michael Hanselmann
      raise AssertionError("Unknown phase '%s'" % phase)
510 dd7f6776 Michael Hanselmann
511 07e0896f Michael Hanselmann
    return env
512 4167825b Iustin Pop
513 dd7f6776 Michael Hanselmann
  def _RunWrapper(self, node_list, hpath, phase, phase_env):
514 4167825b Iustin Pop
    """Simple wrapper over self.callfn.
515 4167825b Iustin Pop

516 4167825b Iustin Pop
    This method fixes the environment before doing the rpc call.
517 4167825b Iustin Pop

518 4167825b Iustin Pop
    """
519 dd7f6776 Michael Hanselmann
    cfg = self.lu.cfg
520 dd7f6776 Michael Hanselmann
521 dd7f6776 Michael Hanselmann
    env = {
522 dd7f6776 Michael Hanselmann
      "PATH": "/sbin:/bin:/usr/sbin:/usr/bin",
523 dd7f6776 Michael Hanselmann
      "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION,
524 dd7f6776 Michael Hanselmann
      "GANETI_OP_CODE": self.op.OP_ID,
525 dd7f6776 Michael Hanselmann
      "GANETI_DATA_DIR": constants.DATA_DIR,
526 dd7f6776 Michael Hanselmann
      "GANETI_HOOKS_PHASE": phase,
527 dd7f6776 Michael Hanselmann
      "GANETI_HOOKS_PATH": hpath,
528 dd7f6776 Michael Hanselmann
      }
529 dd7f6776 Michael Hanselmann
530 07e0896f Michael Hanselmann
    if self.lu.HTYPE:
531 07e0896f Michael Hanselmann
      env["GANETI_OBJECT_TYPE"] = self.lu.HTYPE
532 07e0896f Michael Hanselmann
533 dd7f6776 Michael Hanselmann
    if cfg is not None:
534 dd7f6776 Michael Hanselmann
      env["GANETI_CLUSTER"] = cfg.GetClusterName()
535 dd7f6776 Michael Hanselmann
      env["GANETI_MASTER"] = cfg.GetMasterNode()
536 dd7f6776 Michael Hanselmann
537 dd7f6776 Michael Hanselmann
    if phase_env:
538 dd7f6776 Michael Hanselmann
      assert not (set(env) & set(phase_env)), "Environment variables conflict"
539 dd7f6776 Michael Hanselmann
      env.update(phase_env)
540 a8083063 Iustin Pop
541 dd7f6776 Michael Hanselmann
    # Convert everything to strings
542 4167825b Iustin Pop
    env = dict([(str(key), str(val)) for key, val in env.iteritems()])
543 a8083063 Iustin Pop
544 dd7f6776 Michael Hanselmann
    assert compat.all(key == "PATH" or key.startswith("GANETI_")
545 ebc75510 Michael Hanselmann
                      for key in env)
546 ebc75510 Michael Hanselmann
547 4167825b Iustin Pop
    return self.callfn(node_list, hpath, phase, env)
548 a8083063 Iustin Pop
549 17e82923 Luca Bigliardi
  def RunPhase(self, phase, nodes=None):
550 a8083063 Iustin Pop
    """Run all the scripts for a phase.
551 a8083063 Iustin Pop

552 a8083063 Iustin Pop
    This is the main function of the HookMaster.
553 a8083063 Iustin Pop

554 8dca23a3 Iustin Pop
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
555 8dca23a3 Iustin Pop
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
556 17e82923 Luca Bigliardi
    @param nodes: overrides the predefined list of nodes for the given phase
557 8dca23a3 Iustin Pop
    @return: the processed results of the hooks multi-node rpc call
558 8dca23a3 Iustin Pop
    @raise errors.HooksFailure: on communication failure to the nodes
559 6ef2dc74 Luca Bigliardi
    @raise errors.HooksAbort: on failure of one of the hooks
560 b07a6922 Guido Trotter

561 a8083063 Iustin Pop
    """
562 07e0896f Michael Hanselmann
    if phase == constants.HOOKS_PHASE_PRE:
563 07e0896f Michael Hanselmann
      if nodes is None:
564 07e0896f Michael Hanselmann
        nodes = self.pre_nodes
565 07e0896f Michael Hanselmann
      env = self.pre_env
566 07e0896f Michael Hanselmann
    elif phase == constants.HOOKS_PHASE_POST:
567 07e0896f Michael Hanselmann
      if nodes is None:
568 b423c513 Michael Hanselmann
        nodes = self.post_nodes
569 07e0896f Michael Hanselmann
      env = self._BuildEnv(phase)
570 07e0896f Michael Hanselmann
    else:
571 07e0896f Michael Hanselmann
      raise AssertionError("Unknown phase '%s'" % phase)
572 0306ff62 Michael Hanselmann
573 0306ff62 Michael Hanselmann
    if not nodes:
574 9a395a76 Iustin Pop
      # empty node list, we should not attempt to run this as either
575 9a395a76 Iustin Pop
      # we're in the cluster init phase and the rpc client part can't
576 9a395a76 Iustin Pop
      # even attempt to run, or this LU doesn't do hooks at all
577 a8083063 Iustin Pop
      return
578 0306ff62 Michael Hanselmann
579 dd7f6776 Michael Hanselmann
    results = self._RunWrapper(nodes, self.lu.HPATH, phase, env)
580 8c4b9364 Luca Bigliardi
    if not results:
581 8c4b9364 Luca Bigliardi
      msg = "Communication Failure"
582 8c4b9364 Luca Bigliardi
      if phase == constants.HOOKS_PHASE_PRE:
583 8c4b9364 Luca Bigliardi
        raise errors.HooksFailure(msg)
584 8c4b9364 Luca Bigliardi
      else:
585 8c4b9364 Luca Bigliardi
        self.lu.LogWarning(msg)
586 640b961e Luca Bigliardi
        return results
587 0306ff62 Michael Hanselmann
588 0306ff62 Michael Hanselmann
    errs = []
589 8c4b9364 Luca Bigliardi
    for node_name in results:
590 8c4b9364 Luca Bigliardi
      res = results[node_name]
591 8c4b9364 Luca Bigliardi
      if res.offline:
592 8c4b9364 Luca Bigliardi
        continue
593 0306ff62 Michael Hanselmann
594 3cebe102 Michael Hanselmann
      msg = res.fail_msg
595 8c4b9364 Luca Bigliardi
      if msg:
596 8c4b9364 Luca Bigliardi
        self.lu.LogWarning("Communication failure to node %s: %s",
597 8c4b9364 Luca Bigliardi
                           node_name, msg)
598 8c4b9364 Luca Bigliardi
        continue
599 0306ff62 Michael Hanselmann
600 8c4b9364 Luca Bigliardi
      for script, hkr, output in res.payload:
601 8c4b9364 Luca Bigliardi
        if hkr == constants.HKR_FAIL:
602 8c4b9364 Luca Bigliardi
          if phase == constants.HOOKS_PHASE_PRE:
603 a8083063 Iustin Pop
            errs.append((node_name, script, output))
604 8c4b9364 Luca Bigliardi
          else:
605 8c4b9364 Luca Bigliardi
            if not output:
606 640b961e Luca Bigliardi
              output = "(no output)"
607 8c4b9364 Luca Bigliardi
            self.lu.LogWarning("On %s script %s failed, output: %s" %
608 8c4b9364 Luca Bigliardi
                               (node_name, script, output))
609 0306ff62 Michael Hanselmann
610 8c4b9364 Luca Bigliardi
    if errs and phase == constants.HOOKS_PHASE_PRE:
611 8c4b9364 Luca Bigliardi
      raise errors.HooksAbort(errs)
612 0306ff62 Michael Hanselmann
613 b07a6922 Guido Trotter
    return results
614 6a4aa7c1 Iustin Pop
615 6a4aa7c1 Iustin Pop
  def RunConfigUpdate(self):
616 6a4aa7c1 Iustin Pop
    """Run the special configuration update hook
617 6a4aa7c1 Iustin Pop

618 6a4aa7c1 Iustin Pop
    This is a special hook that runs only on the master after each
619 6a4aa7c1 Iustin Pop
    top-level LI if the configuration has been updated.
620 6a4aa7c1 Iustin Pop

621 6a4aa7c1 Iustin Pop
    """
622 6a4aa7c1 Iustin Pop
    phase = constants.HOOKS_PHASE_POST
623 6a4aa7c1 Iustin Pop
    hpath = constants.HOOKS_NAME_CFGUPDATE
624 437138c9 Michael Hanselmann
    nodes = [self.lu.cfg.GetMasterNode()]
625 dd7f6776 Michael Hanselmann
    self._RunWrapper(nodes, hpath, phase, self.pre_env)