Statistics
| Branch: | Tag: | Revision:

root / lib / mcpu.py @ 936f3c59

History | View | Annotate | Download (19.6 kB)

1 2f31098c Iustin Pop
#
2 a8083063 Iustin Pop
#
3 a8083063 Iustin Pop
4 a8083063 Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 a8083063 Iustin Pop
#
6 a8083063 Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 a8083063 Iustin Pop
# it under the terms of the GNU General Public License as published by
8 a8083063 Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 a8083063 Iustin Pop
# (at your option) any later version.
10 a8083063 Iustin Pop
#
11 a8083063 Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 a8083063 Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a8083063 Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a8083063 Iustin Pop
# General Public License for more details.
15 a8083063 Iustin Pop
#
16 a8083063 Iustin Pop
# You should have received a copy of the GNU General Public License
17 a8083063 Iustin Pop
# along with this program; if not, write to the Free Software
18 a8083063 Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a8083063 Iustin Pop
# 02110-1301, USA.
20 a8083063 Iustin Pop
21 a8083063 Iustin Pop
22 a8083063 Iustin Pop
"""Module implementing the logic behind the cluster operations
23 a8083063 Iustin Pop

24 a8083063 Iustin Pop
This module implements the logic for doing operations in the cluster. There
25 a8083063 Iustin Pop
are two kinds of classes defined:
26 a8083063 Iustin Pop
  - logical units, which know how to deal with their specific opcode only
27 a8083063 Iustin Pop
  - the processor, which dispatches the opcodes to their logical units
28 a8083063 Iustin Pop

29 a8083063 Iustin Pop
"""
30 a8083063 Iustin Pop
31 a5eb7789 Iustin Pop
import logging
32 407339d0 Michael Hanselmann
import random
33 407339d0 Michael Hanselmann
import time
34 a8083063 Iustin Pop
35 a8083063 Iustin Pop
from ganeti import opcodes
36 a8083063 Iustin Pop
from ganeti import constants
37 a8083063 Iustin Pop
from ganeti import errors
38 a8083063 Iustin Pop
from ganeti import rpc
39 a8083063 Iustin Pop
from ganeti import cmdlib
40 04864530 Guido Trotter
from ganeti import locking
41 a8083063 Iustin Pop
42 7c0d6283 Michael Hanselmann
43 407339d0 Michael Hanselmann
class _LockAcquireTimeout(Exception):
44 407339d0 Michael Hanselmann
  """Internal exception to report timeouts on acquiring locks.
45 407339d0 Michael Hanselmann

46 407339d0 Michael Hanselmann
  """
47 407339d0 Michael Hanselmann
48 407339d0 Michael Hanselmann
49 e3200b18 Michael Hanselmann
def _CalculateLockAttemptTimeouts():
50 e3200b18 Michael Hanselmann
  """Calculate timeouts for lock attempts.
51 e3200b18 Michael Hanselmann

52 e3200b18 Michael Hanselmann
  """
53 e3200b18 Michael Hanselmann
  running_sum = 0
54 e3200b18 Michael Hanselmann
  result = [1.0]
55 e3200b18 Michael Hanselmann
56 e3200b18 Michael Hanselmann
  # Wait for a total of at least 150s before doing a blocking acquire
57 e3200b18 Michael Hanselmann
  while sum(result) < 150.0:
58 e3200b18 Michael Hanselmann
    timeout = (result[-1] * 1.05) ** 1.25
59 e3200b18 Michael Hanselmann
60 e3200b18 Michael Hanselmann
    # Cap timeout at 10 seconds. This gives other jobs a chance to run
61 e3200b18 Michael Hanselmann
    # even if we're still trying to get our locks, before finally moving
62 e3200b18 Michael Hanselmann
    # to a blocking acquire.
63 e3200b18 Michael Hanselmann
    if timeout > 10.0:
64 e3200b18 Michael Hanselmann
      timeout = 10.0
65 e3200b18 Michael Hanselmann
66 e3200b18 Michael Hanselmann
    elif timeout < 0.1:
67 e3200b18 Michael Hanselmann
      # Lower boundary for safety
68 e3200b18 Michael Hanselmann
      timeout = 0.1
69 e3200b18 Michael Hanselmann
70 e3200b18 Michael Hanselmann
    result.append(timeout)
71 e3200b18 Michael Hanselmann
72 e3200b18 Michael Hanselmann
  return result
73 e3200b18 Michael Hanselmann
74 e3200b18 Michael Hanselmann
75 e3200b18 Michael Hanselmann
class _LockAttemptTimeoutStrategy(object):
76 407339d0 Michael Hanselmann
  """Class with lock acquire timeout strategy.
77 407339d0 Michael Hanselmann

78 407339d0 Michael Hanselmann
  """
79 407339d0 Michael Hanselmann
  __slots__ = [
80 e3200b18 Michael Hanselmann
    "_attempt",
81 407339d0 Michael Hanselmann
    "_random_fn",
82 407339d0 Michael Hanselmann
    "_start_time",
83 e3200b18 Michael Hanselmann
    "_time_fn",
84 a6db1af2 Michael Hanselmann
    "_running_timeout",
85 407339d0 Michael Hanselmann
    ]
86 407339d0 Michael Hanselmann
87 e3200b18 Michael Hanselmann
  _TIMEOUT_PER_ATTEMPT = _CalculateLockAttemptTimeouts()
88 407339d0 Michael Hanselmann
89 e3200b18 Michael Hanselmann
  def __init__(self, attempt=0, _time_fn=time.time, _random_fn=random.random):
90 407339d0 Michael Hanselmann
    """Initializes this class.
91 407339d0 Michael Hanselmann

92 e3200b18 Michael Hanselmann
    @type attempt: int
93 e3200b18 Michael Hanselmann
    @param attempt: Current attempt number
94 e3200b18 Michael Hanselmann
    @param _time_fn: Time function for unittests
95 407339d0 Michael Hanselmann
    @param _random_fn: Random number generator for unittests
96 407339d0 Michael Hanselmann

97 407339d0 Michael Hanselmann
    """
98 407339d0 Michael Hanselmann
    object.__init__(self)
99 407339d0 Michael Hanselmann
100 e3200b18 Michael Hanselmann
    if attempt < 0:
101 e3200b18 Michael Hanselmann
      raise ValueError("Attempt must be zero or positive")
102 407339d0 Michael Hanselmann
103 e3200b18 Michael Hanselmann
    self._attempt = attempt
104 e3200b18 Michael Hanselmann
    self._time_fn = _time_fn
105 e3200b18 Michael Hanselmann
    self._random_fn = _random_fn
106 e3200b18 Michael Hanselmann
107 a6db1af2 Michael Hanselmann
    try:
108 a6db1af2 Michael Hanselmann
      timeout = self._TIMEOUT_PER_ATTEMPT[attempt]
109 a6db1af2 Michael Hanselmann
    except IndexError:
110 a6db1af2 Michael Hanselmann
      # No more timeouts, do blocking acquire
111 a6db1af2 Michael Hanselmann
      timeout = None
112 a6db1af2 Michael Hanselmann
113 a6db1af2 Michael Hanselmann
    self._running_timeout = locking.RunningTimeout(timeout, False,
114 a6db1af2 Michael Hanselmann
                                                   _time_fn=_time_fn)
115 407339d0 Michael Hanselmann
116 407339d0 Michael Hanselmann
  def NextAttempt(self):
117 e3200b18 Michael Hanselmann
    """Returns the strategy for the next attempt.
118 407339d0 Michael Hanselmann

119 407339d0 Michael Hanselmann
    """
120 e3200b18 Michael Hanselmann
    return _LockAttemptTimeoutStrategy(attempt=self._attempt + 1,
121 e3200b18 Michael Hanselmann
                                       _time_fn=self._time_fn,
122 e3200b18 Michael Hanselmann
                                       _random_fn=self._random_fn)
123 407339d0 Michael Hanselmann
124 407339d0 Michael Hanselmann
  def CalcRemainingTimeout(self):
125 407339d0 Michael Hanselmann
    """Returns the remaining timeout.
126 407339d0 Michael Hanselmann

127 407339d0 Michael Hanselmann
    """
128 a6db1af2 Michael Hanselmann
    timeout = self._running_timeout.Remaining()
129 407339d0 Michael Hanselmann
130 a6db1af2 Michael Hanselmann
    if timeout is not None:
131 a6db1af2 Michael Hanselmann
      # Add a small variation (-/+ 5%) to timeout. This helps in situations
132 a6db1af2 Michael Hanselmann
      # where two or more jobs are fighting for the same lock(s).
133 a6db1af2 Michael Hanselmann
      variation_range = timeout * 0.1
134 a6db1af2 Michael Hanselmann
      timeout += ((self._random_fn() * variation_range) -
135 a6db1af2 Michael Hanselmann
                  (variation_range * 0.5))
136 407339d0 Michael Hanselmann
137 a6db1af2 Michael Hanselmann
    return timeout
138 407339d0 Michael Hanselmann
139 407339d0 Michael Hanselmann
140 031a3e57 Michael Hanselmann
class OpExecCbBase:
141 031a3e57 Michael Hanselmann
  """Base class for OpCode execution callbacks.
142 031a3e57 Michael Hanselmann

143 031a3e57 Michael Hanselmann
  """
144 031a3e57 Michael Hanselmann
  def NotifyStart(self):
145 031a3e57 Michael Hanselmann
    """Called when we are about to execute the LU.
146 031a3e57 Michael Hanselmann

147 031a3e57 Michael Hanselmann
    This function is called when we're about to start the lu's Exec() method,
148 031a3e57 Michael Hanselmann
    that is, after we have acquired all locks.
149 031a3e57 Michael Hanselmann

150 031a3e57 Michael Hanselmann
    """
151 031a3e57 Michael Hanselmann
152 031a3e57 Michael Hanselmann
  def Feedback(self, *args):
153 031a3e57 Michael Hanselmann
    """Sends feedback from the LU code to the end-user.
154 031a3e57 Michael Hanselmann

155 031a3e57 Michael Hanselmann
    """
156 031a3e57 Michael Hanselmann
157 ef2df7d3 Michael Hanselmann
  def ReportLocks(self, msg):
158 ef2df7d3 Michael Hanselmann
    """Report lock operations.
159 ef2df7d3 Michael Hanselmann

160 ef2df7d3 Michael Hanselmann
    """
161 ef2df7d3 Michael Hanselmann
162 031a3e57 Michael Hanselmann
163 a8083063 Iustin Pop
class Processor(object):
164 a8083063 Iustin Pop
  """Object which runs OpCodes"""
165 a8083063 Iustin Pop
  DISPATCH_TABLE = {
166 a8083063 Iustin Pop
    # Cluster
167 b5f5fae9 Luca Bigliardi
    opcodes.OpPostInitCluster: cmdlib.LUPostInitCluster,
168 a8083063 Iustin Pop
    opcodes.OpDestroyCluster: cmdlib.LUDestroyCluster,
169 a8083063 Iustin Pop
    opcodes.OpQueryClusterInfo: cmdlib.LUQueryClusterInfo,
170 a8083063 Iustin Pop
    opcodes.OpVerifyCluster: cmdlib.LUVerifyCluster,
171 ae5849b5 Michael Hanselmann
    opcodes.OpQueryConfigValues: cmdlib.LUQueryConfigValues,
172 07bd8a51 Iustin Pop
    opcodes.OpRenameCluster: cmdlib.LURenameCluster,
173 f4d4e184 Iustin Pop
    opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
174 0cc05d44 Manuel Franceschini
    opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
175 afee0879 Iustin Pop
    opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig,
176 60975797 Iustin Pop
    opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes,
177 a8083063 Iustin Pop
    # node lu
178 a8083063 Iustin Pop
    opcodes.OpAddNode: cmdlib.LUAddNode,
179 a8083063 Iustin Pop
    opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
180 dcb93971 Michael Hanselmann
    opcodes.OpQueryNodeVolumes: cmdlib.LUQueryNodeVolumes,
181 9e5442ce Michael Hanselmann
    opcodes.OpQueryNodeStorage: cmdlib.LUQueryNodeStorage,
182 efb8da02 Michael Hanselmann
    opcodes.OpModifyNodeStorage: cmdlib.LUModifyNodeStorage,
183 76aef8fc Michael Hanselmann
    opcodes.OpRepairNodeStorage: cmdlib.LURepairNodeStorage,
184 a8083063 Iustin Pop
    opcodes.OpRemoveNode: cmdlib.LURemoveNode,
185 b31c8676 Iustin Pop
    opcodes.OpSetNodeParams: cmdlib.LUSetNodeParams,
186 f5118ade Iustin Pop
    opcodes.OpPowercycleNode: cmdlib.LUPowercycleNode,
187 7ffc5a86 Michael Hanselmann
    opcodes.OpEvacuateNode: cmdlib.LUEvacuateNode,
188 80cb875c Michael Hanselmann
    opcodes.OpMigrateNode: cmdlib.LUMigrateNode,
189 a8083063 Iustin Pop
    # instance lu
190 a8083063 Iustin Pop
    opcodes.OpCreateInstance: cmdlib.LUCreateInstance,
191 fe7b0351 Michael Hanselmann
    opcodes.OpReinstallInstance: cmdlib.LUReinstallInstance,
192 a8083063 Iustin Pop
    opcodes.OpRemoveInstance: cmdlib.LURemoveInstance,
193 decd5f45 Iustin Pop
    opcodes.OpRenameInstance: cmdlib.LURenameInstance,
194 a8083063 Iustin Pop
    opcodes.OpActivateInstanceDisks: cmdlib.LUActivateInstanceDisks,
195 a8083063 Iustin Pop
    opcodes.OpShutdownInstance: cmdlib.LUShutdownInstance,
196 a8083063 Iustin Pop
    opcodes.OpStartupInstance: cmdlib.LUStartupInstance,
197 bf6929a2 Alexander Schreiber
    opcodes.OpRebootInstance: cmdlib.LURebootInstance,
198 a8083063 Iustin Pop
    opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks,
199 a8083063 Iustin Pop
    opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks,
200 bd315bfa Iustin Pop
    opcodes.OpRecreateInstanceDisks: cmdlib.LURecreateInstanceDisks,
201 a8083063 Iustin Pop
    opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance,
202 53c776b5 Iustin Pop
    opcodes.OpMigrateInstance: cmdlib.LUMigrateInstance,
203 313bcead Iustin Pop
    opcodes.OpMoveInstance: cmdlib.LUMoveInstance,
204 a8083063 Iustin Pop
    opcodes.OpConnectConsole: cmdlib.LUConnectConsole,
205 a8083063 Iustin Pop
    opcodes.OpQueryInstances: cmdlib.LUQueryInstances,
206 a8083063 Iustin Pop
    opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData,
207 7767bbf5 Manuel Franceschini
    opcodes.OpSetInstanceParams: cmdlib.LUSetInstanceParams,
208 8729e0d7 Iustin Pop
    opcodes.OpGrowDisk: cmdlib.LUGrowDisk,
209 a8083063 Iustin Pop
    # os lu
210 a8083063 Iustin Pop
    opcodes.OpDiagnoseOS: cmdlib.LUDiagnoseOS,
211 a8083063 Iustin Pop
    # exports lu
212 a8083063 Iustin Pop
    opcodes.OpQueryExports: cmdlib.LUQueryExports,
213 a8083063 Iustin Pop
    opcodes.OpExportInstance: cmdlib.LUExportInstance,
214 9ac99fda Guido Trotter
    opcodes.OpRemoveExport: cmdlib.LURemoveExport,
215 5c947f38 Iustin Pop
    # tags lu
216 5c947f38 Iustin Pop
    opcodes.OpGetTags: cmdlib.LUGetTags,
217 73415719 Iustin Pop
    opcodes.OpSearchTags: cmdlib.LUSearchTags,
218 f27302fa Iustin Pop
    opcodes.OpAddTags: cmdlib.LUAddTags,
219 f27302fa Iustin Pop
    opcodes.OpDelTags: cmdlib.LUDelTags,
220 06009e27 Iustin Pop
    # test lu
221 06009e27 Iustin Pop
    opcodes.OpTestDelay: cmdlib.LUTestDelay,
222 d61df03e Iustin Pop
    opcodes.OpTestAllocator: cmdlib.LUTestAllocator,
223 a8083063 Iustin Pop
    }
224 a8083063 Iustin Pop
225 f1048938 Iustin Pop
  def __init__(self, context):
226 a8083063 Iustin Pop
    """Constructor for Processor
227 a8083063 Iustin Pop

228 a8083063 Iustin Pop
    """
229 1c901d13 Guido Trotter
    self.context = context
230 031a3e57 Michael Hanselmann
    self._cbs = None
231 72737a7f Iustin Pop
    self.rpc = rpc.RpcRunner(context.cfg)
232 cd46f3b4 Luca Bigliardi
    self.hmclass = HooksMaster
233 a8083063 Iustin Pop
234 211b6132 Michael Hanselmann
  def _ReportLocks(self, level, names, shared, timeout, acquired, result):
235 ef2df7d3 Michael Hanselmann
    """Reports lock operations.
236 ef2df7d3 Michael Hanselmann

237 ef2df7d3 Michael Hanselmann
    @type level: int
238 ef2df7d3 Michael Hanselmann
    @param level: Lock level
239 ef2df7d3 Michael Hanselmann
    @type names: list or string
240 ef2df7d3 Michael Hanselmann
    @param names: Lock names
241 ef2df7d3 Michael Hanselmann
    @type shared: bool
242 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
243 211b6132 Michael Hanselmann
    @type timeout: None or float
244 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
245 ef2df7d3 Michael Hanselmann
    @type acquired: bool
246 211b6132 Michael Hanselmann
    @param acquired: Whether the locks have already been acquired
247 211b6132 Michael Hanselmann
    @type result: None or set
248 211b6132 Michael Hanselmann
    @param result: Result from L{locking.GanetiLockManager.acquire}
249 ef2df7d3 Michael Hanselmann

250 ef2df7d3 Michael Hanselmann
    """
251 ef2df7d3 Michael Hanselmann
    parts = []
252 ef2df7d3 Michael Hanselmann
253 ef2df7d3 Michael Hanselmann
    # Build message
254 ef2df7d3 Michael Hanselmann
    if acquired:
255 211b6132 Michael Hanselmann
      if result is None:
256 211b6132 Michael Hanselmann
        parts.append("timeout")
257 211b6132 Michael Hanselmann
      else:
258 211b6132 Michael Hanselmann
        parts.append("acquired")
259 ef2df7d3 Michael Hanselmann
    else:
260 ef2df7d3 Michael Hanselmann
      parts.append("waiting")
261 211b6132 Michael Hanselmann
      if timeout is None:
262 211b6132 Michael Hanselmann
        parts.append("blocking")
263 211b6132 Michael Hanselmann
      else:
264 211b6132 Michael Hanselmann
        parts.append("timeout=%0.6fs" % timeout)
265 ef2df7d3 Michael Hanselmann
266 ef2df7d3 Michael Hanselmann
    parts.append(locking.LEVEL_NAMES[level])
267 ef2df7d3 Michael Hanselmann
268 ef2df7d3 Michael Hanselmann
    if names == locking.ALL_SET:
269 ef2df7d3 Michael Hanselmann
      parts.append("ALL")
270 ef2df7d3 Michael Hanselmann
    elif isinstance(names, basestring):
271 ef2df7d3 Michael Hanselmann
      parts.append(names)
272 ef2df7d3 Michael Hanselmann
    else:
273 ef2df7d3 Michael Hanselmann
      parts.append(",".join(names))
274 ef2df7d3 Michael Hanselmann
275 ef2df7d3 Michael Hanselmann
    if shared:
276 ef2df7d3 Michael Hanselmann
      parts.append("shared")
277 ef2df7d3 Michael Hanselmann
    else:
278 ef2df7d3 Michael Hanselmann
      parts.append("exclusive")
279 ef2df7d3 Michael Hanselmann
280 ef2df7d3 Michael Hanselmann
    msg = "/".join(parts)
281 ef2df7d3 Michael Hanselmann
282 ef2df7d3 Michael Hanselmann
    logging.debug("LU locks %s", msg)
283 ef2df7d3 Michael Hanselmann
284 ef2df7d3 Michael Hanselmann
    if self._cbs:
285 ef2df7d3 Michael Hanselmann
      self._cbs.ReportLocks(msg)
286 ef2df7d3 Michael Hanselmann
287 211b6132 Michael Hanselmann
  def _AcquireLocks(self, level, names, shared, timeout):
288 211b6132 Michael Hanselmann
    """Acquires locks via the Ganeti lock manager.
289 211b6132 Michael Hanselmann

290 211b6132 Michael Hanselmann
    @type level: int
291 211b6132 Michael Hanselmann
    @param level: Lock level
292 211b6132 Michael Hanselmann
    @type names: list or string
293 211b6132 Michael Hanselmann
    @param names: Lock names
294 211b6132 Michael Hanselmann
    @type shared: bool
295 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
296 211b6132 Michael Hanselmann
    @type timeout: None or float
297 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
298 211b6132 Michael Hanselmann

299 211b6132 Michael Hanselmann
    """
300 211b6132 Michael Hanselmann
    self._ReportLocks(level, names, shared, timeout, False, None)
301 211b6132 Michael Hanselmann
302 211b6132 Michael Hanselmann
    acquired = self.context.glm.acquire(level, names, shared=shared,
303 211b6132 Michael Hanselmann
                                        timeout=timeout)
304 211b6132 Michael Hanselmann
305 211b6132 Michael Hanselmann
    self._ReportLocks(level, names, shared, timeout, True, acquired)
306 211b6132 Michael Hanselmann
307 211b6132 Michael Hanselmann
    return acquired
308 211b6132 Michael Hanselmann
309 36c381d7 Guido Trotter
  def _ExecLU(self, lu):
310 36c381d7 Guido Trotter
    """Logical Unit execution sequence.
311 36c381d7 Guido Trotter

312 36c381d7 Guido Trotter
    """
313 36c381d7 Guido Trotter
    write_count = self.context.cfg.write_count
314 36c381d7 Guido Trotter
    lu.CheckPrereq()
315 4b5e8271 Iustin Pop
    hm = HooksMaster(self.rpc.call_hooks_runner, lu)
316 36c381d7 Guido Trotter
    h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
317 36c381d7 Guido Trotter
    lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results,
318 031a3e57 Michael Hanselmann
                     self._Feedback, None)
319 20777413 Iustin Pop
320 20777413 Iustin Pop
    if getattr(lu.op, "dry_run", False):
321 20777413 Iustin Pop
      # in this mode, no post-hooks are run, and the config is not
322 20777413 Iustin Pop
      # written (as it might have been modified by another LU, and we
323 20777413 Iustin Pop
      # shouldn't do writeout on behalf of other threads
324 20777413 Iustin Pop
      self.LogInfo("dry-run mode requested, not actually executing"
325 20777413 Iustin Pop
                   " the operation")
326 20777413 Iustin Pop
      return lu.dry_run_result
327 20777413 Iustin Pop
328 36c381d7 Guido Trotter
    try:
329 031a3e57 Michael Hanselmann
      result = lu.Exec(self._Feedback)
330 36c381d7 Guido Trotter
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
331 36c381d7 Guido Trotter
      result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results,
332 031a3e57 Michael Hanselmann
                                self._Feedback, result)
333 36c381d7 Guido Trotter
    finally:
334 36c381d7 Guido Trotter
      # FIXME: This needs locks if not lu_class.REQ_BGL
335 36c381d7 Guido Trotter
      if write_count != self.context.cfg.write_count:
336 36c381d7 Guido Trotter
        hm.RunConfigUpdate()
337 36c381d7 Guido Trotter
338 36c381d7 Guido Trotter
    return result
339 36c381d7 Guido Trotter
340 407339d0 Michael Hanselmann
  def _LockAndExecLU(self, lu, level, calc_timeout):
341 68adfdb2 Guido Trotter
    """Execute a Logical Unit, with the needed locks.
342 68adfdb2 Guido Trotter

343 68adfdb2 Guido Trotter
    This is a recursive function that starts locking the given level, and
344 68adfdb2 Guido Trotter
    proceeds up, till there are no more locks to acquire. Then it executes the
345 68adfdb2 Guido Trotter
    given LU and its opcodes.
346 68adfdb2 Guido Trotter

347 68adfdb2 Guido Trotter
    """
348 ca2a79e1 Guido Trotter
    adding_locks = level in lu.add_locks
349 ca2a79e1 Guido Trotter
    acquiring_locks = level in lu.needed_locks
350 8a2941c4 Guido Trotter
    if level not in locking.LEVELS:
351 031a3e57 Michael Hanselmann
      if self._cbs:
352 031a3e57 Michael Hanselmann
        self._cbs.NotifyStart()
353 031a3e57 Michael Hanselmann
354 8a2941c4 Guido Trotter
      result = self._ExecLU(lu)
355 407339d0 Michael Hanselmann
356 ca2a79e1 Guido Trotter
    elif adding_locks and acquiring_locks:
357 ca2a79e1 Guido Trotter
      # We could both acquire and add locks at the same level, but for now we
358 ca2a79e1 Guido Trotter
      # don't need this, so we'll avoid the complicated code needed.
359 407339d0 Michael Hanselmann
      raise NotImplementedError("Can't declare locks to acquire when adding"
360 407339d0 Michael Hanselmann
                                " others")
361 407339d0 Michael Hanselmann
362 ca2a79e1 Guido Trotter
    elif adding_locks or acquiring_locks:
363 fb8dcb62 Guido Trotter
      lu.DeclareLocks(level)
364 3977a4c1 Guido Trotter
      share = lu.share_locks[level]
365 407339d0 Michael Hanselmann
366 68adfdb2 Guido Trotter
      try:
367 407339d0 Michael Hanselmann
        assert adding_locks ^ acquiring_locks, \
368 407339d0 Michael Hanselmann
          "Locks must be either added or acquired"
369 407339d0 Michael Hanselmann
370 407339d0 Michael Hanselmann
        if acquiring_locks:
371 407339d0 Michael Hanselmann
          # Acquiring locks
372 407339d0 Michael Hanselmann
          needed_locks = lu.needed_locks[level]
373 407339d0 Michael Hanselmann
374 211b6132 Michael Hanselmann
          acquired = self._AcquireLocks(level, needed_locks, share,
375 211b6132 Michael Hanselmann
                                        calc_timeout())
376 407339d0 Michael Hanselmann
377 407339d0 Michael Hanselmann
          if acquired is None:
378 407339d0 Michael Hanselmann
            raise _LockAcquireTimeout()
379 407339d0 Michael Hanselmann
380 407339d0 Michael Hanselmann
        else:
381 407339d0 Michael Hanselmann
          # Adding locks
382 407339d0 Michael Hanselmann
          add_locks = lu.add_locks[level]
383 407339d0 Michael Hanselmann
          lu.remove_locks[level] = add_locks
384 407339d0 Michael Hanselmann
385 407339d0 Michael Hanselmann
          try:
386 407339d0 Michael Hanselmann
            self.context.glm.add(level, add_locks, acquired=1, shared=share)
387 407339d0 Michael Hanselmann
          except errors.LockError:
388 407339d0 Michael Hanselmann
            raise errors.OpPrereqError(
389 407339d0 Michael Hanselmann
              "Couldn't add locks (%s), probably because of a race condition"
390 407339d0 Michael Hanselmann
              " with another job, who added them first" % add_locks)
391 407339d0 Michael Hanselmann
392 6f14fc27 Michael Hanselmann
          acquired = add_locks
393 6f14fc27 Michael Hanselmann
394 ca2a79e1 Guido Trotter
        try:
395 6f14fc27 Michael Hanselmann
          lu.acquired_locks[level] = acquired
396 6f14fc27 Michael Hanselmann
397 407339d0 Michael Hanselmann
          result = self._LockAndExecLU(lu, level + 1, calc_timeout)
398 ca2a79e1 Guido Trotter
        finally:
399 ca2a79e1 Guido Trotter
          if level in lu.remove_locks:
400 ca2a79e1 Guido Trotter
            self.context.glm.remove(level, lu.remove_locks[level])
401 68adfdb2 Guido Trotter
      finally:
402 80ee04a4 Guido Trotter
        if self.context.glm.is_owned(level):
403 68adfdb2 Guido Trotter
          self.context.glm.release(level)
404 407339d0 Michael Hanselmann
405 68adfdb2 Guido Trotter
    else:
406 407339d0 Michael Hanselmann
      result = self._LockAndExecLU(lu, level + 1, calc_timeout)
407 68adfdb2 Guido Trotter
408 68adfdb2 Guido Trotter
    return result
409 68adfdb2 Guido Trotter
410 031a3e57 Michael Hanselmann
  def ExecOpCode(self, op, cbs):
411 a8083063 Iustin Pop
    """Execute an opcode.
412 a8083063 Iustin Pop

413 e92376d7 Iustin Pop
    @type op: an OpCode instance
414 e92376d7 Iustin Pop
    @param op: the opcode to be executed
415 031a3e57 Michael Hanselmann
    @type cbs: L{OpExecCbBase}
416 031a3e57 Michael Hanselmann
    @param cbs: Runtime callbacks
417 a8083063 Iustin Pop

418 a8083063 Iustin Pop
    """
419 a8083063 Iustin Pop
    if not isinstance(op, opcodes.OpCode):
420 3ecf6786 Iustin Pop
      raise errors.ProgrammerError("Non-opcode instance passed"
421 3ecf6786 Iustin Pop
                                   " to ExecOpcode")
422 a8083063 Iustin Pop
423 031a3e57 Michael Hanselmann
    self._cbs = cbs
424 fe482621 Iustin Pop
    try:
425 031a3e57 Michael Hanselmann
      lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
426 031a3e57 Michael Hanselmann
      if lu_class is None:
427 031a3e57 Michael Hanselmann
        raise errors.OpCodeUnknown("Unknown opcode")
428 031a3e57 Michael Hanselmann
429 e3200b18 Michael Hanselmann
      timeout_strategy = _LockAttemptTimeoutStrategy()
430 407339d0 Michael Hanselmann
431 407339d0 Michael Hanselmann
      while True:
432 407339d0 Michael Hanselmann
        try:
433 e3200b18 Michael Hanselmann
          acquire_timeout = timeout_strategy.CalcRemainingTimeout()
434 e3200b18 Michael Hanselmann
435 211b6132 Michael Hanselmann
          # Acquire the Big Ganeti Lock exclusively if this LU requires it,
436 211b6132 Michael Hanselmann
          # and in a shared fashion otherwise (to prevent concurrent run with
437 211b6132 Michael Hanselmann
          # an exclusive LU.
438 211b6132 Michael Hanselmann
          if self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL,
439 e3200b18 Michael Hanselmann
                                not lu_class.REQ_BGL, acquire_timeout) is None:
440 407339d0 Michael Hanselmann
            raise _LockAcquireTimeout()
441 407339d0 Michael Hanselmann
442 407339d0 Michael Hanselmann
          try:
443 407339d0 Michael Hanselmann
            lu = lu_class(self, op, self.context, self.rpc)
444 407339d0 Michael Hanselmann
            lu.ExpandNames()
445 407339d0 Michael Hanselmann
            assert lu.needed_locks is not None, "needed_locks not set by LU"
446 407339d0 Michael Hanselmann
447 e3200b18 Michael Hanselmann
            return self._LockAndExecLU(lu, locking.LEVEL_INSTANCE,
448 e3200b18 Michael Hanselmann
                                       timeout_strategy.CalcRemainingTimeout)
449 407339d0 Michael Hanselmann
          finally:
450 407339d0 Michael Hanselmann
            self.context.glm.release(locking.LEVEL_CLUSTER)
451 407339d0 Michael Hanselmann
452 407339d0 Michael Hanselmann
        except _LockAcquireTimeout:
453 407339d0 Michael Hanselmann
          # Timeout while waiting for lock, try again
454 407339d0 Michael Hanselmann
          pass
455 407339d0 Michael Hanselmann
456 e3200b18 Michael Hanselmann
        timeout_strategy = timeout_strategy.NextAttempt()
457 407339d0 Michael Hanselmann
458 04864530 Guido Trotter
    finally:
459 031a3e57 Michael Hanselmann
      self._cbs = None
460 6a4aa7c1 Iustin Pop
461 031a3e57 Michael Hanselmann
  def _Feedback(self, *args):
462 031a3e57 Michael Hanselmann
    """Forward call to feedback callback function.
463 031a3e57 Michael Hanselmann

464 031a3e57 Michael Hanselmann
    """
465 031a3e57 Michael Hanselmann
    if self._cbs:
466 031a3e57 Michael Hanselmann
      self._cbs.Feedback(*args)
467 031a3e57 Michael Hanselmann
468 0fbbf897 Iustin Pop
  def LogStep(self, current, total, message):
469 0fbbf897 Iustin Pop
    """Log a change in LU execution progress.
470 0fbbf897 Iustin Pop

471 0fbbf897 Iustin Pop
    """
472 a5eb7789 Iustin Pop
    logging.debug("Step %d/%d %s", current, total, message)
473 031a3e57 Michael Hanselmann
    self._Feedback("STEP %d/%d %s" % (current, total, message))
474 0fbbf897 Iustin Pop
475 c0088fb9 Iustin Pop
  def LogWarning(self, message, *args, **kwargs):
476 0fbbf897 Iustin Pop
    """Log a warning to the logs and the user.
477 0fbbf897 Iustin Pop

478 c0088fb9 Iustin Pop
    The optional keyword argument is 'hint' and can be used to show a
479 c0088fb9 Iustin Pop
    hint to the user (presumably related to the warning). If the
480 c0088fb9 Iustin Pop
    message is empty, it will not be printed at all, allowing one to
481 c0088fb9 Iustin Pop
    show only a hint.
482 0fbbf897 Iustin Pop

483 c0088fb9 Iustin Pop
    """
484 c0088fb9 Iustin Pop
    assert not kwargs or (len(kwargs) == 1 and "hint" in kwargs), \
485 c0088fb9 Iustin Pop
           "Invalid keyword arguments for LogWarning (%s)" % str(kwargs)
486 c0088fb9 Iustin Pop
    if args:
487 c0088fb9 Iustin Pop
      message = message % tuple(args)
488 c0088fb9 Iustin Pop
    if message:
489 c0088fb9 Iustin Pop
      logging.warning(message)
490 031a3e57 Michael Hanselmann
      self._Feedback(" - WARNING: %s" % message)
491 c0088fb9 Iustin Pop
    if "hint" in kwargs:
492 031a3e57 Michael Hanselmann
      self._Feedback("      Hint: %s" % kwargs["hint"])
493 c0088fb9 Iustin Pop
494 c0088fb9 Iustin Pop
  def LogInfo(self, message, *args):
495 0fbbf897 Iustin Pop
    """Log an informational message to the logs and the user.
496 0fbbf897 Iustin Pop

497 0fbbf897 Iustin Pop
    """
498 c0088fb9 Iustin Pop
    if args:
499 c0088fb9 Iustin Pop
      message = message % tuple(args)
500 a5eb7789 Iustin Pop
    logging.info(message)
501 031a3e57 Michael Hanselmann
    self._Feedback(" - INFO: %s" % message)
502 0fbbf897 Iustin Pop
503 a8083063 Iustin Pop
504 a8083063 Iustin Pop
class HooksMaster(object):
505 a8083063 Iustin Pop
  """Hooks master.
506 a8083063 Iustin Pop

507 a8083063 Iustin Pop
  This class distributes the run commands to the nodes based on the
508 a8083063 Iustin Pop
  specific LU class.
509 a8083063 Iustin Pop

510 a8083063 Iustin Pop
  In order to remove the direct dependency on the rpc module, the
511 a8083063 Iustin Pop
  constructor needs a function which actually does the remote
512 a8083063 Iustin Pop
  call. This will usually be rpc.call_hooks_runner, but any function
513 a8083063 Iustin Pop
  which behaves the same works.
514 a8083063 Iustin Pop

515 a8083063 Iustin Pop
  """
516 4b5e8271 Iustin Pop
  def __init__(self, callfn, lu):
517 a8083063 Iustin Pop
    self.callfn = callfn
518 a8083063 Iustin Pop
    self.lu = lu
519 a8083063 Iustin Pop
    self.op = lu.op
520 a8083063 Iustin Pop
    self.env, node_list_pre, node_list_post = self._BuildEnv()
521 a8083063 Iustin Pop
    self.node_list = {constants.HOOKS_PHASE_PRE: node_list_pre,
522 a8083063 Iustin Pop
                      constants.HOOKS_PHASE_POST: node_list_post}
523 a8083063 Iustin Pop
524 a8083063 Iustin Pop
  def _BuildEnv(self):
525 a8083063 Iustin Pop
    """Compute the environment and the target nodes.
526 a8083063 Iustin Pop

527 a8083063 Iustin Pop
    Based on the opcode and the current node list, this builds the
528 a8083063 Iustin Pop
    environment for the hooks and the target node list for the run.
529 a8083063 Iustin Pop

530 a8083063 Iustin Pop
    """
531 a8083063 Iustin Pop
    env = {
532 a8083063 Iustin Pop
      "PATH": "/sbin:/bin:/usr/sbin:/usr/bin",
533 a8083063 Iustin Pop
      "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION,
534 a8083063 Iustin Pop
      "GANETI_OP_CODE": self.op.OP_ID,
535 a8083063 Iustin Pop
      "GANETI_OBJECT_TYPE": self.lu.HTYPE,
536 6a4aa7c1 Iustin Pop
      "GANETI_DATA_DIR": constants.DATA_DIR,
537 a8083063 Iustin Pop
      }
538 a8083063 Iustin Pop
539 9a395a76 Iustin Pop
    if self.lu.HPATH is not None:
540 9a395a76 Iustin Pop
      lu_env, lu_nodes_pre, lu_nodes_post = self.lu.BuildHooksEnv()
541 9a395a76 Iustin Pop
      if lu_env:
542 9a395a76 Iustin Pop
        for key in lu_env:
543 9a395a76 Iustin Pop
          env["GANETI_" + key] = lu_env[key]
544 9a395a76 Iustin Pop
    else:
545 9a395a76 Iustin Pop
      lu_nodes_pre = lu_nodes_post = []
546 a8083063 Iustin Pop
547 4167825b Iustin Pop
    return env, frozenset(lu_nodes_pre), frozenset(lu_nodes_post)
548 4167825b Iustin Pop
549 4167825b Iustin Pop
  def _RunWrapper(self, node_list, hpath, phase):
550 4167825b Iustin Pop
    """Simple wrapper over self.callfn.
551 4167825b Iustin Pop

552 4167825b Iustin Pop
    This method fixes the environment before doing the rpc call.
553 4167825b Iustin Pop

554 4167825b Iustin Pop
    """
555 4167825b Iustin Pop
    env = self.env.copy()
556 4167825b Iustin Pop
    env["GANETI_HOOKS_PHASE"] = phase
557 4167825b Iustin Pop
    env["GANETI_HOOKS_PATH"] = hpath
558 437138c9 Michael Hanselmann
    if self.lu.cfg is not None:
559 437138c9 Michael Hanselmann
      env["GANETI_CLUSTER"] = self.lu.cfg.GetClusterName()
560 437138c9 Michael Hanselmann
      env["GANETI_MASTER"] = self.lu.cfg.GetMasterNode()
561 a8083063 Iustin Pop
562 4167825b Iustin Pop
    env = dict([(str(key), str(val)) for key, val in env.iteritems()])
563 a8083063 Iustin Pop
564 4167825b Iustin Pop
    return self.callfn(node_list, hpath, phase, env)
565 a8083063 Iustin Pop
566 17e82923 Luca Bigliardi
  def RunPhase(self, phase, nodes=None):
567 a8083063 Iustin Pop
    """Run all the scripts for a phase.
568 a8083063 Iustin Pop

569 a8083063 Iustin Pop
    This is the main function of the HookMaster.
570 a8083063 Iustin Pop

571 8dca23a3 Iustin Pop
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
572 8dca23a3 Iustin Pop
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
573 17e82923 Luca Bigliardi
    @param nodes: overrides the predefined list of nodes for the given phase
574 8dca23a3 Iustin Pop
    @return: the processed results of the hooks multi-node rpc call
575 8dca23a3 Iustin Pop
    @raise errors.HooksFailure: on communication failure to the nodes
576 6ef2dc74 Luca Bigliardi
    @raise errors.HooksAbort: on failure of one of the hooks
577 b07a6922 Guido Trotter

578 a8083063 Iustin Pop
    """
579 17e82923 Luca Bigliardi
    if not self.node_list[phase] and not nodes:
580 9a395a76 Iustin Pop
      # empty node list, we should not attempt to run this as either
581 9a395a76 Iustin Pop
      # we're in the cluster init phase and the rpc client part can't
582 9a395a76 Iustin Pop
      # even attempt to run, or this LU doesn't do hooks at all
583 a8083063 Iustin Pop
      return
584 4167825b Iustin Pop
    hpath = self.lu.HPATH
585 17e82923 Luca Bigliardi
    if nodes is not None:
586 17e82923 Luca Bigliardi
      results = self._RunWrapper(nodes, hpath, phase)
587 17e82923 Luca Bigliardi
    else:
588 17e82923 Luca Bigliardi
      results = self._RunWrapper(self.node_list[phase], hpath, phase)
589 8c4b9364 Luca Bigliardi
    errs = []
590 8c4b9364 Luca Bigliardi
    if not results:
591 8c4b9364 Luca Bigliardi
      msg = "Communication Failure"
592 8c4b9364 Luca Bigliardi
      if phase == constants.HOOKS_PHASE_PRE:
593 8c4b9364 Luca Bigliardi
        raise errors.HooksFailure(msg)
594 8c4b9364 Luca Bigliardi
      else:
595 8c4b9364 Luca Bigliardi
        self.lu.LogWarning(msg)
596 640b961e Luca Bigliardi
        return results
597 8c4b9364 Luca Bigliardi
    for node_name in results:
598 8c4b9364 Luca Bigliardi
      res = results[node_name]
599 8c4b9364 Luca Bigliardi
      if res.offline:
600 8c4b9364 Luca Bigliardi
        continue
601 3cebe102 Michael Hanselmann
      msg = res.fail_msg
602 8c4b9364 Luca Bigliardi
      if msg:
603 8c4b9364 Luca Bigliardi
        self.lu.LogWarning("Communication failure to node %s: %s",
604 8c4b9364 Luca Bigliardi
                           node_name, msg)
605 8c4b9364 Luca Bigliardi
        continue
606 8c4b9364 Luca Bigliardi
      for script, hkr, output in res.payload:
607 8c4b9364 Luca Bigliardi
        if hkr == constants.HKR_FAIL:
608 8c4b9364 Luca Bigliardi
          if phase == constants.HOOKS_PHASE_PRE:
609 a8083063 Iustin Pop
            errs.append((node_name, script, output))
610 8c4b9364 Luca Bigliardi
          else:
611 8c4b9364 Luca Bigliardi
            if not output:
612 640b961e Luca Bigliardi
              output = "(no output)"
613 8c4b9364 Luca Bigliardi
            self.lu.LogWarning("On %s script %s failed, output: %s" %
614 8c4b9364 Luca Bigliardi
                               (node_name, script, output))
615 8c4b9364 Luca Bigliardi
    if errs and phase == constants.HOOKS_PHASE_PRE:
616 8c4b9364 Luca Bigliardi
      raise errors.HooksAbort(errs)
617 b07a6922 Guido Trotter
    return results
618 6a4aa7c1 Iustin Pop
619 6a4aa7c1 Iustin Pop
  def RunConfigUpdate(self):
620 6a4aa7c1 Iustin Pop
    """Run the special configuration update hook
621 6a4aa7c1 Iustin Pop

622 6a4aa7c1 Iustin Pop
    This is a special hook that runs only on the master after each
623 6a4aa7c1 Iustin Pop
    top-level LI if the configuration has been updated.
624 6a4aa7c1 Iustin Pop

625 6a4aa7c1 Iustin Pop
    """
626 6a4aa7c1 Iustin Pop
    phase = constants.HOOKS_PHASE_POST
627 6a4aa7c1 Iustin Pop
    hpath = constants.HOOKS_NAME_CFGUPDATE
628 437138c9 Michael Hanselmann
    nodes = [self.lu.cfg.GetMasterNode()]
629 29921401 Iustin Pop
    self._RunWrapper(nodes, hpath, phase)