Statistics
| Branch: | Tag: | Revision:

root / lib / mcpu.py @ f3044516

History | View | Annotate | Download (20.2 kB)

1 2f31098c Iustin Pop
#
2 a8083063 Iustin Pop
#
3 a8083063 Iustin Pop
4 a8083063 Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 a8083063 Iustin Pop
#
6 a8083063 Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 a8083063 Iustin Pop
# it under the terms of the GNU General Public License as published by
8 a8083063 Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 a8083063 Iustin Pop
# (at your option) any later version.
10 a8083063 Iustin Pop
#
11 a8083063 Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 a8083063 Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a8083063 Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a8083063 Iustin Pop
# General Public License for more details.
15 a8083063 Iustin Pop
#
16 a8083063 Iustin Pop
# You should have received a copy of the GNU General Public License
17 a8083063 Iustin Pop
# along with this program; if not, write to the Free Software
18 a8083063 Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a8083063 Iustin Pop
# 02110-1301, USA.
20 a8083063 Iustin Pop
21 a8083063 Iustin Pop
22 a8083063 Iustin Pop
"""Module implementing the logic behind the cluster operations
23 a8083063 Iustin Pop

24 a8083063 Iustin Pop
This module implements the logic for doing operations in the cluster. There
25 a8083063 Iustin Pop
are two kinds of classes defined:
26 a8083063 Iustin Pop
  - logical units, which know how to deal with their specific opcode only
27 a8083063 Iustin Pop
  - the processor, which dispatches the opcodes to their logical units
28 a8083063 Iustin Pop

29 a8083063 Iustin Pop
"""
30 a8083063 Iustin Pop
31 a5eb7789 Iustin Pop
import logging
32 407339d0 Michael Hanselmann
import random
33 407339d0 Michael Hanselmann
import time
34 a8083063 Iustin Pop
35 a8083063 Iustin Pop
from ganeti import opcodes
36 a8083063 Iustin Pop
from ganeti import constants
37 a8083063 Iustin Pop
from ganeti import errors
38 a8083063 Iustin Pop
from ganeti import rpc
39 a8083063 Iustin Pop
from ganeti import cmdlib
40 04864530 Guido Trotter
from ganeti import locking
41 a8083063 Iustin Pop
42 7c0d6283 Michael Hanselmann
43 407339d0 Michael Hanselmann
class _LockAcquireTimeout(Exception):
44 407339d0 Michael Hanselmann
  """Internal exception to report timeouts on acquiring locks.
45 407339d0 Michael Hanselmann

46 407339d0 Michael Hanselmann
  """
47 407339d0 Michael Hanselmann
48 407339d0 Michael Hanselmann
49 e3200b18 Michael Hanselmann
def _CalculateLockAttemptTimeouts():
50 e3200b18 Michael Hanselmann
  """Calculate timeouts for lock attempts.
51 e3200b18 Michael Hanselmann

52 e3200b18 Michael Hanselmann
  """
53 e3200b18 Michael Hanselmann
  result = [1.0]
54 e3200b18 Michael Hanselmann
55 e3200b18 Michael Hanselmann
  # Wait for a total of at least 150s before doing a blocking acquire
56 e3200b18 Michael Hanselmann
  while sum(result) < 150.0:
57 e3200b18 Michael Hanselmann
    timeout = (result[-1] * 1.05) ** 1.25
58 e3200b18 Michael Hanselmann
59 e3200b18 Michael Hanselmann
    # Cap timeout at 10 seconds. This gives other jobs a chance to run
60 e3200b18 Michael Hanselmann
    # even if we're still trying to get our locks, before finally moving
61 e3200b18 Michael Hanselmann
    # to a blocking acquire.
62 e3200b18 Michael Hanselmann
    if timeout > 10.0:
63 e3200b18 Michael Hanselmann
      timeout = 10.0
64 e3200b18 Michael Hanselmann
65 e3200b18 Michael Hanselmann
    elif timeout < 0.1:
66 e3200b18 Michael Hanselmann
      # Lower boundary for safety
67 e3200b18 Michael Hanselmann
      timeout = 0.1
68 e3200b18 Michael Hanselmann
69 e3200b18 Michael Hanselmann
    result.append(timeout)
70 e3200b18 Michael Hanselmann
71 e3200b18 Michael Hanselmann
  return result
72 e3200b18 Michael Hanselmann
73 e3200b18 Michael Hanselmann
74 e3200b18 Michael Hanselmann
class _LockAttemptTimeoutStrategy(object):
75 407339d0 Michael Hanselmann
  """Class with lock acquire timeout strategy.
76 407339d0 Michael Hanselmann

77 407339d0 Michael Hanselmann
  """
78 407339d0 Michael Hanselmann
  __slots__ = [
79 e3200b18 Michael Hanselmann
    "_attempt",
80 407339d0 Michael Hanselmann
    "_random_fn",
81 407339d0 Michael Hanselmann
    "_start_time",
82 e3200b18 Michael Hanselmann
    "_time_fn",
83 a6db1af2 Michael Hanselmann
    "_running_timeout",
84 407339d0 Michael Hanselmann
    ]
85 407339d0 Michael Hanselmann
86 e3200b18 Michael Hanselmann
  _TIMEOUT_PER_ATTEMPT = _CalculateLockAttemptTimeouts()
87 407339d0 Michael Hanselmann
88 e3200b18 Michael Hanselmann
  def __init__(self, attempt=0, _time_fn=time.time, _random_fn=random.random):
89 407339d0 Michael Hanselmann
    """Initializes this class.
90 407339d0 Michael Hanselmann

91 e3200b18 Michael Hanselmann
    @type attempt: int
92 e3200b18 Michael Hanselmann
    @param attempt: Current attempt number
93 e3200b18 Michael Hanselmann
    @param _time_fn: Time function for unittests
94 407339d0 Michael Hanselmann
    @param _random_fn: Random number generator for unittests
95 407339d0 Michael Hanselmann

96 407339d0 Michael Hanselmann
    """
97 407339d0 Michael Hanselmann
    object.__init__(self)
98 407339d0 Michael Hanselmann
99 e3200b18 Michael Hanselmann
    if attempt < 0:
100 e3200b18 Michael Hanselmann
      raise ValueError("Attempt must be zero or positive")
101 407339d0 Michael Hanselmann
102 e3200b18 Michael Hanselmann
    self._attempt = attempt
103 e3200b18 Michael Hanselmann
    self._time_fn = _time_fn
104 e3200b18 Michael Hanselmann
    self._random_fn = _random_fn
105 e3200b18 Michael Hanselmann
106 a6db1af2 Michael Hanselmann
    try:
107 a6db1af2 Michael Hanselmann
      timeout = self._TIMEOUT_PER_ATTEMPT[attempt]
108 a6db1af2 Michael Hanselmann
    except IndexError:
109 a6db1af2 Michael Hanselmann
      # No more timeouts, do blocking acquire
110 a6db1af2 Michael Hanselmann
      timeout = None
111 a6db1af2 Michael Hanselmann
112 a6db1af2 Michael Hanselmann
    self._running_timeout = locking.RunningTimeout(timeout, False,
113 a6db1af2 Michael Hanselmann
                                                   _time_fn=_time_fn)
114 407339d0 Michael Hanselmann
115 407339d0 Michael Hanselmann
  def NextAttempt(self):
116 e3200b18 Michael Hanselmann
    """Returns the strategy for the next attempt.
117 407339d0 Michael Hanselmann

118 407339d0 Michael Hanselmann
    """
119 e3200b18 Michael Hanselmann
    return _LockAttemptTimeoutStrategy(attempt=self._attempt + 1,
120 e3200b18 Michael Hanselmann
                                       _time_fn=self._time_fn,
121 e3200b18 Michael Hanselmann
                                       _random_fn=self._random_fn)
122 407339d0 Michael Hanselmann
123 407339d0 Michael Hanselmann
  def CalcRemainingTimeout(self):
124 407339d0 Michael Hanselmann
    """Returns the remaining timeout.
125 407339d0 Michael Hanselmann

126 407339d0 Michael Hanselmann
    """
127 a6db1af2 Michael Hanselmann
    timeout = self._running_timeout.Remaining()
128 407339d0 Michael Hanselmann
129 a6db1af2 Michael Hanselmann
    if timeout is not None:
130 a6db1af2 Michael Hanselmann
      # Add a small variation (-/+ 5%) to timeout. This helps in situations
131 a6db1af2 Michael Hanselmann
      # where two or more jobs are fighting for the same lock(s).
132 a6db1af2 Michael Hanselmann
      variation_range = timeout * 0.1
133 a6db1af2 Michael Hanselmann
      timeout += ((self._random_fn() * variation_range) -
134 a6db1af2 Michael Hanselmann
                  (variation_range * 0.5))
135 407339d0 Michael Hanselmann
136 a6db1af2 Michael Hanselmann
    return timeout
137 407339d0 Michael Hanselmann
138 407339d0 Michael Hanselmann
139 7260cfbe Iustin Pop
class OpExecCbBase: # pylint: disable-msg=W0232
140 031a3e57 Michael Hanselmann
  """Base class for OpCode execution callbacks.
141 031a3e57 Michael Hanselmann

142 031a3e57 Michael Hanselmann
  """
143 031a3e57 Michael Hanselmann
  def NotifyStart(self):
144 031a3e57 Michael Hanselmann
    """Called when we are about to execute the LU.
145 031a3e57 Michael Hanselmann

146 031a3e57 Michael Hanselmann
    This function is called when we're about to start the lu's Exec() method,
147 031a3e57 Michael Hanselmann
    that is, after we have acquired all locks.
148 031a3e57 Michael Hanselmann

149 031a3e57 Michael Hanselmann
    """
150 031a3e57 Michael Hanselmann
151 031a3e57 Michael Hanselmann
  def Feedback(self, *args):
152 031a3e57 Michael Hanselmann
    """Sends feedback from the LU code to the end-user.
153 031a3e57 Michael Hanselmann

154 031a3e57 Michael Hanselmann
    """
155 031a3e57 Michael Hanselmann
156 ef2df7d3 Michael Hanselmann
  def ReportLocks(self, msg):
157 ef2df7d3 Michael Hanselmann
    """Report lock operations.
158 ef2df7d3 Michael Hanselmann

159 ef2df7d3 Michael Hanselmann
    """
160 ef2df7d3 Michael Hanselmann
161 031a3e57 Michael Hanselmann
162 a8083063 Iustin Pop
class Processor(object):
163 a8083063 Iustin Pop
  """Object which runs OpCodes"""
164 a8083063 Iustin Pop
  DISPATCH_TABLE = {
165 a8083063 Iustin Pop
    # Cluster
166 b5f5fae9 Luca Bigliardi
    opcodes.OpPostInitCluster: cmdlib.LUPostInitCluster,
167 a8083063 Iustin Pop
    opcodes.OpDestroyCluster: cmdlib.LUDestroyCluster,
168 a8083063 Iustin Pop
    opcodes.OpQueryClusterInfo: cmdlib.LUQueryClusterInfo,
169 a8083063 Iustin Pop
    opcodes.OpVerifyCluster: cmdlib.LUVerifyCluster,
170 ae5849b5 Michael Hanselmann
    opcodes.OpQueryConfigValues: cmdlib.LUQueryConfigValues,
171 07bd8a51 Iustin Pop
    opcodes.OpRenameCluster: cmdlib.LURenameCluster,
172 f4d4e184 Iustin Pop
    opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
173 0cc05d44 Manuel Franceschini
    opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
174 afee0879 Iustin Pop
    opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig,
175 60975797 Iustin Pop
    opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes,
176 a8083063 Iustin Pop
    # node lu
177 a8083063 Iustin Pop
    opcodes.OpAddNode: cmdlib.LUAddNode,
178 a8083063 Iustin Pop
    opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
179 dcb93971 Michael Hanselmann
    opcodes.OpQueryNodeVolumes: cmdlib.LUQueryNodeVolumes,
180 9e5442ce Michael Hanselmann
    opcodes.OpQueryNodeStorage: cmdlib.LUQueryNodeStorage,
181 efb8da02 Michael Hanselmann
    opcodes.OpModifyNodeStorage: cmdlib.LUModifyNodeStorage,
182 76aef8fc Michael Hanselmann
    opcodes.OpRepairNodeStorage: cmdlib.LURepairNodeStorage,
183 a8083063 Iustin Pop
    opcodes.OpRemoveNode: cmdlib.LURemoveNode,
184 b31c8676 Iustin Pop
    opcodes.OpSetNodeParams: cmdlib.LUSetNodeParams,
185 f5118ade Iustin Pop
    opcodes.OpPowercycleNode: cmdlib.LUPowercycleNode,
186 80cb875c Michael Hanselmann
    opcodes.OpMigrateNode: cmdlib.LUMigrateNode,
187 f7e7689f Iustin Pop
    opcodes.OpNodeEvacuationStrategy: cmdlib.LUNodeEvacuationStrategy,
188 a8083063 Iustin Pop
    # instance lu
189 a8083063 Iustin Pop
    opcodes.OpCreateInstance: cmdlib.LUCreateInstance,
190 fe7b0351 Michael Hanselmann
    opcodes.OpReinstallInstance: cmdlib.LUReinstallInstance,
191 a8083063 Iustin Pop
    opcodes.OpRemoveInstance: cmdlib.LURemoveInstance,
192 decd5f45 Iustin Pop
    opcodes.OpRenameInstance: cmdlib.LURenameInstance,
193 a8083063 Iustin Pop
    opcodes.OpActivateInstanceDisks: cmdlib.LUActivateInstanceDisks,
194 a8083063 Iustin Pop
    opcodes.OpShutdownInstance: cmdlib.LUShutdownInstance,
195 a8083063 Iustin Pop
    opcodes.OpStartupInstance: cmdlib.LUStartupInstance,
196 bf6929a2 Alexander Schreiber
    opcodes.OpRebootInstance: cmdlib.LURebootInstance,
197 a8083063 Iustin Pop
    opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks,
198 a8083063 Iustin Pop
    opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks,
199 bd315bfa Iustin Pop
    opcodes.OpRecreateInstanceDisks: cmdlib.LURecreateInstanceDisks,
200 a8083063 Iustin Pop
    opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance,
201 53c776b5 Iustin Pop
    opcodes.OpMigrateInstance: cmdlib.LUMigrateInstance,
202 313bcead Iustin Pop
    opcodes.OpMoveInstance: cmdlib.LUMoveInstance,
203 a8083063 Iustin Pop
    opcodes.OpConnectConsole: cmdlib.LUConnectConsole,
204 a8083063 Iustin Pop
    opcodes.OpQueryInstances: cmdlib.LUQueryInstances,
205 a8083063 Iustin Pop
    opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData,
206 7767bbf5 Manuel Franceschini
    opcodes.OpSetInstanceParams: cmdlib.LUSetInstanceParams,
207 8729e0d7 Iustin Pop
    opcodes.OpGrowDisk: cmdlib.LUGrowDisk,
208 a8083063 Iustin Pop
    # os lu
209 a8083063 Iustin Pop
    opcodes.OpDiagnoseOS: cmdlib.LUDiagnoseOS,
210 a8083063 Iustin Pop
    # exports lu
211 a8083063 Iustin Pop
    opcodes.OpQueryExports: cmdlib.LUQueryExports,
212 1410fa8d Michael Hanselmann
    opcodes.OpPrepareExport: cmdlib.LUPrepareExport,
213 a8083063 Iustin Pop
    opcodes.OpExportInstance: cmdlib.LUExportInstance,
214 9ac99fda Guido Trotter
    opcodes.OpRemoveExport: cmdlib.LURemoveExport,
215 5c947f38 Iustin Pop
    # tags lu
216 5c947f38 Iustin Pop
    opcodes.OpGetTags: cmdlib.LUGetTags,
217 73415719 Iustin Pop
    opcodes.OpSearchTags: cmdlib.LUSearchTags,
218 f27302fa Iustin Pop
    opcodes.OpAddTags: cmdlib.LUAddTags,
219 f27302fa Iustin Pop
    opcodes.OpDelTags: cmdlib.LUDelTags,
220 06009e27 Iustin Pop
    # test lu
221 06009e27 Iustin Pop
    opcodes.OpTestDelay: cmdlib.LUTestDelay,
222 d61df03e Iustin Pop
    opcodes.OpTestAllocator: cmdlib.LUTestAllocator,
223 e58f87a9 Michael Hanselmann
    opcodes.OpTestJobqueue: cmdlib.LUTestJobqueue,
224 a8083063 Iustin Pop
    }
225 a8083063 Iustin Pop
226 adfa97e3 Guido Trotter
  def __init__(self, context, ec_id):
227 a8083063 Iustin Pop
    """Constructor for Processor
228 a8083063 Iustin Pop

229 adfa97e3 Guido Trotter
    @type context: GanetiContext
230 adfa97e3 Guido Trotter
    @param context: global Ganeti context
231 adfa97e3 Guido Trotter
    @type ec_id: string
232 adfa97e3 Guido Trotter
    @param ec_id: execution context identifier
233 adfa97e3 Guido Trotter

234 a8083063 Iustin Pop
    """
235 1c901d13 Guido Trotter
    self.context = context
236 adfa97e3 Guido Trotter
    self._ec_id = ec_id
237 031a3e57 Michael Hanselmann
    self._cbs = None
238 72737a7f Iustin Pop
    self.rpc = rpc.RpcRunner(context.cfg)
239 cd46f3b4 Luca Bigliardi
    self.hmclass = HooksMaster
240 a8083063 Iustin Pop
241 211b6132 Michael Hanselmann
  def _ReportLocks(self, level, names, shared, timeout, acquired, result):
242 ef2df7d3 Michael Hanselmann
    """Reports lock operations.
243 ef2df7d3 Michael Hanselmann

244 ef2df7d3 Michael Hanselmann
    @type level: int
245 ef2df7d3 Michael Hanselmann
    @param level: Lock level
246 ef2df7d3 Michael Hanselmann
    @type names: list or string
247 ef2df7d3 Michael Hanselmann
    @param names: Lock names
248 ef2df7d3 Michael Hanselmann
    @type shared: bool
249 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
250 211b6132 Michael Hanselmann
    @type timeout: None or float
251 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
252 ef2df7d3 Michael Hanselmann
    @type acquired: bool
253 211b6132 Michael Hanselmann
    @param acquired: Whether the locks have already been acquired
254 211b6132 Michael Hanselmann
    @type result: None or set
255 211b6132 Michael Hanselmann
    @param result: Result from L{locking.GanetiLockManager.acquire}
256 ef2df7d3 Michael Hanselmann

257 ef2df7d3 Michael Hanselmann
    """
258 ef2df7d3 Michael Hanselmann
    parts = []
259 ef2df7d3 Michael Hanselmann
260 ef2df7d3 Michael Hanselmann
    # Build message
261 ef2df7d3 Michael Hanselmann
    if acquired:
262 211b6132 Michael Hanselmann
      if result is None:
263 211b6132 Michael Hanselmann
        parts.append("timeout")
264 211b6132 Michael Hanselmann
      else:
265 211b6132 Michael Hanselmann
        parts.append("acquired")
266 ef2df7d3 Michael Hanselmann
    else:
267 ef2df7d3 Michael Hanselmann
      parts.append("waiting")
268 211b6132 Michael Hanselmann
      if timeout is None:
269 211b6132 Michael Hanselmann
        parts.append("blocking")
270 211b6132 Michael Hanselmann
      else:
271 211b6132 Michael Hanselmann
        parts.append("timeout=%0.6fs" % timeout)
272 ef2df7d3 Michael Hanselmann
273 ef2df7d3 Michael Hanselmann
    parts.append(locking.LEVEL_NAMES[level])
274 ef2df7d3 Michael Hanselmann
275 ef2df7d3 Michael Hanselmann
    if names == locking.ALL_SET:
276 ef2df7d3 Michael Hanselmann
      parts.append("ALL")
277 ef2df7d3 Michael Hanselmann
    elif isinstance(names, basestring):
278 ef2df7d3 Michael Hanselmann
      parts.append(names)
279 ef2df7d3 Michael Hanselmann
    else:
280 4776e022 Michael Hanselmann
      parts.append(",".join(sorted(names)))
281 ef2df7d3 Michael Hanselmann
282 ef2df7d3 Michael Hanselmann
    if shared:
283 ef2df7d3 Michael Hanselmann
      parts.append("shared")
284 ef2df7d3 Michael Hanselmann
    else:
285 ef2df7d3 Michael Hanselmann
      parts.append("exclusive")
286 ef2df7d3 Michael Hanselmann
287 ef2df7d3 Michael Hanselmann
    msg = "/".join(parts)
288 ef2df7d3 Michael Hanselmann
289 ef2df7d3 Michael Hanselmann
    logging.debug("LU locks %s", msg)
290 ef2df7d3 Michael Hanselmann
291 ef2df7d3 Michael Hanselmann
    if self._cbs:
292 ef2df7d3 Michael Hanselmann
      self._cbs.ReportLocks(msg)
293 ef2df7d3 Michael Hanselmann
294 211b6132 Michael Hanselmann
  def _AcquireLocks(self, level, names, shared, timeout):
295 211b6132 Michael Hanselmann
    """Acquires locks via the Ganeti lock manager.
296 211b6132 Michael Hanselmann

297 211b6132 Michael Hanselmann
    @type level: int
298 211b6132 Michael Hanselmann
    @param level: Lock level
299 211b6132 Michael Hanselmann
    @type names: list or string
300 211b6132 Michael Hanselmann
    @param names: Lock names
301 211b6132 Michael Hanselmann
    @type shared: bool
302 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
303 211b6132 Michael Hanselmann
    @type timeout: None or float
304 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
305 211b6132 Michael Hanselmann

306 211b6132 Michael Hanselmann
    """
307 211b6132 Michael Hanselmann
    self._ReportLocks(level, names, shared, timeout, False, None)
308 211b6132 Michael Hanselmann
309 211b6132 Michael Hanselmann
    acquired = self.context.glm.acquire(level, names, shared=shared,
310 211b6132 Michael Hanselmann
                                        timeout=timeout)
311 211b6132 Michael Hanselmann
312 211b6132 Michael Hanselmann
    self._ReportLocks(level, names, shared, timeout, True, acquired)
313 211b6132 Michael Hanselmann
314 211b6132 Michael Hanselmann
    return acquired
315 211b6132 Michael Hanselmann
316 36c381d7 Guido Trotter
  def _ExecLU(self, lu):
317 36c381d7 Guido Trotter
    """Logical Unit execution sequence.
318 36c381d7 Guido Trotter

319 36c381d7 Guido Trotter
    """
320 36c381d7 Guido Trotter
    write_count = self.context.cfg.write_count
321 36c381d7 Guido Trotter
    lu.CheckPrereq()
322 4b5e8271 Iustin Pop
    hm = HooksMaster(self.rpc.call_hooks_runner, lu)
323 36c381d7 Guido Trotter
    h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
324 36c381d7 Guido Trotter
    lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results,
325 7b4c1cb9 Michael Hanselmann
                     self.Log, None)
326 20777413 Iustin Pop
327 20777413 Iustin Pop
    if getattr(lu.op, "dry_run", False):
328 20777413 Iustin Pop
      # in this mode, no post-hooks are run, and the config is not
329 20777413 Iustin Pop
      # written (as it might have been modified by another LU, and we
330 20777413 Iustin Pop
      # shouldn't do writeout on behalf of other threads
331 20777413 Iustin Pop
      self.LogInfo("dry-run mode requested, not actually executing"
332 20777413 Iustin Pop
                   " the operation")
333 20777413 Iustin Pop
      return lu.dry_run_result
334 20777413 Iustin Pop
335 36c381d7 Guido Trotter
    try:
336 7b4c1cb9 Michael Hanselmann
      result = lu.Exec(self.Log)
337 36c381d7 Guido Trotter
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
338 36c381d7 Guido Trotter
      result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results,
339 7b4c1cb9 Michael Hanselmann
                                self.Log, result)
340 36c381d7 Guido Trotter
    finally:
341 36c381d7 Guido Trotter
      # FIXME: This needs locks if not lu_class.REQ_BGL
342 36c381d7 Guido Trotter
      if write_count != self.context.cfg.write_count:
343 36c381d7 Guido Trotter
        hm.RunConfigUpdate()
344 36c381d7 Guido Trotter
345 36c381d7 Guido Trotter
    return result
346 36c381d7 Guido Trotter
347 407339d0 Michael Hanselmann
  def _LockAndExecLU(self, lu, level, calc_timeout):
348 68adfdb2 Guido Trotter
    """Execute a Logical Unit, with the needed locks.
349 68adfdb2 Guido Trotter

350 68adfdb2 Guido Trotter
    This is a recursive function that starts locking the given level, and
351 68adfdb2 Guido Trotter
    proceeds up, till there are no more locks to acquire. Then it executes the
352 68adfdb2 Guido Trotter
    given LU and its opcodes.
353 68adfdb2 Guido Trotter

354 68adfdb2 Guido Trotter
    """
355 ca2a79e1 Guido Trotter
    adding_locks = level in lu.add_locks
356 ca2a79e1 Guido Trotter
    acquiring_locks = level in lu.needed_locks
357 8a2941c4 Guido Trotter
    if level not in locking.LEVELS:
358 031a3e57 Michael Hanselmann
      if self._cbs:
359 031a3e57 Michael Hanselmann
        self._cbs.NotifyStart()
360 031a3e57 Michael Hanselmann
361 8a2941c4 Guido Trotter
      result = self._ExecLU(lu)
362 407339d0 Michael Hanselmann
363 ca2a79e1 Guido Trotter
    elif adding_locks and acquiring_locks:
364 ca2a79e1 Guido Trotter
      # We could both acquire and add locks at the same level, but for now we
365 ca2a79e1 Guido Trotter
      # don't need this, so we'll avoid the complicated code needed.
366 407339d0 Michael Hanselmann
      raise NotImplementedError("Can't declare locks to acquire when adding"
367 407339d0 Michael Hanselmann
                                " others")
368 407339d0 Michael Hanselmann
369 ca2a79e1 Guido Trotter
    elif adding_locks or acquiring_locks:
370 fb8dcb62 Guido Trotter
      lu.DeclareLocks(level)
371 3977a4c1 Guido Trotter
      share = lu.share_locks[level]
372 407339d0 Michael Hanselmann
373 68adfdb2 Guido Trotter
      try:
374 407339d0 Michael Hanselmann
        assert adding_locks ^ acquiring_locks, \
375 407339d0 Michael Hanselmann
          "Locks must be either added or acquired"
376 407339d0 Michael Hanselmann
377 407339d0 Michael Hanselmann
        if acquiring_locks:
378 407339d0 Michael Hanselmann
          # Acquiring locks
379 407339d0 Michael Hanselmann
          needed_locks = lu.needed_locks[level]
380 407339d0 Michael Hanselmann
381 211b6132 Michael Hanselmann
          acquired = self._AcquireLocks(level, needed_locks, share,
382 211b6132 Michael Hanselmann
                                        calc_timeout())
383 407339d0 Michael Hanselmann
384 407339d0 Michael Hanselmann
          if acquired is None:
385 407339d0 Michael Hanselmann
            raise _LockAcquireTimeout()
386 407339d0 Michael Hanselmann
387 407339d0 Michael Hanselmann
        else:
388 407339d0 Michael Hanselmann
          # Adding locks
389 407339d0 Michael Hanselmann
          add_locks = lu.add_locks[level]
390 407339d0 Michael Hanselmann
          lu.remove_locks[level] = add_locks
391 407339d0 Michael Hanselmann
392 407339d0 Michael Hanselmann
          try:
393 407339d0 Michael Hanselmann
            self.context.glm.add(level, add_locks, acquired=1, shared=share)
394 407339d0 Michael Hanselmann
          except errors.LockError:
395 407339d0 Michael Hanselmann
            raise errors.OpPrereqError(
396 407339d0 Michael Hanselmann
              "Couldn't add locks (%s), probably because of a race condition"
397 debac808 Iustin Pop
              " with another job, who added them first" % add_locks,
398 debac808 Iustin Pop
              errors.ECODE_FAULT)
399 407339d0 Michael Hanselmann
400 6f14fc27 Michael Hanselmann
          acquired = add_locks
401 6f14fc27 Michael Hanselmann
402 ca2a79e1 Guido Trotter
        try:
403 6f14fc27 Michael Hanselmann
          lu.acquired_locks[level] = acquired
404 6f14fc27 Michael Hanselmann
405 407339d0 Michael Hanselmann
          result = self._LockAndExecLU(lu, level + 1, calc_timeout)
406 ca2a79e1 Guido Trotter
        finally:
407 ca2a79e1 Guido Trotter
          if level in lu.remove_locks:
408 ca2a79e1 Guido Trotter
            self.context.glm.remove(level, lu.remove_locks[level])
409 68adfdb2 Guido Trotter
      finally:
410 80ee04a4 Guido Trotter
        if self.context.glm.is_owned(level):
411 68adfdb2 Guido Trotter
          self.context.glm.release(level)
412 407339d0 Michael Hanselmann
413 68adfdb2 Guido Trotter
    else:
414 407339d0 Michael Hanselmann
      result = self._LockAndExecLU(lu, level + 1, calc_timeout)
415 68adfdb2 Guido Trotter
416 68adfdb2 Guido Trotter
    return result
417 68adfdb2 Guido Trotter
418 031a3e57 Michael Hanselmann
  def ExecOpCode(self, op, cbs):
419 a8083063 Iustin Pop
    """Execute an opcode.
420 a8083063 Iustin Pop

421 e92376d7 Iustin Pop
    @type op: an OpCode instance
422 e92376d7 Iustin Pop
    @param op: the opcode to be executed
423 031a3e57 Michael Hanselmann
    @type cbs: L{OpExecCbBase}
424 031a3e57 Michael Hanselmann
    @param cbs: Runtime callbacks
425 a8083063 Iustin Pop

426 a8083063 Iustin Pop
    """
427 a8083063 Iustin Pop
    if not isinstance(op, opcodes.OpCode):
428 3ecf6786 Iustin Pop
      raise errors.ProgrammerError("Non-opcode instance passed"
429 3ecf6786 Iustin Pop
                                   " to ExecOpcode")
430 a8083063 Iustin Pop
431 031a3e57 Michael Hanselmann
    self._cbs = cbs
432 fe482621 Iustin Pop
    try:
433 031a3e57 Michael Hanselmann
      lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
434 031a3e57 Michael Hanselmann
      if lu_class is None:
435 031a3e57 Michael Hanselmann
        raise errors.OpCodeUnknown("Unknown opcode")
436 031a3e57 Michael Hanselmann
437 e3200b18 Michael Hanselmann
      timeout_strategy = _LockAttemptTimeoutStrategy()
438 407339d0 Michael Hanselmann
439 407339d0 Michael Hanselmann
      while True:
440 407339d0 Michael Hanselmann
        try:
441 e3200b18 Michael Hanselmann
          acquire_timeout = timeout_strategy.CalcRemainingTimeout()
442 e3200b18 Michael Hanselmann
443 211b6132 Michael Hanselmann
          # Acquire the Big Ganeti Lock exclusively if this LU requires it,
444 211b6132 Michael Hanselmann
          # and in a shared fashion otherwise (to prevent concurrent run with
445 211b6132 Michael Hanselmann
          # an exclusive LU.
446 211b6132 Michael Hanselmann
          if self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL,
447 e3200b18 Michael Hanselmann
                                not lu_class.REQ_BGL, acquire_timeout) is None:
448 407339d0 Michael Hanselmann
            raise _LockAcquireTimeout()
449 407339d0 Michael Hanselmann
450 407339d0 Michael Hanselmann
          try:
451 407339d0 Michael Hanselmann
            lu = lu_class(self, op, self.context, self.rpc)
452 407339d0 Michael Hanselmann
            lu.ExpandNames()
453 407339d0 Michael Hanselmann
            assert lu.needed_locks is not None, "needed_locks not set by LU"
454 407339d0 Michael Hanselmann
455 73064714 Guido Trotter
            try:
456 73064714 Guido Trotter
              return self._LockAndExecLU(lu, locking.LEVEL_INSTANCE,
457 73064714 Guido Trotter
                                         timeout_strategy.CalcRemainingTimeout)
458 73064714 Guido Trotter
            finally:
459 73064714 Guido Trotter
              if self._ec_id:
460 73064714 Guido Trotter
                self.context.cfg.DropECReservations(self._ec_id)
461 73064714 Guido Trotter
462 407339d0 Michael Hanselmann
          finally:
463 407339d0 Michael Hanselmann
            self.context.glm.release(locking.LEVEL_CLUSTER)
464 407339d0 Michael Hanselmann
465 407339d0 Michael Hanselmann
        except _LockAcquireTimeout:
466 407339d0 Michael Hanselmann
          # Timeout while waiting for lock, try again
467 407339d0 Michael Hanselmann
          pass
468 407339d0 Michael Hanselmann
469 e3200b18 Michael Hanselmann
        timeout_strategy = timeout_strategy.NextAttempt()
470 407339d0 Michael Hanselmann
471 04864530 Guido Trotter
    finally:
472 031a3e57 Michael Hanselmann
      self._cbs = None
473 6a4aa7c1 Iustin Pop
474 7b4c1cb9 Michael Hanselmann
  def Log(self, *args):
475 031a3e57 Michael Hanselmann
    """Forward call to feedback callback function.
476 031a3e57 Michael Hanselmann

477 031a3e57 Michael Hanselmann
    """
478 031a3e57 Michael Hanselmann
    if self._cbs:
479 031a3e57 Michael Hanselmann
      self._cbs.Feedback(*args)
480 031a3e57 Michael Hanselmann
481 0fbbf897 Iustin Pop
  def LogStep(self, current, total, message):
482 0fbbf897 Iustin Pop
    """Log a change in LU execution progress.
483 0fbbf897 Iustin Pop

484 0fbbf897 Iustin Pop
    """
485 a5eb7789 Iustin Pop
    logging.debug("Step %d/%d %s", current, total, message)
486 7b4c1cb9 Michael Hanselmann
    self.Log("STEP %d/%d %s" % (current, total, message))
487 0fbbf897 Iustin Pop
488 c0088fb9 Iustin Pop
  def LogWarning(self, message, *args, **kwargs):
489 0fbbf897 Iustin Pop
    """Log a warning to the logs and the user.
490 0fbbf897 Iustin Pop

491 c0088fb9 Iustin Pop
    The optional keyword argument is 'hint' and can be used to show a
492 c0088fb9 Iustin Pop
    hint to the user (presumably related to the warning). If the
493 c0088fb9 Iustin Pop
    message is empty, it will not be printed at all, allowing one to
494 c0088fb9 Iustin Pop
    show only a hint.
495 0fbbf897 Iustin Pop

496 c0088fb9 Iustin Pop
    """
497 c0088fb9 Iustin Pop
    assert not kwargs or (len(kwargs) == 1 and "hint" in kwargs), \
498 c0088fb9 Iustin Pop
           "Invalid keyword arguments for LogWarning (%s)" % str(kwargs)
499 c0088fb9 Iustin Pop
    if args:
500 c0088fb9 Iustin Pop
      message = message % tuple(args)
501 c0088fb9 Iustin Pop
    if message:
502 c0088fb9 Iustin Pop
      logging.warning(message)
503 7b4c1cb9 Michael Hanselmann
      self.Log(" - WARNING: %s" % message)
504 c0088fb9 Iustin Pop
    if "hint" in kwargs:
505 7b4c1cb9 Michael Hanselmann
      self.Log("      Hint: %s" % kwargs["hint"])
506 c0088fb9 Iustin Pop
507 c0088fb9 Iustin Pop
  def LogInfo(self, message, *args):
508 0fbbf897 Iustin Pop
    """Log an informational message to the logs and the user.
509 0fbbf897 Iustin Pop

510 0fbbf897 Iustin Pop
    """
511 c0088fb9 Iustin Pop
    if args:
512 c0088fb9 Iustin Pop
      message = message % tuple(args)
513 a5eb7789 Iustin Pop
    logging.info(message)
514 7b4c1cb9 Michael Hanselmann
    self.Log(" - INFO: %s" % message)
515 0fbbf897 Iustin Pop
516 adfa97e3 Guido Trotter
  def GetECId(self):
517 adfa97e3 Guido Trotter
    if not self._ec_id:
518 adfa97e3 Guido Trotter
      errors.ProgrammerError("Tried to use execution context id when not set")
519 adfa97e3 Guido Trotter
    return self._ec_id
520 adfa97e3 Guido Trotter
521 a8083063 Iustin Pop
522 a8083063 Iustin Pop
class HooksMaster(object):
523 a8083063 Iustin Pop
  """Hooks master.
524 a8083063 Iustin Pop

525 a8083063 Iustin Pop
  This class distributes the run commands to the nodes based on the
526 a8083063 Iustin Pop
  specific LU class.
527 a8083063 Iustin Pop

528 a8083063 Iustin Pop
  In order to remove the direct dependency on the rpc module, the
529 a8083063 Iustin Pop
  constructor needs a function which actually does the remote
530 a8083063 Iustin Pop
  call. This will usually be rpc.call_hooks_runner, but any function
531 a8083063 Iustin Pop
  which behaves the same works.
532 a8083063 Iustin Pop

533 a8083063 Iustin Pop
  """
534 4b5e8271 Iustin Pop
  def __init__(self, callfn, lu):
535 a8083063 Iustin Pop
    self.callfn = callfn
536 a8083063 Iustin Pop
    self.lu = lu
537 a8083063 Iustin Pop
    self.op = lu.op
538 a8083063 Iustin Pop
    self.env, node_list_pre, node_list_post = self._BuildEnv()
539 a8083063 Iustin Pop
    self.node_list = {constants.HOOKS_PHASE_PRE: node_list_pre,
540 a8083063 Iustin Pop
                      constants.HOOKS_PHASE_POST: node_list_post}
541 a8083063 Iustin Pop
542 a8083063 Iustin Pop
  def _BuildEnv(self):
543 a8083063 Iustin Pop
    """Compute the environment and the target nodes.
544 a8083063 Iustin Pop

545 a8083063 Iustin Pop
    Based on the opcode and the current node list, this builds the
546 a8083063 Iustin Pop
    environment for the hooks and the target node list for the run.
547 a8083063 Iustin Pop

548 a8083063 Iustin Pop
    """
549 a8083063 Iustin Pop
    env = {
550 a8083063 Iustin Pop
      "PATH": "/sbin:/bin:/usr/sbin:/usr/bin",
551 a8083063 Iustin Pop
      "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION,
552 a8083063 Iustin Pop
      "GANETI_OP_CODE": self.op.OP_ID,
553 a8083063 Iustin Pop
      "GANETI_OBJECT_TYPE": self.lu.HTYPE,
554 6a4aa7c1 Iustin Pop
      "GANETI_DATA_DIR": constants.DATA_DIR,
555 a8083063 Iustin Pop
      }
556 a8083063 Iustin Pop
557 9a395a76 Iustin Pop
    if self.lu.HPATH is not None:
558 9a395a76 Iustin Pop
      lu_env, lu_nodes_pre, lu_nodes_post = self.lu.BuildHooksEnv()
559 9a395a76 Iustin Pop
      if lu_env:
560 9a395a76 Iustin Pop
        for key in lu_env:
561 9a395a76 Iustin Pop
          env["GANETI_" + key] = lu_env[key]
562 9a395a76 Iustin Pop
    else:
563 9a395a76 Iustin Pop
      lu_nodes_pre = lu_nodes_post = []
564 a8083063 Iustin Pop
565 4167825b Iustin Pop
    return env, frozenset(lu_nodes_pre), frozenset(lu_nodes_post)
566 4167825b Iustin Pop
567 4167825b Iustin Pop
  def _RunWrapper(self, node_list, hpath, phase):
568 4167825b Iustin Pop
    """Simple wrapper over self.callfn.
569 4167825b Iustin Pop

570 4167825b Iustin Pop
    This method fixes the environment before doing the rpc call.
571 4167825b Iustin Pop

572 4167825b Iustin Pop
    """
573 4167825b Iustin Pop
    env = self.env.copy()
574 4167825b Iustin Pop
    env["GANETI_HOOKS_PHASE"] = phase
575 4167825b Iustin Pop
    env["GANETI_HOOKS_PATH"] = hpath
576 437138c9 Michael Hanselmann
    if self.lu.cfg is not None:
577 437138c9 Michael Hanselmann
      env["GANETI_CLUSTER"] = self.lu.cfg.GetClusterName()
578 437138c9 Michael Hanselmann
      env["GANETI_MASTER"] = self.lu.cfg.GetMasterNode()
579 a8083063 Iustin Pop
580 4167825b Iustin Pop
    env = dict([(str(key), str(val)) for key, val in env.iteritems()])
581 a8083063 Iustin Pop
582 4167825b Iustin Pop
    return self.callfn(node_list, hpath, phase, env)
583 a8083063 Iustin Pop
584 17e82923 Luca Bigliardi
  def RunPhase(self, phase, nodes=None):
585 a8083063 Iustin Pop
    """Run all the scripts for a phase.
586 a8083063 Iustin Pop

587 a8083063 Iustin Pop
    This is the main function of the HookMaster.
588 a8083063 Iustin Pop

589 8dca23a3 Iustin Pop
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
590 8dca23a3 Iustin Pop
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
591 17e82923 Luca Bigliardi
    @param nodes: overrides the predefined list of nodes for the given phase
592 8dca23a3 Iustin Pop
    @return: the processed results of the hooks multi-node rpc call
593 8dca23a3 Iustin Pop
    @raise errors.HooksFailure: on communication failure to the nodes
594 6ef2dc74 Luca Bigliardi
    @raise errors.HooksAbort: on failure of one of the hooks
595 b07a6922 Guido Trotter

596 a8083063 Iustin Pop
    """
597 17e82923 Luca Bigliardi
    if not self.node_list[phase] and not nodes:
598 9a395a76 Iustin Pop
      # empty node list, we should not attempt to run this as either
599 9a395a76 Iustin Pop
      # we're in the cluster init phase and the rpc client part can't
600 9a395a76 Iustin Pop
      # even attempt to run, or this LU doesn't do hooks at all
601 a8083063 Iustin Pop
      return
602 4167825b Iustin Pop
    hpath = self.lu.HPATH
603 17e82923 Luca Bigliardi
    if nodes is not None:
604 17e82923 Luca Bigliardi
      results = self._RunWrapper(nodes, hpath, phase)
605 17e82923 Luca Bigliardi
    else:
606 17e82923 Luca Bigliardi
      results = self._RunWrapper(self.node_list[phase], hpath, phase)
607 8c4b9364 Luca Bigliardi
    errs = []
608 8c4b9364 Luca Bigliardi
    if not results:
609 8c4b9364 Luca Bigliardi
      msg = "Communication Failure"
610 8c4b9364 Luca Bigliardi
      if phase == constants.HOOKS_PHASE_PRE:
611 8c4b9364 Luca Bigliardi
        raise errors.HooksFailure(msg)
612 8c4b9364 Luca Bigliardi
      else:
613 8c4b9364 Luca Bigliardi
        self.lu.LogWarning(msg)
614 640b961e Luca Bigliardi
        return results
615 8c4b9364 Luca Bigliardi
    for node_name in results:
616 8c4b9364 Luca Bigliardi
      res = results[node_name]
617 8c4b9364 Luca Bigliardi
      if res.offline:
618 8c4b9364 Luca Bigliardi
        continue
619 3cebe102 Michael Hanselmann
      msg = res.fail_msg
620 8c4b9364 Luca Bigliardi
      if msg:
621 8c4b9364 Luca Bigliardi
        self.lu.LogWarning("Communication failure to node %s: %s",
622 8c4b9364 Luca Bigliardi
                           node_name, msg)
623 8c4b9364 Luca Bigliardi
        continue
624 8c4b9364 Luca Bigliardi
      for script, hkr, output in res.payload:
625 8c4b9364 Luca Bigliardi
        if hkr == constants.HKR_FAIL:
626 8c4b9364 Luca Bigliardi
          if phase == constants.HOOKS_PHASE_PRE:
627 a8083063 Iustin Pop
            errs.append((node_name, script, output))
628 8c4b9364 Luca Bigliardi
          else:
629 8c4b9364 Luca Bigliardi
            if not output:
630 640b961e Luca Bigliardi
              output = "(no output)"
631 8c4b9364 Luca Bigliardi
            self.lu.LogWarning("On %s script %s failed, output: %s" %
632 8c4b9364 Luca Bigliardi
                               (node_name, script, output))
633 8c4b9364 Luca Bigliardi
    if errs and phase == constants.HOOKS_PHASE_PRE:
634 8c4b9364 Luca Bigliardi
      raise errors.HooksAbort(errs)
635 b07a6922 Guido Trotter
    return results
636 6a4aa7c1 Iustin Pop
637 6a4aa7c1 Iustin Pop
  def RunConfigUpdate(self):
638 6a4aa7c1 Iustin Pop
    """Run the special configuration update hook
639 6a4aa7c1 Iustin Pop

640 6a4aa7c1 Iustin Pop
    This is a special hook that runs only on the master after each
641 6a4aa7c1 Iustin Pop
    top-level LI if the configuration has been updated.
642 6a4aa7c1 Iustin Pop

643 6a4aa7c1 Iustin Pop
    """
644 6a4aa7c1 Iustin Pop
    phase = constants.HOOKS_PHASE_POST
645 6a4aa7c1 Iustin Pop
    hpath = constants.HOOKS_NAME_CFGUPDATE
646 437138c9 Michael Hanselmann
    nodes = [self.lu.cfg.GetMasterNode()]
647 29921401 Iustin Pop
    self._RunWrapper(nodes, hpath, phase)