Statistics
| Branch: | Tag: | Revision:

root / lib / mcpu.py @ fdad8c4d

History | View | Annotate | Download (20.2 kB)

1 2f31098c Iustin Pop
#
2 a8083063 Iustin Pop
#
3 a8083063 Iustin Pop
4 a8083063 Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 a8083063 Iustin Pop
#
6 a8083063 Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 a8083063 Iustin Pop
# it under the terms of the GNU General Public License as published by
8 a8083063 Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 a8083063 Iustin Pop
# (at your option) any later version.
10 a8083063 Iustin Pop
#
11 a8083063 Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 a8083063 Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a8083063 Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a8083063 Iustin Pop
# General Public License for more details.
15 a8083063 Iustin Pop
#
16 a8083063 Iustin Pop
# You should have received a copy of the GNU General Public License
17 a8083063 Iustin Pop
# along with this program; if not, write to the Free Software
18 a8083063 Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a8083063 Iustin Pop
# 02110-1301, USA.
20 a8083063 Iustin Pop
21 a8083063 Iustin Pop
22 a8083063 Iustin Pop
"""Module implementing the logic behind the cluster operations
23 a8083063 Iustin Pop

24 a8083063 Iustin Pop
This module implements the logic for doing operations in the cluster. There
25 a8083063 Iustin Pop
are two kinds of classes defined:
26 a8083063 Iustin Pop
  - logical units, which know how to deal with their specific opcode only
27 a8083063 Iustin Pop
  - the processor, which dispatches the opcodes to their logical units
28 a8083063 Iustin Pop

29 a8083063 Iustin Pop
"""
30 a8083063 Iustin Pop
31 a5eb7789 Iustin Pop
import logging
32 407339d0 Michael Hanselmann
import random
33 407339d0 Michael Hanselmann
import time
34 a8083063 Iustin Pop
35 a8083063 Iustin Pop
from ganeti import opcodes
36 a8083063 Iustin Pop
from ganeti import constants
37 a8083063 Iustin Pop
from ganeti import errors
38 a8083063 Iustin Pop
from ganeti import rpc
39 a8083063 Iustin Pop
from ganeti import cmdlib
40 04864530 Guido Trotter
from ganeti import locking
41 a8083063 Iustin Pop
42 7c0d6283 Michael Hanselmann
43 407339d0 Michael Hanselmann
class _LockAcquireTimeout(Exception):
44 407339d0 Michael Hanselmann
  """Internal exception to report timeouts on acquiring locks.
45 407339d0 Michael Hanselmann

46 407339d0 Michael Hanselmann
  """
47 407339d0 Michael Hanselmann
48 407339d0 Michael Hanselmann
49 e3200b18 Michael Hanselmann
def _CalculateLockAttemptTimeouts():
50 e3200b18 Michael Hanselmann
  """Calculate timeouts for lock attempts.
51 e3200b18 Michael Hanselmann

52 e3200b18 Michael Hanselmann
  """
53 e3200b18 Michael Hanselmann
  result = [1.0]
54 e3200b18 Michael Hanselmann
55 e3200b18 Michael Hanselmann
  # Wait for a total of at least 150s before doing a blocking acquire
56 e3200b18 Michael Hanselmann
  while sum(result) < 150.0:
57 e3200b18 Michael Hanselmann
    timeout = (result[-1] * 1.05) ** 1.25
58 e3200b18 Michael Hanselmann
59 e3200b18 Michael Hanselmann
    # Cap timeout at 10 seconds. This gives other jobs a chance to run
60 e3200b18 Michael Hanselmann
    # even if we're still trying to get our locks, before finally moving
61 e3200b18 Michael Hanselmann
    # to a blocking acquire.
62 e3200b18 Michael Hanselmann
    if timeout > 10.0:
63 e3200b18 Michael Hanselmann
      timeout = 10.0
64 e3200b18 Michael Hanselmann
65 e3200b18 Michael Hanselmann
    elif timeout < 0.1:
66 e3200b18 Michael Hanselmann
      # Lower boundary for safety
67 e3200b18 Michael Hanselmann
      timeout = 0.1
68 e3200b18 Michael Hanselmann
69 e3200b18 Michael Hanselmann
    result.append(timeout)
70 e3200b18 Michael Hanselmann
71 e3200b18 Michael Hanselmann
  return result
72 e3200b18 Michael Hanselmann
73 e3200b18 Michael Hanselmann
74 e3200b18 Michael Hanselmann
class _LockAttemptTimeoutStrategy(object):
75 407339d0 Michael Hanselmann
  """Class with lock acquire timeout strategy.
76 407339d0 Michael Hanselmann

77 407339d0 Michael Hanselmann
  """
78 407339d0 Michael Hanselmann
  __slots__ = [
79 e3200b18 Michael Hanselmann
    "_attempt",
80 407339d0 Michael Hanselmann
    "_random_fn",
81 407339d0 Michael Hanselmann
    "_start_time",
82 e3200b18 Michael Hanselmann
    "_time_fn",
83 a6db1af2 Michael Hanselmann
    "_running_timeout",
84 407339d0 Michael Hanselmann
    ]
85 407339d0 Michael Hanselmann
86 e3200b18 Michael Hanselmann
  _TIMEOUT_PER_ATTEMPT = _CalculateLockAttemptTimeouts()
87 407339d0 Michael Hanselmann
88 e3200b18 Michael Hanselmann
  def __init__(self, attempt=0, _time_fn=time.time, _random_fn=random.random):
89 407339d0 Michael Hanselmann
    """Initializes this class.
90 407339d0 Michael Hanselmann

91 e3200b18 Michael Hanselmann
    @type attempt: int
92 e3200b18 Michael Hanselmann
    @param attempt: Current attempt number
93 e3200b18 Michael Hanselmann
    @param _time_fn: Time function for unittests
94 407339d0 Michael Hanselmann
    @param _random_fn: Random number generator for unittests
95 407339d0 Michael Hanselmann

96 407339d0 Michael Hanselmann
    """
97 407339d0 Michael Hanselmann
    object.__init__(self)
98 407339d0 Michael Hanselmann
99 e3200b18 Michael Hanselmann
    if attempt < 0:
100 e3200b18 Michael Hanselmann
      raise ValueError("Attempt must be zero or positive")
101 407339d0 Michael Hanselmann
102 e3200b18 Michael Hanselmann
    self._attempt = attempt
103 e3200b18 Michael Hanselmann
    self._time_fn = _time_fn
104 e3200b18 Michael Hanselmann
    self._random_fn = _random_fn
105 e3200b18 Michael Hanselmann
106 a6db1af2 Michael Hanselmann
    try:
107 a6db1af2 Michael Hanselmann
      timeout = self._TIMEOUT_PER_ATTEMPT[attempt]
108 a6db1af2 Michael Hanselmann
    except IndexError:
109 a6db1af2 Michael Hanselmann
      # No more timeouts, do blocking acquire
110 a6db1af2 Michael Hanselmann
      timeout = None
111 a6db1af2 Michael Hanselmann
112 a6db1af2 Michael Hanselmann
    self._running_timeout = locking.RunningTimeout(timeout, False,
113 a6db1af2 Michael Hanselmann
                                                   _time_fn=_time_fn)
114 407339d0 Michael Hanselmann
115 407339d0 Michael Hanselmann
  def NextAttempt(self):
116 e3200b18 Michael Hanselmann
    """Returns the strategy for the next attempt.
117 407339d0 Michael Hanselmann

118 407339d0 Michael Hanselmann
    """
119 e3200b18 Michael Hanselmann
    return _LockAttemptTimeoutStrategy(attempt=self._attempt + 1,
120 e3200b18 Michael Hanselmann
                                       _time_fn=self._time_fn,
121 e3200b18 Michael Hanselmann
                                       _random_fn=self._random_fn)
122 407339d0 Michael Hanselmann
123 407339d0 Michael Hanselmann
  def CalcRemainingTimeout(self):
124 407339d0 Michael Hanselmann
    """Returns the remaining timeout.
125 407339d0 Michael Hanselmann

126 407339d0 Michael Hanselmann
    """
127 a6db1af2 Michael Hanselmann
    timeout = self._running_timeout.Remaining()
128 407339d0 Michael Hanselmann
129 a6db1af2 Michael Hanselmann
    if timeout is not None:
130 a6db1af2 Michael Hanselmann
      # Add a small variation (-/+ 5%) to timeout. This helps in situations
131 a6db1af2 Michael Hanselmann
      # where two or more jobs are fighting for the same lock(s).
132 a6db1af2 Michael Hanselmann
      variation_range = timeout * 0.1
133 a6db1af2 Michael Hanselmann
      timeout += ((self._random_fn() * variation_range) -
134 a6db1af2 Michael Hanselmann
                  (variation_range * 0.5))
135 407339d0 Michael Hanselmann
136 a6db1af2 Michael Hanselmann
    return timeout
137 407339d0 Michael Hanselmann
138 407339d0 Michael Hanselmann
139 7260cfbe Iustin Pop
class OpExecCbBase: # pylint: disable-msg=W0232
140 031a3e57 Michael Hanselmann
  """Base class for OpCode execution callbacks.
141 031a3e57 Michael Hanselmann

142 031a3e57 Michael Hanselmann
  """
143 031a3e57 Michael Hanselmann
  def NotifyStart(self):
144 031a3e57 Michael Hanselmann
    """Called when we are about to execute the LU.
145 031a3e57 Michael Hanselmann

146 031a3e57 Michael Hanselmann
    This function is called when we're about to start the lu's Exec() method,
147 031a3e57 Michael Hanselmann
    that is, after we have acquired all locks.
148 031a3e57 Michael Hanselmann

149 031a3e57 Michael Hanselmann
    """
150 031a3e57 Michael Hanselmann
151 031a3e57 Michael Hanselmann
  def Feedback(self, *args):
152 031a3e57 Michael Hanselmann
    """Sends feedback from the LU code to the end-user.
153 031a3e57 Michael Hanselmann

154 031a3e57 Michael Hanselmann
    """
155 031a3e57 Michael Hanselmann
156 ef2df7d3 Michael Hanselmann
  def ReportLocks(self, msg):
157 ef2df7d3 Michael Hanselmann
    """Report lock operations.
158 ef2df7d3 Michael Hanselmann

159 ef2df7d3 Michael Hanselmann
    """
160 ef2df7d3 Michael Hanselmann
161 031a3e57 Michael Hanselmann
162 a8083063 Iustin Pop
class Processor(object):
163 a8083063 Iustin Pop
  """Object which runs OpCodes"""
164 a8083063 Iustin Pop
  DISPATCH_TABLE = {
165 a8083063 Iustin Pop
    # Cluster
166 b5f5fae9 Luca Bigliardi
    opcodes.OpPostInitCluster: cmdlib.LUPostInitCluster,
167 a8083063 Iustin Pop
    opcodes.OpDestroyCluster: cmdlib.LUDestroyCluster,
168 a8083063 Iustin Pop
    opcodes.OpQueryClusterInfo: cmdlib.LUQueryClusterInfo,
169 a8083063 Iustin Pop
    opcodes.OpVerifyCluster: cmdlib.LUVerifyCluster,
170 ae5849b5 Michael Hanselmann
    opcodes.OpQueryConfigValues: cmdlib.LUQueryConfigValues,
171 07bd8a51 Iustin Pop
    opcodes.OpRenameCluster: cmdlib.LURenameCluster,
172 f4d4e184 Iustin Pop
    opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
173 0cc05d44 Manuel Franceschini
    opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
174 afee0879 Iustin Pop
    opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig,
175 60975797 Iustin Pop
    opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes,
176 a8083063 Iustin Pop
    # node lu
177 a8083063 Iustin Pop
    opcodes.OpAddNode: cmdlib.LUAddNode,
178 a8083063 Iustin Pop
    opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
179 dcb93971 Michael Hanselmann
    opcodes.OpQueryNodeVolumes: cmdlib.LUQueryNodeVolumes,
180 9e5442ce Michael Hanselmann
    opcodes.OpQueryNodeStorage: cmdlib.LUQueryNodeStorage,
181 efb8da02 Michael Hanselmann
    opcodes.OpModifyNodeStorage: cmdlib.LUModifyNodeStorage,
182 76aef8fc Michael Hanselmann
    opcodes.OpRepairNodeStorage: cmdlib.LURepairNodeStorage,
183 a8083063 Iustin Pop
    opcodes.OpRemoveNode: cmdlib.LURemoveNode,
184 b31c8676 Iustin Pop
    opcodes.OpSetNodeParams: cmdlib.LUSetNodeParams,
185 f5118ade Iustin Pop
    opcodes.OpPowercycleNode: cmdlib.LUPowercycleNode,
186 7ffc5a86 Michael Hanselmann
    opcodes.OpEvacuateNode: cmdlib.LUEvacuateNode,
187 80cb875c Michael Hanselmann
    opcodes.OpMigrateNode: cmdlib.LUMigrateNode,
188 f7e7689f Iustin Pop
    opcodes.OpNodeEvacuationStrategy: cmdlib.LUNodeEvacuationStrategy,
189 a8083063 Iustin Pop
    # instance lu
190 a8083063 Iustin Pop
    opcodes.OpCreateInstance: cmdlib.LUCreateInstance,
191 fe7b0351 Michael Hanselmann
    opcodes.OpReinstallInstance: cmdlib.LUReinstallInstance,
192 a8083063 Iustin Pop
    opcodes.OpRemoveInstance: cmdlib.LURemoveInstance,
193 decd5f45 Iustin Pop
    opcodes.OpRenameInstance: cmdlib.LURenameInstance,
194 a8083063 Iustin Pop
    opcodes.OpActivateInstanceDisks: cmdlib.LUActivateInstanceDisks,
195 a8083063 Iustin Pop
    opcodes.OpShutdownInstance: cmdlib.LUShutdownInstance,
196 a8083063 Iustin Pop
    opcodes.OpStartupInstance: cmdlib.LUStartupInstance,
197 bf6929a2 Alexander Schreiber
    opcodes.OpRebootInstance: cmdlib.LURebootInstance,
198 a8083063 Iustin Pop
    opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks,
199 a8083063 Iustin Pop
    opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks,
200 bd315bfa Iustin Pop
    opcodes.OpRecreateInstanceDisks: cmdlib.LURecreateInstanceDisks,
201 a8083063 Iustin Pop
    opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance,
202 53c776b5 Iustin Pop
    opcodes.OpMigrateInstance: cmdlib.LUMigrateInstance,
203 313bcead Iustin Pop
    opcodes.OpMoveInstance: cmdlib.LUMoveInstance,
204 a8083063 Iustin Pop
    opcodes.OpConnectConsole: cmdlib.LUConnectConsole,
205 a8083063 Iustin Pop
    opcodes.OpQueryInstances: cmdlib.LUQueryInstances,
206 a8083063 Iustin Pop
    opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData,
207 7767bbf5 Manuel Franceschini
    opcodes.OpSetInstanceParams: cmdlib.LUSetInstanceParams,
208 8729e0d7 Iustin Pop
    opcodes.OpGrowDisk: cmdlib.LUGrowDisk,
209 a8083063 Iustin Pop
    # os lu
210 a8083063 Iustin Pop
    opcodes.OpDiagnoseOS: cmdlib.LUDiagnoseOS,
211 a8083063 Iustin Pop
    # exports lu
212 a8083063 Iustin Pop
    opcodes.OpQueryExports: cmdlib.LUQueryExports,
213 a8083063 Iustin Pop
    opcodes.OpExportInstance: cmdlib.LUExportInstance,
214 9ac99fda Guido Trotter
    opcodes.OpRemoveExport: cmdlib.LURemoveExport,
215 5c947f38 Iustin Pop
    # tags lu
216 5c947f38 Iustin Pop
    opcodes.OpGetTags: cmdlib.LUGetTags,
217 73415719 Iustin Pop
    opcodes.OpSearchTags: cmdlib.LUSearchTags,
218 f27302fa Iustin Pop
    opcodes.OpAddTags: cmdlib.LUAddTags,
219 f27302fa Iustin Pop
    opcodes.OpDelTags: cmdlib.LUDelTags,
220 06009e27 Iustin Pop
    # test lu
221 06009e27 Iustin Pop
    opcodes.OpTestDelay: cmdlib.LUTestDelay,
222 d61df03e Iustin Pop
    opcodes.OpTestAllocator: cmdlib.LUTestAllocator,
223 a8083063 Iustin Pop
    }
224 a8083063 Iustin Pop
225 adfa97e3 Guido Trotter
  def __init__(self, context, ec_id):
226 a8083063 Iustin Pop
    """Constructor for Processor
227 a8083063 Iustin Pop

228 adfa97e3 Guido Trotter
    @type context: GanetiContext
229 adfa97e3 Guido Trotter
    @param context: global Ganeti context
230 adfa97e3 Guido Trotter
    @type ec_id: string
231 adfa97e3 Guido Trotter
    @param ec_id: execution context identifier
232 adfa97e3 Guido Trotter

233 a8083063 Iustin Pop
    """
234 1c901d13 Guido Trotter
    self.context = context
235 adfa97e3 Guido Trotter
    self._ec_id = ec_id
236 031a3e57 Michael Hanselmann
    self._cbs = None
237 72737a7f Iustin Pop
    self.rpc = rpc.RpcRunner(context.cfg)
238 cd46f3b4 Luca Bigliardi
    self.hmclass = HooksMaster
239 a8083063 Iustin Pop
240 211b6132 Michael Hanselmann
  def _ReportLocks(self, level, names, shared, timeout, acquired, result):
241 ef2df7d3 Michael Hanselmann
    """Reports lock operations.
242 ef2df7d3 Michael Hanselmann

243 ef2df7d3 Michael Hanselmann
    @type level: int
244 ef2df7d3 Michael Hanselmann
    @param level: Lock level
245 ef2df7d3 Michael Hanselmann
    @type names: list or string
246 ef2df7d3 Michael Hanselmann
    @param names: Lock names
247 ef2df7d3 Michael Hanselmann
    @type shared: bool
248 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
249 211b6132 Michael Hanselmann
    @type timeout: None or float
250 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
251 ef2df7d3 Michael Hanselmann
    @type acquired: bool
252 211b6132 Michael Hanselmann
    @param acquired: Whether the locks have already been acquired
253 211b6132 Michael Hanselmann
    @type result: None or set
254 211b6132 Michael Hanselmann
    @param result: Result from L{locking.GanetiLockManager.acquire}
255 ef2df7d3 Michael Hanselmann

256 ef2df7d3 Michael Hanselmann
    """
257 ef2df7d3 Michael Hanselmann
    parts = []
258 ef2df7d3 Michael Hanselmann
259 ef2df7d3 Michael Hanselmann
    # Build message
260 ef2df7d3 Michael Hanselmann
    if acquired:
261 211b6132 Michael Hanselmann
      if result is None:
262 211b6132 Michael Hanselmann
        parts.append("timeout")
263 211b6132 Michael Hanselmann
      else:
264 211b6132 Michael Hanselmann
        parts.append("acquired")
265 ef2df7d3 Michael Hanselmann
    else:
266 ef2df7d3 Michael Hanselmann
      parts.append("waiting")
267 211b6132 Michael Hanselmann
      if timeout is None:
268 211b6132 Michael Hanselmann
        parts.append("blocking")
269 211b6132 Michael Hanselmann
      else:
270 211b6132 Michael Hanselmann
        parts.append("timeout=%0.6fs" % timeout)
271 ef2df7d3 Michael Hanselmann
272 ef2df7d3 Michael Hanselmann
    parts.append(locking.LEVEL_NAMES[level])
273 ef2df7d3 Michael Hanselmann
274 ef2df7d3 Michael Hanselmann
    if names == locking.ALL_SET:
275 ef2df7d3 Michael Hanselmann
      parts.append("ALL")
276 ef2df7d3 Michael Hanselmann
    elif isinstance(names, basestring):
277 ef2df7d3 Michael Hanselmann
      parts.append(names)
278 ef2df7d3 Michael Hanselmann
    else:
279 4776e022 Michael Hanselmann
      parts.append(",".join(sorted(names)))
280 ef2df7d3 Michael Hanselmann
281 ef2df7d3 Michael Hanselmann
    if shared:
282 ef2df7d3 Michael Hanselmann
      parts.append("shared")
283 ef2df7d3 Michael Hanselmann
    else:
284 ef2df7d3 Michael Hanselmann
      parts.append("exclusive")
285 ef2df7d3 Michael Hanselmann
286 ef2df7d3 Michael Hanselmann
    msg = "/".join(parts)
287 ef2df7d3 Michael Hanselmann
288 ef2df7d3 Michael Hanselmann
    logging.debug("LU locks %s", msg)
289 ef2df7d3 Michael Hanselmann
290 ef2df7d3 Michael Hanselmann
    if self._cbs:
291 ef2df7d3 Michael Hanselmann
      self._cbs.ReportLocks(msg)
292 ef2df7d3 Michael Hanselmann
293 211b6132 Michael Hanselmann
  def _AcquireLocks(self, level, names, shared, timeout):
294 211b6132 Michael Hanselmann
    """Acquires locks via the Ganeti lock manager.
295 211b6132 Michael Hanselmann

296 211b6132 Michael Hanselmann
    @type level: int
297 211b6132 Michael Hanselmann
    @param level: Lock level
298 211b6132 Michael Hanselmann
    @type names: list or string
299 211b6132 Michael Hanselmann
    @param names: Lock names
300 211b6132 Michael Hanselmann
    @type shared: bool
301 211b6132 Michael Hanselmann
    @param shared: Whether the locks should be acquired in shared mode
302 211b6132 Michael Hanselmann
    @type timeout: None or float
303 211b6132 Michael Hanselmann
    @param timeout: Timeout for acquiring the locks
304 211b6132 Michael Hanselmann

305 211b6132 Michael Hanselmann
    """
306 211b6132 Michael Hanselmann
    self._ReportLocks(level, names, shared, timeout, False, None)
307 211b6132 Michael Hanselmann
308 211b6132 Michael Hanselmann
    acquired = self.context.glm.acquire(level, names, shared=shared,
309 211b6132 Michael Hanselmann
                                        timeout=timeout)
310 211b6132 Michael Hanselmann
311 211b6132 Michael Hanselmann
    self._ReportLocks(level, names, shared, timeout, True, acquired)
312 211b6132 Michael Hanselmann
313 211b6132 Michael Hanselmann
    return acquired
314 211b6132 Michael Hanselmann
315 36c381d7 Guido Trotter
  def _ExecLU(self, lu):
316 36c381d7 Guido Trotter
    """Logical Unit execution sequence.
317 36c381d7 Guido Trotter

318 36c381d7 Guido Trotter
    """
319 36c381d7 Guido Trotter
    write_count = self.context.cfg.write_count
320 36c381d7 Guido Trotter
    lu.CheckPrereq()
321 4b5e8271 Iustin Pop
    hm = HooksMaster(self.rpc.call_hooks_runner, lu)
322 36c381d7 Guido Trotter
    h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
323 36c381d7 Guido Trotter
    lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results,
324 031a3e57 Michael Hanselmann
                     self._Feedback, None)
325 20777413 Iustin Pop
326 20777413 Iustin Pop
    if getattr(lu.op, "dry_run", False):
327 20777413 Iustin Pop
      # in this mode, no post-hooks are run, and the config is not
328 20777413 Iustin Pop
      # written (as it might have been modified by another LU, and we
329 20777413 Iustin Pop
      # shouldn't do writeout on behalf of other threads
330 20777413 Iustin Pop
      self.LogInfo("dry-run mode requested, not actually executing"
331 20777413 Iustin Pop
                   " the operation")
332 20777413 Iustin Pop
      return lu.dry_run_result
333 20777413 Iustin Pop
334 36c381d7 Guido Trotter
    try:
335 031a3e57 Michael Hanselmann
      result = lu.Exec(self._Feedback)
336 36c381d7 Guido Trotter
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST)
337 36c381d7 Guido Trotter
      result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results,
338 031a3e57 Michael Hanselmann
                                self._Feedback, result)
339 36c381d7 Guido Trotter
    finally:
340 36c381d7 Guido Trotter
      # FIXME: This needs locks if not lu_class.REQ_BGL
341 36c381d7 Guido Trotter
      if write_count != self.context.cfg.write_count:
342 36c381d7 Guido Trotter
        hm.RunConfigUpdate()
343 36c381d7 Guido Trotter
344 36c381d7 Guido Trotter
    return result
345 36c381d7 Guido Trotter
346 407339d0 Michael Hanselmann
  def _LockAndExecLU(self, lu, level, calc_timeout):
347 68adfdb2 Guido Trotter
    """Execute a Logical Unit, with the needed locks.
348 68adfdb2 Guido Trotter

349 68adfdb2 Guido Trotter
    This is a recursive function that starts locking the given level, and
350 68adfdb2 Guido Trotter
    proceeds up, till there are no more locks to acquire. Then it executes the
351 68adfdb2 Guido Trotter
    given LU and its opcodes.
352 68adfdb2 Guido Trotter

353 68adfdb2 Guido Trotter
    """
354 ca2a79e1 Guido Trotter
    adding_locks = level in lu.add_locks
355 ca2a79e1 Guido Trotter
    acquiring_locks = level in lu.needed_locks
356 8a2941c4 Guido Trotter
    if level not in locking.LEVELS:
357 031a3e57 Michael Hanselmann
      if self._cbs:
358 031a3e57 Michael Hanselmann
        self._cbs.NotifyStart()
359 031a3e57 Michael Hanselmann
360 8a2941c4 Guido Trotter
      result = self._ExecLU(lu)
361 407339d0 Michael Hanselmann
362 ca2a79e1 Guido Trotter
    elif adding_locks and acquiring_locks:
363 ca2a79e1 Guido Trotter
      # We could both acquire and add locks at the same level, but for now we
364 ca2a79e1 Guido Trotter
      # don't need this, so we'll avoid the complicated code needed.
365 407339d0 Michael Hanselmann
      raise NotImplementedError("Can't declare locks to acquire when adding"
366 407339d0 Michael Hanselmann
                                " others")
367 407339d0 Michael Hanselmann
368 ca2a79e1 Guido Trotter
    elif adding_locks or acquiring_locks:
369 fb8dcb62 Guido Trotter
      lu.DeclareLocks(level)
370 3977a4c1 Guido Trotter
      share = lu.share_locks[level]
371 407339d0 Michael Hanselmann
372 68adfdb2 Guido Trotter
      try:
373 407339d0 Michael Hanselmann
        assert adding_locks ^ acquiring_locks, \
374 407339d0 Michael Hanselmann
          "Locks must be either added or acquired"
375 407339d0 Michael Hanselmann
376 407339d0 Michael Hanselmann
        if acquiring_locks:
377 407339d0 Michael Hanselmann
          # Acquiring locks
378 407339d0 Michael Hanselmann
          needed_locks = lu.needed_locks[level]
379 407339d0 Michael Hanselmann
380 211b6132 Michael Hanselmann
          acquired = self._AcquireLocks(level, needed_locks, share,
381 211b6132 Michael Hanselmann
                                        calc_timeout())
382 407339d0 Michael Hanselmann
383 407339d0 Michael Hanselmann
          if acquired is None:
384 407339d0 Michael Hanselmann
            raise _LockAcquireTimeout()
385 407339d0 Michael Hanselmann
386 407339d0 Michael Hanselmann
        else:
387 407339d0 Michael Hanselmann
          # Adding locks
388 407339d0 Michael Hanselmann
          add_locks = lu.add_locks[level]
389 407339d0 Michael Hanselmann
          lu.remove_locks[level] = add_locks
390 407339d0 Michael Hanselmann
391 407339d0 Michael Hanselmann
          try:
392 407339d0 Michael Hanselmann
            self.context.glm.add(level, add_locks, acquired=1, shared=share)
393 407339d0 Michael Hanselmann
          except errors.LockError:
394 407339d0 Michael Hanselmann
            raise errors.OpPrereqError(
395 407339d0 Michael Hanselmann
              "Couldn't add locks (%s), probably because of a race condition"
396 debac808 Iustin Pop
              " with another job, who added them first" % add_locks,
397 debac808 Iustin Pop
              errors.ECODE_FAULT)
398 407339d0 Michael Hanselmann
399 6f14fc27 Michael Hanselmann
          acquired = add_locks
400 6f14fc27 Michael Hanselmann
401 ca2a79e1 Guido Trotter
        try:
402 6f14fc27 Michael Hanselmann
          lu.acquired_locks[level] = acquired
403 6f14fc27 Michael Hanselmann
404 407339d0 Michael Hanselmann
          result = self._LockAndExecLU(lu, level + 1, calc_timeout)
405 ca2a79e1 Guido Trotter
        finally:
406 ca2a79e1 Guido Trotter
          if level in lu.remove_locks:
407 ca2a79e1 Guido Trotter
            self.context.glm.remove(level, lu.remove_locks[level])
408 68adfdb2 Guido Trotter
      finally:
409 80ee04a4 Guido Trotter
        if self.context.glm.is_owned(level):
410 68adfdb2 Guido Trotter
          self.context.glm.release(level)
411 407339d0 Michael Hanselmann
412 68adfdb2 Guido Trotter
    else:
413 407339d0 Michael Hanselmann
      result = self._LockAndExecLU(lu, level + 1, calc_timeout)
414 68adfdb2 Guido Trotter
415 68adfdb2 Guido Trotter
    return result
416 68adfdb2 Guido Trotter
417 031a3e57 Michael Hanselmann
  def ExecOpCode(self, op, cbs):
418 a8083063 Iustin Pop
    """Execute an opcode.
419 a8083063 Iustin Pop

420 e92376d7 Iustin Pop
    @type op: an OpCode instance
421 e92376d7 Iustin Pop
    @param op: the opcode to be executed
422 031a3e57 Michael Hanselmann
    @type cbs: L{OpExecCbBase}
423 031a3e57 Michael Hanselmann
    @param cbs: Runtime callbacks
424 a8083063 Iustin Pop

425 a8083063 Iustin Pop
    """
426 a8083063 Iustin Pop
    if not isinstance(op, opcodes.OpCode):
427 3ecf6786 Iustin Pop
      raise errors.ProgrammerError("Non-opcode instance passed"
428 3ecf6786 Iustin Pop
                                   " to ExecOpcode")
429 a8083063 Iustin Pop
430 031a3e57 Michael Hanselmann
    self._cbs = cbs
431 fe482621 Iustin Pop
    try:
432 031a3e57 Michael Hanselmann
      lu_class = self.DISPATCH_TABLE.get(op.__class__, None)
433 031a3e57 Michael Hanselmann
      if lu_class is None:
434 031a3e57 Michael Hanselmann
        raise errors.OpCodeUnknown("Unknown opcode")
435 031a3e57 Michael Hanselmann
436 e3200b18 Michael Hanselmann
      timeout_strategy = _LockAttemptTimeoutStrategy()
437 407339d0 Michael Hanselmann
438 407339d0 Michael Hanselmann
      while True:
439 407339d0 Michael Hanselmann
        try:
440 e3200b18 Michael Hanselmann
          acquire_timeout = timeout_strategy.CalcRemainingTimeout()
441 e3200b18 Michael Hanselmann
442 211b6132 Michael Hanselmann
          # Acquire the Big Ganeti Lock exclusively if this LU requires it,
443 211b6132 Michael Hanselmann
          # and in a shared fashion otherwise (to prevent concurrent run with
444 211b6132 Michael Hanselmann
          # an exclusive LU.
445 211b6132 Michael Hanselmann
          if self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL,
446 e3200b18 Michael Hanselmann
                                not lu_class.REQ_BGL, acquire_timeout) is None:
447 407339d0 Michael Hanselmann
            raise _LockAcquireTimeout()
448 407339d0 Michael Hanselmann
449 407339d0 Michael Hanselmann
          try:
450 407339d0 Michael Hanselmann
            lu = lu_class(self, op, self.context, self.rpc)
451 407339d0 Michael Hanselmann
            lu.ExpandNames()
452 407339d0 Michael Hanselmann
            assert lu.needed_locks is not None, "needed_locks not set by LU"
453 407339d0 Michael Hanselmann
454 73064714 Guido Trotter
            try:
455 73064714 Guido Trotter
              return self._LockAndExecLU(lu, locking.LEVEL_INSTANCE,
456 73064714 Guido Trotter
                                         timeout_strategy.CalcRemainingTimeout)
457 73064714 Guido Trotter
            finally:
458 73064714 Guido Trotter
              if self._ec_id:
459 73064714 Guido Trotter
                self.context.cfg.DropECReservations(self._ec_id)
460 73064714 Guido Trotter
461 407339d0 Michael Hanselmann
          finally:
462 407339d0 Michael Hanselmann
            self.context.glm.release(locking.LEVEL_CLUSTER)
463 407339d0 Michael Hanselmann
464 407339d0 Michael Hanselmann
        except _LockAcquireTimeout:
465 407339d0 Michael Hanselmann
          # Timeout while waiting for lock, try again
466 407339d0 Michael Hanselmann
          pass
467 407339d0 Michael Hanselmann
468 e3200b18 Michael Hanselmann
        timeout_strategy = timeout_strategy.NextAttempt()
469 407339d0 Michael Hanselmann
470 04864530 Guido Trotter
    finally:
471 031a3e57 Michael Hanselmann
      self._cbs = None
472 6a4aa7c1 Iustin Pop
473 031a3e57 Michael Hanselmann
  def _Feedback(self, *args):
474 031a3e57 Michael Hanselmann
    """Forward call to feedback callback function.
475 031a3e57 Michael Hanselmann

476 031a3e57 Michael Hanselmann
    """
477 031a3e57 Michael Hanselmann
    if self._cbs:
478 031a3e57 Michael Hanselmann
      self._cbs.Feedback(*args)
479 031a3e57 Michael Hanselmann
480 0fbbf897 Iustin Pop
  def LogStep(self, current, total, message):
481 0fbbf897 Iustin Pop
    """Log a change in LU execution progress.
482 0fbbf897 Iustin Pop

483 0fbbf897 Iustin Pop
    """
484 a5eb7789 Iustin Pop
    logging.debug("Step %d/%d %s", current, total, message)
485 031a3e57 Michael Hanselmann
    self._Feedback("STEP %d/%d %s" % (current, total, message))
486 0fbbf897 Iustin Pop
487 c0088fb9 Iustin Pop
  def LogWarning(self, message, *args, **kwargs):
488 0fbbf897 Iustin Pop
    """Log a warning to the logs and the user.
489 0fbbf897 Iustin Pop

490 c0088fb9 Iustin Pop
    The optional keyword argument is 'hint' and can be used to show a
491 c0088fb9 Iustin Pop
    hint to the user (presumably related to the warning). If the
492 c0088fb9 Iustin Pop
    message is empty, it will not be printed at all, allowing one to
493 c0088fb9 Iustin Pop
    show only a hint.
494 0fbbf897 Iustin Pop

495 c0088fb9 Iustin Pop
    """
496 c0088fb9 Iustin Pop
    assert not kwargs or (len(kwargs) == 1 and "hint" in kwargs), \
497 c0088fb9 Iustin Pop
           "Invalid keyword arguments for LogWarning (%s)" % str(kwargs)
498 c0088fb9 Iustin Pop
    if args:
499 c0088fb9 Iustin Pop
      message = message % tuple(args)
500 c0088fb9 Iustin Pop
    if message:
501 c0088fb9 Iustin Pop
      logging.warning(message)
502 031a3e57 Michael Hanselmann
      self._Feedback(" - WARNING: %s" % message)
503 c0088fb9 Iustin Pop
    if "hint" in kwargs:
504 031a3e57 Michael Hanselmann
      self._Feedback("      Hint: %s" % kwargs["hint"])
505 c0088fb9 Iustin Pop
506 c0088fb9 Iustin Pop
  def LogInfo(self, message, *args):
507 0fbbf897 Iustin Pop
    """Log an informational message to the logs and the user.
508 0fbbf897 Iustin Pop

509 0fbbf897 Iustin Pop
    """
510 c0088fb9 Iustin Pop
    if args:
511 c0088fb9 Iustin Pop
      message = message % tuple(args)
512 a5eb7789 Iustin Pop
    logging.info(message)
513 031a3e57 Michael Hanselmann
    self._Feedback(" - INFO: %s" % message)
514 0fbbf897 Iustin Pop
515 adfa97e3 Guido Trotter
  def GetECId(self):
516 adfa97e3 Guido Trotter
    if not self._ec_id:
517 adfa97e3 Guido Trotter
      errors.ProgrammerError("Tried to use execution context id when not set")
518 adfa97e3 Guido Trotter
    return self._ec_id
519 adfa97e3 Guido Trotter
520 a8083063 Iustin Pop
521 a8083063 Iustin Pop
class HooksMaster(object):
522 a8083063 Iustin Pop
  """Hooks master.
523 a8083063 Iustin Pop

524 a8083063 Iustin Pop
  This class distributes the run commands to the nodes based on the
525 a8083063 Iustin Pop
  specific LU class.
526 a8083063 Iustin Pop

527 a8083063 Iustin Pop
  In order to remove the direct dependency on the rpc module, the
528 a8083063 Iustin Pop
  constructor needs a function which actually does the remote
529 a8083063 Iustin Pop
  call. This will usually be rpc.call_hooks_runner, but any function
530 a8083063 Iustin Pop
  which behaves the same works.
531 a8083063 Iustin Pop

532 a8083063 Iustin Pop
  """
533 4b5e8271 Iustin Pop
  def __init__(self, callfn, lu):
534 a8083063 Iustin Pop
    self.callfn = callfn
535 a8083063 Iustin Pop
    self.lu = lu
536 a8083063 Iustin Pop
    self.op = lu.op
537 a8083063 Iustin Pop
    self.env, node_list_pre, node_list_post = self._BuildEnv()
538 a8083063 Iustin Pop
    self.node_list = {constants.HOOKS_PHASE_PRE: node_list_pre,
539 a8083063 Iustin Pop
                      constants.HOOKS_PHASE_POST: node_list_post}
540 a8083063 Iustin Pop
541 a8083063 Iustin Pop
  def _BuildEnv(self):
542 a8083063 Iustin Pop
    """Compute the environment and the target nodes.
543 a8083063 Iustin Pop

544 a8083063 Iustin Pop
    Based on the opcode and the current node list, this builds the
545 a8083063 Iustin Pop
    environment for the hooks and the target node list for the run.
546 a8083063 Iustin Pop

547 a8083063 Iustin Pop
    """
548 a8083063 Iustin Pop
    env = {
549 a8083063 Iustin Pop
      "PATH": "/sbin:/bin:/usr/sbin:/usr/bin",
550 a8083063 Iustin Pop
      "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION,
551 a8083063 Iustin Pop
      "GANETI_OP_CODE": self.op.OP_ID,
552 a8083063 Iustin Pop
      "GANETI_OBJECT_TYPE": self.lu.HTYPE,
553 6a4aa7c1 Iustin Pop
      "GANETI_DATA_DIR": constants.DATA_DIR,
554 a8083063 Iustin Pop
      }
555 a8083063 Iustin Pop
556 9a395a76 Iustin Pop
    if self.lu.HPATH is not None:
557 9a395a76 Iustin Pop
      lu_env, lu_nodes_pre, lu_nodes_post = self.lu.BuildHooksEnv()
558 9a395a76 Iustin Pop
      if lu_env:
559 9a395a76 Iustin Pop
        for key in lu_env:
560 9a395a76 Iustin Pop
          env["GANETI_" + key] = lu_env[key]
561 9a395a76 Iustin Pop
    else:
562 9a395a76 Iustin Pop
      lu_nodes_pre = lu_nodes_post = []
563 a8083063 Iustin Pop
564 4167825b Iustin Pop
    return env, frozenset(lu_nodes_pre), frozenset(lu_nodes_post)
565 4167825b Iustin Pop
566 4167825b Iustin Pop
  def _RunWrapper(self, node_list, hpath, phase):
567 4167825b Iustin Pop
    """Simple wrapper over self.callfn.
568 4167825b Iustin Pop

569 4167825b Iustin Pop
    This method fixes the environment before doing the rpc call.
570 4167825b Iustin Pop

571 4167825b Iustin Pop
    """
572 4167825b Iustin Pop
    env = self.env.copy()
573 4167825b Iustin Pop
    env["GANETI_HOOKS_PHASE"] = phase
574 4167825b Iustin Pop
    env["GANETI_HOOKS_PATH"] = hpath
575 437138c9 Michael Hanselmann
    if self.lu.cfg is not None:
576 437138c9 Michael Hanselmann
      env["GANETI_CLUSTER"] = self.lu.cfg.GetClusterName()
577 437138c9 Michael Hanselmann
      env["GANETI_MASTER"] = self.lu.cfg.GetMasterNode()
578 a8083063 Iustin Pop
579 4167825b Iustin Pop
    env = dict([(str(key), str(val)) for key, val in env.iteritems()])
580 a8083063 Iustin Pop
581 4167825b Iustin Pop
    return self.callfn(node_list, hpath, phase, env)
582 a8083063 Iustin Pop
583 17e82923 Luca Bigliardi
  def RunPhase(self, phase, nodes=None):
584 a8083063 Iustin Pop
    """Run all the scripts for a phase.
585 a8083063 Iustin Pop

586 a8083063 Iustin Pop
    This is the main function of the HookMaster.
587 a8083063 Iustin Pop

588 8dca23a3 Iustin Pop
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
589 8dca23a3 Iustin Pop
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
590 17e82923 Luca Bigliardi
    @param nodes: overrides the predefined list of nodes for the given phase
591 8dca23a3 Iustin Pop
    @return: the processed results of the hooks multi-node rpc call
592 8dca23a3 Iustin Pop
    @raise errors.HooksFailure: on communication failure to the nodes
593 6ef2dc74 Luca Bigliardi
    @raise errors.HooksAbort: on failure of one of the hooks
594 b07a6922 Guido Trotter

595 a8083063 Iustin Pop
    """
596 17e82923 Luca Bigliardi
    if not self.node_list[phase] and not nodes:
597 9a395a76 Iustin Pop
      # empty node list, we should not attempt to run this as either
598 9a395a76 Iustin Pop
      # we're in the cluster init phase and the rpc client part can't
599 9a395a76 Iustin Pop
      # even attempt to run, or this LU doesn't do hooks at all
600 a8083063 Iustin Pop
      return
601 4167825b Iustin Pop
    hpath = self.lu.HPATH
602 17e82923 Luca Bigliardi
    if nodes is not None:
603 17e82923 Luca Bigliardi
      results = self._RunWrapper(nodes, hpath, phase)
604 17e82923 Luca Bigliardi
    else:
605 17e82923 Luca Bigliardi
      results = self._RunWrapper(self.node_list[phase], hpath, phase)
606 8c4b9364 Luca Bigliardi
    errs = []
607 8c4b9364 Luca Bigliardi
    if not results:
608 8c4b9364 Luca Bigliardi
      msg = "Communication Failure"
609 8c4b9364 Luca Bigliardi
      if phase == constants.HOOKS_PHASE_PRE:
610 8c4b9364 Luca Bigliardi
        raise errors.HooksFailure(msg)
611 8c4b9364 Luca Bigliardi
      else:
612 8c4b9364 Luca Bigliardi
        self.lu.LogWarning(msg)
613 640b961e Luca Bigliardi
        return results
614 8c4b9364 Luca Bigliardi
    for node_name in results:
615 8c4b9364 Luca Bigliardi
      res = results[node_name]
616 8c4b9364 Luca Bigliardi
      if res.offline:
617 8c4b9364 Luca Bigliardi
        continue
618 3cebe102 Michael Hanselmann
      msg = res.fail_msg
619 8c4b9364 Luca Bigliardi
      if msg:
620 8c4b9364 Luca Bigliardi
        self.lu.LogWarning("Communication failure to node %s: %s",
621 8c4b9364 Luca Bigliardi
                           node_name, msg)
622 8c4b9364 Luca Bigliardi
        continue
623 8c4b9364 Luca Bigliardi
      for script, hkr, output in res.payload:
624 8c4b9364 Luca Bigliardi
        if hkr == constants.HKR_FAIL:
625 8c4b9364 Luca Bigliardi
          if phase == constants.HOOKS_PHASE_PRE:
626 a8083063 Iustin Pop
            errs.append((node_name, script, output))
627 8c4b9364 Luca Bigliardi
          else:
628 8c4b9364 Luca Bigliardi
            if not output:
629 640b961e Luca Bigliardi
              output = "(no output)"
630 8c4b9364 Luca Bigliardi
            self.lu.LogWarning("On %s script %s failed, output: %s" %
631 8c4b9364 Luca Bigliardi
                               (node_name, script, output))
632 8c4b9364 Luca Bigliardi
    if errs and phase == constants.HOOKS_PHASE_PRE:
633 8c4b9364 Luca Bigliardi
      raise errors.HooksAbort(errs)
634 b07a6922 Guido Trotter
    return results
635 6a4aa7c1 Iustin Pop
636 6a4aa7c1 Iustin Pop
  def RunConfigUpdate(self):
637 6a4aa7c1 Iustin Pop
    """Run the special configuration update hook
638 6a4aa7c1 Iustin Pop

639 6a4aa7c1 Iustin Pop
    This is a special hook that runs only on the master after each
640 6a4aa7c1 Iustin Pop
    top-level LI if the configuration has been updated.
641 6a4aa7c1 Iustin Pop

642 6a4aa7c1 Iustin Pop
    """
643 6a4aa7c1 Iustin Pop
    phase = constants.HOOKS_PHASE_POST
644 6a4aa7c1 Iustin Pop
    hpath = constants.HOOKS_NAME_CFGUPDATE
645 437138c9 Michael Hanselmann
    nodes = [self.lu.cfg.GetMasterNode()]
646 29921401 Iustin Pop
    self._RunWrapper(nodes, hpath, phase)