Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 5d55819e

History | View | Annotate | Download (322.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
class LogicalUnit(object):
50
  """Logical Unit base class.
51

52
  Subclasses must follow these rules:
53
    - implement ExpandNames
54
    - implement CheckPrereq (except when tasklets are used)
55
    - implement Exec (except when tasklets are used)
56
    - implement BuildHooksEnv
57
    - redefine HPATH and HTYPE
58
    - optionally redefine their run requirements:
59
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
60

61
  Note that all commands require root permissions.
62

63
  @ivar dry_run_result: the value (if any) that will be returned to the caller
64
      in dry-run mode (signalled by opcode dry_run parameter)
65

66
  """
67
  HPATH = None
68
  HTYPE = None
69
  _OP_REQP = []
70
  REQ_BGL = True
71

    
72
  def __init__(self, processor, op, context, rpc):
73
    """Constructor for LogicalUnit.
74

75
    This needs to be overridden in derived classes in order to check op
76
    validity.
77

78
    """
79
    self.proc = processor
80
    self.op = op
81
    self.cfg = context.cfg
82
    self.context = context
83
    self.rpc = rpc
84
    # Dicts used to declare locking needs to mcpu
85
    self.needed_locks = None
86
    self.acquired_locks = {}
87
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
88
    self.add_locks = {}
89
    self.remove_locks = {}
90
    # Used to force good behavior when calling helper functions
91
    self.recalculate_locks = {}
92
    self.__ssh = None
93
    # logging
94
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
97
    # support for dry-run
98
    self.dry_run_result = None
99
    # support for generic debug attribute
100
    if (not hasattr(self.op, "debug_level") or
101
        not isinstance(self.op.debug_level, int)):
102
      self.op.debug_level = 0
103

    
104
    # Tasklets
105
    self.tasklets = None
106

    
107
    for attr_name in self._OP_REQP:
108
      attr_val = getattr(op, attr_name, None)
109
      if attr_val is None:
110
        raise errors.OpPrereqError("Required parameter '%s' missing" %
111
                                   attr_name, errors.ECODE_INVAL)
112

    
113
    self.CheckArguments()
114

    
115
  def __GetSSH(self):
116
    """Returns the SshRunner object
117

118
    """
119
    if not self.__ssh:
120
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
121
    return self.__ssh
122

    
123
  ssh = property(fget=__GetSSH)
124

    
125
  def CheckArguments(self):
126
    """Check syntactic validity for the opcode arguments.
127

128
    This method is for doing a simple syntactic check and ensure
129
    validity of opcode parameters, without any cluster-related
130
    checks. While the same can be accomplished in ExpandNames and/or
131
    CheckPrereq, doing these separate is better because:
132

133
      - ExpandNames is left as as purely a lock-related function
134
      - CheckPrereq is run after we have acquired locks (and possible
135
        waited for them)
136

137
    The function is allowed to change the self.op attribute so that
138
    later methods can no longer worry about missing parameters.
139

140
    """
141
    pass
142

    
143
  def ExpandNames(self):
144
    """Expand names for this LU.
145

146
    This method is called before starting to execute the opcode, and it should
147
    update all the parameters of the opcode to their canonical form (e.g. a
148
    short node name must be fully expanded after this method has successfully
149
    completed). This way locking, hooks, logging, ecc. can work correctly.
150

151
    LUs which implement this method must also populate the self.needed_locks
152
    member, as a dict with lock levels as keys, and a list of needed lock names
153
    as values. Rules:
154

155
      - use an empty dict if you don't need any lock
156
      - if you don't need any lock at a particular level omit that level
157
      - don't put anything for the BGL level
158
      - if you want all locks at a level use locking.ALL_SET as a value
159

160
    If you need to share locks (rather than acquire them exclusively) at one
161
    level you can modify self.share_locks, setting a true value (usually 1) for
162
    that level. By default locks are not shared.
163

164
    This function can also define a list of tasklets, which then will be
165
    executed in order instead of the usual LU-level CheckPrereq and Exec
166
    functions, if those are not defined by the LU.
167

168
    Examples::
169

170
      # Acquire all nodes and one instance
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: locking.ALL_SET,
173
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
174
      }
175
      # Acquire just two nodes
176
      self.needed_locks = {
177
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
178
      }
179
      # Acquire no locks
180
      self.needed_locks = {} # No, you can't leave it to the default value None
181

182
    """
183
    # The implementation of this method is mandatory only if the new LU is
184
    # concurrent, so that old LUs don't need to be changed all at the same
185
    # time.
186
    if self.REQ_BGL:
187
      self.needed_locks = {} # Exclusive LUs don't need locks.
188
    else:
189
      raise NotImplementedError
190

    
191
  def DeclareLocks(self, level):
192
    """Declare LU locking needs for a level
193

194
    While most LUs can just declare their locking needs at ExpandNames time,
195
    sometimes there's the need to calculate some locks after having acquired
196
    the ones before. This function is called just before acquiring locks at a
197
    particular level, but after acquiring the ones at lower levels, and permits
198
    such calculations. It can be used to modify self.needed_locks, and by
199
    default it does nothing.
200

201
    This function is only called if you have something already set in
202
    self.needed_locks for the level.
203

204
    @param level: Locking level which is going to be locked
205
    @type level: member of ganeti.locking.LEVELS
206

207
    """
208

    
209
  def CheckPrereq(self):
210
    """Check prerequisites for this LU.
211

212
    This method should check that the prerequisites for the execution
213
    of this LU are fulfilled. It can do internode communication, but
214
    it should be idempotent - no cluster or system changes are
215
    allowed.
216

217
    The method should raise errors.OpPrereqError in case something is
218
    not fulfilled. Its return value is ignored.
219

220
    This method should also update all the parameters of the opcode to
221
    their canonical form if it hasn't been done by ExpandNames before.
222

223
    """
224
    if self.tasklets is not None:
225
      for (idx, tl) in enumerate(self.tasklets):
226
        logging.debug("Checking prerequisites for tasklet %s/%s",
227
                      idx + 1, len(self.tasklets))
228
        tl.CheckPrereq()
229
    else:
230
      raise NotImplementedError
231

    
232
  def Exec(self, feedback_fn):
233
    """Execute the LU.
234

235
    This method should implement the actual work. It should raise
236
    errors.OpExecError for failures that are somewhat dealt with in
237
    code, or expected.
238

239
    """
240
    if self.tasklets is not None:
241
      for (idx, tl) in enumerate(self.tasklets):
242
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
243
        tl.Exec(feedback_fn)
244
    else:
245
      raise NotImplementedError
246

    
247
  def BuildHooksEnv(self):
248
    """Build hooks environment for this LU.
249

250
    This method should return a three-node tuple consisting of: a dict
251
    containing the environment that will be used for running the
252
    specific hook for this LU, a list of node names on which the hook
253
    should run before the execution, and a list of node names on which
254
    the hook should run after the execution.
255

256
    The keys of the dict must not have 'GANETI_' prefixed as this will
257
    be handled in the hooks runner. Also note additional keys will be
258
    added by the hooks runner. If the LU doesn't define any
259
    environment, an empty dict (and not None) should be returned.
260

261
    No nodes should be returned as an empty list (and not None).
262

263
    Note that if the HPATH for a LU class is None, this function will
264
    not be called.
265

266
    """
267
    raise NotImplementedError
268

    
269
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270
    """Notify the LU about the results of its hooks.
271

272
    This method is called every time a hooks phase is executed, and notifies
273
    the Logical Unit about the hooks' result. The LU can then use it to alter
274
    its result based on the hooks.  By default the method does nothing and the
275
    previous result is passed back unchanged but any LU can define it if it
276
    wants to use the local cluster hook-scripts somehow.
277

278
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
279
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280
    @param hook_results: the results of the multi-node hooks rpc call
281
    @param feedback_fn: function used send feedback back to the caller
282
    @param lu_result: the previous Exec result this LU had, or None
283
        in the PRE phase
284
    @return: the new Exec result, based on the previous result
285
        and hook results
286

287
    """
288
    # API must be kept, thus we ignore the unused argument and could
289
    # be a function warnings
290
    # pylint: disable-msg=W0613,R0201
291
    return lu_result
292

    
293
  def _ExpandAndLockInstance(self):
294
    """Helper function to expand and lock an instance.
295

296
    Many LUs that work on an instance take its name in self.op.instance_name
297
    and need to expand it and then declare the expanded name for locking. This
298
    function does it, and then updates self.op.instance_name to the expanded
299
    name. It also initializes needed_locks as a dict, if this hasn't been done
300
    before.
301

302
    """
303
    if self.needed_locks is None:
304
      self.needed_locks = {}
305
    else:
306
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307
        "_ExpandAndLockInstance called with instance-level locks set"
308
    self.op.instance_name = _ExpandInstanceName(self.cfg,
309
                                                self.op.instance_name)
310
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
311

    
312
  def _LockInstancesNodes(self, primary_only=False):
313
    """Helper function to declare instances' nodes for locking.
314

315
    This function should be called after locking one or more instances to lock
316
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317
    with all primary or secondary nodes for instances already locked and
318
    present in self.needed_locks[locking.LEVEL_INSTANCE].
319

320
    It should be called from DeclareLocks, and for safety only works if
321
    self.recalculate_locks[locking.LEVEL_NODE] is set.
322

323
    In the future it may grow parameters to just lock some instance's nodes, or
324
    to just lock primaries or secondary nodes, if needed.
325

326
    If should be called in DeclareLocks in a way similar to::
327

328
      if level == locking.LEVEL_NODE:
329
        self._LockInstancesNodes()
330

331
    @type primary_only: boolean
332
    @param primary_only: only lock primary nodes of locked instances
333

334
    """
335
    assert locking.LEVEL_NODE in self.recalculate_locks, \
336
      "_LockInstancesNodes helper function called with no nodes to recalculate"
337

    
338
    # TODO: check if we're really been called with the instance locks held
339

    
340
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341
    # future we might want to have different behaviors depending on the value
342
    # of self.recalculate_locks[locking.LEVEL_NODE]
343
    wanted_nodes = []
344
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345
      instance = self.context.cfg.GetInstanceInfo(instance_name)
346
      wanted_nodes.append(instance.primary_node)
347
      if not primary_only:
348
        wanted_nodes.extend(instance.secondary_nodes)
349

    
350
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
354

    
355
    del self.recalculate_locks[locking.LEVEL_NODE]
356

    
357

    
358
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359
  """Simple LU which runs no hooks.
360

361
  This LU is intended as a parent for other LogicalUnits which will
362
  run no hooks, in order to reduce duplicate code.
363

364
  """
365
  HPATH = None
366
  HTYPE = None
367

    
368
  def BuildHooksEnv(self):
369
    """Empty BuildHooksEnv for NoHooksLu.
370

371
    This just raises an error.
372

373
    """
374
    assert False, "BuildHooksEnv called for NoHooksLUs"
375

    
376

    
377
class Tasklet:
378
  """Tasklet base class.
379

380
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
382
  tasklets know nothing about locks.
383

384
  Subclasses must follow these rules:
385
    - Implement CheckPrereq
386
    - Implement Exec
387

388
  """
389
  def __init__(self, lu):
390
    self.lu = lu
391

    
392
    # Shortcuts
393
    self.cfg = lu.cfg
394
    self.rpc = lu.rpc
395

    
396
  def CheckPrereq(self):
397
    """Check prerequisites for this tasklets.
398

399
    This method should check whether the prerequisites for the execution of
400
    this tasklet are fulfilled. It can do internode communication, but it
401
    should be idempotent - no cluster or system changes are allowed.
402

403
    The method should raise errors.OpPrereqError in case something is not
404
    fulfilled. Its return value is ignored.
405

406
    This method should also update all parameters to their canonical form if it
407
    hasn't been done before.
408

409
    """
410
    raise NotImplementedError
411

    
412
  def Exec(self, feedback_fn):
413
    """Execute the tasklet.
414

415
    This method should implement the actual work. It should raise
416
    errors.OpExecError for failures that are somewhat dealt with in code, or
417
    expected.
418

419
    """
420
    raise NotImplementedError
421

    
422

    
423
def _GetWantedNodes(lu, nodes):
424
  """Returns list of checked and expanded node names.
425

426
  @type lu: L{LogicalUnit}
427
  @param lu: the logical unit on whose behalf we execute
428
  @type nodes: list
429
  @param nodes: list of node names or None for all nodes
430
  @rtype: list
431
  @return: the list of nodes, sorted
432
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
433

434
  """
435
  if not isinstance(nodes, list):
436
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
437
                               errors.ECODE_INVAL)
438

    
439
  if not nodes:
440
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441
      " non-empty list of nodes whose name is to be expanded.")
442

    
443
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444
  return utils.NiceSort(wanted)
445

    
446

    
447
def _GetWantedInstances(lu, instances):
448
  """Returns list of checked and expanded instance names.
449

450
  @type lu: L{LogicalUnit}
451
  @param lu: the logical unit on whose behalf we execute
452
  @type instances: list
453
  @param instances: list of instance names or None for all instances
454
  @rtype: list
455
  @return: the list of instances, sorted
456
  @raise errors.OpPrereqError: if the instances parameter is wrong type
457
  @raise errors.OpPrereqError: if any of the passed instances is not found
458

459
  """
460
  if not isinstance(instances, list):
461
    raise errors.OpPrereqError("Invalid argument type 'instances'",
462
                               errors.ECODE_INVAL)
463

    
464
  if instances:
465
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
466
  else:
467
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
468
  return wanted
469

    
470

    
471
def _CheckOutputFields(static, dynamic, selected):
472
  """Checks whether all selected fields are valid.
473

474
  @type static: L{utils.FieldSet}
475
  @param static: static fields set
476
  @type dynamic: L{utils.FieldSet}
477
  @param dynamic: dynamic fields set
478

479
  """
480
  f = utils.FieldSet()
481
  f.Extend(static)
482
  f.Extend(dynamic)
483

    
484
  delta = f.NonMatching(selected)
485
  if delta:
486
    raise errors.OpPrereqError("Unknown output fields selected: %s"
487
                               % ",".join(delta), errors.ECODE_INVAL)
488

    
489

    
490
def _CheckBooleanOpField(op, name):
491
  """Validates boolean opcode parameters.
492

493
  This will ensure that an opcode parameter is either a boolean value,
494
  or None (but that it always exists).
495

496
  """
497
  val = getattr(op, name, None)
498
  if not (val is None or isinstance(val, bool)):
499
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500
                               (name, str(val)), errors.ECODE_INVAL)
501
  setattr(op, name, val)
502

    
503

    
504
def _CheckGlobalHvParams(params):
505
  """Validates that given hypervisor params are not global ones.
506

507
  This will ensure that instances don't get customised versions of
508
  global params.
509

510
  """
511
  used_globals = constants.HVC_GLOBALS.intersection(params)
512
  if used_globals:
513
    msg = ("The following hypervisor parameters are global and cannot"
514
           " be customized at instance level, please modify them at"
515
           " cluster level: %s" % utils.CommaJoin(used_globals))
516
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
517

    
518

    
519
def _CheckNodeOnline(lu, node):
520
  """Ensure that a given node is online.
521

522
  @param lu: the LU on behalf of which we make the check
523
  @param node: the node to check
524
  @raise errors.OpPrereqError: if the node is offline
525

526
  """
527
  if lu.cfg.GetNodeInfo(node).offline:
528
    raise errors.OpPrereqError("Can't use offline node %s" % node,
529
                               errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeNotDrained(lu, node):
533
  """Ensure that a given node is not drained.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is drained
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).drained:
541
    raise errors.OpPrereqError("Can't use drained node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckDiskTemplate(template):
546
  """Ensure a given disk template is valid.
547

548
  """
549
  if template not in constants.DISK_TEMPLATES:
550
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
551
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
552
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
553

    
554

    
555
def _ExpandItemName(fn, name, kind):
556
  """Expand an item name.
557

558
  @param fn: the function to use for expansion
559
  @param name: requested item name
560
  @param kind: text description ('Node' or 'Instance')
561
  @return: the resolved (full) name
562
  @raise errors.OpPrereqError: if the item is not found
563

564
  """
565
  full_name = fn(name)
566
  if full_name is None:
567
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
568
                               errors.ECODE_NOENT)
569
  return full_name
570

    
571

    
572
def _ExpandNodeName(cfg, name):
573
  """Wrapper over L{_ExpandItemName} for nodes."""
574
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
575

    
576

    
577
def _ExpandInstanceName(cfg, name):
578
  """Wrapper over L{_ExpandItemName} for instance."""
579
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
580

    
581

    
582
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
583
                          memory, vcpus, nics, disk_template, disks,
584
                          bep, hvp, hypervisor_name):
585
  """Builds instance related env variables for hooks
586

587
  This builds the hook environment from individual variables.
588

589
  @type name: string
590
  @param name: the name of the instance
591
  @type primary_node: string
592
  @param primary_node: the name of the instance's primary node
593
  @type secondary_nodes: list
594
  @param secondary_nodes: list of secondary nodes as strings
595
  @type os_type: string
596
  @param os_type: the name of the instance's OS
597
  @type status: boolean
598
  @param status: the should_run status of the instance
599
  @type memory: string
600
  @param memory: the memory size of the instance
601
  @type vcpus: string
602
  @param vcpus: the count of VCPUs the instance has
603
  @type nics: list
604
  @param nics: list of tuples (ip, mac, mode, link) representing
605
      the NICs the instance has
606
  @type disk_template: string
607
  @param disk_template: the disk template of the instance
608
  @type disks: list
609
  @param disks: the list of (size, mode) pairs
610
  @type bep: dict
611
  @param bep: the backend parameters for the instance
612
  @type hvp: dict
613
  @param hvp: the hypervisor parameters for the instance
614
  @type hypervisor_name: string
615
  @param hypervisor_name: the hypervisor for the instance
616
  @rtype: dict
617
  @return: the hook environment for this instance
618

619
  """
620
  if status:
621
    str_status = "up"
622
  else:
623
    str_status = "down"
624
  env = {
625
    "OP_TARGET": name,
626
    "INSTANCE_NAME": name,
627
    "INSTANCE_PRIMARY": primary_node,
628
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
629
    "INSTANCE_OS_TYPE": os_type,
630
    "INSTANCE_STATUS": str_status,
631
    "INSTANCE_MEMORY": memory,
632
    "INSTANCE_VCPUS": vcpus,
633
    "INSTANCE_DISK_TEMPLATE": disk_template,
634
    "INSTANCE_HYPERVISOR": hypervisor_name,
635
  }
636

    
637
  if nics:
638
    nic_count = len(nics)
639
    for idx, (ip, mac, mode, link) in enumerate(nics):
640
      if ip is None:
641
        ip = ""
642
      env["INSTANCE_NIC%d_IP" % idx] = ip
643
      env["INSTANCE_NIC%d_MAC" % idx] = mac
644
      env["INSTANCE_NIC%d_MODE" % idx] = mode
645
      env["INSTANCE_NIC%d_LINK" % idx] = link
646
      if mode == constants.NIC_MODE_BRIDGED:
647
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
648
  else:
649
    nic_count = 0
650

    
651
  env["INSTANCE_NIC_COUNT"] = nic_count
652

    
653
  if disks:
654
    disk_count = len(disks)
655
    for idx, (size, mode) in enumerate(disks):
656
      env["INSTANCE_DISK%d_SIZE" % idx] = size
657
      env["INSTANCE_DISK%d_MODE" % idx] = mode
658
  else:
659
    disk_count = 0
660

    
661
  env["INSTANCE_DISK_COUNT"] = disk_count
662

    
663
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
664
    for key, value in source.items():
665
      env["INSTANCE_%s_%s" % (kind, key)] = value
666

    
667
  return env
668

    
669

    
670
def _NICListToTuple(lu, nics):
671
  """Build a list of nic information tuples.
672

673
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
674
  value in LUQueryInstanceData.
675

676
  @type lu:  L{LogicalUnit}
677
  @param lu: the logical unit on whose behalf we execute
678
  @type nics: list of L{objects.NIC}
679
  @param nics: list of nics to convert to hooks tuples
680

681
  """
682
  hooks_nics = []
683
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
684
  for nic in nics:
685
    ip = nic.ip
686
    mac = nic.mac
687
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
688
    mode = filled_params[constants.NIC_MODE]
689
    link = filled_params[constants.NIC_LINK]
690
    hooks_nics.append((ip, mac, mode, link))
691
  return hooks_nics
692

    
693

    
694
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
695
  """Builds instance related env variables for hooks from an object.
696

697
  @type lu: L{LogicalUnit}
698
  @param lu: the logical unit on whose behalf we execute
699
  @type instance: L{objects.Instance}
700
  @param instance: the instance for which we should build the
701
      environment
702
  @type override: dict
703
  @param override: dictionary with key/values that will override
704
      our values
705
  @rtype: dict
706
  @return: the hook environment dictionary
707

708
  """
709
  cluster = lu.cfg.GetClusterInfo()
710
  bep = cluster.FillBE(instance)
711
  hvp = cluster.FillHV(instance)
712
  args = {
713
    'name': instance.name,
714
    'primary_node': instance.primary_node,
715
    'secondary_nodes': instance.secondary_nodes,
716
    'os_type': instance.os,
717
    'status': instance.admin_up,
718
    'memory': bep[constants.BE_MEMORY],
719
    'vcpus': bep[constants.BE_VCPUS],
720
    'nics': _NICListToTuple(lu, instance.nics),
721
    'disk_template': instance.disk_template,
722
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
723
    'bep': bep,
724
    'hvp': hvp,
725
    'hypervisor_name': instance.hypervisor,
726
  }
727
  if override:
728
    args.update(override)
729
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
730

    
731

    
732
def _AdjustCandidatePool(lu, exceptions):
733
  """Adjust the candidate pool after node operations.
734

735
  """
736
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
737
  if mod_list:
738
    lu.LogInfo("Promoted nodes to master candidate role: %s",
739
               utils.CommaJoin(node.name for node in mod_list))
740
    for name in mod_list:
741
      lu.context.ReaddNode(name)
742
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
743
  if mc_now > mc_max:
744
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
745
               (mc_now, mc_max))
746

    
747

    
748
def _DecideSelfPromotion(lu, exceptions=None):
749
  """Decide whether I should promote myself as a master candidate.
750

751
  """
752
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
753
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
754
  # the new node will increase mc_max with one, so:
755
  mc_should = min(mc_should + 1, cp_size)
756
  return mc_now < mc_should
757

    
758

    
759
def _CheckNicsBridgesExist(lu, target_nics, target_node,
760
                               profile=constants.PP_DEFAULT):
761
  """Check that the brigdes needed by a list of nics exist.
762

763
  """
764
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
765
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
766
                for nic in target_nics]
767
  brlist = [params[constants.NIC_LINK] for params in paramslist
768
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
769
  if brlist:
770
    result = lu.rpc.call_bridges_exist(target_node, brlist)
771
    result.Raise("Error checking bridges on destination node '%s'" %
772
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
773

    
774

    
775
def _CheckInstanceBridgesExist(lu, instance, node=None):
776
  """Check that the brigdes needed by an instance exist.
777

778
  """
779
  if node is None:
780
    node = instance.primary_node
781
  _CheckNicsBridgesExist(lu, instance.nics, node)
782

    
783

    
784
def _CheckOSVariant(os_obj, name):
785
  """Check whether an OS name conforms to the os variants specification.
786

787
  @type os_obj: L{objects.OS}
788
  @param os_obj: OS object to check
789
  @type name: string
790
  @param name: OS name passed by the user, to check for validity
791

792
  """
793
  if not os_obj.supported_variants:
794
    return
795
  try:
796
    variant = name.split("+", 1)[1]
797
  except IndexError:
798
    raise errors.OpPrereqError("OS name must include a variant",
799
                               errors.ECODE_INVAL)
800

    
801
  if variant not in os_obj.supported_variants:
802
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
803

    
804

    
805
def _GetNodeInstancesInner(cfg, fn):
806
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
807

    
808

    
809
def _GetNodeInstances(cfg, node_name):
810
  """Returns a list of all primary and secondary instances on a node.
811

812
  """
813

    
814
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
815

    
816

    
817
def _GetNodePrimaryInstances(cfg, node_name):
818
  """Returns primary instances on a node.
819

820
  """
821
  return _GetNodeInstancesInner(cfg,
822
                                lambda inst: node_name == inst.primary_node)
823

    
824

    
825
def _GetNodeSecondaryInstances(cfg, node_name):
826
  """Returns secondary instances on a node.
827

828
  """
829
  return _GetNodeInstancesInner(cfg,
830
                                lambda inst: node_name in inst.secondary_nodes)
831

    
832

    
833
def _GetStorageTypeArgs(cfg, storage_type):
834
  """Returns the arguments for a storage type.
835

836
  """
837
  # Special case for file storage
838
  if storage_type == constants.ST_FILE:
839
    # storage.FileStorage wants a list of storage directories
840
    return [[cfg.GetFileStorageDir()]]
841

    
842
  return []
843

    
844

    
845
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
846
  faulty = []
847

    
848
  for dev in instance.disks:
849
    cfg.SetDiskID(dev, node_name)
850

    
851
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
852
  result.Raise("Failed to get disk status from node %s" % node_name,
853
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
854

    
855
  for idx, bdev_status in enumerate(result.payload):
856
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
857
      faulty.append(idx)
858

    
859
  return faulty
860

    
861

    
862
def _FormatTimestamp(secs):
863
  """Formats a Unix timestamp with the local timezone.
864

865
  """
866
  return time.strftime("%F %T %Z", time.gmtime(secs))
867

    
868

    
869
class LUPostInitCluster(LogicalUnit):
870
  """Logical unit for running hooks after cluster initialization.
871

872
  """
873
  HPATH = "cluster-init"
874
  HTYPE = constants.HTYPE_CLUSTER
875
  _OP_REQP = []
876

    
877
  def BuildHooksEnv(self):
878
    """Build hooks env.
879

880
    """
881
    env = {"OP_TARGET": self.cfg.GetClusterName()}
882
    mn = self.cfg.GetMasterNode()
883
    return env, [], [mn]
884

    
885
  def CheckPrereq(self):
886
    """No prerequisites to check.
887

888
    """
889
    return True
890

    
891
  def Exec(self, feedback_fn):
892
    """Nothing to do.
893

894
    """
895
    return True
896

    
897

    
898
class LUDestroyCluster(LogicalUnit):
899
  """Logical unit for destroying the cluster.
900

901
  """
902
  HPATH = "cluster-destroy"
903
  HTYPE = constants.HTYPE_CLUSTER
904
  _OP_REQP = []
905

    
906
  def BuildHooksEnv(self):
907
    """Build hooks env.
908

909
    """
910
    env = {"OP_TARGET": self.cfg.GetClusterName()}
911
    return env, [], []
912

    
913
  def CheckPrereq(self):
914
    """Check prerequisites.
915

916
    This checks whether the cluster is empty.
917

918
    Any errors are signaled by raising errors.OpPrereqError.
919

920
    """
921
    master = self.cfg.GetMasterNode()
922

    
923
    nodelist = self.cfg.GetNodeList()
924
    if len(nodelist) != 1 or nodelist[0] != master:
925
      raise errors.OpPrereqError("There are still %d node(s) in"
926
                                 " this cluster." % (len(nodelist) - 1),
927
                                 errors.ECODE_INVAL)
928
    instancelist = self.cfg.GetInstanceList()
929
    if instancelist:
930
      raise errors.OpPrereqError("There are still %d instance(s) in"
931
                                 " this cluster." % len(instancelist),
932
                                 errors.ECODE_INVAL)
933

    
934
  def Exec(self, feedback_fn):
935
    """Destroys the cluster.
936

937
    """
938
    master = self.cfg.GetMasterNode()
939
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
940

    
941
    # Run post hooks on master node before it's removed
942
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
943
    try:
944
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
945
    except:
946
      # pylint: disable-msg=W0702
947
      self.LogWarning("Errors occurred running hooks on %s" % master)
948

    
949
    result = self.rpc.call_node_stop_master(master, False)
950
    result.Raise("Could not disable the master role")
951

    
952
    if modify_ssh_setup:
953
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
954
      utils.CreateBackup(priv_key)
955
      utils.CreateBackup(pub_key)
956

    
957
    return master
958

    
959

    
960
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
961
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
962
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
963
  """Verifies certificate details for LUVerifyCluster.
964

965
  """
966
  if expired:
967
    msg = "Certificate %s is expired" % filename
968

    
969
    if not_before is not None and not_after is not None:
970
      msg += (" (valid from %s to %s)" %
971
              (_FormatTimestamp(not_before),
972
               _FormatTimestamp(not_after)))
973
    elif not_before is not None:
974
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
975
    elif not_after is not None:
976
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
977

    
978
    return (LUVerifyCluster.ETYPE_ERROR, msg)
979

    
980
  elif not_before is not None and not_before > now:
981
    return (LUVerifyCluster.ETYPE_WARNING,
982
            "Certificate %s not yet valid (valid from %s)" %
983
            (filename, _FormatTimestamp(not_before)))
984

    
985
  elif not_after is not None:
986
    remaining_days = int((not_after - now) / (24 * 3600))
987

    
988
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
989

    
990
    if remaining_days <= error_days:
991
      return (LUVerifyCluster.ETYPE_ERROR, msg)
992

    
993
    if remaining_days <= warn_days:
994
      return (LUVerifyCluster.ETYPE_WARNING, msg)
995

    
996
  return (None, None)
997

    
998

    
999
def _VerifyCertificate(filename):
1000
  """Verifies a certificate for LUVerifyCluster.
1001

1002
  @type filename: string
1003
  @param filename: Path to PEM file
1004

1005
  """
1006
  try:
1007
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1008
                                           utils.ReadFile(filename))
1009
  except Exception, err: # pylint: disable-msg=W0703
1010
    return (LUVerifyCluster.ETYPE_ERROR,
1011
            "Failed to load X509 certificate %s: %s" % (filename, err))
1012

    
1013
  # Depending on the pyOpenSSL version, this can just return (None, None)
1014
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1015

    
1016
  return _VerifyCertificateInner(filename, cert.has_expired(),
1017
                                 not_before, not_after, time.time())
1018

    
1019

    
1020
class LUVerifyCluster(LogicalUnit):
1021
  """Verifies the cluster status.
1022

1023
  """
1024
  HPATH = "cluster-verify"
1025
  HTYPE = constants.HTYPE_CLUSTER
1026
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1027
  REQ_BGL = False
1028

    
1029
  TCLUSTER = "cluster"
1030
  TNODE = "node"
1031
  TINSTANCE = "instance"
1032

    
1033
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1034
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1035
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1036
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1037
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1038
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1039
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1040
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1041
  ENODEDRBD = (TNODE, "ENODEDRBD")
1042
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1043
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1044
  ENODEHV = (TNODE, "ENODEHV")
1045
  ENODELVM = (TNODE, "ENODELVM")
1046
  ENODEN1 = (TNODE, "ENODEN1")
1047
  ENODENET = (TNODE, "ENODENET")
1048
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1049
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1050
  ENODERPC = (TNODE, "ENODERPC")
1051
  ENODESSH = (TNODE, "ENODESSH")
1052
  ENODEVERSION = (TNODE, "ENODEVERSION")
1053
  ENODESETUP = (TNODE, "ENODESETUP")
1054
  ENODETIME = (TNODE, "ENODETIME")
1055

    
1056
  ETYPE_FIELD = "code"
1057
  ETYPE_ERROR = "ERROR"
1058
  ETYPE_WARNING = "WARNING"
1059

    
1060
  def ExpandNames(self):
1061
    self.needed_locks = {
1062
      locking.LEVEL_NODE: locking.ALL_SET,
1063
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1064
    }
1065
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1066

    
1067
  def _Error(self, ecode, item, msg, *args, **kwargs):
1068
    """Format an error message.
1069

1070
    Based on the opcode's error_codes parameter, either format a
1071
    parseable error code, or a simpler error string.
1072

1073
    This must be called only from Exec and functions called from Exec.
1074

1075
    """
1076
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1077
    itype, etxt = ecode
1078
    # first complete the msg
1079
    if args:
1080
      msg = msg % args
1081
    # then format the whole message
1082
    if self.op.error_codes:
1083
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1084
    else:
1085
      if item:
1086
        item = " " + item
1087
      else:
1088
        item = ""
1089
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1090
    # and finally report it via the feedback_fn
1091
    self._feedback_fn("  - %s" % msg)
1092

    
1093
  def _ErrorIf(self, cond, *args, **kwargs):
1094
    """Log an error message if the passed condition is True.
1095

1096
    """
1097
    cond = bool(cond) or self.op.debug_simulate_errors
1098
    if cond:
1099
      self._Error(*args, **kwargs)
1100
    # do not mark the operation as failed for WARN cases only
1101
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1102
      self.bad = self.bad or cond
1103

    
1104
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1105
                  node_result, master_files, drbd_map, vg_name):
1106
    """Run multiple tests against a node.
1107

1108
    Test list:
1109

1110
      - compares ganeti version
1111
      - checks vg existence and size > 20G
1112
      - checks config file checksum
1113
      - checks ssh to other nodes
1114

1115
    @type nodeinfo: L{objects.Node}
1116
    @param nodeinfo: the node to check
1117
    @param file_list: required list of files
1118
    @param local_cksum: dictionary of local files and their checksums
1119
    @param node_result: the results from the node
1120
    @param master_files: list of files that only masters should have
1121
    @param drbd_map: the useddrbd minors for this node, in
1122
        form of minor: (instance, must_exist) which correspond to instances
1123
        and their running status
1124
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1125

1126
    """
1127
    node = nodeinfo.name
1128
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1129

    
1130
    # main result, node_result should be a non-empty dict
1131
    test = not node_result or not isinstance(node_result, dict)
1132
    _ErrorIf(test, self.ENODERPC, node,
1133
                  "unable to verify node: no data returned")
1134
    if test:
1135
      return
1136

    
1137
    # compares ganeti version
1138
    local_version = constants.PROTOCOL_VERSION
1139
    remote_version = node_result.get('version', None)
1140
    test = not (remote_version and
1141
                isinstance(remote_version, (list, tuple)) and
1142
                len(remote_version) == 2)
1143
    _ErrorIf(test, self.ENODERPC, node,
1144
             "connection to node returned invalid data")
1145
    if test:
1146
      return
1147

    
1148
    test = local_version != remote_version[0]
1149
    _ErrorIf(test, self.ENODEVERSION, node,
1150
             "incompatible protocol versions: master %s,"
1151
             " node %s", local_version, remote_version[0])
1152
    if test:
1153
      return
1154

    
1155
    # node seems compatible, we can actually try to look into its results
1156

    
1157
    # full package version
1158
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1159
                  self.ENODEVERSION, node,
1160
                  "software version mismatch: master %s, node %s",
1161
                  constants.RELEASE_VERSION, remote_version[1],
1162
                  code=self.ETYPE_WARNING)
1163

    
1164
    # checks vg existence and size > 20G
1165
    if vg_name is not None:
1166
      vglist = node_result.get(constants.NV_VGLIST, None)
1167
      test = not vglist
1168
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1169
      if not test:
1170
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1171
                                              constants.MIN_VG_SIZE)
1172
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1173

    
1174
    # checks config file checksum
1175

    
1176
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1177
    test = not isinstance(remote_cksum, dict)
1178
    _ErrorIf(test, self.ENODEFILECHECK, node,
1179
             "node hasn't returned file checksum data")
1180
    if not test:
1181
      for file_name in file_list:
1182
        node_is_mc = nodeinfo.master_candidate
1183
        must_have = (file_name not in master_files) or node_is_mc
1184
        # missing
1185
        test1 = file_name not in remote_cksum
1186
        # invalid checksum
1187
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1188
        # existing and good
1189
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1190
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1191
                 "file '%s' missing", file_name)
1192
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1193
                 "file '%s' has wrong checksum", file_name)
1194
        # not candidate and this is not a must-have file
1195
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1196
                 "file '%s' should not exist on non master"
1197
                 " candidates (and the file is outdated)", file_name)
1198
        # all good, except non-master/non-must have combination
1199
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1200
                 "file '%s' should not exist"
1201
                 " on non master candidates", file_name)
1202

    
1203
    # checks ssh to any
1204

    
1205
    test = constants.NV_NODELIST not in node_result
1206
    _ErrorIf(test, self.ENODESSH, node,
1207
             "node hasn't returned node ssh connectivity data")
1208
    if not test:
1209
      if node_result[constants.NV_NODELIST]:
1210
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1211
          _ErrorIf(True, self.ENODESSH, node,
1212
                   "ssh communication with node '%s': %s", a_node, a_msg)
1213

    
1214
    test = constants.NV_NODENETTEST not in node_result
1215
    _ErrorIf(test, self.ENODENET, node,
1216
             "node hasn't returned node tcp connectivity data")
1217
    if not test:
1218
      if node_result[constants.NV_NODENETTEST]:
1219
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1220
        for anode in nlist:
1221
          _ErrorIf(True, self.ENODENET, node,
1222
                   "tcp communication with node '%s': %s",
1223
                   anode, node_result[constants.NV_NODENETTEST][anode])
1224

    
1225
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1226
    if isinstance(hyp_result, dict):
1227
      for hv_name, hv_result in hyp_result.iteritems():
1228
        test = hv_result is not None
1229
        _ErrorIf(test, self.ENODEHV, node,
1230
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1231

    
1232
    # check used drbd list
1233
    if vg_name is not None:
1234
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1235
      test = not isinstance(used_minors, (tuple, list))
1236
      _ErrorIf(test, self.ENODEDRBD, node,
1237
               "cannot parse drbd status file: %s", str(used_minors))
1238
      if not test:
1239
        for minor, (iname, must_exist) in drbd_map.items():
1240
          test = minor not in used_minors and must_exist
1241
          _ErrorIf(test, self.ENODEDRBD, node,
1242
                   "drbd minor %d of instance %s is not active",
1243
                   minor, iname)
1244
        for minor in used_minors:
1245
          test = minor not in drbd_map
1246
          _ErrorIf(test, self.ENODEDRBD, node,
1247
                   "unallocated drbd minor %d is in use", minor)
1248
    test = node_result.get(constants.NV_NODESETUP,
1249
                           ["Missing NODESETUP results"])
1250
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1251
             "; ".join(test))
1252

    
1253
    # check pv names
1254
    if vg_name is not None:
1255
      pvlist = node_result.get(constants.NV_PVLIST, None)
1256
      test = pvlist is None
1257
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1258
      if not test:
1259
        # check that ':' is not present in PV names, since it's a
1260
        # special character for lvcreate (denotes the range of PEs to
1261
        # use on the PV)
1262
        for _, pvname, owner_vg in pvlist:
1263
          test = ":" in pvname
1264
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1265
                   " '%s' of VG '%s'", pvname, owner_vg)
1266

    
1267
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1268
                      node_instance, n_offline):
1269
    """Verify an instance.
1270

1271
    This function checks to see if the required block devices are
1272
    available on the instance's node.
1273

1274
    """
1275
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1276
    node_current = instanceconfig.primary_node
1277

    
1278
    node_vol_should = {}
1279
    instanceconfig.MapLVsByNode(node_vol_should)
1280

    
1281
    for node in node_vol_should:
1282
      if node in n_offline:
1283
        # ignore missing volumes on offline nodes
1284
        continue
1285
      for volume in node_vol_should[node]:
1286
        test = node not in node_vol_is or volume not in node_vol_is[node]
1287
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1288
                 "volume %s missing on node %s", volume, node)
1289

    
1290
    if instanceconfig.admin_up:
1291
      test = ((node_current not in node_instance or
1292
               not instance in node_instance[node_current]) and
1293
              node_current not in n_offline)
1294
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1295
               "instance not running on its primary node %s",
1296
               node_current)
1297

    
1298
    for node in node_instance:
1299
      if (not node == node_current):
1300
        test = instance in node_instance[node]
1301
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1302
                 "instance should not run on node %s", node)
1303

    
1304
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1305
    """Verify if there are any unknown volumes in the cluster.
1306

1307
    The .os, .swap and backup volumes are ignored. All other volumes are
1308
    reported as unknown.
1309

1310
    """
1311
    for node in node_vol_is:
1312
      for volume in node_vol_is[node]:
1313
        test = (node not in node_vol_should or
1314
                volume not in node_vol_should[node])
1315
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1316
                      "volume %s is unknown", volume)
1317

    
1318
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1319
    """Verify the list of running instances.
1320

1321
    This checks what instances are running but unknown to the cluster.
1322

1323
    """
1324
    for node in node_instance:
1325
      for o_inst in node_instance[node]:
1326
        test = o_inst not in instancelist
1327
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1328
                      "instance %s on node %s should not exist", o_inst, node)
1329

    
1330
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1331
    """Verify N+1 Memory Resilience.
1332

1333
    Check that if one single node dies we can still start all the instances it
1334
    was primary for.
1335

1336
    """
1337
    for node, nodeinfo in node_info.iteritems():
1338
      # This code checks that every node which is now listed as secondary has
1339
      # enough memory to host all instances it is supposed to should a single
1340
      # other node in the cluster fail.
1341
      # FIXME: not ready for failover to an arbitrary node
1342
      # FIXME: does not support file-backed instances
1343
      # WARNING: we currently take into account down instances as well as up
1344
      # ones, considering that even if they're down someone might want to start
1345
      # them even in the event of a node failure.
1346
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1347
        needed_mem = 0
1348
        for instance in instances:
1349
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1350
          if bep[constants.BE_AUTO_BALANCE]:
1351
            needed_mem += bep[constants.BE_MEMORY]
1352
        test = nodeinfo['mfree'] < needed_mem
1353
        self._ErrorIf(test, self.ENODEN1, node,
1354
                      "not enough memory on to accommodate"
1355
                      " failovers should peer node %s fail", prinode)
1356

    
1357
  def CheckPrereq(self):
1358
    """Check prerequisites.
1359

1360
    Transform the list of checks we're going to skip into a set and check that
1361
    all its members are valid.
1362

1363
    """
1364
    self.skip_set = frozenset(self.op.skip_checks)
1365
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1366
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1367
                                 errors.ECODE_INVAL)
1368

    
1369
  def BuildHooksEnv(self):
1370
    """Build hooks env.
1371

1372
    Cluster-Verify hooks just ran in the post phase and their failure makes
1373
    the output be logged in the verify output and the verification to fail.
1374

1375
    """
1376
    all_nodes = self.cfg.GetNodeList()
1377
    env = {
1378
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1379
      }
1380
    for node in self.cfg.GetAllNodesInfo().values():
1381
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1382

    
1383
    return env, [], all_nodes
1384

    
1385
  def Exec(self, feedback_fn):
1386
    """Verify integrity of cluster, performing various test on nodes.
1387

1388
    """
1389
    self.bad = False
1390
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1391
    verbose = self.op.verbose
1392
    self._feedback_fn = feedback_fn
1393
    feedback_fn("* Verifying global settings")
1394
    for msg in self.cfg.VerifyConfig():
1395
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1396

    
1397
    # Check the cluster certificates
1398
    for cert_filename in constants.ALL_CERT_FILES:
1399
      (errcode, msg) = _VerifyCertificate(cert_filename)
1400
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1401

    
1402
    vg_name = self.cfg.GetVGName()
1403
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1404
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1405
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1406
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1407
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1408
                        for iname in instancelist)
1409
    i_non_redundant = [] # Non redundant instances
1410
    i_non_a_balanced = [] # Non auto-balanced instances
1411
    n_offline = [] # List of offline nodes
1412
    n_drained = [] # List of nodes being drained
1413
    node_volume = {}
1414
    node_instance = {}
1415
    node_info = {}
1416
    instance_cfg = {}
1417

    
1418
    # FIXME: verify OS list
1419
    # do local checksums
1420
    master_files = [constants.CLUSTER_CONF_FILE]
1421

    
1422
    file_names = ssconf.SimpleStore().GetFileList()
1423
    file_names.extend(constants.ALL_CERT_FILES)
1424
    file_names.extend(master_files)
1425

    
1426
    local_checksums = utils.FingerprintFiles(file_names)
1427

    
1428
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1429
    node_verify_param = {
1430
      constants.NV_FILELIST: file_names,
1431
      constants.NV_NODELIST: [node.name for node in nodeinfo
1432
                              if not node.offline],
1433
      constants.NV_HYPERVISOR: hypervisors,
1434
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1435
                                  node.secondary_ip) for node in nodeinfo
1436
                                 if not node.offline],
1437
      constants.NV_INSTANCELIST: hypervisors,
1438
      constants.NV_VERSION: None,
1439
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1440
      constants.NV_NODESETUP: None,
1441
      constants.NV_TIME: None,
1442
      }
1443

    
1444
    if vg_name is not None:
1445
      node_verify_param[constants.NV_VGLIST] = None
1446
      node_verify_param[constants.NV_LVLIST] = vg_name
1447
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1448
      node_verify_param[constants.NV_DRBDLIST] = None
1449

    
1450
    # Due to the way our RPC system works, exact response times cannot be
1451
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1452
    # time before and after executing the request, we can at least have a time
1453
    # window.
1454
    nvinfo_starttime = time.time()
1455
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1456
                                           self.cfg.GetClusterName())
1457
    nvinfo_endtime = time.time()
1458

    
1459
    cluster = self.cfg.GetClusterInfo()
1460
    master_node = self.cfg.GetMasterNode()
1461
    all_drbd_map = self.cfg.ComputeDRBDMap()
1462

    
1463
    feedback_fn("* Verifying node status")
1464
    for node_i in nodeinfo:
1465
      node = node_i.name
1466

    
1467
      if node_i.offline:
1468
        if verbose:
1469
          feedback_fn("* Skipping offline node %s" % (node,))
1470
        n_offline.append(node)
1471
        continue
1472

    
1473
      if node == master_node:
1474
        ntype = "master"
1475
      elif node_i.master_candidate:
1476
        ntype = "master candidate"
1477
      elif node_i.drained:
1478
        ntype = "drained"
1479
        n_drained.append(node)
1480
      else:
1481
        ntype = "regular"
1482
      if verbose:
1483
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1484

    
1485
      msg = all_nvinfo[node].fail_msg
1486
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1487
      if msg:
1488
        continue
1489

    
1490
      nresult = all_nvinfo[node].payload
1491
      node_drbd = {}
1492
      for minor, instance in all_drbd_map[node].items():
1493
        test = instance not in instanceinfo
1494
        _ErrorIf(test, self.ECLUSTERCFG, None,
1495
                 "ghost instance '%s' in temporary DRBD map", instance)
1496
          # ghost instance should not be running, but otherwise we
1497
          # don't give double warnings (both ghost instance and
1498
          # unallocated minor in use)
1499
        if test:
1500
          node_drbd[minor] = (instance, False)
1501
        else:
1502
          instance = instanceinfo[instance]
1503
          node_drbd[minor] = (instance.name, instance.admin_up)
1504

    
1505
      self._VerifyNode(node_i, file_names, local_checksums,
1506
                       nresult, master_files, node_drbd, vg_name)
1507

    
1508
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1509
      if vg_name is None:
1510
        node_volume[node] = {}
1511
      elif isinstance(lvdata, basestring):
1512
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1513
                 utils.SafeEncode(lvdata))
1514
        node_volume[node] = {}
1515
      elif not isinstance(lvdata, dict):
1516
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1517
        continue
1518
      else:
1519
        node_volume[node] = lvdata
1520

    
1521
      # node_instance
1522
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1523
      test = not isinstance(idata, list)
1524
      _ErrorIf(test, self.ENODEHV, node,
1525
               "rpc call to node failed (instancelist): %s",
1526
               utils.SafeEncode(str(idata)))
1527
      if test:
1528
        continue
1529

    
1530
      node_instance[node] = idata
1531

    
1532
      # node_info
1533
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1534
      test = not isinstance(nodeinfo, dict)
1535
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1536
      if test:
1537
        continue
1538

    
1539
      # Node time
1540
      ntime = nresult.get(constants.NV_TIME, None)
1541
      try:
1542
        ntime_merged = utils.MergeTime(ntime)
1543
      except (ValueError, TypeError):
1544
        _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1545

    
1546
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547
        ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549
        ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550
      else:
1551
        ntime_diff = None
1552

    
1553
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554
               "Node time diverges by at least %s from master node time",
1555
               ntime_diff)
1556

    
1557
      if ntime_diff is not None:
1558
        continue
1559

    
1560
      try:
1561
        node_info[node] = {
1562
          "mfree": int(nodeinfo['memory_free']),
1563
          "pinst": [],
1564
          "sinst": [],
1565
          # dictionary holding all instances this node is secondary for,
1566
          # grouped by their primary node. Each key is a cluster node, and each
1567
          # value is a list of instances which have the key as primary and the
1568
          # current node as secondary.  this is handy to calculate N+1 memory
1569
          # availability if you can only failover from a primary to its
1570
          # secondary.
1571
          "sinst-by-pnode": {},
1572
        }
1573
        # FIXME: devise a free space model for file based instances as well
1574
        if vg_name is not None:
1575
          test = (constants.NV_VGLIST not in nresult or
1576
                  vg_name not in nresult[constants.NV_VGLIST])
1577
          _ErrorIf(test, self.ENODELVM, node,
1578
                   "node didn't return data for the volume group '%s'"
1579
                   " - it is either missing or broken", vg_name)
1580
          if test:
1581
            continue
1582
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1583
      except (ValueError, KeyError):
1584
        _ErrorIf(True, self.ENODERPC, node,
1585
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1586
        continue
1587

    
1588
    node_vol_should = {}
1589

    
1590
    feedback_fn("* Verifying instance status")
1591
    for instance in instancelist:
1592
      if verbose:
1593
        feedback_fn("* Verifying instance %s" % instance)
1594
      inst_config = instanceinfo[instance]
1595
      self._VerifyInstance(instance, inst_config, node_volume,
1596
                           node_instance, n_offline)
1597
      inst_nodes_offline = []
1598

    
1599
      inst_config.MapLVsByNode(node_vol_should)
1600

    
1601
      instance_cfg[instance] = inst_config
1602

    
1603
      pnode = inst_config.primary_node
1604
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1605
               self.ENODERPC, pnode, "instance %s, connection to"
1606
               " primary node failed", instance)
1607
      if pnode in node_info:
1608
        node_info[pnode]['pinst'].append(instance)
1609

    
1610
      if pnode in n_offline:
1611
        inst_nodes_offline.append(pnode)
1612

    
1613
      # If the instance is non-redundant we cannot survive losing its primary
1614
      # node, so we are not N+1 compliant. On the other hand we have no disk
1615
      # templates with more than one secondary so that situation is not well
1616
      # supported either.
1617
      # FIXME: does not support file-backed instances
1618
      if len(inst_config.secondary_nodes) == 0:
1619
        i_non_redundant.append(instance)
1620
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1621
               self.EINSTANCELAYOUT, instance,
1622
               "instance has multiple secondary nodes", code="WARNING")
1623

    
1624
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1625
        i_non_a_balanced.append(instance)
1626

    
1627
      for snode in inst_config.secondary_nodes:
1628
        _ErrorIf(snode not in node_info and snode not in n_offline,
1629
                 self.ENODERPC, snode,
1630
                 "instance %s, connection to secondary node"
1631
                 " failed", instance)
1632

    
1633
        if snode in node_info:
1634
          node_info[snode]['sinst'].append(instance)
1635
          if pnode not in node_info[snode]['sinst-by-pnode']:
1636
            node_info[snode]['sinst-by-pnode'][pnode] = []
1637
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1638

    
1639
        if snode in n_offline:
1640
          inst_nodes_offline.append(snode)
1641

    
1642
      # warn that the instance lives on offline nodes
1643
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1644
               "instance lives on offline node(s) %s",
1645
               utils.CommaJoin(inst_nodes_offline))
1646

    
1647
    feedback_fn("* Verifying orphan volumes")
1648
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1649

    
1650
    feedback_fn("* Verifying remaining instances")
1651
    self._VerifyOrphanInstances(instancelist, node_instance)
1652

    
1653
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1654
      feedback_fn("* Verifying N+1 Memory redundancy")
1655
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1656

    
1657
    feedback_fn("* Other Notes")
1658
    if i_non_redundant:
1659
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1660
                  % len(i_non_redundant))
1661

    
1662
    if i_non_a_balanced:
1663
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1664
                  % len(i_non_a_balanced))
1665

    
1666
    if n_offline:
1667
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1668

    
1669
    if n_drained:
1670
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1671

    
1672
    return not self.bad
1673

    
1674
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1675
    """Analyze the post-hooks' result
1676

1677
    This method analyses the hook result, handles it, and sends some
1678
    nicely-formatted feedback back to the user.
1679

1680
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1681
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1682
    @param hooks_results: the results of the multi-node hooks rpc call
1683
    @param feedback_fn: function used send feedback back to the caller
1684
    @param lu_result: previous Exec result
1685
    @return: the new Exec result, based on the previous result
1686
        and hook results
1687

1688
    """
1689
    # We only really run POST phase hooks, and are only interested in
1690
    # their results
1691
    if phase == constants.HOOKS_PHASE_POST:
1692
      # Used to change hooks' output to proper indentation
1693
      indent_re = re.compile('^', re.M)
1694
      feedback_fn("* Hooks Results")
1695
      assert hooks_results, "invalid result from hooks"
1696

    
1697
      for node_name in hooks_results:
1698
        res = hooks_results[node_name]
1699
        msg = res.fail_msg
1700
        test = msg and not res.offline
1701
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1702
                      "Communication failure in hooks execution: %s", msg)
1703
        if res.offline or msg:
1704
          # No need to investigate payload if node is offline or gave an error.
1705
          # override manually lu_result here as _ErrorIf only
1706
          # overrides self.bad
1707
          lu_result = 1
1708
          continue
1709
        for script, hkr, output in res.payload:
1710
          test = hkr == constants.HKR_FAIL
1711
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1712
                        "Script %s failed, output:", script)
1713
          if test:
1714
            output = indent_re.sub('      ', output)
1715
            feedback_fn("%s" % output)
1716
            lu_result = 0
1717

    
1718
      return lu_result
1719

    
1720

    
1721
class LUVerifyDisks(NoHooksLU):
1722
  """Verifies the cluster disks status.
1723

1724
  """
1725
  _OP_REQP = []
1726
  REQ_BGL = False
1727

    
1728
  def ExpandNames(self):
1729
    self.needed_locks = {
1730
      locking.LEVEL_NODE: locking.ALL_SET,
1731
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1732
    }
1733
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1734

    
1735
  def CheckPrereq(self):
1736
    """Check prerequisites.
1737

1738
    This has no prerequisites.
1739

1740
    """
1741
    pass
1742

    
1743
  def Exec(self, feedback_fn):
1744
    """Verify integrity of cluster disks.
1745

1746
    @rtype: tuple of three items
1747
    @return: a tuple of (dict of node-to-node_error, list of instances
1748
        which need activate-disks, dict of instance: (node, volume) for
1749
        missing volumes
1750

1751
    """
1752
    result = res_nodes, res_instances, res_missing = {}, [], {}
1753

    
1754
    vg_name = self.cfg.GetVGName()
1755
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1756
    instances = [self.cfg.GetInstanceInfo(name)
1757
                 for name in self.cfg.GetInstanceList()]
1758

    
1759
    nv_dict = {}
1760
    for inst in instances:
1761
      inst_lvs = {}
1762
      if (not inst.admin_up or
1763
          inst.disk_template not in constants.DTS_NET_MIRROR):
1764
        continue
1765
      inst.MapLVsByNode(inst_lvs)
1766
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1767
      for node, vol_list in inst_lvs.iteritems():
1768
        for vol in vol_list:
1769
          nv_dict[(node, vol)] = inst
1770

    
1771
    if not nv_dict:
1772
      return result
1773

    
1774
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1775

    
1776
    for node in nodes:
1777
      # node_volume
1778
      node_res = node_lvs[node]
1779
      if node_res.offline:
1780
        continue
1781
      msg = node_res.fail_msg
1782
      if msg:
1783
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1784
        res_nodes[node] = msg
1785
        continue
1786

    
1787
      lvs = node_res.payload
1788
      for lv_name, (_, _, lv_online) in lvs.items():
1789
        inst = nv_dict.pop((node, lv_name), None)
1790
        if (not lv_online and inst is not None
1791
            and inst.name not in res_instances):
1792
          res_instances.append(inst.name)
1793

    
1794
    # any leftover items in nv_dict are missing LVs, let's arrange the
1795
    # data better
1796
    for key, inst in nv_dict.iteritems():
1797
      if inst.name not in res_missing:
1798
        res_missing[inst.name] = []
1799
      res_missing[inst.name].append(key)
1800

    
1801
    return result
1802

    
1803

    
1804
class LURepairDiskSizes(NoHooksLU):
1805
  """Verifies the cluster disks sizes.
1806

1807
  """
1808
  _OP_REQP = ["instances"]
1809
  REQ_BGL = False
1810

    
1811
  def ExpandNames(self):
1812
    if not isinstance(self.op.instances, list):
1813
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1814
                                 errors.ECODE_INVAL)
1815

    
1816
    if self.op.instances:
1817
      self.wanted_names = []
1818
      for name in self.op.instances:
1819
        full_name = _ExpandInstanceName(self.cfg, name)
1820
        self.wanted_names.append(full_name)
1821
      self.needed_locks = {
1822
        locking.LEVEL_NODE: [],
1823
        locking.LEVEL_INSTANCE: self.wanted_names,
1824
        }
1825
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1826
    else:
1827
      self.wanted_names = None
1828
      self.needed_locks = {
1829
        locking.LEVEL_NODE: locking.ALL_SET,
1830
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1831
        }
1832
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1833

    
1834
  def DeclareLocks(self, level):
1835
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1836
      self._LockInstancesNodes(primary_only=True)
1837

    
1838
  def CheckPrereq(self):
1839
    """Check prerequisites.
1840

1841
    This only checks the optional instance list against the existing names.
1842

1843
    """
1844
    if self.wanted_names is None:
1845
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1846

    
1847
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1848
                             in self.wanted_names]
1849

    
1850
  def _EnsureChildSizes(self, disk):
1851
    """Ensure children of the disk have the needed disk size.
1852

1853
    This is valid mainly for DRBD8 and fixes an issue where the
1854
    children have smaller disk size.
1855

1856
    @param disk: an L{ganeti.objects.Disk} object
1857

1858
    """
1859
    if disk.dev_type == constants.LD_DRBD8:
1860
      assert disk.children, "Empty children for DRBD8?"
1861
      fchild = disk.children[0]
1862
      mismatch = fchild.size < disk.size
1863
      if mismatch:
1864
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1865
                     fchild.size, disk.size)
1866
        fchild.size = disk.size
1867

    
1868
      # and we recurse on this child only, not on the metadev
1869
      return self._EnsureChildSizes(fchild) or mismatch
1870
    else:
1871
      return False
1872

    
1873
  def Exec(self, feedback_fn):
1874
    """Verify the size of cluster disks.
1875

1876
    """
1877
    # TODO: check child disks too
1878
    # TODO: check differences in size between primary/secondary nodes
1879
    per_node_disks = {}
1880
    for instance in self.wanted_instances:
1881
      pnode = instance.primary_node
1882
      if pnode not in per_node_disks:
1883
        per_node_disks[pnode] = []
1884
      for idx, disk in enumerate(instance.disks):
1885
        per_node_disks[pnode].append((instance, idx, disk))
1886

    
1887
    changed = []
1888
    for node, dskl in per_node_disks.items():
1889
      newl = [v[2].Copy() for v in dskl]
1890
      for dsk in newl:
1891
        self.cfg.SetDiskID(dsk, node)
1892
      result = self.rpc.call_blockdev_getsizes(node, newl)
1893
      if result.fail_msg:
1894
        self.LogWarning("Failure in blockdev_getsizes call to node"
1895
                        " %s, ignoring", node)
1896
        continue
1897
      if len(result.data) != len(dskl):
1898
        self.LogWarning("Invalid result from node %s, ignoring node results",
1899
                        node)
1900
        continue
1901
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1902
        if size is None:
1903
          self.LogWarning("Disk %d of instance %s did not return size"
1904
                          " information, ignoring", idx, instance.name)
1905
          continue
1906
        if not isinstance(size, (int, long)):
1907
          self.LogWarning("Disk %d of instance %s did not return valid"
1908
                          " size information, ignoring", idx, instance.name)
1909
          continue
1910
        size = size >> 20
1911
        if size != disk.size:
1912
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1913
                       " correcting: recorded %d, actual %d", idx,
1914
                       instance.name, disk.size, size)
1915
          disk.size = size
1916
          self.cfg.Update(instance, feedback_fn)
1917
          changed.append((instance.name, idx, size))
1918
        if self._EnsureChildSizes(disk):
1919
          self.cfg.Update(instance, feedback_fn)
1920
          changed.append((instance.name, idx, disk.size))
1921
    return changed
1922

    
1923

    
1924
class LURenameCluster(LogicalUnit):
1925
  """Rename the cluster.
1926

1927
  """
1928
  HPATH = "cluster-rename"
1929
  HTYPE = constants.HTYPE_CLUSTER
1930
  _OP_REQP = ["name"]
1931

    
1932
  def BuildHooksEnv(self):
1933
    """Build hooks env.
1934

1935
    """
1936
    env = {
1937
      "OP_TARGET": self.cfg.GetClusterName(),
1938
      "NEW_NAME": self.op.name,
1939
      }
1940
    mn = self.cfg.GetMasterNode()
1941
    all_nodes = self.cfg.GetNodeList()
1942
    return env, [mn], all_nodes
1943

    
1944
  def CheckPrereq(self):
1945
    """Verify that the passed name is a valid one.
1946

1947
    """
1948
    hostname = utils.GetHostInfo(self.op.name)
1949

    
1950
    new_name = hostname.name
1951
    self.ip = new_ip = hostname.ip
1952
    old_name = self.cfg.GetClusterName()
1953
    old_ip = self.cfg.GetMasterIP()
1954
    if new_name == old_name and new_ip == old_ip:
1955
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1956
                                 " cluster has changed",
1957
                                 errors.ECODE_INVAL)
1958
    if new_ip != old_ip:
1959
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1960
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1961
                                   " reachable on the network. Aborting." %
1962
                                   new_ip, errors.ECODE_NOTUNIQUE)
1963

    
1964
    self.op.name = new_name
1965

    
1966
  def Exec(self, feedback_fn):
1967
    """Rename the cluster.
1968

1969
    """
1970
    clustername = self.op.name
1971
    ip = self.ip
1972

    
1973
    # shutdown the master IP
1974
    master = self.cfg.GetMasterNode()
1975
    result = self.rpc.call_node_stop_master(master, False)
1976
    result.Raise("Could not disable the master role")
1977

    
1978
    try:
1979
      cluster = self.cfg.GetClusterInfo()
1980
      cluster.cluster_name = clustername
1981
      cluster.master_ip = ip
1982
      self.cfg.Update(cluster, feedback_fn)
1983

    
1984
      # update the known hosts file
1985
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1986
      node_list = self.cfg.GetNodeList()
1987
      try:
1988
        node_list.remove(master)
1989
      except ValueError:
1990
        pass
1991
      result = self.rpc.call_upload_file(node_list,
1992
                                         constants.SSH_KNOWN_HOSTS_FILE)
1993
      for to_node, to_result in result.iteritems():
1994
        msg = to_result.fail_msg
1995
        if msg:
1996
          msg = ("Copy of file %s to node %s failed: %s" %
1997
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1998
          self.proc.LogWarning(msg)
1999

    
2000
    finally:
2001
      result = self.rpc.call_node_start_master(master, False, False)
2002
      msg = result.fail_msg
2003
      if msg:
2004
        self.LogWarning("Could not re-enable the master role on"
2005
                        " the master, please restart manually: %s", msg)
2006

    
2007

    
2008
def _RecursiveCheckIfLVMBased(disk):
2009
  """Check if the given disk or its children are lvm-based.
2010

2011
  @type disk: L{objects.Disk}
2012
  @param disk: the disk to check
2013
  @rtype: boolean
2014
  @return: boolean indicating whether a LD_LV dev_type was found or not
2015

2016
  """
2017
  if disk.children:
2018
    for chdisk in disk.children:
2019
      if _RecursiveCheckIfLVMBased(chdisk):
2020
        return True
2021
  return disk.dev_type == constants.LD_LV
2022

    
2023

    
2024
class LUSetClusterParams(LogicalUnit):
2025
  """Change the parameters of the cluster.
2026

2027
  """
2028
  HPATH = "cluster-modify"
2029
  HTYPE = constants.HTYPE_CLUSTER
2030
  _OP_REQP = []
2031
  REQ_BGL = False
2032

    
2033
  def CheckArguments(self):
2034
    """Check parameters
2035

2036
    """
2037
    if not hasattr(self.op, "candidate_pool_size"):
2038
      self.op.candidate_pool_size = None
2039
    if self.op.candidate_pool_size is not None:
2040
      try:
2041
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2042
      except (ValueError, TypeError), err:
2043
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2044
                                   str(err), errors.ECODE_INVAL)
2045
      if self.op.candidate_pool_size < 1:
2046
        raise errors.OpPrereqError("At least one master candidate needed",
2047
                                   errors.ECODE_INVAL)
2048

    
2049
  def ExpandNames(self):
2050
    # FIXME: in the future maybe other cluster params won't require checking on
2051
    # all nodes to be modified.
2052
    self.needed_locks = {
2053
      locking.LEVEL_NODE: locking.ALL_SET,
2054
    }
2055
    self.share_locks[locking.LEVEL_NODE] = 1
2056

    
2057
  def BuildHooksEnv(self):
2058
    """Build hooks env.
2059

2060
    """
2061
    env = {
2062
      "OP_TARGET": self.cfg.GetClusterName(),
2063
      "NEW_VG_NAME": self.op.vg_name,
2064
      }
2065
    mn = self.cfg.GetMasterNode()
2066
    return env, [mn], [mn]
2067

    
2068
  def CheckPrereq(self):
2069
    """Check prerequisites.
2070

2071
    This checks whether the given params don't conflict and
2072
    if the given volume group is valid.
2073

2074
    """
2075
    if self.op.vg_name is not None and not self.op.vg_name:
2076
      instances = self.cfg.GetAllInstancesInfo().values()
2077
      for inst in instances:
2078
        for disk in inst.disks:
2079
          if _RecursiveCheckIfLVMBased(disk):
2080
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2081
                                       " lvm-based instances exist",
2082
                                       errors.ECODE_INVAL)
2083

    
2084
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2085

    
2086
    # if vg_name not None, checks given volume group on all nodes
2087
    if self.op.vg_name:
2088
      vglist = self.rpc.call_vg_list(node_list)
2089
      for node in node_list:
2090
        msg = vglist[node].fail_msg
2091
        if msg:
2092
          # ignoring down node
2093
          self.LogWarning("Error while gathering data on node %s"
2094
                          " (ignoring node): %s", node, msg)
2095
          continue
2096
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2097
                                              self.op.vg_name,
2098
                                              constants.MIN_VG_SIZE)
2099
        if vgstatus:
2100
          raise errors.OpPrereqError("Error on node '%s': %s" %
2101
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2102

    
2103
    self.cluster = cluster = self.cfg.GetClusterInfo()
2104
    # validate params changes
2105
    if self.op.beparams:
2106
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2107
      self.new_beparams = objects.FillDict(
2108
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2109

    
2110
    if self.op.nicparams:
2111
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2112
      self.new_nicparams = objects.FillDict(
2113
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2114
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2115
      nic_errors = []
2116

    
2117
      # check all instances for consistency
2118
      for instance in self.cfg.GetAllInstancesInfo().values():
2119
        for nic_idx, nic in enumerate(instance.nics):
2120
          params_copy = copy.deepcopy(nic.nicparams)
2121
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2122

    
2123
          # check parameter syntax
2124
          try:
2125
            objects.NIC.CheckParameterSyntax(params_filled)
2126
          except errors.ConfigurationError, err:
2127
            nic_errors.append("Instance %s, nic/%d: %s" %
2128
                              (instance.name, nic_idx, err))
2129

    
2130
          # if we're moving instances to routed, check that they have an ip
2131
          target_mode = params_filled[constants.NIC_MODE]
2132
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2133
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2134
                              (instance.name, nic_idx))
2135
      if nic_errors:
2136
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2137
                                   "\n".join(nic_errors))
2138

    
2139
    # hypervisor list/parameters
2140
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2141
    if self.op.hvparams:
2142
      if not isinstance(self.op.hvparams, dict):
2143
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2144
                                   errors.ECODE_INVAL)
2145
      for hv_name, hv_dict in self.op.hvparams.items():
2146
        if hv_name not in self.new_hvparams:
2147
          self.new_hvparams[hv_name] = hv_dict
2148
        else:
2149
          self.new_hvparams[hv_name].update(hv_dict)
2150

    
2151
    # os hypervisor parameters
2152
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2153
    if self.op.os_hvp:
2154
      if not isinstance(self.op.os_hvp, dict):
2155
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2156
                                   errors.ECODE_INVAL)
2157
      for os_name, hvs in self.op.os_hvp.items():
2158
        if not isinstance(hvs, dict):
2159
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2160
                                      " input"), errors.ECODE_INVAL)
2161
        if os_name not in self.new_os_hvp:
2162
          self.new_os_hvp[os_name] = hvs
2163
        else:
2164
          for hv_name, hv_dict in hvs.items():
2165
            if hv_name not in self.new_os_hvp[os_name]:
2166
              self.new_os_hvp[os_name][hv_name] = hv_dict
2167
            else:
2168
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2169

    
2170
    if self.op.enabled_hypervisors is not None:
2171
      self.hv_list = self.op.enabled_hypervisors
2172
      if not self.hv_list:
2173
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2174
                                   " least one member",
2175
                                   errors.ECODE_INVAL)
2176
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2177
      if invalid_hvs:
2178
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2179
                                   " entries: %s" %
2180
                                   utils.CommaJoin(invalid_hvs),
2181
                                   errors.ECODE_INVAL)
2182
    else:
2183
      self.hv_list = cluster.enabled_hypervisors
2184

    
2185
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2186
      # either the enabled list has changed, or the parameters have, validate
2187
      for hv_name, hv_params in self.new_hvparams.items():
2188
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2189
            (self.op.enabled_hypervisors and
2190
             hv_name in self.op.enabled_hypervisors)):
2191
          # either this is a new hypervisor, or its parameters have changed
2192
          hv_class = hypervisor.GetHypervisor(hv_name)
2193
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194
          hv_class.CheckParameterSyntax(hv_params)
2195
          _CheckHVParams(self, node_list, hv_name, hv_params)
2196

    
2197
    if self.op.os_hvp:
2198
      # no need to check any newly-enabled hypervisors, since the
2199
      # defaults have already been checked in the above code-block
2200
      for os_name, os_hvp in self.new_os_hvp.items():
2201
        for hv_name, hv_params in os_hvp.items():
2202
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2203
          # we need to fill in the new os_hvp on top of the actual hv_p
2204
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2205
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2206
          hv_class = hypervisor.GetHypervisor(hv_name)
2207
          hv_class.CheckParameterSyntax(new_osp)
2208
          _CheckHVParams(self, node_list, hv_name, new_osp)
2209

    
2210

    
2211
  def Exec(self, feedback_fn):
2212
    """Change the parameters of the cluster.
2213

2214
    """
2215
    if self.op.vg_name is not None:
2216
      new_volume = self.op.vg_name
2217
      if not new_volume:
2218
        new_volume = None
2219
      if new_volume != self.cfg.GetVGName():
2220
        self.cfg.SetVGName(new_volume)
2221
      else:
2222
        feedback_fn("Cluster LVM configuration already in desired"
2223
                    " state, not changing")
2224
    if self.op.hvparams:
2225
      self.cluster.hvparams = self.new_hvparams
2226
    if self.op.os_hvp:
2227
      self.cluster.os_hvp = self.new_os_hvp
2228
    if self.op.enabled_hypervisors is not None:
2229
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2230
    if self.op.beparams:
2231
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2232
    if self.op.nicparams:
2233
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2234

    
2235
    if self.op.candidate_pool_size is not None:
2236
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2237
      # we need to update the pool size here, otherwise the save will fail
2238
      _AdjustCandidatePool(self, [])
2239

    
2240
    self.cfg.Update(self.cluster, feedback_fn)
2241

    
2242

    
2243
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2244
  """Distribute additional files which are part of the cluster configuration.
2245

2246
  ConfigWriter takes care of distributing the config and ssconf files, but
2247
  there are more files which should be distributed to all nodes. This function
2248
  makes sure those are copied.
2249

2250
  @param lu: calling logical unit
2251
  @param additional_nodes: list of nodes not in the config to distribute to
2252

2253
  """
2254
  # 1. Gather target nodes
2255
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2256
  dist_nodes = lu.cfg.GetOnlineNodeList()
2257
  if additional_nodes is not None:
2258
    dist_nodes.extend(additional_nodes)
2259
  if myself.name in dist_nodes:
2260
    dist_nodes.remove(myself.name)
2261

    
2262
  # 2. Gather files to distribute
2263
  dist_files = set([constants.ETC_HOSTS,
2264
                    constants.SSH_KNOWN_HOSTS_FILE,
2265
                    constants.RAPI_CERT_FILE,
2266
                    constants.RAPI_USERS_FILE,
2267
                    constants.HMAC_CLUSTER_KEY,
2268
                   ])
2269

    
2270
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2271
  for hv_name in enabled_hypervisors:
2272
    hv_class = hypervisor.GetHypervisor(hv_name)
2273
    dist_files.update(hv_class.GetAncillaryFiles())
2274

    
2275
  # 3. Perform the files upload
2276
  for fname in dist_files:
2277
    if os.path.exists(fname):
2278
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2279
      for to_node, to_result in result.items():
2280
        msg = to_result.fail_msg
2281
        if msg:
2282
          msg = ("Copy of file %s to node %s failed: %s" %
2283
                 (fname, to_node, msg))
2284
          lu.proc.LogWarning(msg)
2285

    
2286

    
2287
class LURedistributeConfig(NoHooksLU):
2288
  """Force the redistribution of cluster configuration.
2289

2290
  This is a very simple LU.
2291

2292
  """
2293
  _OP_REQP = []
2294
  REQ_BGL = False
2295

    
2296
  def ExpandNames(self):
2297
    self.needed_locks = {
2298
      locking.LEVEL_NODE: locking.ALL_SET,
2299
    }
2300
    self.share_locks[locking.LEVEL_NODE] = 1
2301

    
2302
  def CheckPrereq(self):
2303
    """Check prerequisites.
2304

2305
    """
2306

    
2307
  def Exec(self, feedback_fn):
2308
    """Redistribute the configuration.
2309

2310
    """
2311
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2312
    _RedistributeAncillaryFiles(self)
2313

    
2314

    
2315
def _WaitForSync(lu, instance, oneshot=False):
2316
  """Sleep and poll for an instance's disk to sync.
2317

2318
  """
2319
  if not instance.disks:
2320
    return True
2321

    
2322
  if not oneshot:
2323
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2324

    
2325
  node = instance.primary_node
2326

    
2327
  for dev in instance.disks:
2328
    lu.cfg.SetDiskID(dev, node)
2329

    
2330
  # TODO: Convert to utils.Retry
2331

    
2332
  retries = 0
2333
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2334
  while True:
2335
    max_time = 0
2336
    done = True
2337
    cumul_degraded = False
2338
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2339
    msg = rstats.fail_msg
2340
    if msg:
2341
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2342
      retries += 1
2343
      if retries >= 10:
2344
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2345
                                 " aborting." % node)
2346
      time.sleep(6)
2347
      continue
2348
    rstats = rstats.payload
2349
    retries = 0
2350
    for i, mstat in enumerate(rstats):
2351
      if mstat is None:
2352
        lu.LogWarning("Can't compute data for node %s/%s",
2353
                           node, instance.disks[i].iv_name)
2354
        continue
2355

    
2356
      cumul_degraded = (cumul_degraded or
2357
                        (mstat.is_degraded and mstat.sync_percent is None))
2358
      if mstat.sync_percent is not None:
2359
        done = False
2360
        if mstat.estimated_time is not None:
2361
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2362
          max_time = mstat.estimated_time
2363
        else:
2364
          rem_time = "no time estimate"
2365
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2366
                        (instance.disks[i].iv_name, mstat.sync_percent,
2367
                         rem_time))
2368

    
2369
    # if we're done but degraded, let's do a few small retries, to
2370
    # make sure we see a stable and not transient situation; therefore
2371
    # we force restart of the loop
2372
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2373
      logging.info("Degraded disks found, %d retries left", degr_retries)
2374
      degr_retries -= 1
2375
      time.sleep(1)
2376
      continue
2377

    
2378
    if done or oneshot:
2379
      break
2380

    
2381
    time.sleep(min(60, max_time))
2382

    
2383
  if done:
2384
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2385
  return not cumul_degraded
2386

    
2387

    
2388
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2389
  """Check that mirrors are not degraded.
2390

2391
  The ldisk parameter, if True, will change the test from the
2392
  is_degraded attribute (which represents overall non-ok status for
2393
  the device(s)) to the ldisk (representing the local storage status).
2394

2395
  """
2396
  lu.cfg.SetDiskID(dev, node)
2397

    
2398
  result = True
2399

    
2400
  if on_primary or dev.AssembleOnSecondary():
2401
    rstats = lu.rpc.call_blockdev_find(node, dev)
2402
    msg = rstats.fail_msg
2403
    if msg:
2404
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2405
      result = False
2406
    elif not rstats.payload:
2407
      lu.LogWarning("Can't find disk on node %s", node)
2408
      result = False
2409
    else:
2410
      if ldisk:
2411
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2412
      else:
2413
        result = result and not rstats.payload.is_degraded
2414

    
2415
  if dev.children:
2416
    for child in dev.children:
2417
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2418

    
2419
  return result
2420

    
2421

    
2422
class LUDiagnoseOS(NoHooksLU):
2423
  """Logical unit for OS diagnose/query.
2424

2425
  """
2426
  _OP_REQP = ["output_fields", "names"]
2427
  REQ_BGL = False
2428
  _FIELDS_STATIC = utils.FieldSet()
2429
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2430
  # Fields that need calculation of global os validity
2431
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2432

    
2433
  def ExpandNames(self):
2434
    if self.op.names:
2435
      raise errors.OpPrereqError("Selective OS query not supported",
2436
                                 errors.ECODE_INVAL)
2437

    
2438
    _CheckOutputFields(static=self._FIELDS_STATIC,
2439
                       dynamic=self._FIELDS_DYNAMIC,
2440
                       selected=self.op.output_fields)
2441

    
2442
    # Lock all nodes, in shared mode
2443
    # Temporary removal of locks, should be reverted later
2444
    # TODO: reintroduce locks when they are lighter-weight
2445
    self.needed_locks = {}
2446
    #self.share_locks[locking.LEVEL_NODE] = 1
2447
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2448

    
2449
  def CheckPrereq(self):
2450
    """Check prerequisites.
2451

2452
    """
2453

    
2454
  @staticmethod
2455
  def _DiagnoseByOS(rlist):
2456
    """Remaps a per-node return list into an a per-os per-node dictionary
2457

2458
    @param rlist: a map with node names as keys and OS objects as values
2459

2460
    @rtype: dict
2461
    @return: a dictionary with osnames as keys and as value another map, with
2462
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2463

2464
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2465
                                     (/srv/..., False, "invalid api")],
2466
                           "node2": [(/srv/..., True, "")]}
2467
          }
2468

2469
    """
2470
    all_os = {}
2471
    # we build here the list of nodes that didn't fail the RPC (at RPC
2472
    # level), so that nodes with a non-responding node daemon don't
2473
    # make all OSes invalid
2474
    good_nodes = [node_name for node_name in rlist
2475
                  if not rlist[node_name].fail_msg]
2476
    for node_name, nr in rlist.items():
2477
      if nr.fail_msg or not nr.payload:
2478
        continue
2479
      for name, path, status, diagnose, variants in nr.payload:
2480
        if name not in all_os:
2481
          # build a list of nodes for this os containing empty lists
2482
          # for each node in node_list
2483
          all_os[name] = {}
2484
          for nname in good_nodes:
2485
            all_os[name][nname] = []
2486
        all_os[name][node_name].append((path, status, diagnose, variants))
2487
    return all_os
2488

    
2489
  def Exec(self, feedback_fn):
2490
    """Compute the list of OSes.
2491

2492
    """
2493
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2494
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2495
    pol = self._DiagnoseByOS(node_data)
2496
    output = []
2497
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2498
    calc_variants = "variants" in self.op.output_fields
2499

    
2500
    for os_name, os_data in pol.items():
2501
      row = []
2502
      if calc_valid:
2503
        valid = True
2504
        variants = None
2505
        for osl in os_data.values():
2506
          valid = valid and osl and osl[0][1]
2507
          if not valid:
2508
            variants = None
2509
            break
2510
          if calc_variants:
2511
            node_variants = osl[0][3]
2512
            if variants is None:
2513
              variants = node_variants
2514
            else:
2515
              variants = [v for v in variants if v in node_variants]
2516

    
2517
      for field in self.op.output_fields:
2518
        if field == "name":
2519
          val = os_name
2520
        elif field == "valid":
2521
          val = valid
2522
        elif field == "node_status":
2523
          # this is just a copy of the dict
2524
          val = {}
2525
          for node_name, nos_list in os_data.items():
2526
            val[node_name] = nos_list
2527
        elif field == "variants":
2528
          val =  variants
2529
        else:
2530
          raise errors.ParameterError(field)
2531
        row.append(val)
2532
      output.append(row)
2533

    
2534
    return output
2535

    
2536

    
2537
class LURemoveNode(LogicalUnit):
2538
  """Logical unit for removing a node.
2539

2540
  """
2541
  HPATH = "node-remove"
2542
  HTYPE = constants.HTYPE_NODE
2543
  _OP_REQP = ["node_name"]
2544

    
2545
  def BuildHooksEnv(self):
2546
    """Build hooks env.
2547

2548
    This doesn't run on the target node in the pre phase as a failed
2549
    node would then be impossible to remove.
2550

2551
    """
2552
    env = {
2553
      "OP_TARGET": self.op.node_name,
2554
      "NODE_NAME": self.op.node_name,
2555
      }
2556
    all_nodes = self.cfg.GetNodeList()
2557
    try:
2558
      all_nodes.remove(self.op.node_name)
2559
    except ValueError:
2560
      logging.warning("Node %s which is about to be removed not found"
2561
                      " in the all nodes list", self.op.node_name)
2562
    return env, all_nodes, all_nodes
2563

    
2564
  def CheckPrereq(self):
2565
    """Check prerequisites.
2566

2567
    This checks:
2568
     - the node exists in the configuration
2569
     - it does not have primary or secondary instances
2570
     - it's not the master
2571

2572
    Any errors are signaled by raising errors.OpPrereqError.
2573

2574
    """
2575
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2576
    node = self.cfg.GetNodeInfo(self.op.node_name)
2577
    assert node is not None
2578

    
2579
    instance_list = self.cfg.GetInstanceList()
2580

    
2581
    masternode = self.cfg.GetMasterNode()
2582
    if node.name == masternode:
2583
      raise errors.OpPrereqError("Node is the master node,"
2584
                                 " you need to failover first.",
2585
                                 errors.ECODE_INVAL)
2586

    
2587
    for instance_name in instance_list:
2588
      instance = self.cfg.GetInstanceInfo(instance_name)
2589
      if node.name in instance.all_nodes:
2590
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2591
                                   " please remove first." % instance_name,
2592
                                   errors.ECODE_INVAL)
2593
    self.op.node_name = node.name
2594
    self.node = node
2595

    
2596
  def Exec(self, feedback_fn):
2597
    """Removes the node from the cluster.
2598

2599
    """
2600
    node = self.node
2601
    logging.info("Stopping the node daemon and removing configs from node %s",
2602
                 node.name)
2603

    
2604
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2605

    
2606
    # Promote nodes to master candidate as needed
2607
    _AdjustCandidatePool(self, exceptions=[node.name])
2608
    self.context.RemoveNode(node.name)
2609

    
2610
    # Run post hooks on the node before it's removed
2611
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2612
    try:
2613
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2614
    except:
2615
      # pylint: disable-msg=W0702
2616
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2617

    
2618
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2619
    msg = result.fail_msg
2620
    if msg:
2621
      self.LogWarning("Errors encountered on the remote node while leaving"
2622
                      " the cluster: %s", msg)
2623

    
2624

    
2625
class LUQueryNodes(NoHooksLU):
2626
  """Logical unit for querying nodes.
2627

2628
  """
2629
  # pylint: disable-msg=W0142
2630
  _OP_REQP = ["output_fields", "names", "use_locking"]
2631
  REQ_BGL = False
2632

    
2633
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2634
                    "master_candidate", "offline", "drained"]
2635

    
2636
  _FIELDS_DYNAMIC = utils.FieldSet(
2637
    "dtotal", "dfree",
2638
    "mtotal", "mnode", "mfree",
2639
    "bootid",
2640
    "ctotal", "cnodes", "csockets",
2641
    )
2642

    
2643
  _FIELDS_STATIC = utils.FieldSet(*[
2644
    "pinst_cnt", "sinst_cnt",
2645
    "pinst_list", "sinst_list",
2646
    "pip", "sip", "tags",
2647
    "master",
2648
    "role"] + _SIMPLE_FIELDS
2649
    )
2650

    
2651
  def ExpandNames(self):
2652
    _CheckOutputFields(static=self._FIELDS_STATIC,
2653
                       dynamic=self._FIELDS_DYNAMIC,
2654
                       selected=self.op.output_fields)
2655

    
2656
    self.needed_locks = {}
2657
    self.share_locks[locking.LEVEL_NODE] = 1
2658

    
2659
    if self.op.names:
2660
      self.wanted = _GetWantedNodes(self, self.op.names)
2661
    else:
2662
      self.wanted = locking.ALL_SET
2663

    
2664
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2665
    self.do_locking = self.do_node_query and self.op.use_locking
2666
    if self.do_locking:
2667
      # if we don't request only static fields, we need to lock the nodes
2668
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2669

    
2670
  def CheckPrereq(self):
2671
    """Check prerequisites.
2672

2673
    """
2674
    # The validation of the node list is done in the _GetWantedNodes,
2675
    # if non empty, and if empty, there's no validation to do
2676
    pass
2677

    
2678
  def Exec(self, feedback_fn):
2679
    """Computes the list of nodes and their attributes.
2680

2681
    """
2682
    all_info = self.cfg.GetAllNodesInfo()
2683
    if self.do_locking:
2684
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2685
    elif self.wanted != locking.ALL_SET:
2686
      nodenames = self.wanted
2687
      missing = set(nodenames).difference(all_info.keys())
2688
      if missing:
2689
        raise errors.OpExecError(
2690
          "Some nodes were removed before retrieving their data: %s" % missing)
2691
    else:
2692
      nodenames = all_info.keys()
2693

    
2694
    nodenames = utils.NiceSort(nodenames)
2695
    nodelist = [all_info[name] for name in nodenames]
2696

    
2697
    # begin data gathering
2698

    
2699
    if self.do_node_query:
2700
      live_data = {}
2701
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2702
                                          self.cfg.GetHypervisorType())
2703
      for name in nodenames:
2704
        nodeinfo = node_data[name]
2705
        if not nodeinfo.fail_msg and nodeinfo.payload:
2706
          nodeinfo = nodeinfo.payload
2707
          fn = utils.TryConvert
2708
          live_data[name] = {
2709
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2710
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2711
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2712
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2713
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2714
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2715
            "bootid": nodeinfo.get('bootid', None),
2716
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2717
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2718
            }
2719
        else:
2720
          live_data[name] = {}
2721
    else:
2722
      live_data = dict.fromkeys(nodenames, {})
2723

    
2724
    node_to_primary = dict([(name, set()) for name in nodenames])
2725
    node_to_secondary = dict([(name, set()) for name in nodenames])
2726

    
2727
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2728
                             "sinst_cnt", "sinst_list"))
2729
    if inst_fields & frozenset(self.op.output_fields):
2730
      inst_data = self.cfg.GetAllInstancesInfo()
2731

    
2732
      for inst in inst_data.values():
2733
        if inst.primary_node in node_to_primary:
2734
          node_to_primary[inst.primary_node].add(inst.name)
2735
        for secnode in inst.secondary_nodes:
2736
          if secnode in node_to_secondary:
2737
            node_to_secondary[secnode].add(inst.name)
2738

    
2739
    master_node = self.cfg.GetMasterNode()
2740

    
2741
    # end data gathering
2742

    
2743
    output = []
2744
    for node in nodelist:
2745
      node_output = []
2746
      for field in self.op.output_fields:
2747
        if field in self._SIMPLE_FIELDS:
2748
          val = getattr(node, field)
2749
        elif field == "pinst_list":
2750
          val = list(node_to_primary[node.name])
2751
        elif field == "sinst_list":
2752
          val = list(node_to_secondary[node.name])
2753
        elif field == "pinst_cnt":
2754
          val = len(node_to_primary[node.name])
2755
        elif field == "sinst_cnt":
2756
          val = len(node_to_secondary[node.name])
2757
        elif field == "pip":
2758
          val = node.primary_ip
2759
        elif field == "sip":
2760
          val = node.secondary_ip
2761
        elif field == "tags":
2762
          val = list(node.GetTags())
2763
        elif field == "master":
2764
          val = node.name == master_node
2765
        elif self._FIELDS_DYNAMIC.Matches(field):
2766
          val = live_data[node.name].get(field, None)
2767
        elif field == "role":
2768
          if node.name == master_node:
2769
            val = "M"
2770
          elif node.master_candidate:
2771
            val = "C"
2772
          elif node.drained:
2773
            val = "D"
2774
          elif node.offline:
2775
            val = "O"
2776
          else:
2777
            val = "R"
2778
        else:
2779
          raise errors.ParameterError(field)
2780
        node_output.append(val)
2781
      output.append(node_output)
2782

    
2783
    return output
2784

    
2785

    
2786
class LUQueryNodeVolumes(NoHooksLU):
2787
  """Logical unit for getting volumes on node(s).
2788

2789
  """
2790
  _OP_REQP = ["nodes", "output_fields"]
2791
  REQ_BGL = False
2792
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2793
  _FIELDS_STATIC = utils.FieldSet("node")
2794

    
2795
  def ExpandNames(self):
2796
    _CheckOutputFields(static=self._FIELDS_STATIC,
2797
                       dynamic=self._FIELDS_DYNAMIC,
2798
                       selected=self.op.output_fields)
2799

    
2800
    self.needed_locks = {}
2801
    self.share_locks[locking.LEVEL_NODE] = 1
2802
    if not self.op.nodes:
2803
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2804
    else:
2805
      self.needed_locks[locking.LEVEL_NODE] = \
2806
        _GetWantedNodes(self, self.op.nodes)
2807

    
2808
  def CheckPrereq(self):
2809
    """Check prerequisites.
2810

2811
    This checks that the fields required are valid output fields.
2812

2813
    """
2814
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2815

    
2816
  def Exec(self, feedback_fn):
2817
    """Computes the list of nodes and their attributes.
2818

2819
    """
2820
    nodenames = self.nodes
2821
    volumes = self.rpc.call_node_volumes(nodenames)
2822

    
2823
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2824
             in self.cfg.GetInstanceList()]
2825

    
2826
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2827

    
2828
    output = []
2829
    for node in nodenames:
2830
      nresult = volumes[node]
2831
      if nresult.offline:
2832
        continue
2833
      msg = nresult.fail_msg
2834
      if msg:
2835
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2836
        continue
2837

    
2838
      node_vols = nresult.payload[:]
2839
      node_vols.sort(key=lambda vol: vol['dev'])
2840

    
2841
      for vol in node_vols:
2842
        node_output = []
2843
        for field in self.op.output_fields:
2844
          if field == "node":
2845
            val = node
2846
          elif field == "phys":
2847
            val = vol['dev']
2848
          elif field == "vg":
2849
            val = vol['vg']
2850
          elif field == "name":
2851
            val = vol['name']
2852
          elif field == "size":
2853
            val = int(float(vol['size']))
2854
          elif field == "instance":
2855
            for inst in ilist:
2856
              if node not in lv_by_node[inst]:
2857
                continue
2858
              if vol['name'] in lv_by_node[inst][node]:
2859
                val = inst.name
2860
                break
2861
            else:
2862
              val = '-'
2863
          else:
2864
            raise errors.ParameterError(field)
2865
          node_output.append(str(val))
2866

    
2867
        output.append(node_output)
2868

    
2869
    return output
2870

    
2871

    
2872
class LUQueryNodeStorage(NoHooksLU):
2873
  """Logical unit for getting information on storage units on node(s).
2874

2875
  """
2876
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2877
  REQ_BGL = False
2878
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2879

    
2880
  def ExpandNames(self):
2881
    storage_type = self.op.storage_type
2882

    
2883
    if storage_type not in constants.VALID_STORAGE_TYPES:
2884
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2885
                                 errors.ECODE_INVAL)
2886

    
2887
    _CheckOutputFields(static=self._FIELDS_STATIC,
2888
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2889
                       selected=self.op.output_fields)
2890

    
2891
    self.needed_locks = {}
2892
    self.share_locks[locking.LEVEL_NODE] = 1
2893

    
2894
    if self.op.nodes:
2895
      self.needed_locks[locking.LEVEL_NODE] = \
2896
        _GetWantedNodes(self, self.op.nodes)
2897
    else:
2898
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This checks that the fields required are valid output fields.
2904

2905
    """
2906
    self.op.name = getattr(self.op, "name", None)
2907

    
2908
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Computes the list of nodes and their attributes.
2912

2913
    """
2914
    # Always get name to sort by
2915
    if constants.SF_NAME in self.op.output_fields:
2916
      fields = self.op.output_fields[:]
2917
    else:
2918
      fields = [constants.SF_NAME] + self.op.output_fields
2919

    
2920
    # Never ask for node or type as it's only known to the LU
2921
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2922
      while extra in fields:
2923
        fields.remove(extra)
2924

    
2925
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2926
    name_idx = field_idx[constants.SF_NAME]
2927

    
2928
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2929
    data = self.rpc.call_storage_list(self.nodes,
2930
                                      self.op.storage_type, st_args,
2931
                                      self.op.name, fields)
2932

    
2933
    result = []
2934

    
2935
    for node in utils.NiceSort(self.nodes):
2936
      nresult = data[node]
2937
      if nresult.offline:
2938
        continue
2939

    
2940
      msg = nresult.fail_msg
2941
      if msg:
2942
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2943
        continue
2944

    
2945
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2946

    
2947
      for name in utils.NiceSort(rows.keys()):
2948
        row = rows[name]
2949

    
2950
        out = []
2951

    
2952
        for field in self.op.output_fields:
2953
          if field == constants.SF_NODE:
2954
            val = node
2955
          elif field == constants.SF_TYPE:
2956
            val = self.op.storage_type
2957
          elif field in field_idx:
2958
            val = row[field_idx[field]]
2959
          else:
2960
            raise errors.ParameterError(field)
2961

    
2962
          out.append(val)
2963

    
2964
        result.append(out)
2965

    
2966
    return result
2967

    
2968

    
2969
class LUModifyNodeStorage(NoHooksLU):
2970
  """Logical unit for modifying a storage volume on a node.
2971

2972
  """
2973
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2974
  REQ_BGL = False
2975

    
2976
  def CheckArguments(self):
2977
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2978

    
2979
    storage_type = self.op.storage_type
2980
    if storage_type not in constants.VALID_STORAGE_TYPES:
2981
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2982
                                 errors.ECODE_INVAL)
2983

    
2984
  def ExpandNames(self):
2985
    self.needed_locks = {
2986
      locking.LEVEL_NODE: self.op.node_name,
2987
      }
2988

    
2989
  def CheckPrereq(self):
2990
    """Check prerequisites.
2991

2992
    """
2993
    storage_type = self.op.storage_type
2994

    
2995
    try:
2996
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2997
    except KeyError:
2998
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2999
                                 " modified" % storage_type,
3000
                                 errors.ECODE_INVAL)
3001

    
3002
    diff = set(self.op.changes.keys()) - modifiable
3003
    if diff:
3004
      raise errors.OpPrereqError("The following fields can not be modified for"
3005
                                 " storage units of type '%s': %r" %
3006
                                 (storage_type, list(diff)),
3007
                                 errors.ECODE_INVAL)
3008

    
3009
  def Exec(self, feedback_fn):
3010
    """Computes the list of nodes and their attributes.
3011

3012
    """
3013
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3014
    result = self.rpc.call_storage_modify(self.op.node_name,
3015
                                          self.op.storage_type, st_args,
3016
                                          self.op.name, self.op.changes)
3017
    result.Raise("Failed to modify storage unit '%s' on %s" %
3018
                 (self.op.name, self.op.node_name))
3019

    
3020

    
3021
class LUAddNode(LogicalUnit):
3022
  """Logical unit for adding node to the cluster.
3023

3024
  """
3025
  HPATH = "node-add"
3026
  HTYPE = constants.HTYPE_NODE
3027
  _OP_REQP = ["node_name"]
3028

    
3029
  def CheckArguments(self):
3030
    # validate/normalize the node name
3031
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3032

    
3033
  def BuildHooksEnv(self):
3034
    """Build hooks env.
3035

3036
    This will run on all nodes before, and on all nodes + the new node after.
3037

3038
    """
3039
    env = {
3040
      "OP_TARGET": self.op.node_name,
3041
      "NODE_NAME": self.op.node_name,
3042
      "NODE_PIP": self.op.primary_ip,
3043
      "NODE_SIP": self.op.secondary_ip,
3044
      }
3045
    nodes_0 = self.cfg.GetNodeList()
3046
    nodes_1 = nodes_0 + [self.op.node_name, ]
3047
    return env, nodes_0, nodes_1
3048

    
3049
  def CheckPrereq(self):
3050
    """Check prerequisites.
3051

3052
    This checks:
3053
     - the new node is not already in the config
3054
     - it is resolvable
3055
     - its parameters (single/dual homed) matches the cluster
3056

3057
    Any errors are signaled by raising errors.OpPrereqError.
3058

3059
    """
3060
    node_name = self.op.node_name
3061
    cfg = self.cfg
3062

    
3063
    dns_data = utils.GetHostInfo(node_name)
3064

    
3065
    node = dns_data.name
3066
    primary_ip = self.op.primary_ip = dns_data.ip
3067
    secondary_ip = getattr(self.op, "secondary_ip", None)
3068
    if secondary_ip is None:
3069
      secondary_ip = primary_ip
3070
    if not utils.IsValidIP(secondary_ip):
3071
      raise errors.OpPrereqError("Invalid secondary IP given",
3072
                                 errors.ECODE_INVAL)
3073
    self.op.secondary_ip = secondary_ip
3074

    
3075
    node_list = cfg.GetNodeList()
3076
    if not self.op.readd and node in node_list:
3077
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3078
                                 node, errors.ECODE_EXISTS)
3079
    elif self.op.readd and node not in node_list:
3080
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3081
                                 errors.ECODE_NOENT)
3082

    
3083
    for existing_node_name in node_list:
3084
      existing_node = cfg.GetNodeInfo(existing_node_name)
3085

    
3086
      if self.op.readd and node == existing_node_name:
3087
        if (existing_node.primary_ip != primary_ip or
3088
            existing_node.secondary_ip != secondary_ip):
3089
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3090
                                     " address configuration as before",
3091
                                     errors.ECODE_INVAL)
3092
        continue
3093

    
3094
      if (existing_node.primary_ip == primary_ip or
3095
          existing_node.secondary_ip == primary_ip or
3096
          existing_node.primary_ip == secondary_ip or
3097
          existing_node.secondary_ip == secondary_ip):
3098
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3099
                                   " existing node %s" % existing_node.name,
3100
                                   errors.ECODE_NOTUNIQUE)
3101

    
3102
    # check that the type of the node (single versus dual homed) is the
3103
    # same as for the master
3104
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3105
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3106
    newbie_singlehomed = secondary_ip == primary_ip
3107
    if master_singlehomed != newbie_singlehomed:
3108
      if master_singlehomed:
3109
        raise errors.OpPrereqError("The master has no private ip but the"
3110
                                   " new node has one",
3111
                                   errors.ECODE_INVAL)
3112
      else:
3113
        raise errors.OpPrereqError("The master has a private ip but the"
3114
                                   " new node doesn't have one",
3115
                                   errors.ECODE_INVAL)
3116

    
3117
    # checks reachability
3118
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3119
      raise errors.OpPrereqError("Node not reachable by ping",
3120
                                 errors.ECODE_ENVIRON)
3121

    
3122
    if not newbie_singlehomed:
3123
      # check reachability from my secondary ip to newbie's secondary ip
3124
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3125
                           source=myself.secondary_ip):
3126
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3127
                                   " based ping to noded port",
3128
                                   errors.ECODE_ENVIRON)
3129

    
3130
    if self.op.readd:
3131
      exceptions = [node]
3132
    else:
3133
      exceptions = []
3134

    
3135
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3136

    
3137
    if self.op.readd:
3138
      self.new_node = self.cfg.GetNodeInfo(node)
3139
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3140
    else:
3141
      self.new_node = objects.Node(name=node,
3142
                                   primary_ip=primary_ip,
3143
                                   secondary_ip=secondary_ip,
3144
                                   master_candidate=self.master_candidate,
3145
                                   offline=False, drained=False)
3146

    
3147
  def Exec(self, feedback_fn):
3148
    """Adds the new node to the cluster.
3149

3150
    """
3151
    new_node = self.new_node
3152
    node = new_node.name
3153

    
3154
    # for re-adds, reset the offline/drained/master-candidate flags;
3155
    # we need to reset here, otherwise offline would prevent RPC calls
3156
    # later in the procedure; this also means that if the re-add
3157
    # fails, we are left with a non-offlined, broken node
3158
    if self.op.readd:
3159
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3160
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3161
      # if we demote the node, we do cleanup later in the procedure
3162
      new_node.master_candidate = self.master_candidate
3163

    
3164
    # notify the user about any possible mc promotion
3165
    if new_node.master_candidate:
3166
      self.LogInfo("Node will be a master candidate")
3167

    
3168
    # check connectivity
3169
    result = self.rpc.call_version([node])[node]
3170
    result.Raise("Can't get version information from node %s" % node)
3171
    if constants.PROTOCOL_VERSION == result.payload:
3172
      logging.info("Communication to node %s fine, sw version %s match",
3173
                   node, result.payload)
3174
    else:
3175
      raise errors.OpExecError("Version mismatch master version %s,"
3176
                               " node version %s" %
3177
                               (constants.PROTOCOL_VERSION, result.payload))
3178

    
3179
    # setup ssh on node
3180
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3181
      logging.info("Copy ssh key to node %s", node)
3182
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3183
      keyarray = []
3184
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3185
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3186
                  priv_key, pub_key]
3187

    
3188
      for i in keyfiles:
3189
        keyarray.append(utils.ReadFile(i))
3190

    
3191
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3192
                                      keyarray[2], keyarray[3], keyarray[4],
3193
                                      keyarray[5])
3194
      result.Raise("Cannot transfer ssh keys to the new node")
3195

    
3196
    # Add node to our /etc/hosts, and add key to known_hosts
3197
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3198
      utils.AddHostToEtcHosts(new_node.name)
3199

    
3200
    if new_node.secondary_ip != new_node.primary_ip:
3201
      result = self.rpc.call_node_has_ip_address(new_node.name,
3202
                                                 new_node.secondary_ip)
3203
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3204
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3205
      if not result.payload:
3206
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3207
                                 " you gave (%s). Please fix and re-run this"
3208
                                 " command." % new_node.secondary_ip)
3209

    
3210
    node_verify_list = [self.cfg.GetMasterNode()]
3211
    node_verify_param = {
3212
      constants.NV_NODELIST: [node],
3213
      # TODO: do a node-net-test as well?
3214
    }
3215

    
3216
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3217
                                       self.cfg.GetClusterName())
3218
    for verifier in node_verify_list:
3219
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3220
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3221
      if nl_payload:
3222
        for failed in nl_payload:
3223
          feedback_fn("ssh/hostname verification failed"
3224
                      " (checking from %s): %s" %
3225
                      (verifier, nl_payload[failed]))
3226
        raise errors.OpExecError("ssh/hostname verification failed.")
3227

    
3228
    if self.op.readd:
3229
      _RedistributeAncillaryFiles(self)
3230
      self.context.ReaddNode(new_node)
3231
      # make sure we redistribute the config
3232
      self.cfg.Update(new_node, feedback_fn)
3233
      # and make sure the new node will not have old files around
3234
      if not new_node.master_candidate:
3235
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3236
        msg = result.fail_msg
3237
        if msg:
3238
          self.LogWarning("Node failed to demote itself from master"
3239
                          " candidate status: %s" % msg)
3240
    else:
3241
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3242
      self.context.AddNode(new_node, self.proc.GetECId())
3243

    
3244

    
3245
class LUSetNodeParams(LogicalUnit):
3246
  """Modifies the parameters of a node.
3247

3248
  """
3249
  HPATH = "node-modify"
3250
  HTYPE = constants.HTYPE_NODE
3251
  _OP_REQP = ["node_name"]
3252
  REQ_BGL = False
3253

    
3254
  def CheckArguments(self):
3255
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3256
    _CheckBooleanOpField(self.op, 'master_candidate')
3257
    _CheckBooleanOpField(self.op, 'offline')
3258
    _CheckBooleanOpField(self.op, 'drained')
3259
    _CheckBooleanOpField(self.op, 'auto_promote')
3260
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3261
    if all_mods.count(None) == 3:
3262
      raise errors.OpPrereqError("Please pass at least one modification",
3263
                                 errors.ECODE_INVAL)
3264
    if all_mods.count(True) > 1:
3265
      raise errors.OpPrereqError("Can't set the node into more than one"
3266
                                 " state at the same time",
3267
                                 errors.ECODE_INVAL)
3268

    
3269
    # Boolean value that tells us whether we're offlining or draining the node
3270
    self.offline_or_drain = (self.op.offline == True or
3271
                             self.op.drained == True)
3272
    self.deoffline_or_drain = (self.op.offline == False or
3273
                               self.op.drained == False)
3274
    self.might_demote = (self.op.master_candidate == False or
3275
                         self.offline_or_drain)
3276

    
3277
    self.lock_all = self.op.auto_promote and self.might_demote
3278

    
3279

    
3280
  def ExpandNames(self):
3281
    if self.lock_all:
3282
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3283
    else:
3284
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3285

    
3286
  def BuildHooksEnv(self):
3287
    """Build hooks env.
3288

3289
    This runs on the master node.
3290

3291
    """
3292
    env = {
3293
      "OP_TARGET": self.op.node_name,
3294
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3295
      "OFFLINE": str(self.op.offline),
3296
      "DRAINED": str(self.op.drained),
3297
      }
3298
    nl = [self.cfg.GetMasterNode(),
3299
          self.op.node_name]
3300
    return env, nl, nl
3301

    
3302
  def CheckPrereq(self):
3303
    """Check prerequisites.
3304

3305
    This only checks the instance list against the existing names.
3306

3307
    """
3308
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3309

    
3310
    if (self.op.master_candidate is not None or
3311
        self.op.drained is not None or
3312
        self.op.offline is not None):
3313
      # we can't change the master's node flags
3314
      if self.op.node_name == self.cfg.GetMasterNode():
3315
        raise errors.OpPrereqError("The master role can be changed"
3316
                                   " only via masterfailover",
3317
                                   errors.ECODE_INVAL)
3318

    
3319

    
3320
    if node.master_candidate and self.might_demote and not self.lock_all:
3321
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3322
      # check if after removing the current node, we're missing master
3323
      # candidates
3324
      (mc_remaining, mc_should, _) = \
3325
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3326
      if mc_remaining != mc_should:
3327
        raise errors.OpPrereqError("Not enough master candidates, please"
3328
                                   " pass auto_promote to allow promotion",
3329
                                   errors.ECODE_INVAL)
3330

    
3331
    if (self.op.master_candidate == True and
3332
        ((node.offline and not self.op.offline == False) or
3333
         (node.drained and not self.op.drained == False))):
3334
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3335
                                 " to master_candidate" % node.name,
3336
                                 errors.ECODE_INVAL)
3337

    
3338
    # If we're being deofflined/drained, we'll MC ourself if needed
3339
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3340
        self.op.master_candidate == True and not node.master_candidate):
3341
      self.op.master_candidate = _DecideSelfPromotion(self)
3342
      if self.op.master_candidate:
3343
        self.LogInfo("Autopromoting node to master candidate")
3344

    
3345
    return
3346

    
3347
  def Exec(self, feedback_fn):
3348
    """Modifies a node.
3349

3350
    """
3351
    node = self.node
3352

    
3353
    result = []
3354
    changed_mc = False
3355

    
3356
    if self.op.offline is not None:
3357
      node.offline = self.op.offline
3358
      result.append(("offline", str(self.op.offline)))
3359
      if self.op.offline == True:
3360
        if node.master_candidate:
3361
          node.master_candidate = False
3362
          changed_mc = True
3363
          result.append(("master_candidate", "auto-demotion due to offline"))
3364
        if node.drained:
3365
          node.drained = False
3366
          result.append(("drained", "clear drained status due to offline"))
3367

    
3368
    if self.op.master_candidate is not None:
3369
      node.master_candidate = self.op.master_candidate
3370
      changed_mc = True
3371
      result.append(("master_candidate", str(self.op.master_candidate)))
3372
      if self.op.master_candidate == False:
3373
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3374
        msg = rrc.fail_msg
3375
        if msg:
3376
          self.LogWarning("Node failed to demote itself: %s" % msg)
3377

    
3378
    if self.op.drained is not None:
3379
      node.drained = self.op.drained
3380
      result.append(("drained", str(self.op.drained)))
3381
      if self.op.drained == True:
3382
        if node.master_candidate:
3383
          node.master_candidate = False
3384
          changed_mc = True
3385
          result.append(("master_candidate", "auto-demotion due to drain"))
3386
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3387
          msg = rrc.fail_msg
3388
          if msg:
3389
            self.LogWarning("Node failed to demote itself: %s" % msg)
3390
        if node.offline:
3391
          node.offline = False
3392
          result.append(("offline", "clear offline status due to drain"))
3393

    
3394
    # we locked all nodes, we adjust the CP before updating this node
3395
    if self.lock_all:
3396
      _AdjustCandidatePool(self, [node.name])
3397

    
3398
    # this will trigger configuration file update, if needed
3399
    self.cfg.Update(node, feedback_fn)
3400

    
3401
    # this will trigger job queue propagation or cleanup
3402
    if changed_mc:
3403
      self.context.ReaddNode(node)
3404

    
3405
    return result
3406

    
3407

    
3408
class LUPowercycleNode(NoHooksLU):
3409
  """Powercycles a node.
3410

3411
  """
3412
  _OP_REQP = ["node_name", "force"]
3413
  REQ_BGL = False
3414

    
3415
  def CheckArguments(self):
3416
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3417
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3418
      raise errors.OpPrereqError("The node is the master and the force"
3419
                                 " parameter was not set",
3420
                                 errors.ECODE_INVAL)
3421

    
3422
  def ExpandNames(self):
3423
    """Locking for PowercycleNode.
3424

3425
    This is a last-resort option and shouldn't block on other
3426
    jobs. Therefore, we grab no locks.
3427

3428
    """
3429
    self.needed_locks = {}
3430

    
3431
  def CheckPrereq(self):
3432
    """Check prerequisites.
3433

3434
    This LU has no prereqs.
3435

3436
    """
3437
    pass
3438

    
3439
  def Exec(self, feedback_fn):
3440
    """Reboots a node.
3441

3442
    """
3443
    result = self.rpc.call_node_powercycle(self.op.node_name,
3444
                                           self.cfg.GetHypervisorType())
3445
    result.Raise("Failed to schedule the reboot")
3446
    return result.payload
3447

    
3448

    
3449
class LUQueryClusterInfo(NoHooksLU):
3450
  """Query cluster configuration.
3451

3452
  """
3453
  _OP_REQP = []
3454
  REQ_BGL = False
3455

    
3456
  def ExpandNames(self):
3457
    self.needed_locks = {}
3458

    
3459
  def CheckPrereq(self):
3460
    """No prerequsites needed for this LU.
3461

3462
    """
3463
    pass
3464

    
3465
  def Exec(self, feedback_fn):
3466
    """Return cluster config.
3467

3468
    """
3469
    cluster = self.cfg.GetClusterInfo()
3470
    os_hvp = {}
3471

    
3472
    # Filter just for enabled hypervisors
3473
    for os_name, hv_dict in cluster.os_hvp.items():
3474
      os_hvp[os_name] = {}
3475
      for hv_name, hv_params in hv_dict.items():
3476
        if hv_name in cluster.enabled_hypervisors:
3477
          os_hvp[os_name][hv_name] = hv_params
3478

    
3479
    result = {
3480
      "software_version": constants.RELEASE_VERSION,
3481
      "protocol_version": constants.PROTOCOL_VERSION,
3482
      "config_version": constants.CONFIG_VERSION,
3483
      "os_api_version": max(constants.OS_API_VERSIONS),
3484
      "export_version": constants.EXPORT_VERSION,
3485
      "architecture": (platform.architecture()[0], platform.machine()),
3486
      "name": cluster.cluster_name,
3487
      "master": cluster.master_node,
3488
      "default_hypervisor": cluster.enabled_hypervisors[0],
3489
      "enabled_hypervisors": cluster.enabled_hypervisors,
3490
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3491
                        for hypervisor_name in cluster.enabled_hypervisors]),
3492
      "os_hvp": os_hvp,
3493
      "beparams": cluster.beparams,
3494
      "nicparams": cluster.nicparams,
3495
      "candidate_pool_size": cluster.candidate_pool_size,
3496
      "master_netdev": cluster.master_netdev,
3497
      "volume_group_name": cluster.volume_group_name,
3498
      "file_storage_dir": cluster.file_storage_dir,
3499
      "ctime": cluster.ctime,
3500
      "mtime": cluster.mtime,
3501
      "uuid": cluster.uuid,
3502
      "tags": list(cluster.GetTags()),
3503
      }
3504

    
3505
    return result
3506

    
3507

    
3508
class LUQueryConfigValues(NoHooksLU):
3509
  """Return configuration values.
3510

3511
  """
3512
  _OP_REQP = []
3513
  REQ_BGL = False
3514
  _FIELDS_DYNAMIC = utils.FieldSet()
3515
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3516
                                  "watcher_pause")
3517

    
3518
  def ExpandNames(self):
3519
    self.needed_locks = {}
3520

    
3521
    _CheckOutputFields(static=self._FIELDS_STATIC,
3522
                       dynamic=self._FIELDS_DYNAMIC,
3523
                       selected=self.op.output_fields)
3524

    
3525
  def CheckPrereq(self):
3526
    """No prerequisites.
3527

3528
    """
3529
    pass
3530

    
3531
  def Exec(self, feedback_fn):
3532
    """Dump a representation of the cluster config to the standard output.
3533

3534
    """
3535
    values = []
3536
    for field in self.op.output_fields:
3537
      if field == "cluster_name":
3538
        entry = self.cfg.GetClusterName()
3539
      elif field == "master_node":
3540
        entry = self.cfg.GetMasterNode()
3541
      elif field == "drain_flag":
3542
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3543
      elif field == "watcher_pause":
3544
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3545
      else:
3546
        raise errors.ParameterError(field)
3547
      values.append(entry)
3548
    return values
3549

    
3550

    
3551
class LUActivateInstanceDisks(NoHooksLU):
3552
  """Bring up an instance's disks.
3553

3554
  """
3555
  _OP_REQP = ["instance_name"]
3556
  REQ_BGL = False
3557

    
3558
  def ExpandNames(self):
3559
    self._ExpandAndLockInstance()
3560
    self.needed_locks[locking.LEVEL_NODE] = []
3561
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3562

    
3563
  def DeclareLocks(self, level):
3564
    if level == locking.LEVEL_NODE:
3565
      self._LockInstancesNodes()
3566

    
3567
  def CheckPrereq(self):
3568
    """Check prerequisites.
3569

3570
    This checks that the instance is in the cluster.
3571

3572
    """
3573
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574
    assert self.instance is not None, \
3575
      "Cannot retrieve locked instance %s" % self.op.instance_name
3576
    _CheckNodeOnline(self, self.instance.primary_node)
3577
    if not hasattr(self.op, "ignore_size"):
3578
      self.op.ignore_size = False
3579

    
3580
  def Exec(self, feedback_fn):
3581
    """Activate the disks.
3582

3583
    """
3584
    disks_ok, disks_info = \
3585
              _AssembleInstanceDisks(self, self.instance,
3586
                                     ignore_size=self.op.ignore_size)
3587
    if not disks_ok:
3588
      raise errors.OpExecError("Cannot activate block devices")
3589

    
3590
    return disks_info
3591

    
3592

    
3593
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3594
                           ignore_size=False):
3595
  """Prepare the block devices for an instance.
3596

3597
  This sets up the block devices on all nodes.
3598

3599
  @type lu: L{LogicalUnit}
3600
  @param lu: the logical unit on whose behalf we execute
3601
  @type instance: L{objects.Instance}
3602
  @param instance: the instance for whose disks we assemble
3603
  @type ignore_secondaries: boolean
3604
  @param ignore_secondaries: if true, errors on secondary nodes
3605
      won't result in an error return from the function
3606
  @type ignore_size: boolean
3607
  @param ignore_size: if true, the current known size of the disk
3608
      will not be used during the disk activation, useful for cases
3609
      when the size is wrong
3610
  @return: False if the operation failed, otherwise a list of
3611
      (host, instance_visible_name, node_visible_name)
3612
      with the mapping from node devices to instance devices
3613

3614
  """
3615
  device_info = []
3616
  disks_ok = True
3617
  iname = instance.name
3618
  # With the two passes mechanism we try to reduce the window of
3619
  # opportunity for the race condition of switching DRBD to primary
3620
  # before handshaking occured, but we do not eliminate it
3621

    
3622
  # The proper fix would be to wait (with some limits) until the
3623
  # connection has been made and drbd transitions from WFConnection
3624
  # into any other network-connected state (Connected, SyncTarget,
3625
  # SyncSource, etc.)
3626

    
3627
  # 1st pass, assemble on all nodes in secondary mode
3628
  for inst_disk in instance.disks:
3629
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3630
      if ignore_size:
3631
        node_disk = node_disk.Copy()
3632
        node_disk.UnsetSize()
3633
      lu.cfg.SetDiskID(node_disk, node)
3634
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3635
      msg = result.fail_msg
3636
      if msg:
3637
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3638
                           " (is_primary=False, pass=1): %s",
3639
                           inst_disk.iv_name, node, msg)
3640
        if not ignore_secondaries:
3641
          disks_ok = False
3642

    
3643
  # FIXME: race condition on drbd migration to primary
3644

    
3645
  # 2nd pass, do only the primary node
3646
  for inst_disk in instance.disks:
3647
    dev_path = None
3648

    
3649
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3650
      if node != instance.primary_node:
3651
        continue
3652
      if ignore_size:
3653
        node_disk = node_disk.Copy()
3654
        node_disk.UnsetSize()
3655
      lu.cfg.SetDiskID(node_disk, node)
3656
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3657
      msg = result.fail_msg
3658
      if msg:
3659
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3660
                           " (is_primary=True, pass=2): %s",
3661
                           inst_disk.iv_name, node, msg)
3662
        disks_ok = False
3663
      else:
3664
        dev_path = result.payload
3665

    
3666
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3667

    
3668
  # leave the disks configured for the primary node
3669
  # this is a workaround that would be fixed better by
3670
  # improving the logical/physical id handling
3671
  for disk in instance.disks:
3672
    lu.cfg.SetDiskID(disk, instance.primary_node)
3673

    
3674
  return disks_ok, device_info
3675

    
3676

    
3677
def _StartInstanceDisks(lu, instance, force):
3678
  """Start the disks of an instance.
3679

3680
  """
3681
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3682
                                           ignore_secondaries=force)
3683
  if not disks_ok:
3684
    _ShutdownInstanceDisks(lu, instance)
3685
    if force is not None and not force:
3686
      lu.proc.LogWarning("", hint="If the message above refers to a"
3687
                         " secondary node,"
3688
                         " you can retry the operation using '--force'.")
3689
    raise errors.OpExecError("Disk consistency error")
3690

    
3691

    
3692
class LUDeactivateInstanceDisks(NoHooksLU):
3693
  """Shutdown an instance's disks.
3694

3695
  """
3696
  _OP_REQP = ["instance_name"]
3697
  REQ_BGL = False
3698

    
3699
  def ExpandNames(self):
3700
    self._ExpandAndLockInstance()
3701
    self.needed_locks[locking.LEVEL_NODE] = []
3702
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3703

    
3704
  def DeclareLocks(self, level):
3705
    if level == locking.LEVEL_NODE:
3706
      self._LockInstancesNodes()
3707

    
3708
  def CheckPrereq(self):
3709
    """Check prerequisites.
3710

3711
    This checks that the instance is in the cluster.
3712

3713
    """
3714
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3715
    assert self.instance is not None, \
3716
      "Cannot retrieve locked instance %s" % self.op.instance_name
3717

    
3718
  def Exec(self, feedback_fn):
3719
    """Deactivate the disks
3720

3721
    """
3722
    instance = self.instance
3723
    _SafeShutdownInstanceDisks(self, instance)
3724

    
3725

    
3726
def _SafeShutdownInstanceDisks(lu, instance):
3727
  """Shutdown block devices of an instance.
3728

3729
  This function checks if an instance is running, before calling
3730
  _ShutdownInstanceDisks.
3731

3732
  """
3733
  pnode = instance.primary_node
3734
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3735
  ins_l.Raise("Can't contact node %s" % pnode)
3736

    
3737
  if instance.name in ins_l.payload:
3738
    raise errors.OpExecError("Instance is running, can't shutdown"
3739
                             " block devices.")
3740

    
3741
  _ShutdownInstanceDisks(lu, instance)
3742

    
3743

    
3744
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3745
  """Shutdown block devices of an instance.
3746

3747
  This does the shutdown on all nodes of the instance.
3748

3749
  If the ignore_primary is false, errors on the primary node are
3750
  ignored.
3751

3752
  """
3753
  all_result = True
3754
  for disk in instance.disks:
3755
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3756
      lu.cfg.SetDiskID(top_disk, node)
3757
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3758
      msg = result.fail_msg
3759
      if msg:
3760
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3761
                      disk.iv_name, node, msg)
3762
        if not ignore_primary or node != instance.primary_node:
3763
          all_result = False
3764
  return all_result
3765

    
3766

    
3767
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3768
  """Checks if a node has enough free memory.
3769

3770
  This function check if a given node has the needed amount of free
3771
  memory. In case the node has less memory or we cannot get the
3772
  information from the node, this function raise an OpPrereqError
3773
  exception.
3774

3775
  @type lu: C{LogicalUnit}
3776
  @param lu: a logical unit from which we get configuration data
3777
  @type node: C{str}
3778
  @param node: the node to check
3779
  @type reason: C{str}
3780
  @param reason: string to use in the error message
3781
  @type requested: C{int}
3782
  @param requested: the amount of memory in MiB to check for
3783
  @type hypervisor_name: C{str}
3784
  @param hypervisor_name: the hypervisor to ask for memory stats
3785
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3786
      we cannot check the node
3787

3788
  """
3789
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3790
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3791
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3792
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3793
  if not isinstance(free_mem, int):
3794
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3795
                               " was '%s'" % (node, free_mem),
3796
                               errors.ECODE_ENVIRON)
3797
  if requested > free_mem:
3798
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3799
                               " needed %s MiB, available %s MiB" %
3800
                               (node, reason, requested, free_mem),
3801
                               errors.ECODE_NORES)
3802

    
3803

    
3804
class LUStartupInstance(LogicalUnit):
3805
  """Starts an instance.
3806

3807
  """
3808
  HPATH = "instance-start"
3809
  HTYPE = constants.HTYPE_INSTANCE
3810
  _OP_REQP = ["instance_name", "force"]
3811
  REQ_BGL = False
3812

    
3813
  def ExpandNames(self):
3814
    self._ExpandAndLockInstance()
3815

    
3816
  def BuildHooksEnv(self):
3817
    """Build hooks env.
3818

3819
    This runs on master, primary and secondary nodes of the instance.
3820

3821
    """
3822
    env = {
3823
      "FORCE": self.op.force,
3824
      }
3825
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3826
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3827
    return env, nl, nl
3828

    
3829
  def CheckPrereq(self):
3830
    """Check prerequisites.
3831

3832
    This checks that the instance is in the cluster.
3833

3834
    """
3835
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3836
    assert self.instance is not None, \
3837
      "Cannot retrieve locked instance %s" % self.op.instance_name
3838

    
3839
    # extra beparams
3840
    self.beparams = getattr(self.op, "beparams", {})
3841
    if self.beparams:
3842
      if not isinstance(self.beparams, dict):
3843
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3844
                                   " dict" % (type(self.beparams), ),
3845
                                   errors.ECODE_INVAL)
3846
      # fill the beparams dict
3847
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3848
      self.op.beparams = self.beparams
3849

    
3850
    # extra hvparams
3851
    self.hvparams = getattr(self.op, "hvparams", {})
3852
    if self.hvparams:
3853
      if not isinstance(self.hvparams, dict):
3854
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3855
                                   " dict" % (type(self.hvparams), ),
3856
                                   errors.ECODE_INVAL)
3857

    
3858
      # check hypervisor parameter syntax (locally)
3859
      cluster = self.cfg.GetClusterInfo()
3860
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3861
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3862
                                    instance.hvparams)
3863
      filled_hvp.update(self.hvparams)
3864
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3865
      hv_type.CheckParameterSyntax(filled_hvp)
3866
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3867
      self.op.hvparams = self.hvparams
3868

    
3869
    _CheckNodeOnline(self, instance.primary_node)
3870

    
3871
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3872
    # check bridges existence
3873
    _CheckInstanceBridgesExist(self, instance)
3874

    
3875
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3876
                                              instance.name,
3877
                                              instance.hypervisor)
3878
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3879
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3880
    if not remote_info.payload: # not running already
3881
      _CheckNodeFreeMemory(self, instance.primary_node,
3882
                           "starting instance %s" % instance.name,
3883
                           bep[constants.BE_MEMORY], instance.hypervisor)
3884

    
3885
  def Exec(self, feedback_fn):
3886
    """Start the instance.
3887

3888
    """
3889
    instance = self.instance
3890
    force = self.op.force
3891

    
3892
    self.cfg.MarkInstanceUp(instance.name)
3893

    
3894
    node_current = instance.primary_node
3895

    
3896
    _StartInstanceDisks(self, instance, force)
3897

    
3898
    result = self.rpc.call_instance_start(node_current, instance,
3899
                                          self.hvparams, self.beparams)
3900
    msg = result.fail_msg
3901
    if msg:
3902
      _ShutdownInstanceDisks(self, instance)
3903
      raise errors.OpExecError("Could not start instance: %s" % msg)
3904

    
3905

    
3906
class LURebootInstance(LogicalUnit):
3907
  """Reboot an instance.
3908

3909
  """
3910
  HPATH = "instance-reboot"
3911
  HTYPE = constants.HTYPE_INSTANCE
3912
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3913
  REQ_BGL = False
3914

    
3915
  def CheckArguments(self):
3916
    """Check the arguments.
3917

3918
    """
3919
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3920
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3921

    
3922
  def ExpandNames(self):
3923
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3924
                                   constants.INSTANCE_REBOOT_HARD,
3925
                                   constants.INSTANCE_REBOOT_FULL]:
3926
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3927
                                  (constants.INSTANCE_REBOOT_SOFT,
3928
                                   constants.INSTANCE_REBOOT_HARD,
3929
                                   constants.INSTANCE_REBOOT_FULL))
3930
    self._ExpandAndLockInstance()
3931

    
3932
  def BuildHooksEnv(self):
3933
    """Build hooks env.
3934

3935
    This runs on master, primary and secondary nodes of the instance.
3936

3937
    """
3938
    env = {
3939
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3940
      "REBOOT_TYPE": self.op.reboot_type,
3941
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3942
      }
3943
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3944
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3945
    return env, nl, nl
3946

    
3947
  def CheckPrereq(self):
3948
    """Check prerequisites.
3949

3950
    This checks that the instance is in the cluster.
3951

3952
    """
3953
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3954
    assert self.instance is not None, \
3955
      "Cannot retrieve locked instance %s" % self.op.instance_name
3956

    
3957
    _CheckNodeOnline(self, instance.primary_node)
3958

    
3959
    # check bridges existence
3960
    _CheckInstanceBridgesExist(self, instance)
3961

    
3962
  def Exec(self, feedback_fn):
3963
    """Reboot the instance.
3964

3965
    """
3966
    instance = self.instance
3967
    ignore_secondaries = self.op.ignore_secondaries
3968
    reboot_type = self.op.reboot_type
3969

    
3970
    node_current = instance.primary_node
3971

    
3972
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3973
                       constants.INSTANCE_REBOOT_HARD]:
3974
      for disk in instance.disks:
3975
        self.cfg.SetDiskID(disk, node_current)
3976
      result = self.rpc.call_instance_reboot(node_current, instance,
3977
                                             reboot_type,
3978
                                             self.shutdown_timeout)
3979
      result.Raise("Could not reboot instance")
3980
    else:
3981
      result = self.rpc.call_instance_shutdown(node_current, instance,
3982
                                               self.shutdown_timeout)
3983
      result.Raise("Could not shutdown instance for full reboot")
3984
      _ShutdownInstanceDisks(self, instance)
3985
      _StartInstanceDisks(self, instance, ignore_secondaries)
3986
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3987
      msg = result.fail_msg
3988
      if msg:
3989
        _ShutdownInstanceDisks(self, instance)
3990
        raise errors.OpExecError("Could not start instance for"
3991
                                 " full reboot: %s" % msg)
3992

    
3993
    self.cfg.MarkInstanceUp(instance.name)
3994

    
3995

    
3996
class LUShutdownInstance(LogicalUnit):
3997
  """Shutdown an instance.
3998

3999
  """
4000
  HPATH = "instance-stop"
4001
  HTYPE = constants.HTYPE_INSTANCE
4002
  _OP_REQP = ["instance_name"]
4003
  REQ_BGL = False
4004

    
4005
  def CheckArguments(self):
4006
    """Check the arguments.
4007

4008
    """
4009
    self.timeout = getattr(self.op, "timeout",
4010
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4011

    
4012
  def ExpandNames(self):
4013
    self._ExpandAndLockInstance()
4014

    
4015
  def BuildHooksEnv(self):
4016
    """Build hooks env.
4017

4018
    This runs on master, primary and secondary nodes of the instance.
4019

4020
    """
4021
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4022
    env["TIMEOUT"] = self.timeout
4023
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4024
    return env, nl, nl
4025

    
4026
  def CheckPrereq(self):
4027
    """Check prerequisites.
4028

4029
    This checks that the instance is in the cluster.
4030

4031
    """
4032
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4033
    assert self.instance is not None, \
4034
      "Cannot retrieve locked instance %s" % self.op.instance_name
4035
    _CheckNodeOnline(self, self.instance.primary_node)
4036

    
4037
  def Exec(self, feedback_fn):
4038
    """Shutdown the instance.
4039

4040
    """
4041
    instance = self.instance
4042
    node_current = instance.primary_node
4043
    timeout = self.timeout
4044
    self.cfg.MarkInstanceDown(instance.name)
4045
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4046
    msg = result.fail_msg
4047
    if msg:
4048
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4049

    
4050
    _ShutdownInstanceDisks(self, instance)
4051

    
4052

    
4053
class LUReinstallInstance(LogicalUnit):
4054
  """Reinstall an instance.
4055

4056
  """
4057
  HPATH = "instance-reinstall"
4058
  HTYPE = constants.HTYPE_INSTANCE
4059
  _OP_REQP = ["instance_name"]
4060
  REQ_BGL = False
4061

    
4062
  def ExpandNames(self):
4063
    self._ExpandAndLockInstance()
4064

    
4065
  def BuildHooksEnv(self):
4066
    """Build hooks env.
4067

4068
    This runs on master, primary and secondary nodes of the instance.
4069

4070
    """
4071
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4072
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4073
    return env, nl, nl
4074

    
4075
  def CheckPrereq(self):
4076
    """Check prerequisites.
4077

4078
    This checks that the instance is in the cluster and is not running.
4079

4080
    """
4081
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4082
    assert instance is not None, \
4083
      "Cannot retrieve locked instance %s" % self.op.instance_name
4084
    _CheckNodeOnline(self, instance.primary_node)
4085

    
4086
    if instance.disk_template == constants.DT_DISKLESS:
4087
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4088
                                 self.op.instance_name,
4089
                                 errors.ECODE_INVAL)
4090
    if instance.admin_up:
4091
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4092
                                 self.op.instance_name,
4093
                                 errors.ECODE_STATE)
4094
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4095
                                              instance.name,
4096
                                              instance.hypervisor)
4097
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4098
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4099
    if remote_info.payload:
4100
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4101
                                 (self.op.instance_name,
4102
                                  instance.primary_node),
4103
                                 errors.ECODE_STATE)
4104

    
4105
    self.op.os_type = getattr(self.op, "os_type", None)
4106
    self.op.force_variant = getattr(self.op, "force_variant", False)
4107
    if self.op.os_type is not None:
4108
      # OS verification
4109
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4110
      result = self.rpc.call_os_get(pnode, self.op.os_type)
4111
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
4112
                   (self.op.os_type, pnode),
4113
                   prereq=True, ecode=errors.ECODE_INVAL)
4114
      if not self.op.force_variant:
4115
        _CheckOSVariant(result.payload, self.op.os_type)
4116

    
4117
    self.instance = instance
4118

    
4119
  def Exec(self, feedback_fn):
4120
    """Reinstall the instance.
4121

4122
    """
4123
    inst = self.instance
4124

    
4125
    if self.op.os_type is not None:
4126
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4127
      inst.os = self.op.os_type
4128
      self.cfg.Update(inst, feedback_fn)
4129

    
4130
    _StartInstanceDisks(self, inst, None)
4131
    try:
4132
      feedback_fn("Running the instance OS create scripts...")
4133
      # FIXME: pass debug option from opcode to backend
4134
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4135
                                             self.op.debug_level)
4136
      result.Raise("Could not install OS for instance %s on node %s" %
4137
                   (inst.name, inst.primary_node))
4138
    finally:
4139
      _ShutdownInstanceDisks(self, inst)
4140

    
4141

    
4142
class LURecreateInstanceDisks(LogicalUnit):
4143
  """Recreate an instance's missing disks.
4144

4145
  """
4146
  HPATH = "instance-recreate-disks"
4147
  HTYPE = constants.HTYPE_INSTANCE
4148
  _OP_REQP = ["instance_name", "disks"]
4149
  REQ_BGL = False
4150

    
4151
  def CheckArguments(self):
4152
    """Check the arguments.
4153

4154
    """
4155
    if not isinstance(self.op.disks, list):
4156
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4157
    for item in self.op.disks:
4158
      if (not isinstance(item, int) or
4159
          item < 0):
4160
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4161
                                   str(item), errors.ECODE_INVAL)
4162

    
4163
  def ExpandNames(self):
4164
    self._ExpandAndLockInstance()
4165

    
4166
  def BuildHooksEnv(self):
4167
    """Build hooks env.
4168

4169
    This runs on master, primary and secondary nodes of the instance.
4170

4171
    """
4172
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4173
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4174
    return env, nl, nl
4175

    
4176
  def CheckPrereq(self):
4177
    """Check prerequisites.
4178

4179
    This checks that the instance is in the cluster and is not running.
4180

4181
    """
4182
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4183
    assert instance is not None, \
4184
      "Cannot retrieve locked instance %s" % self.op.instance_name
4185
    _CheckNodeOnline(self, instance.primary_node)
4186

    
4187
    if instance.disk_template == constants.DT_DISKLESS:
4188
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4189
                                 self.op.instance_name, errors.ECODE_INVAL)
4190
    if instance.admin_up:
4191
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4192
                                 self.op.instance_name, errors.ECODE_STATE)
4193
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4194
                                              instance.name,
4195
                                              instance.hypervisor)
4196
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4197
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4198
    if remote_info.payload:
4199
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4200
                                 (self.op.instance_name,
4201
                                  instance.primary_node), errors.ECODE_STATE)
4202

    
4203
    if not self.op.disks:
4204
      self.op.disks = range(len(instance.disks))
4205
    else:
4206
      for idx in self.op.disks:
4207
        if idx >= len(instance.disks):
4208
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4209
                                     errors.ECODE_INVAL)
4210

    
4211
    self.instance = instance
4212

    
4213
  def Exec(self, feedback_fn):
4214
    """Recreate the disks.
4215

4216
    """
4217
    to_skip = []
4218
    for idx, _ in enumerate(self.instance.disks):
4219
      if idx not in self.op.disks: # disk idx has not been passed in
4220
        to_skip.append(idx)
4221
        continue
4222

    
4223
    _CreateDisks(self, self.instance, to_skip=to_skip)
4224

    
4225

    
4226
class LURenameInstance(LogicalUnit):
4227
  """Rename an instance.
4228

4229
  """
4230
  HPATH = "instance-rename"
4231
  HTYPE = constants.HTYPE_INSTANCE
4232
  _OP_REQP = ["instance_name", "new_name"]
4233

    
4234
  def BuildHooksEnv(self):
4235
    """Build hooks env.
4236

4237
    This runs on master, primary and secondary nodes of the instance.
4238

4239
    """
4240
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4241
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4242
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4243
    return env, nl, nl
4244

    
4245
  def CheckPrereq(self):
4246
    """Check prerequisites.
4247

4248
    This checks that the instance is in the cluster and is not running.
4249

4250
    """
4251
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4252
                                                self.op.instance_name)
4253
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4254
    assert instance is not None
4255
    _CheckNodeOnline(self, instance.primary_node)
4256

    
4257
    if instance.admin_up:
4258
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4259
                                 self.op.instance_name, errors.ECODE_STATE)
4260
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4261
                                              instance.name,
4262
                                              instance.hypervisor)
4263
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4264
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4265
    if remote_info.payload:
4266
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4267
                                 (self.op.instance_name,
4268
                                  instance.primary_node), errors.ECODE_STATE)
4269
    self.instance = instance
4270

    
4271
    # new name verification
4272
    name_info = utils.GetHostInfo(self.op.new_name)
4273

    
4274
    self.op.new_name = new_name = name_info.name
4275
    instance_list = self.cfg.GetInstanceList()
4276
    if new_name in instance_list:
4277
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4278
                                 new_name, errors.ECODE_EXISTS)
4279

    
4280
    if not getattr(self.op, "ignore_ip", False):
4281
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4282
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4283
                                   (name_info.ip, new_name),
4284
                                   errors.ECODE_NOTUNIQUE)
4285

    
4286

    
4287
  def Exec(self, feedback_fn):
4288
    """Reinstall the instance.
4289

4290
    """
4291
    inst = self.instance
4292
    old_name = inst.name
4293

    
4294
    if inst.disk_template == constants.DT_FILE:
4295
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4296

    
4297
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4298
    # Change the instance lock. This is definitely safe while we hold the BGL
4299
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4300
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4301

    
4302
    # re-read the instance from the configuration after rename
4303
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4304

    
4305
    if inst.disk_template == constants.DT_FILE:
4306
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4307
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4308
                                                     old_file_storage_dir,
4309
                                                     new_file_storage_dir)
4310
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4311
                   " (but the instance has been renamed in Ganeti)" %
4312
                   (inst.primary_node, old_file_storage_dir,
4313
                    new_file_storage_dir))
4314

    
4315
    _StartInstanceDisks(self, inst, None)
4316
    try:
4317
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4318
                                                 old_name, self.op.debug_level)
4319
      msg = result.fail_msg
4320
      if msg:
4321
        msg = ("Could not run OS rename script for instance %s on node %s"
4322
               " (but the instance has been renamed in Ganeti): %s" %
4323
               (inst.name, inst.primary_node, msg))
4324
        self.proc.LogWarning(msg)
4325
    finally:
4326
      _ShutdownInstanceDisks(self, inst)
4327

    
4328

    
4329
class LURemoveInstance(LogicalUnit):
4330
  """Remove an instance.
4331

4332
  """
4333
  HPATH = "instance-remove"
4334
  HTYPE = constants.HTYPE_INSTANCE
4335
  _OP_REQP = ["instance_name", "ignore_failures"]
4336
  REQ_BGL = False
4337

    
4338
  def CheckArguments(self):
4339
    """Check the arguments.
4340

4341
    """
4342
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4343
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4344

    
4345
  def ExpandNames(self):
4346
    self._ExpandAndLockInstance()
4347
    self.needed_locks[locking.LEVEL_NODE] = []
4348
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4349

    
4350
  def DeclareLocks(self, level):
4351
    if level == locking.LEVEL_NODE:
4352
      self._LockInstancesNodes()
4353

    
4354
  def BuildHooksEnv(self):
4355
    """Build hooks env.
4356

4357
    This runs on master, primary and secondary nodes of the instance.
4358

4359
    """
4360
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4361
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4362
    nl = [self.cfg.GetMasterNode()]
4363
    nl_post = list(self.instance.all_nodes) + nl
4364
    return env, nl, nl_post
4365

    
4366
  def CheckPrereq(self):
4367
    """Check prerequisites.
4368

4369
    This checks that the instance is in the cluster.
4370

4371
    """
4372
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4373
    assert self.instance is not None, \
4374
      "Cannot retrieve locked instance %s" % self.op.instance_name
4375

    
4376
  def Exec(self, feedback_fn):
4377
    """Remove the instance.
4378

4379
    """
4380
    instance = self.instance
4381
    logging.info("Shutting down instance %s on node %s",
4382
                 instance.name, instance.primary_node)
4383

    
4384
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4385
                                             self.shutdown_timeout)
4386
    msg = result.fail_msg
4387
    if msg:
4388
      if self.op.ignore_failures:
4389
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4390
      else:
4391
        raise errors.OpExecError("Could not shutdown instance %s on"
4392
                                 " node %s: %s" %
4393
                                 (instance.name, instance.primary_node, msg))
4394

    
4395
    logging.info("Removing block devices for instance %s", instance.name)
4396

    
4397
    if not _RemoveDisks(self, instance):
4398
      if self.op.ignore_failures:
4399
        feedback_fn("Warning: can't remove instance's disks")
4400
      else:
4401
        raise errors.OpExecError("Can't remove instance's disks")
4402

    
4403
    logging.info("Removing instance %s out of cluster config", instance.name)
4404

    
4405
    self.cfg.RemoveInstance(instance.name)
4406
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4407

    
4408

    
4409
class LUQueryInstances(NoHooksLU):
4410
  """Logical unit for querying instances.
4411

4412
  """
4413
  # pylint: disable-msg=W0142
4414
  _OP_REQP = ["output_fields", "names", "use_locking"]
4415
  REQ_BGL = False
4416
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4417
                    "serial_no", "ctime", "mtime", "uuid"]
4418
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4419
                                    "admin_state",
4420
                                    "disk_template", "ip", "mac", "bridge",
4421
                                    "nic_mode", "nic_link",
4422
                                    "sda_size", "sdb_size", "vcpus", "tags",
4423
                                    "network_port", "beparams",
4424
                                    r"(disk)\.(size)/([0-9]+)",
4425
                                    r"(disk)\.(sizes)", "disk_usage",
4426
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4427
                                    r"(nic)\.(bridge)/([0-9]+)",
4428
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4429
                                    r"(disk|nic)\.(count)",
4430
                                    "hvparams",
4431
                                    ] + _SIMPLE_FIELDS +
4432
                                  ["hv/%s" % name
4433
                                   for name in constants.HVS_PARAMETERS
4434
                                   if name not in constants.HVC_GLOBALS] +
4435
                                  ["be/%s" % name
4436
                                   for name in constants.BES_PARAMETERS])
4437
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4438

    
4439

    
4440
  def ExpandNames(self):
4441
    _CheckOutputFields(static=self._FIELDS_STATIC,
4442
                       dynamic=self._FIELDS_DYNAMIC,
4443
                       selected=self.op.output_fields)
4444

    
4445
    self.needed_locks = {}
4446
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4447
    self.share_locks[locking.LEVEL_NODE] = 1
4448

    
4449
    if self.op.names:
4450
      self.wanted = _GetWantedInstances(self, self.op.names)
4451
    else:
4452
      self.wanted = locking.ALL_SET
4453

    
4454
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4455
    self.do_locking = self.do_node_query and self.op.use_locking
4456
    if self.do_locking:
4457
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4458
      self.needed_locks[locking.LEVEL_NODE] = []
4459
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4460

    
4461
  def DeclareLocks(self, level):
4462
    if level == locking.LEVEL_NODE and self.do_locking:
4463
      self._LockInstancesNodes()
4464

    
4465
  def CheckPrereq(self):
4466
    """Check prerequisites.
4467

4468
    """
4469
    pass
4470

    
4471
  def Exec(self, feedback_fn):
4472
    """Computes the list of nodes and their attributes.
4473

4474
    """
4475
    # pylint: disable-msg=R0912
4476
    # way too many branches here
4477
    all_info = self.cfg.GetAllInstancesInfo()
4478
    if self.wanted == locking.ALL_SET:
4479
      # caller didn't specify instance names, so ordering is not important
4480
      if self.do_locking:
4481
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4482
      else:
4483
        instance_names = all_info.keys()
4484
      instance_names = utils.NiceSort(instance_names)
4485
    else:
4486
      # caller did specify names, so we must keep the ordering
4487
      if self.do_locking:
4488
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4489
      else:
4490
        tgt_set = all_info.keys()
4491
      missing = set(self.wanted).difference(tgt_set)
4492
      if missing:
4493
        raise errors.OpExecError("Some instances were removed before"
4494
                                 " retrieving their data: %s" % missing)
4495
      instance_names = self.wanted
4496

    
4497
    instance_list = [all_info[iname] for iname in instance_names]
4498

    
4499
    # begin data gathering
4500

    
4501
    nodes = frozenset([inst.primary_node for inst in instance_list])
4502
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4503

    
4504
    bad_nodes = []
4505
    off_nodes = []
4506
    if self.do_node_query:
4507
      live_data = {}
4508
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4509
      for name in nodes:
4510
        result = node_data[name]
4511
        if result.offline:
4512
          # offline nodes will be in both lists
4513
          off_nodes.append(name)
4514
        if result.fail_msg:
4515
          bad_nodes.append(name)
4516
        else:
4517
          if result.payload:
4518
            live_data.update(result.payload)
4519
          # else no instance is alive
4520
    else:
4521
      live_data = dict([(name, {}) for name in instance_names])
4522

    
4523
    # end data gathering
4524

    
4525
    HVPREFIX = "hv/"
4526
    BEPREFIX = "be/"
4527
    output = []
4528
    cluster = self.cfg.GetClusterInfo()
4529
    for instance in instance_list:
4530
      iout = []
4531
      i_hv = cluster.FillHV(instance, skip_globals=True)
4532
      i_be = cluster.FillBE(instance)
4533
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4534
                                 nic.nicparams) for nic in instance.nics]
4535
      for field in self.op.output_fields:
4536
        st_match = self._FIELDS_STATIC.Matches(field)
4537
        if field in self._SIMPLE_FIELDS:
4538
          val = getattr(instance, field)
4539
        elif field == "pnode":
4540
          val = instance.primary_node
4541
        elif field == "snodes":
4542
          val = list(instance.secondary_nodes)
4543
        elif field == "admin_state":
4544
          val = instance.admin_up
4545
        elif field == "oper_state":
4546
          if instance.primary_node in bad_nodes:
4547
            val = None
4548
          else:
4549
            val = bool(live_data.get(instance.name))
4550
        elif field == "status":
4551
          if instance.primary_node in off_nodes:
4552
            val = "ERROR_nodeoffline"
4553
          elif instance.primary_node in bad_nodes:
4554
            val = "ERROR_nodedown"
4555
          else:
4556
            running = bool(live_data.get(instance.name))
4557
            if running:
4558
              if instance.admin_up:
4559
                val = "running"
4560
              else:
4561
                val = "ERROR_up"
4562
            else:
4563
              if instance.admin_up:
4564
                val = "ERROR_down"
4565
              else:
4566
                val = "ADMIN_down"
4567
        elif field == "oper_ram":
4568
          if instance.primary_node in bad_nodes:
4569
            val = None
4570
          elif instance.name in live_data:
4571
            val = live_data[instance.name].get("memory", "?")
4572
          else:
4573
            val = "-"
4574
        elif field == "vcpus":
4575
          val = i_be[constants.BE_VCPUS]
4576
        elif field == "disk_template":
4577
          val = instance.disk_template
4578
        elif field == "ip":
4579
          if instance.nics:
4580
            val = instance.nics[0].ip
4581
          else:
4582
            val = None
4583
        elif field == "nic_mode":
4584
          if instance.nics:
4585
            val = i_nicp[0][constants.NIC_MODE]
4586
          else:
4587
            val = None
4588
        elif field == "nic_link":
4589
          if instance.nics:
4590
            val = i_nicp[0][constants.NIC_LINK]
4591
          else:
4592
            val = None
4593
        elif field == "bridge":
4594
          if (instance.nics and
4595
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4596
            val = i_nicp[0][constants.NIC_LINK]
4597
          else:
4598
            val = None
4599
        elif field == "mac":
4600
          if instance.nics:
4601
            val = instance.nics[0].mac
4602
          else:
4603
            val = None
4604
        elif field == "sda_size" or field == "sdb_size":
4605
          idx = ord(field[2]) - ord('a')
4606
          try:
4607
            val = instance.FindDisk(idx).size
4608
          except errors.OpPrereqError:
4609
            val = None
4610
        elif field == "disk_usage": # total disk usage per node
4611
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4612
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4613
        elif field == "tags":
4614
          val = list(instance.GetTags())
4615
        elif field == "hvparams":
4616
          val = i_hv
4617
        elif (field.startswith(HVPREFIX) and
4618
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4619
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4620
          val = i_hv.get(field[len(HVPREFIX):], None)
4621
        elif field == "beparams":
4622
          val = i_be
4623
        elif (field.startswith(BEPREFIX) and
4624
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4625
          val = i_be.get(field[len(BEPREFIX):], None)
4626
        elif st_match and st_match.groups():
4627
          # matches a variable list
4628
          st_groups = st_match.groups()
4629
          if st_groups and st_groups[0] == "disk":
4630
            if st_groups[1] == "count":
4631
              val = len(instance.disks)
4632
            elif st_groups[1] == "sizes":
4633
              val = [disk.size for disk in instance.disks]
4634
            elif st_groups[1] == "size":
4635
              try:
4636
                val = instance.FindDisk(st_groups[2]).size
4637
              except errors.OpPrereqError:
4638
                val = None
4639
            else:
4640
              assert False, "Unhandled disk parameter"
4641
          elif st_groups[0] == "nic":
4642
            if st_groups[1] == "count":
4643
              val = len(instance.nics)
4644
            elif st_groups[1] == "macs":
4645
              val = [nic.mac for nic in instance.nics]
4646
            elif st_groups[1] == "ips":
4647
              val = [nic.ip for nic in instance.nics]
4648
            elif st_groups[1] == "modes":
4649
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4650
            elif st_groups[1] == "links":
4651
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4652
            elif st_groups[1] == "bridges":
4653
              val = []
4654
              for nicp in i_nicp:
4655
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4656
                  val.append(nicp[constants.NIC_LINK])
4657
                else:
4658
                  val.append(None)
4659
            else:
4660
              # index-based item
4661
              nic_idx = int(st_groups[2])
4662
              if nic_idx >= len(instance.nics):
4663
                val = None
4664
              else:
4665
                if st_groups[1] == "mac":
4666
                  val = instance.nics[nic_idx].mac
4667
                elif st_groups[1] == "ip":
4668
                  val = instance.nics[nic_idx].ip
4669
                elif st_groups[1] == "mode":
4670
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4671
                elif st_groups[1] == "link":
4672
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4673
                elif st_groups[1] == "bridge":
4674
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4675
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4676
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4677
                  else:
4678
                    val = None
4679
                else:
4680
                  assert False, "Unhandled NIC parameter"
4681
          else:
4682
            assert False, ("Declared but unhandled variable parameter '%s'" %
4683
                           field)
4684
        else:
4685
          assert False, "Declared but unhandled parameter '%s'" % field
4686
        iout.append(val)
4687
      output.append(iout)
4688

    
4689
    return output
4690

    
4691

    
4692
class LUFailoverInstance(LogicalUnit):
4693
  """Failover an instance.
4694

4695
  """
4696
  HPATH = "instance-failover"
4697
  HTYPE = constants.HTYPE_INSTANCE
4698
  _OP_REQP = ["instance_name", "ignore_consistency"]
4699
  REQ_BGL = False
4700

    
4701
  def CheckArguments(self):
4702
    """Check the arguments.
4703

4704
    """
4705
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4706
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4707

    
4708
  def ExpandNames(self):
4709
    self._ExpandAndLockInstance()
4710
    self.needed_locks[locking.LEVEL_NODE] = []
4711
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712

    
4713
  def DeclareLocks(self, level):
4714
    if level == locking.LEVEL_NODE:
4715
      self._LockInstancesNodes()
4716

    
4717
  def BuildHooksEnv(self):
4718
    """Build hooks env.
4719

4720
    This runs on master, primary and secondary nodes of the instance.
4721

4722
    """
4723
    instance = self.instance
4724
    source_node = instance.primary_node
4725
    target_node = instance.secondary_nodes[0]
4726
    env = {
4727
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4728
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4729
      "OLD_PRIMARY": source_node,
4730
      "OLD_SECONDARY": target_node,
4731
      "NEW_PRIMARY": target_node,
4732
      "NEW_SECONDARY": source_node,
4733
      }
4734
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4735
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4736
    nl_post = list(nl)
4737
    nl_post.append(source_node)
4738
    return env, nl, nl_post
4739

    
4740
  def CheckPrereq(self):
4741
    """Check prerequisites.
4742

4743
    This checks that the instance is in the cluster.
4744

4745
    """
4746
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4747
    assert self.instance is not None, \
4748
      "Cannot retrieve locked instance %s" % self.op.instance_name
4749

    
4750
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4751
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4752
      raise errors.OpPrereqError("Instance's disk layout is not"
4753
                                 " network mirrored, cannot failover.",
4754
                                 errors.ECODE_STATE)
4755

    
4756
    secondary_nodes = instance.secondary_nodes
4757
    if not secondary_nodes:
4758
      raise errors.ProgrammerError("no secondary node but using "
4759
                                   "a mirrored disk template")
4760

    
4761
    target_node = secondary_nodes[0]
4762
    _CheckNodeOnline(self, target_node)
4763
    _CheckNodeNotDrained(self, target_node)
4764
    if instance.admin_up:
4765
      # check memory requirements on the secondary node
4766
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4767
                           instance.name, bep[constants.BE_MEMORY],
4768
                           instance.hypervisor)
4769
    else:
4770
      self.LogInfo("Not checking memory on the secondary node as"
4771
                   " instance will not be started")
4772

    
4773
    # check bridge existance
4774
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4775

    
4776
  def Exec(self, feedback_fn):
4777
    """Failover an instance.
4778

4779
    The failover is done by shutting it down on its present node and
4780
    starting it on the secondary.
4781

4782
    """
4783
    instance = self.instance
4784

    
4785
    source_node = instance.primary_node
4786
    target_node = instance.secondary_nodes[0]
4787

    
4788
    if instance.admin_up:
4789
      feedback_fn("* checking disk consistency between source and target")
4790
      for dev in instance.disks:
4791
        # for drbd, these are drbd over lvm
4792
        if not _CheckDiskConsistency(self, dev, target_node, False):
4793
          if not self.op.ignore_consistency:
4794
            raise errors.OpExecError("Disk %s is degraded on target node,"
4795
                                     " aborting failover." % dev.iv_name)
4796
    else:
4797
      feedback_fn("* not checking disk consistency as instance is not running")
4798

    
4799
    feedback_fn("* shutting down instance on source node")
4800
    logging.info("Shutting down instance %s on node %s",
4801
                 instance.name, source_node)
4802

    
4803
    result = self.rpc.call_instance_shutdown(source_node, instance,
4804
                                             self.shutdown_timeout)
4805
    msg = result.fail_msg
4806
    if msg:
4807
      if self.op.ignore_consistency:
4808
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4809
                             " Proceeding anyway. Please make sure node"
4810
                             " %s is down. Error details: %s",
4811
                             instance.name, source_node, source_node, msg)
4812
      else:
4813
        raise errors.OpExecError("Could not shutdown instance %s on"
4814
                                 " node %s: %s" %
4815
                                 (instance.name, source_node, msg))
4816

    
4817
    feedback_fn("* deactivating the instance's disks on source node")
4818
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4819
      raise errors.OpExecError("Can't shut down the instance's disks.")
4820

    
4821
    instance.primary_node = target_node
4822
    # distribute new instance config to the other nodes
4823
    self.cfg.Update(instance, feedback_fn)
4824

    
4825
    # Only start the instance if it's marked as up
4826
    if instance.admin_up:
4827
      feedback_fn("* activating the instance's disks on target node")
4828
      logging.info("Starting instance %s on node %s",
4829
                   instance.name, target_node)
4830

    
4831
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4832
                                               ignore_secondaries=True)
4833
      if not disks_ok:
4834
        _ShutdownInstanceDisks(self, instance)
4835
        raise errors.OpExecError("Can't activate the instance's disks")
4836

    
4837
      feedback_fn("* starting the instance on the target node")
4838
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4839
      msg = result.fail_msg
4840
      if msg:
4841
        _ShutdownInstanceDisks(self, instance)
4842
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4843
                                 (instance.name, target_node, msg))
4844

    
4845

    
4846
class LUMigrateInstance(LogicalUnit):
4847
  """Migrate an instance.
4848

4849
  This is migration without shutting down, compared to the failover,
4850
  which is done with shutdown.
4851

4852
  """
4853
  HPATH = "instance-migrate"
4854
  HTYPE = constants.HTYPE_INSTANCE
4855
  _OP_REQP = ["instance_name", "live", "cleanup"]
4856

    
4857
  REQ_BGL = False
4858

    
4859
  def ExpandNames(self):
4860
    self._ExpandAndLockInstance()
4861

    
4862
    self.needed_locks[locking.LEVEL_NODE] = []
4863
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4864

    
4865
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4866
                                       self.op.live, self.op.cleanup)
4867
    self.tasklets = [self._migrater]
4868

    
4869
  def DeclareLocks(self, level):
4870
    if level == locking.LEVEL_NODE:
4871
      self._LockInstancesNodes()
4872

    
4873
  def BuildHooksEnv(self):
4874
    """Build hooks env.
4875

4876
    This runs on master, primary and secondary nodes of the instance.
4877

4878
    """
4879
    instance = self._migrater.instance
4880
    source_node = instance.primary_node
4881
    target_node = instance.secondary_nodes[0]
4882
    env = _BuildInstanceHookEnvByObject(self, instance)
4883
    env["MIGRATE_LIVE"] = self.op.live
4884
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4885
    env.update({
4886
        "OLD_PRIMARY": source_node,
4887
        "OLD_SECONDARY": target_node,
4888
        "NEW_PRIMARY": target_node,
4889
        "NEW_SECONDARY": source_node,
4890
        })
4891
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4892
    nl_post = list(nl)
4893
    nl_post.append(source_node)
4894
    return env, nl, nl_post
4895

    
4896

    
4897
class LUMoveInstance(LogicalUnit):
4898
  """Move an instance by data-copying.
4899

4900
  """
4901
  HPATH = "instance-move"
4902
  HTYPE = constants.HTYPE_INSTANCE
4903
  _OP_REQP = ["instance_name", "target_node"]
4904
  REQ_BGL = False
4905

    
4906
  def CheckArguments(self):
4907
    """Check the arguments.
4908

4909
    """
4910
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4911
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4912

    
4913
  def ExpandNames(self):
4914
    self._ExpandAndLockInstance()
4915
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4916
    self.op.target_node = target_node
4917
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4918
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4919

    
4920
  def DeclareLocks(self, level):
4921
    if level == locking.LEVEL_NODE:
4922
      self._LockInstancesNodes(primary_only=True)
4923

    
4924
  def BuildHooksEnv(self):
4925
    """Build hooks env.
4926

4927
    This runs on master, primary and secondary nodes of the instance.
4928

4929
    """
4930
    env = {
4931
      "TARGET_NODE": self.op.target_node,
4932
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4933
      }
4934
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4935
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4936
                                       self.op.target_node]
4937
    return env, nl, nl
4938

    
4939
  def CheckPrereq(self):
4940
    """Check prerequisites.
4941

4942
    This checks that the instance is in the cluster.
4943

4944
    """
4945
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4946
    assert self.instance is not None, \
4947
      "Cannot retrieve locked instance %s" % self.op.instance_name
4948

    
4949
    node = self.cfg.GetNodeInfo(self.op.target_node)
4950
    assert node is not None, \
4951
      "Cannot retrieve locked node %s" % self.op.target_node
4952

    
4953
    self.target_node = target_node = node.name
4954

    
4955
    if target_node == instance.primary_node:
4956
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4957
                                 (instance.name, target_node),
4958
                                 errors.ECODE_STATE)
4959

    
4960
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4961

    
4962
    for idx, dsk in enumerate(instance.disks):
4963
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4964
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4965
                                   " cannot copy" % idx, errors.ECODE_STATE)
4966

    
4967
    _CheckNodeOnline(self, target_node)
4968
    _CheckNodeNotDrained(self, target_node)
4969

    
4970
    if instance.admin_up:
4971
      # check memory requirements on the secondary node
4972
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4973
                           instance.name, bep[constants.BE_MEMORY],
4974
                           instance.hypervisor)
4975
    else:
4976
      self.LogInfo("Not checking memory on the secondary node as"
4977
                   " instance will not be started")
4978

    
4979
    # check bridge existance
4980
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4981

    
4982
  def Exec(self, feedback_fn):
4983
    """Move an instance.
4984

4985
    The move is done by shutting it down on its present node, copying
4986
    the data over (slow) and starting it on the new node.
4987

4988
    """
4989
    instance = self.instance
4990

    
4991
    source_node = instance.primary_node
4992
    target_node = self.target_node
4993

    
4994
    self.LogInfo("Shutting down instance %s on source node %s",
4995
                 instance.name, source_node)
4996

    
4997
    result = self.rpc.call_instance_shutdown(source_node, instance,
4998
                                             self.shutdown_timeout)
4999
    msg = result.fail_msg
5000
    if msg:
5001
      if self.op.ignore_consistency:
5002
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5003
                             " Proceeding anyway. Please make sure node"
5004
                             " %s is down. Error details: %s",
5005
                             instance.name, source_node, source_node, msg)
5006
      else:
5007
        raise errors.OpExecError("Could not shutdown instance %s on"
5008
                                 " node %s: %s" %
5009
                                 (instance.name, source_node, msg))
5010

    
5011
    # create the target disks
5012
    try:
5013
      _CreateDisks(self, instance, target_node=target_node)
5014
    except errors.OpExecError:
5015
      self.LogWarning("Device creation failed, reverting...")
5016
      try:
5017
        _RemoveDisks(self, instance, target_node=target_node)
5018
      finally:
5019
        self.cfg.ReleaseDRBDMinors(instance.name)
5020
        raise
5021

    
5022
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5023

    
5024
    errs = []
5025
    # activate, get path, copy the data over
5026
    for idx, disk in enumerate(instance.disks):
5027
      self.LogInfo("Copying data for disk %d", idx)
5028
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5029
                                               instance.name, True)
5030
      if result.fail_msg:
5031
        self.LogWarning("Can't assemble newly created disk %d: %s",
5032
                        idx, result.fail_msg)
5033
        errs.append(result.fail_msg)
5034
        break
5035
      dev_path = result.payload
5036
      result = self.rpc.call_blockdev_export(source_node, disk,
5037
                                             target_node, dev_path,
5038
                                             cluster_name)
5039
      if result.fail_msg:
5040
        self.LogWarning("Can't copy data over for disk %d: %s",
5041
                        idx, result.fail_msg)
5042
        errs.append(result.fail_msg)
5043
        break
5044

    
5045
    if errs:
5046
      self.LogWarning("Some disks failed to copy, aborting")
5047
      try:
5048
        _RemoveDisks(self, instance, target_node=target_node)
5049
      finally:
5050
        self.cfg.ReleaseDRBDMinors(instance.name)
5051
        raise errors.OpExecError("Errors during disk copy: %s" %
5052
                                 (",".join(errs),))
5053

    
5054
    instance.primary_node = target_node
5055
    self.cfg.Update(instance, feedback_fn)
5056

    
5057
    self.LogInfo("Removing the disks on the original node")
5058
    _RemoveDisks(self, instance, target_node=source_node)
5059

    
5060
    # Only start the instance if it's marked as up
5061
    if instance.admin_up:
5062
      self.LogInfo("Starting instance %s on node %s",
5063
                   instance.name, target_node)
5064

    
5065
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5066
                                           ignore_secondaries=True)
5067
      if not disks_ok:
5068
        _ShutdownInstanceDisks(self, instance)
5069
        raise errors.OpExecError("Can't activate the instance's disks")
5070

    
5071
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5072
      msg = result.fail_msg
5073
      if msg:
5074
        _ShutdownInstanceDisks(self, instance)
5075
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5076
                                 (instance.name, target_node, msg))
5077

    
5078

    
5079
class LUMigrateNode(LogicalUnit):
5080
  """Migrate all instances from a node.
5081

5082
  """
5083
  HPATH = "node-migrate"
5084
  HTYPE = constants.HTYPE_NODE
5085
  _OP_REQP = ["node_name", "live"]
5086
  REQ_BGL = False
5087

    
5088
  def ExpandNames(self):
5089
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5090

    
5091
    self.needed_locks = {
5092
      locking.LEVEL_NODE: [self.op.node_name],
5093
      }
5094

    
5095
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5096

    
5097
    # Create tasklets for migrating instances for all instances on this node
5098
    names = []
5099
    tasklets = []
5100

    
5101
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5102
      logging.debug("Migrating instance %s", inst.name)
5103
      names.append(inst.name)
5104

    
5105
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5106

    
5107
    self.tasklets = tasklets
5108

    
5109
    # Declare instance locks
5110
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5111

    
5112
  def DeclareLocks(self, level):
5113
    if level == locking.LEVEL_NODE:
5114
      self._LockInstancesNodes()
5115

    
5116
  def BuildHooksEnv(self):
5117
    """Build hooks env.
5118

5119
    This runs on the master, the primary and all the secondaries.
5120

5121
    """
5122
    env = {
5123
      "NODE_NAME": self.op.node_name,
5124
      }
5125

    
5126
    nl = [self.cfg.GetMasterNode()]
5127

    
5128
    return (env, nl, nl)
5129

    
5130

    
5131
class TLMigrateInstance(Tasklet):
5132
  def __init__(self, lu, instance_name, live, cleanup):
5133
    """Initializes this class.
5134

5135
    """
5136
    Tasklet.__init__(self, lu)
5137

    
5138
    # Parameters
5139
    self.instance_name = instance_name
5140
    self.live = live
5141
    self.cleanup = cleanup
5142

    
5143
  def CheckPrereq(self):
5144
    """Check prerequisites.
5145

5146
    This checks that the instance is in the cluster.
5147

5148
    """
5149
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5150
    instance = self.cfg.GetInstanceInfo(instance_name)
5151
    assert instance is not None
5152

    
5153
    if instance.disk_template != constants.DT_DRBD8:
5154
      raise errors.OpPrereqError("Instance's disk layout is not"
5155
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5156

    
5157
    secondary_nodes = instance.secondary_nodes
5158
    if not secondary_nodes:
5159
      raise errors.ConfigurationError("No secondary node but using"
5160
                                      " drbd8 disk template")
5161

    
5162
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5163

    
5164
    target_node = secondary_nodes[0]
5165
    # check memory requirements on the secondary node
5166
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5167
                         instance.name, i_be[constants.BE_MEMORY],
5168
                         instance.hypervisor)
5169

    
5170
    # check bridge existance
5171
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5172

    
5173
    if not self.cleanup:
5174
      _CheckNodeNotDrained(self, target_node)
5175
      result = self.rpc.call_instance_migratable(instance.primary_node,
5176
                                                 instance)
5177
      result.Raise("Can't migrate, please use failover",
5178
                   prereq=True, ecode=errors.ECODE_STATE)
5179

    
5180
    self.instance = instance
5181

    
5182
  def _WaitUntilSync(self):
5183
    """Poll with custom rpc for disk sync.
5184

5185
    This uses our own step-based rpc call.
5186

5187
    """
5188
    self.feedback_fn("* wait until resync is done")
5189
    all_done = False
5190
    while not all_done:
5191
      all_done = True
5192
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5193
                                            self.nodes_ip,
5194
                                            self.instance.disks)
5195
      min_percent = 100
5196
      for node, nres in result.items():
5197
        nres.Raise("Cannot resync disks on node %s" % node)
5198
        node_done, node_percent = nres.payload
5199
        all_done = all_done and node_done
5200
        if node_percent is not None:
5201
          min_percent = min(min_percent, node_percent)
5202
      if not all_done:
5203
        if min_percent < 100:
5204
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5205
        time.sleep(2)
5206

    
5207
  def _EnsureSecondary(self, node):
5208
    """Demote a node to secondary.
5209

5210
    """
5211
    self.feedback_fn("* switching node %s to secondary mode" % node)
5212

    
5213
    for dev in self.instance.disks:
5214
      self.cfg.SetDiskID(dev, node)
5215

    
5216
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5217
                                          self.instance.disks)
5218
    result.Raise("Cannot change disk to secondary on node %s" % node)
5219

    
5220
  def _GoStandalone(self):
5221
    """Disconnect from the network.
5222

5223
    """
5224
    self.feedback_fn("* changing into standalone mode")
5225
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5226
                                               self.instance.disks)
5227
    for node, nres in result.items():
5228
      nres.Raise("Cannot disconnect disks node %s" % node)
5229

    
5230
  def _GoReconnect(self, multimaster):
5231
    """Reconnect to the network.
5232

5233
    """
5234
    if multimaster:
5235
      msg = "dual-master"
5236
    else:
5237
      msg = "single-master"
5238
    self.feedback_fn("* changing disks into %s mode" % msg)
5239
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5240
                                           self.instance.disks,
5241
                                           self.instance.name, multimaster)
5242
    for node, nres in result.items():
5243
      nres.Raise("Cannot change disks config on node %s" % node)
5244

    
5245
  def _ExecCleanup(self):
5246
    """Try to cleanup after a failed migration.
5247

5248
    The cleanup is done by:
5249
      - check that the instance is running only on one node
5250
        (and update the config if needed)
5251
      - change disks on its secondary node to secondary
5252
      - wait until disks are fully synchronized
5253
      - disconnect from the network
5254
      - change disks into single-master mode
5255
      - wait again until disks are fully synchronized
5256

5257
    """
5258
    instance = self.instance
5259
    target_node = self.target_node
5260
    source_node = self.source_node
5261

    
5262
    # check running on only one node
5263
    self.feedback_fn("* checking where the instance actually runs"
5264
                     " (if this hangs, the hypervisor might be in"
5265
                     " a bad state)")
5266
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5267
    for node, result in ins_l.items():
5268
      result.Raise("Can't contact node %s" % node)
5269

    
5270
    runningon_source = instance.name in ins_l[source_node].payload
5271
    runningon_target = instance.name in ins_l[target_node].payload
5272

    
5273
    if runningon_source and runningon_target:
5274
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5275
                               " or the hypervisor is confused. You will have"
5276
                               " to ensure manually that it runs only on one"
5277
                               " and restart this operation.")
5278

    
5279
    if not (runningon_source or runningon_target):
5280
      raise errors.OpExecError("Instance does not seem to be running at all."
5281
                               " In this case, it's safer to repair by"
5282
                               " running 'gnt-instance stop' to ensure disk"
5283
                               " shutdown, and then restarting it.")
5284

    
5285
    if runningon_target:
5286
      # the migration has actually succeeded, we need to update the config
5287
      self.feedback_fn("* instance running on secondary node (%s),"
5288
                       " updating config" % target_node)
5289
      instance.primary_node = target_node
5290
      self.cfg.Update(instance, self.feedback_fn)
5291
      demoted_node = source_node
5292
    else:
5293
      self.feedback_fn("* instance confirmed to be running on its"
5294
                       " primary node (%s)" % source_node)
5295
      demoted_node = target_node
5296

    
5297
    self._EnsureSecondary(demoted_node)
5298
    try:
5299
      self._WaitUntilSync()
5300
    except errors.OpExecError:
5301
      # we ignore here errors, since if the device is standalone, it
5302
      # won't be able to sync
5303
      pass
5304
    self._GoStandalone()
5305
    self._GoReconnect(False)
5306
    self._WaitUntilSync()
5307

    
5308
    self.feedback_fn("* done")
5309

    
5310
  def _RevertDiskStatus(self):
5311
    """Try to revert the disk status after a failed migration.
5312

5313
    """
5314
    target_node = self.target_node
5315
    try:
5316
      self._EnsureSecondary(target_node)
5317
      self._GoStandalone()
5318
      self._GoReconnect(False)
5319
      self._WaitUntilSync()
5320
    except errors.OpExecError, err:
5321
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5322
                         " drives: error '%s'\n"
5323
                         "Please look and recover the instance status" %
5324
                         str(err))
5325

    
5326
  def _AbortMigration(self):
5327
    """Call the hypervisor code to abort a started migration.
5328

5329
    """
5330
    instance = self.instance
5331
    target_node = self.target_node
5332
    migration_info = self.migration_info
5333

    
5334
    abort_result = self.rpc.call_finalize_migration(target_node,
5335
                                                    instance,
5336
                                                    migration_info,
5337
                                                    False)
5338
    abort_msg = abort_result.fail_msg
5339
    if abort_msg:
5340
      logging.error("Aborting migration failed on target node %s: %s",
5341
                    target_node, abort_msg)
5342
      # Don't raise an exception here, as we stil have to try to revert the
5343
      # disk status, even if this step failed.
5344

    
5345
  def _ExecMigration(self):
5346
    """Migrate an instance.
5347

5348
    The migrate is done by:
5349
      - change the disks into dual-master mode
5350
      - wait until disks are fully synchronized again
5351
      - migrate the instance
5352
      - change disks on the new secondary node (the old primary) to secondary
5353
      - wait until disks are fully synchronized
5354
      - change disks into single-master mode
5355

5356
    """
5357
    instance = self.instance
5358
    target_node = self.target_node
5359
    source_node = self.source_node
5360

    
5361
    self.feedback_fn("* checking disk consistency between source and target")
5362
    for dev in instance.disks:
5363
      if not _CheckDiskConsistency(self, dev, target_node, False):
5364
        raise errors.OpExecError("Disk %s is degraded or not fully"
5365
                                 " synchronized on target node,"
5366
                                 " aborting migrate." % dev.iv_name)
5367

    
5368
    # First get the migration information from the remote node
5369
    result = self.rpc.call_migration_info(source_node, instance)
5370
    msg = result.fail_msg
5371
    if msg:
5372
      log_err = ("Failed fetching source migration information from %s: %s" %
5373
                 (source_node, msg))
5374
      logging.error(log_err)
5375
      raise errors.OpExecError(log_err)
5376

    
5377
    self.migration_info = migration_info = result.payload
5378

    
5379
    # Then switch the disks to master/master mode
5380
    self._EnsureSecondary(target_node)
5381
    self._GoStandalone()
5382
    self._GoReconnect(True)
5383
    self._WaitUntilSync()
5384

    
5385
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5386
    result = self.rpc.call_accept_instance(target_node,
5387
                                           instance,
5388
                                           migration_info,
5389
                                           self.nodes_ip[target_node])
5390

    
5391
    msg = result.fail_msg
5392
    if msg:
5393
      logging.error("Instance pre-migration failed, trying to revert"
5394
                    " disk status: %s", msg)
5395
      self.feedback_fn("Pre-migration failed, aborting")
5396
      self._AbortMigration()
5397
      self._RevertDiskStatus()
5398
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5399
                               (instance.name, msg))
5400

    
5401
    self.feedback_fn("* migrating instance to %s" % target_node)
5402
    time.sleep(10)
5403
    result = self.rpc.call_instance_migrate(source_node, instance,
5404
                                            self.nodes_ip[target_node],
5405
                                            self.live)
5406
    msg = result.fail_msg
5407
    if msg:
5408
      logging.error("Instance migration failed, trying to revert"
5409
                    " disk status: %s", msg)
5410
      self.feedback_fn("Migration failed, aborting")
5411
      self._AbortMigration()
5412
      self._RevertDiskStatus()
5413
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5414
                               (instance.name, msg))
5415
    time.sleep(10)
5416

    
5417
    instance.primary_node = target_node
5418
    # distribute new instance config to the other nodes
5419
    self.cfg.Update(instance, self.feedback_fn)
5420

    
5421
    result = self.rpc.call_finalize_migration(target_node,
5422
                                              instance,
5423
                                              migration_info,
5424
                                              True)
5425
    msg = result.fail_msg
5426
    if msg:
5427
      logging.error("Instance migration succeeded, but finalization failed:"
5428
                    " %s", msg)
5429
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5430
                               msg)
5431

    
5432
    self._EnsureSecondary(source_node)
5433
    self._WaitUntilSync()
5434
    self._GoStandalone()
5435
    self._GoReconnect(False)
5436
    self._WaitUntilSync()
5437

    
5438
    self.feedback_fn("* done")
5439

    
5440
  def Exec(self, feedback_fn):
5441
    """Perform the migration.
5442

5443
    """
5444
    feedback_fn("Migrating instance %s" % self.instance.name)
5445

    
5446
    self.feedback_fn = feedback_fn
5447

    
5448
    self.source_node = self.instance.primary_node
5449
    self.target_node = self.instance.secondary_nodes[0]
5450
    self.all_nodes = [self.source_node, self.target_node]
5451
    self.nodes_ip = {
5452
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5453
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5454
      }
5455

    
5456
    if self.cleanup:
5457
      return self._ExecCleanup()
5458
    else:
5459
      return self._ExecMigration()
5460

    
5461

    
5462
def _CreateBlockDev(lu, node, instance, device, force_create,
5463
                    info, force_open):
5464
  """Create a tree of block devices on a given node.
5465

5466
  If this device type has to be created on secondaries, create it and
5467
  all its children.
5468

5469
  If not, just recurse to children keeping the same 'force' value.
5470

5471
  @param lu: the lu on whose behalf we execute
5472
  @param node: the node on which to create the device
5473
  @type instance: L{objects.Instance}
5474
  @param instance: the instance which owns the device
5475
  @type device: L{objects.Disk}
5476
  @param device: the device to create
5477
  @type force_create: boolean
5478
  @param force_create: whether to force creation of this device; this
5479
      will be change to True whenever we find a device which has
5480
      CreateOnSecondary() attribute
5481
  @param info: the extra 'metadata' we should attach to the device
5482
      (this will be represented as a LVM tag)
5483
  @type force_open: boolean
5484
  @param force_open: this parameter will be passes to the
5485
      L{backend.BlockdevCreate} function where it specifies
5486
      whether we run on primary or not, and it affects both
5487
      the child assembly and the device own Open() execution
5488

5489
  """
5490
  if device.CreateOnSecondary():
5491
    force_create = True
5492

    
5493
  if device.children:
5494
    for child in device.children:
5495
      _CreateBlockDev(lu, node, instance, child, force_create,
5496
                      info, force_open)
5497

    
5498
  if not force_create:
5499
    return
5500

    
5501
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5502

    
5503

    
5504
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5505
  """Create a single block device on a given node.
5506

5507
  This will not recurse over children of the device, so they must be
5508
  created in advance.
5509

5510
  @param lu: the lu on whose behalf we execute
5511
  @param node: the node on which to create the device
5512
  @type instance: L{objects.Instance}
5513
  @param instance: the instance which owns the device
5514
  @type device: L{objects.Disk}
5515
  @param device: the device to create
5516
  @param info: the extra 'metadata' we should attach to the device
5517
      (this will be represented as a LVM tag)
5518
  @type force_open: boolean
5519
  @param force_open: this parameter will be passes to the
5520
      L{backend.BlockdevCreate} function where it specifies
5521
      whether we run on primary or not, and it affects both
5522
      the child assembly and the device own Open() execution
5523

5524
  """
5525
  lu.cfg.SetDiskID(device, node)
5526
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5527
                                       instance.name, force_open, info)
5528
  result.Raise("Can't create block device %s on"
5529
               " node %s for instance %s" % (device, node, instance.name))
5530
  if device.physical_id is None:
5531
    device.physical_id = result.payload
5532

    
5533

    
5534
def _GenerateUniqueNames(lu, exts):
5535
  """Generate a suitable LV name.
5536

5537
  This will generate a logical volume name for the given instance.
5538

5539
  """
5540
  results = []
5541
  for val in exts:
5542
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5543
    results.append("%s%s" % (new_id, val))
5544
  return results
5545

    
5546

    
5547
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5548
                         p_minor, s_minor):
5549
  """Generate a drbd8 device complete with its children.
5550

5551
  """
5552
  port = lu.cfg.AllocatePort()
5553
  vgname = lu.cfg.GetVGName()
5554
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5555
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5556
                          logical_id=(vgname, names[0]))
5557
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5558
                          logical_id=(vgname, names[1]))
5559
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5560
                          logical_id=(primary, secondary, port,
5561
                                      p_minor, s_minor,
5562
                                      shared_secret),
5563
                          children=[dev_data, dev_meta],
5564
                          iv_name=iv_name)
5565
  return drbd_dev
5566

    
5567

    
5568
def _GenerateDiskTemplate(lu, template_name,
5569
                          instance_name, primary_node,
5570
                          secondary_nodes, disk_info,
5571
                          file_storage_dir, file_driver,
5572
                          base_index):
5573
  """Generate the entire disk layout for a given template type.
5574

5575
  """
5576
  #TODO: compute space requirements
5577

    
5578
  vgname = lu.cfg.GetVGName()
5579
  disk_count = len(disk_info)
5580
  disks = []
5581
  if template_name == constants.DT_DISKLESS:
5582
    pass
5583
  elif template_name == constants.DT_PLAIN:
5584
    if len(secondary_nodes) != 0:
5585
      raise errors.ProgrammerError("Wrong template configuration")
5586

    
5587
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5588
                                      for i in range(disk_count)])
5589
    for idx, disk in enumerate(disk_info):
5590
      disk_index = idx + base_index
5591
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5592
                              logical_id=(vgname, names[idx]),
5593
                              iv_name="disk/%d" % disk_index,
5594
                              mode=disk["mode"])
5595
      disks.append(disk_dev)
5596
  elif template_name == constants.DT_DRBD8:
5597
    if len(secondary_nodes) != 1:
5598
      raise errors.ProgrammerError("Wrong template configuration")
5599
    remote_node = secondary_nodes[0]
5600
    minors = lu.cfg.AllocateDRBDMinor(
5601
      [primary_node, remote_node] * len(disk_info), instance_name)
5602

    
5603
    names = []
5604
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5605
                                               for i in range(disk_count)]):
5606
      names.append(lv_prefix + "_data")
5607
      names.append(lv_prefix + "_meta")
5608
    for idx, disk in enumerate(disk_info):
5609
      disk_index = idx + base_index
5610
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5611
                                      disk["size"], names[idx*2:idx*2+2],
5612
                                      "disk/%d" % disk_index,
5613
                                      minors[idx*2], minors[idx*2+1])
5614
      disk_dev.mode = disk["mode"]
5615
      disks.append(disk_dev)
5616
  elif template_name == constants.DT_FILE:
5617
    if len(secondary_nodes) != 0:
5618
      raise errors.ProgrammerError("Wrong template configuration")
5619

    
5620
    for idx, disk in enumerate(disk_info):
5621
      disk_index = idx + base_index
5622
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5623
                              iv_name="disk/%d" % disk_index,
5624
                              logical_id=(file_driver,
5625
                                          "%s/disk%d" % (file_storage_dir,
5626
                                                         disk_index)),
5627
                              mode=disk["mode"])
5628
      disks.append(disk_dev)
5629
  else:
5630
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5631
  return disks
5632

    
5633

    
5634
def _GetInstanceInfoText(instance):
5635
  """Compute that text that should be added to the disk's metadata.
5636

5637
  """
5638
  return "originstname+%s" % instance.name
5639

    
5640

    
5641
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5642
  """Create all disks for an instance.
5643

5644
  This abstracts away some work from AddInstance.
5645

5646
  @type lu: L{LogicalUnit}
5647
  @param lu: the logical unit on whose behalf we execute
5648
  @type instance: L{objects.Instance}
5649
  @param instance: the instance whose disks we should create
5650
  @type to_skip: list
5651
  @param to_skip: list of indices to skip
5652
  @type target_node: string
5653
  @param target_node: if passed, overrides the target node for creation
5654
  @rtype: boolean
5655
  @return: the success of the creation
5656

5657
  """
5658
  info = _GetInstanceInfoText(instance)
5659
  if target_node is None:
5660
    pnode = instance.primary_node
5661
    all_nodes = instance.all_nodes
5662
  else:
5663
    pnode = target_node
5664
    all_nodes = [pnode]
5665

    
5666
  if instance.disk_template == constants.DT_FILE:
5667
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5668
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5669

    
5670
    result.Raise("Failed to create directory '%s' on"
5671
                 " node %s" % (file_storage_dir, pnode))
5672

    
5673
  # Note: this needs to be kept in sync with adding of disks in
5674
  # LUSetInstanceParams
5675
  for idx, device in enumerate(instance.disks):
5676
    if to_skip and idx in to_skip:
5677
      continue
5678
    logging.info("Creating volume %s for instance %s",
5679
                 device.iv_name, instance.name)
5680
    #HARDCODE
5681
    for node in all_nodes:
5682
      f_create = node == pnode
5683
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5684

    
5685

    
5686
def _RemoveDisks(lu, instance, target_node=None):
5687
  """Remove all disks for an instance.
5688

5689
  This abstracts away some work from `AddInstance()` and
5690
  `RemoveInstance()`. Note that in case some of the devices couldn't
5691
  be removed, the removal will continue with the other ones (compare
5692
  with `_CreateDisks()`).
5693

5694
  @type lu: L{LogicalUnit}
5695
  @param lu: the logical unit on whose behalf we execute
5696
  @type instance: L{objects.Instance}
5697
  @param instance: the instance whose disks we should remove
5698
  @type target_node: string
5699
  @param target_node: used to override the node on which to remove the disks
5700
  @rtype: boolean
5701
  @return: the success of the removal
5702

5703
  """
5704
  logging.info("Removing block devices for instance %s", instance.name)
5705

    
5706
  all_result = True
5707
  for device in instance.disks:
5708
    if target_node:
5709
      edata = [(target_node, device)]
5710
    else:
5711
      edata = device.ComputeNodeTree(instance.primary_node)
5712
    for node, disk in edata:
5713
      lu.cfg.SetDiskID(disk, node)
5714
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5715
      if msg:
5716
        lu.LogWarning("Could not remove block device %s on node %s,"
5717
                      " continuing anyway: %s", device.iv_name, node, msg)
5718
        all_result = False
5719

    
5720
  if instance.disk_template == constants.DT_FILE:
5721
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5722
    if target_node:
5723
      tgt = target_node
5724
    else:
5725
      tgt = instance.primary_node
5726
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5727
    if result.fail_msg:
5728
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5729
                    file_storage_dir, instance.primary_node, result.fail_msg)
5730
      all_result = False
5731

    
5732
  return all_result
5733

    
5734

    
5735
def _ComputeDiskSize(disk_template, disks):
5736
  """Compute disk size requirements in the volume group
5737

5738
  """
5739
  # Required free disk space as a function of disk and swap space
5740
  req_size_dict = {
5741
    constants.DT_DISKLESS: None,
5742
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5743
    # 128 MB are added for drbd metadata for each disk
5744
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5745
    constants.DT_FILE: None,
5746
  }
5747

    
5748
  if disk_template not in req_size_dict:
5749
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5750
                                 " is unknown" %  disk_template)
5751

    
5752
  return req_size_dict[disk_template]
5753

    
5754

    
5755
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5756
  """Hypervisor parameter validation.
5757

5758
  This function abstract the hypervisor parameter validation to be
5759
  used in both instance create and instance modify.
5760

5761
  @type lu: L{LogicalUnit}
5762
  @param lu: the logical unit for which we check
5763
  @type nodenames: list
5764
  @param nodenames: the list of nodes on which we should check
5765
  @type hvname: string
5766
  @param hvname: the name of the hypervisor we should use
5767
  @type hvparams: dict
5768
  @param hvparams: the parameters which we need to check
5769
  @raise errors.OpPrereqError: if the parameters are not valid
5770

5771
  """
5772
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5773
                                                  hvname,
5774
                                                  hvparams)
5775
  for node in nodenames:
5776
    info = hvinfo[node]
5777
    if info.offline:
5778
      continue
5779
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5780

    
5781

    
5782
class LUCreateInstance(LogicalUnit):
5783
  """Create an instance.
5784

5785
  """
5786
  HPATH = "instance-add"
5787
  HTYPE = constants.HTYPE_INSTANCE
5788
  _OP_REQP = ["instance_name", "disks", "disk_template",
5789
              "mode", "start",
5790
              "wait_for_sync", "ip_check", "nics",
5791
              "hvparams", "beparams"]
5792
  REQ_BGL = False
5793

    
5794
  def CheckArguments(self):
5795
    """Check arguments.
5796

5797
    """
5798
    # set optional parameters to none if they don't exist
5799
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5800
      if not hasattr(self.op, attr):
5801
        setattr(self.op, attr, None)
5802

    
5803
    # do not require name_check to ease forward/backward compatibility
5804
    # for tools
5805
    if not hasattr(self.op, "name_check"):
5806
      self.op.name_check = True
5807
    # validate/normalize the instance name
5808
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5809
    if self.op.ip_check and not self.op.name_check:
5810
      # TODO: make the ip check more flexible and not depend on the name check
5811
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5812
                                 errors.ECODE_INVAL)
5813
    if (self.op.disk_template == constants.DT_FILE and
5814
        not constants.ENABLE_FILE_STORAGE):
5815
      raise errors.OpPrereqError("File storage disabled at configure time",
5816
                                 errors.ECODE_INVAL)
5817
    # check disk information: either all adopt, or no adopt
5818
    has_adopt = has_no_adopt = False
5819
    for disk in self.op.disks:
5820
      if "adopt" in disk:
5821
        has_adopt = True
5822
      else:
5823
        has_no_adopt = True
5824
    if has_adopt and has_no_adopt:
5825
      raise errors.OpPrereqError("Either all disks have are adoped or none is",
5826
                                 errors.ECODE_INVAL)
5827
    if has_adopt:
5828
      if self.op.disk_template != constants.DT_PLAIN:
5829
        raise errors.OpPrereqError("Disk adoption is only supported for the"
5830
                                   " 'plain' disk template",
5831
                                   errors.ECODE_INVAL)
5832
      if self.op.iallocator is not None:
5833
        raise errors.OpPrereqError("Disk adoption not allowed with an"
5834
                                   " iallocator script", errors.ECODE_INVAL)
5835
      if self.op.mode == constants.INSTANCE_IMPORT:
5836
        raise errors.OpPrereqError("Disk adoption not allowed for"
5837
                                   " instance import", errors.ECODE_INVAL)
5838

    
5839
    self.adopt_disks = has_adopt
5840

    
5841
  def ExpandNames(self):
5842
    """ExpandNames for CreateInstance.
5843

5844
    Figure out the right locks for instance creation.
5845

5846
    """
5847
    self.needed_locks = {}
5848

    
5849
    # cheap checks, mostly valid constants given
5850

    
5851
    # verify creation mode
5852
    if self.op.mode not in (constants.INSTANCE_CREATE,
5853
                            constants.INSTANCE_IMPORT):
5854
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5855
                                 self.op.mode, errors.ECODE_INVAL)
5856

    
5857
    # disk template and mirror node verification
5858
    _CheckDiskTemplate(self.op.disk_template)
5859

    
5860
    if self.op.hypervisor is None:
5861
      self.op.hypervisor = self.cfg.GetHypervisorType()
5862

    
5863
    cluster = self.cfg.GetClusterInfo()
5864
    enabled_hvs = cluster.enabled_hypervisors
5865
    if self.op.hypervisor not in enabled_hvs:
5866
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5867
                                 " cluster (%s)" % (self.op.hypervisor,
5868
                                  ",".join(enabled_hvs)),
5869
                                 errors.ECODE_STATE)
5870

    
5871
    # check hypervisor parameter syntax (locally)
5872
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5873
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5874
                                  self.op.hvparams)
5875
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5876
    hv_type.CheckParameterSyntax(filled_hvp)
5877
    self.hv_full = filled_hvp
5878
    # check that we don't specify global parameters on an instance
5879
    _CheckGlobalHvParams(self.op.hvparams)
5880

    
5881
    # fill and remember the beparams dict
5882
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5883
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5884
                                    self.op.beparams)
5885

    
5886
    #### instance parameters check
5887

    
5888
    # instance name verification
5889
    if self.op.name_check:
5890
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5891
      self.op.instance_name = instance_name = hostname1.name
5892
      # used in CheckPrereq for ip ping check
5893
      self.check_ip = hostname1.ip
5894
    else:
5895
      instance_name = self.op.instance_name
5896
      self.check_ip = None
5897

    
5898
    # this is just a preventive check, but someone might still add this
5899
    # instance in the meantime, and creation will fail at lock-add time
5900
    if instance_name in self.cfg.GetInstanceList():
5901
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5902
                                 instance_name, errors.ECODE_EXISTS)
5903

    
5904
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5905

    
5906
    # NIC buildup
5907
    self.nics = []
5908
    for idx, nic in enumerate(self.op.nics):
5909
      nic_mode_req = nic.get("mode", None)
5910
      nic_mode = nic_mode_req
5911
      if nic_mode is None:
5912
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5913

    
5914
      # in routed mode, for the first nic, the default ip is 'auto'
5915
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5916
        default_ip_mode = constants.VALUE_AUTO
5917
      else:
5918
        default_ip_mode = constants.VALUE_NONE
5919

    
5920
      # ip validity checks
5921
      ip = nic.get("ip", default_ip_mode)
5922
      if ip is None or ip.lower() == constants.VALUE_NONE:
5923
        nic_ip = None
5924
      elif ip.lower() == constants.VALUE_AUTO:
5925
        if not self.op.name_check:
5926
          raise errors.OpPrereqError("IP address set to auto but name checks"
5927
                                     " have been skipped. Aborting.",
5928
                                     errors.ECODE_INVAL)
5929
        nic_ip = hostname1.ip
5930
      else:
5931
        if not utils.IsValidIP(ip):
5932
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5933
                                     " like a valid IP" % ip,
5934
                                     errors.ECODE_INVAL)
5935
        nic_ip = ip
5936

    
5937
      # TODO: check the ip address for uniqueness
5938
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5939
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5940
                                   errors.ECODE_INVAL)
5941

    
5942
      # MAC address verification
5943
      mac = nic.get("mac", constants.VALUE_AUTO)
5944
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5945
        mac = utils.NormalizeAndValidateMac(mac)
5946

    
5947
        try:
5948
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
5949
        except errors.ReservationError:
5950
          raise errors.OpPrereqError("MAC address %s already in use"
5951
                                     " in cluster" % mac,
5952
                                     errors.ECODE_NOTUNIQUE)
5953

    
5954
      # bridge verification
5955
      bridge = nic.get("bridge", None)
5956
      link = nic.get("link", None)
5957
      if bridge and link:
5958
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5959
                                   " at the same time", errors.ECODE_INVAL)
5960
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5961
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5962
                                   errors.ECODE_INVAL)
5963
      elif bridge:
5964
        link = bridge
5965

    
5966
      nicparams = {}
5967
      if nic_mode_req:
5968
        nicparams[constants.NIC_MODE] = nic_mode_req
5969
      if link:
5970
        nicparams[constants.NIC_LINK] = link
5971

    
5972
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5973
                                      nicparams)
5974
      objects.NIC.CheckParameterSyntax(check_params)
5975
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5976

    
5977
    # disk checks/pre-build
5978
    self.disks = []
5979
    for disk in self.op.disks:
5980
      mode = disk.get("mode", constants.DISK_RDWR)
5981
      if mode not in constants.DISK_ACCESS_SET:
5982
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5983
                                   mode, errors.ECODE_INVAL)
5984
      size = disk.get("size", None)
5985
      if size is None:
5986
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5987
      try:
5988
        size = int(size)
5989
      except (TypeError, ValueError):
5990
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5991
                                   errors.ECODE_INVAL)
5992
      new_disk = {"size": size, "mode": mode}
5993
      if "adopt" in disk:
5994
        new_disk["adopt"] = disk["adopt"]
5995
      self.disks.append(new_disk)
5996

    
5997
    # file storage checks
5998
    if (self.op.file_driver and
5999
        not self.op.file_driver in constants.FILE_DRIVER):
6000
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6001
                                 self.op.file_driver, errors.ECODE_INVAL)
6002

    
6003
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6004
      raise errors.OpPrereqError("File storage directory path not absolute",
6005
                                 errors.ECODE_INVAL)
6006

    
6007
    ### Node/iallocator related checks
6008
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6009
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6010
                                 " node must be given",
6011
                                 errors.ECODE_INVAL)
6012

    
6013
    if self.op.iallocator:
6014
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6015
    else:
6016
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6017
      nodelist = [self.op.pnode]
6018
      if self.op.snode is not None:
6019
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6020
        nodelist.append(self.op.snode)
6021
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6022

    
6023
    # in case of import lock the source node too
6024
    if self.op.mode == constants.INSTANCE_IMPORT:
6025
      src_node = getattr(self.op, "src_node", None)
6026
      src_path = getattr(self.op, "src_path", None)
6027

    
6028
      if src_path is None:
6029
        self.op.src_path = src_path = self.op.instance_name
6030

    
6031
      if src_node is None:
6032
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6033
        self.op.src_node = None
6034
        if os.path.isabs(src_path):
6035
          raise errors.OpPrereqError("Importing an instance from an absolute"
6036
                                     " path requires a source node option.",
6037
                                     errors.ECODE_INVAL)
6038
      else:
6039
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6040
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6041
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6042
        if not os.path.isabs(src_path):
6043
          self.op.src_path = src_path = \
6044
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6045

    
6046
      # On import force_variant must be True, because if we forced it at
6047
      # initial install, our only chance when importing it back is that it
6048
      # works again!
6049
      self.op.force_variant = True
6050

    
6051
    else: # INSTANCE_CREATE
6052
      if getattr(self.op, "os_type", None) is None:
6053
        raise errors.OpPrereqError("No guest OS specified",
6054
                                   errors.ECODE_INVAL)
6055
      self.op.force_variant = getattr(self.op, "force_variant", False)
6056

    
6057
  def _RunAllocator(self):
6058
    """Run the allocator based on input opcode.
6059

6060
    """
6061
    nics = [n.ToDict() for n in self.nics]
6062
    ial = IAllocator(self.cfg, self.rpc,
6063
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6064
                     name=self.op.instance_name,
6065
                     disk_template=self.op.disk_template,
6066
                     tags=[],
6067
                     os=self.op.os_type,
6068
                     vcpus=self.be_full[constants.BE_VCPUS],
6069
                     mem_size=self.be_full[constants.BE_MEMORY],
6070
                     disks=self.disks,
6071
                     nics=nics,
6072
                     hypervisor=self.op.hypervisor,
6073
                     )
6074

    
6075
    ial.Run(self.op.iallocator)
6076

    
6077
    if not ial.success:
6078
      raise errors.OpPrereqError("Can't compute nodes using"
6079
                                 " iallocator '%s': %s" %
6080
                                 (self.op.iallocator, ial.info),
6081
                                 errors.ECODE_NORES)
6082
    if len(ial.result) != ial.required_nodes:
6083
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6084
                                 " of nodes (%s), required %s" %
6085
                                 (self.op.iallocator, len(ial.result),
6086
                                  ial.required_nodes), errors.ECODE_FAULT)
6087
    self.op.pnode = ial.result[0]
6088
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6089
                 self.op.instance_name, self.op.iallocator,
6090
                 utils.CommaJoin(ial.result))
6091
    if ial.required_nodes == 2:
6092
      self.op.snode = ial.result[1]
6093

    
6094
  def BuildHooksEnv(self):
6095
    """Build hooks env.
6096

6097
    This runs on master, primary and secondary nodes of the instance.
6098

6099
    """
6100
    env = {
6101
      "ADD_MODE": self.op.mode,
6102
      }
6103
    if self.op.mode == constants.INSTANCE_IMPORT:
6104
      env["SRC_NODE"] = self.op.src_node
6105
      env["SRC_PATH"] = self.op.src_path
6106
      env["SRC_IMAGES"] = self.src_images
6107

    
6108
    env.update(_BuildInstanceHookEnv(
6109
      name=self.op.instance_name,
6110
      primary_node=self.op.pnode,
6111
      secondary_nodes=self.secondaries,
6112
      status=self.op.start,
6113
      os_type=self.op.os_type,
6114
      memory=self.be_full[constants.BE_MEMORY],
6115
      vcpus=self.be_full[constants.BE_VCPUS],
6116
      nics=_NICListToTuple(self, self.nics),
6117
      disk_template=self.op.disk_template,
6118
      disks=[(d["size"], d["mode"]) for d in self.disks],
6119
      bep=self.be_full,
6120
      hvp=self.hv_full,
6121
      hypervisor_name=self.op.hypervisor,
6122
    ))
6123

    
6124
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6125
          self.secondaries)
6126
    return env, nl, nl
6127

    
6128

    
6129
  def CheckPrereq(self):
6130
    """Check prerequisites.
6131

6132
    """
6133
    if (not self.cfg.GetVGName() and
6134
        self.op.disk_template not in constants.DTS_NOT_LVM):
6135
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6136
                                 " instances", errors.ECODE_STATE)
6137

    
6138
    if self.op.mode == constants.INSTANCE_IMPORT:
6139
      src_node = self.op.src_node
6140
      src_path = self.op.src_path
6141

    
6142
      if src_node is None:
6143
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6144
        exp_list = self.rpc.call_export_list(locked_nodes)
6145
        found = False
6146
        for node in exp_list:
6147
          if exp_list[node].fail_msg:
6148
            continue
6149
          if src_path in exp_list[node].payload:
6150
            found = True
6151
            self.op.src_node = src_node = node
6152
            self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6153
                                                         src_path)
6154
            break
6155
        if not found:
6156
          raise errors.OpPrereqError("No export found for relative path %s" %
6157
                                      src_path, errors.ECODE_INVAL)
6158

    
6159
      _CheckNodeOnline(self, src_node)
6160
      result = self.rpc.call_export_info(src_node, src_path)
6161
      result.Raise("No export or invalid export found in dir %s" % src_path)
6162

    
6163
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6164
      if not export_info.has_section(constants.INISECT_EXP):
6165
        raise errors.ProgrammerError("Corrupted export config",
6166
                                     errors.ECODE_ENVIRON)
6167

    
6168
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
6169
      if (int(ei_version) != constants.EXPORT_VERSION):
6170
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6171
                                   (ei_version, constants.EXPORT_VERSION),
6172
                                   errors.ECODE_ENVIRON)
6173

    
6174
      # Check that the new instance doesn't have less disks than the export
6175
      instance_disks = len(self.disks)
6176
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6177
      if instance_disks < export_disks:
6178
        raise errors.OpPrereqError("Not enough disks to import."
6179
                                   " (instance: %d, export: %d)" %
6180
                                   (instance_disks, export_disks),
6181
                                   errors.ECODE_INVAL)
6182

    
6183
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6184
      disk_images = []
6185
      for idx in range(export_disks):
6186
        option = 'disk%d_dump' % idx
6187
        if export_info.has_option(constants.INISECT_INS, option):
6188
          # FIXME: are the old os-es, disk sizes, etc. useful?
6189
          export_name = export_info.get(constants.INISECT_INS, option)
6190
          image = utils.PathJoin(src_path, export_name)
6191
          disk_images.append(image)
6192
        else:
6193
          disk_images.append(False)
6194

    
6195
      self.src_images = disk_images
6196

    
6197
      old_name = export_info.get(constants.INISECT_INS, 'name')
6198
      # FIXME: int() here could throw a ValueError on broken exports
6199
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6200
      if self.op.instance_name == old_name:
6201
        for idx, nic in enumerate(self.nics):
6202
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6203
            nic_mac_ini = 'nic%d_mac' % idx
6204
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6205

    
6206
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6207

    
6208
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6209
    if self.op.ip_check:
6210
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6211
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6212
                                   (self.check_ip, self.op.instance_name),
6213
                                   errors.ECODE_NOTUNIQUE)
6214

    
6215
    #### mac address generation
6216
    # By generating here the mac address both the allocator and the hooks get
6217
    # the real final mac address rather than the 'auto' or 'generate' value.
6218
    # There is a race condition between the generation and the instance object
6219
    # creation, which means that we know the mac is valid now, but we're not
6220
    # sure it will be when we actually add the instance. If things go bad
6221
    # adding the instance will abort because of a duplicate mac, and the
6222
    # creation job will fail.
6223
    for nic in self.nics:
6224
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6225
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6226

    
6227
    #### allocator run
6228

    
6229
    if self.op.iallocator is not None:
6230
      self._RunAllocator()
6231

    
6232
    #### node related checks
6233

    
6234
    # check primary node
6235
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6236
    assert self.pnode is not None, \
6237
      "Cannot retrieve locked node %s" % self.op.pnode
6238
    if pnode.offline:
6239
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6240
                                 pnode.name, errors.ECODE_STATE)
6241
    if pnode.drained:
6242
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6243
                                 pnode.name, errors.ECODE_STATE)
6244

    
6245
    self.secondaries = []
6246

    
6247
    # mirror node verification
6248
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6249
      if self.op.snode is None:
6250
        raise errors.OpPrereqError("The networked disk templates need"
6251
                                   " a mirror node", errors.ECODE_INVAL)
6252
      if self.op.snode == pnode.name:
6253
        raise errors.OpPrereqError("The secondary node cannot be the"
6254
                                   " primary node.", errors.ECODE_INVAL)
6255
      _CheckNodeOnline(self, self.op.snode)
6256
      _CheckNodeNotDrained(self, self.op.snode)
6257
      self.secondaries.append(self.op.snode)
6258

    
6259
    nodenames = [pnode.name] + self.secondaries
6260

    
6261
    req_size = _ComputeDiskSize(self.op.disk_template,
6262
                                self.disks)
6263

    
6264
    # Check lv size requirements, if not adopting
6265
    if req_size is not None and not self.adopt_disks:
6266
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6267
                                         self.op.hypervisor)
6268
      for node in nodenames:
6269
        info = nodeinfo[node]
6270
        info.Raise("Cannot get current information from node %s" % node)
6271
        info = info.payload
6272
        vg_free = info.get('vg_free', None)
6273
        if not isinstance(vg_free, int):
6274
          raise errors.OpPrereqError("Can't compute free disk space on"
6275
                                     " node %s" % node, errors.ECODE_ENVIRON)
6276
        if req_size > vg_free:
6277
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6278
                                     " %d MB available, %d MB required" %
6279
                                     (node, vg_free, req_size),
6280
                                     errors.ECODE_NORES)
6281

    
6282
    if self.adopt_disks: # instead, we must check the adoption data
6283
      all_lvs = set([i["adopt"] for i in self.disks])
6284
      if len(all_lvs) != len(self.disks):
6285
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6286
                                   errors.ECODE_INVAL)
6287
      for lv_name in all_lvs:
6288
        try:
6289
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6290
        except errors.ReservationError:
6291
          raise errors.OpPrereqError("LV named %s used by another instance" %
6292
                                     lv_name, errors.ECODE_NOTUNIQUE)
6293

    
6294
      node_lvs = self.rpc.call_lv_list([pnode.name],
6295
                                       self.cfg.GetVGName())[pnode.name]
6296
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6297
      node_lvs = node_lvs.payload
6298
      delta = all_lvs.difference(node_lvs.keys())
6299
      if delta:
6300
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6301
                                   utils.CommaJoin(delta),
6302
                                   errors.ECODE_INVAL)
6303
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6304
      if online_lvs:
6305
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6306
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6307
                                   errors.ECODE_STATE)
6308
      # update the size of disk based on what is found
6309
      for dsk in self.disks:
6310
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6311

    
6312
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6313

    
6314
    # os verification
6315
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6316
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6317
                 (self.op.os_type, pnode.name),
6318
                 prereq=True, ecode=errors.ECODE_INVAL)
6319
    if not self.op.force_variant:
6320
      _CheckOSVariant(result.payload, self.op.os_type)
6321

    
6322
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6323

    
6324
    # memory check on primary node
6325
    if self.op.start:
6326
      _CheckNodeFreeMemory(self, self.pnode.name,
6327
                           "creating instance %s" % self.op.instance_name,
6328
                           self.be_full[constants.BE_MEMORY],
6329
                           self.op.hypervisor)
6330

    
6331
    self.dry_run_result = list(nodenames)
6332

    
6333
  def Exec(self, feedback_fn):
6334
    """Create and add the instance to the cluster.
6335

6336
    """
6337
    instance = self.op.instance_name
6338
    pnode_name = self.pnode.name
6339

    
6340
    ht_kind = self.op.hypervisor
6341
    if ht_kind in constants.HTS_REQ_PORT:
6342
      network_port = self.cfg.AllocatePort()
6343
    else:
6344
      network_port = None
6345

    
6346
    ##if self.op.vnc_bind_address is None:
6347
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6348

    
6349
    # this is needed because os.path.join does not accept None arguments
6350
    if self.op.file_storage_dir is None:
6351
      string_file_storage_dir = ""
6352
    else:
6353
      string_file_storage_dir = self.op.file_storage_dir
6354

    
6355
    # build the full file storage dir path
6356
    file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6357
                                      string_file_storage_dir, instance)
6358

    
6359

    
6360
    disks = _GenerateDiskTemplate(self,
6361
                                  self.op.disk_template,
6362
                                  instance, pnode_name,
6363
                                  self.secondaries,
6364
                                  self.disks,
6365
                                  file_storage_dir,
6366
                                  self.op.file_driver,
6367
                                  0)
6368

    
6369
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6370
                            primary_node=pnode_name,
6371
                            nics=self.nics, disks=disks,
6372
                            disk_template=self.op.disk_template,
6373
                            admin_up=False,
6374
                            network_port=network_port,
6375
                            beparams=self.op.beparams,
6376
                            hvparams=self.op.hvparams,
6377
                            hypervisor=self.op.hypervisor,
6378
                            )
6379

    
6380
    if self.adopt_disks:
6381
      # rename LVs to the newly-generated names; we need to construct
6382
      # 'fake' LV disks with the old data, plus the new unique_id
6383
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6384
      rename_to = []
6385
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6386
        rename_to.append(t_dsk.logical_id)
6387
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6388
        self.cfg.SetDiskID(t_dsk, pnode_name)
6389
      result = self.rpc.call_blockdev_rename(pnode_name,
6390
                                             zip(tmp_disks, rename_to))
6391
      result.Raise("Failed to rename adoped LVs")
6392
    else:
6393
      feedback_fn("* creating instance disks...")
6394
      try:
6395
        _CreateDisks(self, iobj)
6396
      except errors.OpExecError:
6397
        self.LogWarning("Device creation failed, reverting...")
6398
        try:
6399
          _RemoveDisks(self, iobj)
6400
        finally:
6401
          self.cfg.ReleaseDRBDMinors(instance)
6402
          raise
6403

    
6404
    feedback_fn("adding instance %s to cluster config" % instance)
6405

    
6406
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6407

    
6408
    # Declare that we don't want to remove the instance lock anymore, as we've
6409
    # added the instance to the config
6410
    del self.remove_locks[locking.LEVEL_INSTANCE]
6411
    # Unlock all the nodes
6412
    if self.op.mode == constants.INSTANCE_IMPORT:
6413
      nodes_keep = [self.op.src_node]
6414
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6415
                       if node != self.op.src_node]
6416
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6417
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6418
    else:
6419
      self.context.glm.release(locking.LEVEL_NODE)
6420
      del self.acquired_locks[locking.LEVEL_NODE]
6421

    
6422
    if self.op.wait_for_sync:
6423
      disk_abort = not _WaitForSync(self, iobj)
6424
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6425
      # make sure the disks are not degraded (still sync-ing is ok)
6426
      time.sleep(15)
6427
      feedback_fn("* checking mirrors status")
6428
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6429
    else:
6430
      disk_abort = False
6431

    
6432
    if disk_abort:
6433
      _RemoveDisks(self, iobj)
6434
      self.cfg.RemoveInstance(iobj.name)
6435
      # Make sure the instance lock gets removed
6436
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6437
      raise errors.OpExecError("There are some degraded disks for"
6438
                               " this instance")
6439

    
6440
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6441
      if self.op.mode == constants.INSTANCE_CREATE:
6442
        feedback_fn("* running the instance OS create scripts...")
6443
        # FIXME: pass debug option from opcode to backend
6444
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6445
                                               self.op.debug_level)
6446
        result.Raise("Could not add os for instance %s"
6447
                     " on node %s" % (instance, pnode_name))
6448

    
6449
      elif self.op.mode == constants.INSTANCE_IMPORT:
6450
        feedback_fn("* running the instance OS import scripts...")
6451
        src_node = self.op.src_node
6452
        src_images = self.src_images
6453
        cluster_name = self.cfg.GetClusterName()
6454
        # FIXME: pass debug option from opcode to backend
6455
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6456
                                                         src_node, src_images,
6457
                                                         cluster_name,
6458
                                                         self.op.debug_level)
6459
        msg = import_result.fail_msg
6460
        if msg:
6461
          self.LogWarning("Error while importing the disk images for instance"
6462
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6463
      else:
6464
        # also checked in the prereq part
6465
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6466
                                     % self.op.mode)
6467

    
6468
    if self.op.start:
6469
      iobj.admin_up = True
6470
      self.cfg.Update(iobj, feedback_fn)
6471
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6472
      feedback_fn("* starting instance...")
6473
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6474
      result.Raise("Could not start instance")
6475

    
6476
    return list(iobj.all_nodes)
6477

    
6478

    
6479
class LUConnectConsole(NoHooksLU):
6480
  """Connect to an instance's console.
6481

6482
  This is somewhat special in that it returns the command line that
6483
  you need to run on the master node in order to connect to the
6484
  console.
6485

6486
  """
6487
  _OP_REQP = ["instance_name"]
6488
  REQ_BGL = False
6489

    
6490
  def ExpandNames(self):
6491
    self._ExpandAndLockInstance()
6492

    
6493
  def CheckPrereq(self):
6494
    """Check prerequisites.
6495

6496
    This checks that the instance is in the cluster.
6497

6498
    """
6499
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6500
    assert self.instance is not None, \
6501
      "Cannot retrieve locked instance %s" % self.op.instance_name
6502
    _CheckNodeOnline(self, self.instance.primary_node)
6503

    
6504
  def Exec(self, feedback_fn):
6505
    """Connect to the console of an instance
6506

6507
    """
6508
    instance = self.instance
6509
    node = instance.primary_node
6510

    
6511
    node_insts = self.rpc.call_instance_list([node],
6512
                                             [instance.hypervisor])[node]
6513
    node_insts.Raise("Can't get node information from %s" % node)
6514

    
6515
    if instance.name not in node_insts.payload:
6516
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6517

    
6518
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6519

    
6520
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6521
    cluster = self.cfg.GetClusterInfo()
6522
    # beparams and hvparams are passed separately, to avoid editing the
6523
    # instance and then saving the defaults in the instance itself.
6524
    hvparams = cluster.FillHV(instance)
6525
    beparams = cluster.FillBE(instance)
6526
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6527

    
6528
    # build ssh cmdline
6529
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6530

    
6531

    
6532
class LUReplaceDisks(LogicalUnit):
6533
  """Replace the disks of an instance.
6534

6535
  """
6536
  HPATH = "mirrors-replace"
6537
  HTYPE = constants.HTYPE_INSTANCE
6538
  _OP_REQP = ["instance_name", "mode", "disks"]
6539
  REQ_BGL = False
6540

    
6541
  def CheckArguments(self):
6542
    if not hasattr(self.op, "remote_node"):
6543
      self.op.remote_node = None
6544
    if not hasattr(self.op, "iallocator"):
6545
      self.op.iallocator = None
6546
    if not hasattr(self.op, "early_release"):
6547
      self.op.early_release = False
6548

    
6549
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6550
                                  self.op.iallocator)
6551

    
6552
  def ExpandNames(self):
6553
    self._ExpandAndLockInstance()
6554

    
6555
    if self.op.iallocator is not None:
6556
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6557

    
6558
    elif self.op.remote_node is not None:
6559
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6560
      self.op.remote_node = remote_node
6561

    
6562
      # Warning: do not remove the locking of the new secondary here
6563
      # unless DRBD8.AddChildren is changed to work in parallel;
6564
      # currently it doesn't since parallel invocations of
6565
      # FindUnusedMinor will conflict
6566
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6567
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6568

    
6569
    else:
6570
      self.needed_locks[locking.LEVEL_NODE] = []
6571
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6572

    
6573
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6574
                                   self.op.iallocator, self.op.remote_node,
6575
                                   self.op.disks, False, self.op.early_release)
6576

    
6577
    self.tasklets = [self.replacer]
6578

    
6579
  def DeclareLocks(self, level):
6580
    # If we're not already locking all nodes in the set we have to declare the
6581
    # instance's primary/secondary nodes.
6582
    if (level == locking.LEVEL_NODE and
6583
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6584
      self._LockInstancesNodes()
6585

    
6586
  def BuildHooksEnv(self):
6587
    """Build hooks env.
6588

6589
    This runs on the master, the primary and all the secondaries.
6590

6591
    """
6592
    instance = self.replacer.instance
6593
    env = {
6594
      "MODE": self.op.mode,
6595
      "NEW_SECONDARY": self.op.remote_node,
6596
      "OLD_SECONDARY": instance.secondary_nodes[0],
6597
      }
6598
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6599
    nl = [
6600
      self.cfg.GetMasterNode(),
6601
      instance.primary_node,
6602
      ]
6603
    if self.op.remote_node is not None:
6604
      nl.append(self.op.remote_node)
6605
    return env, nl, nl
6606

    
6607

    
6608
class LUEvacuateNode(LogicalUnit):
6609
  """Relocate the secondary instances from a node.
6610

6611
  """
6612
  HPATH = "node-evacuate"
6613
  HTYPE = constants.HTYPE_NODE
6614
  _OP_REQP = ["node_name"]
6615
  REQ_BGL = False
6616

    
6617
  def CheckArguments(self):
6618
    if not hasattr(self.op, "remote_node"):
6619
      self.op.remote_node = None
6620
    if not hasattr(self.op, "iallocator"):
6621
      self.op.iallocator = None
6622
    if not hasattr(self.op, "early_release"):
6623
      self.op.early_release = False
6624

    
6625
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6626
                                  self.op.remote_node,
6627
                                  self.op.iallocator)
6628

    
6629
  def ExpandNames(self):
6630
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6631

    
6632
    self.needed_locks = {}
6633

    
6634
    # Declare node locks
6635
    if self.op.iallocator is not None:
6636
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6637

    
6638
    elif self.op.remote_node is not None:
6639
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6640

    
6641
      # Warning: do not remove the locking of the new secondary here
6642
      # unless DRBD8.AddChildren is changed to work in parallel;
6643
      # currently it doesn't since parallel invocations of
6644
      # FindUnusedMinor will conflict
6645
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6646
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6647

    
6648
    else:
6649
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6650

    
6651
    # Create tasklets for replacing disks for all secondary instances on this
6652
    # node
6653
    names = []
6654
    tasklets = []
6655

    
6656
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6657
      logging.debug("Replacing disks for instance %s", inst.name)
6658
      names.append(inst.name)
6659

    
6660
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6661
                                self.op.iallocator, self.op.remote_node, [],
6662
                                True, self.op.early_release)
6663
      tasklets.append(replacer)
6664

    
6665
    self.tasklets = tasklets
6666
    self.instance_names = names
6667

    
6668
    # Declare instance locks
6669
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6670

    
6671
  def DeclareLocks(self, level):
6672
    # If we're not already locking all nodes in the set we have to declare the
6673
    # instance's primary/secondary nodes.
6674
    if (level == locking.LEVEL_NODE and
6675
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6676
      self._LockInstancesNodes()
6677

    
6678
  def BuildHooksEnv(self):
6679
    """Build hooks env.
6680

6681
    This runs on the master, the primary and all the secondaries.
6682

6683
    """
6684
    env = {
6685
      "NODE_NAME": self.op.node_name,
6686
      }
6687

    
6688
    nl = [self.cfg.GetMasterNode()]
6689

    
6690
    if self.op.remote_node is not None:
6691
      env["NEW_SECONDARY"] = self.op.remote_node
6692
      nl.append(self.op.remote_node)
6693

    
6694
    return (env, nl, nl)
6695

    
6696

    
6697
class TLReplaceDisks(Tasklet):
6698
  """Replaces disks for an instance.
6699

6700
  Note: Locking is not within the scope of this class.
6701

6702
  """
6703
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6704
               disks, delay_iallocator, early_release):
6705
    """Initializes this class.
6706

6707
    """
6708
    Tasklet.__init__(self, lu)
6709

    
6710
    # Parameters
6711
    self.instance_name = instance_name
6712
    self.mode = mode
6713
    self.iallocator_name = iallocator_name
6714
    self.remote_node = remote_node
6715
    self.disks = disks
6716
    self.delay_iallocator = delay_iallocator
6717
    self.early_release = early_release
6718

    
6719
    # Runtime data
6720
    self.instance = None
6721
    self.new_node = None
6722
    self.target_node = None
6723
    self.other_node = None
6724
    self.remote_node_info = None
6725
    self.node_secondary_ip = None
6726

    
6727
  @staticmethod
6728
  def CheckArguments(mode, remote_node, iallocator):
6729
    """Helper function for users of this class.
6730

6731
    """
6732
    # check for valid parameter combination
6733
    if mode == constants.REPLACE_DISK_CHG:
6734
      if remote_node is None and iallocator is None:
6735
        raise errors.OpPrereqError("When changing the secondary either an"
6736
                                   " iallocator script must be used or the"
6737
                                   " new node given", errors.ECODE_INVAL)
6738

    
6739
      if remote_node is not None and iallocator is not None:
6740
        raise errors.OpPrereqError("Give either the iallocator or the new"
6741
                                   " secondary, not both", errors.ECODE_INVAL)
6742

    
6743
    elif remote_node is not None or iallocator is not None:
6744
      # Not replacing the secondary
6745
      raise errors.OpPrereqError("The iallocator and new node options can"
6746
                                 " only be used when changing the"
6747
                                 " secondary node", errors.ECODE_INVAL)
6748

    
6749
  @staticmethod
6750
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6751
    """Compute a new secondary node using an IAllocator.
6752

6753
    """
6754
    ial = IAllocator(lu.cfg, lu.rpc,
6755
                     mode=constants.IALLOCATOR_MODE_RELOC,
6756
                     name=instance_name,
6757
                     relocate_from=relocate_from)
6758

    
6759
    ial.Run(iallocator_name)
6760

    
6761
    if not ial.success:
6762
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6763
                                 " %s" % (iallocator_name, ial.info),
6764
                                 errors.ECODE_NORES)
6765

    
6766
    if len(ial.result) != ial.required_nodes:
6767
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6768
                                 " of nodes (%s), required %s" %
6769
                                 (iallocator_name,
6770
                                  len(ial.result), ial.required_nodes),
6771
                                 errors.ECODE_FAULT)
6772

    
6773
    remote_node_name = ial.result[0]
6774

    
6775
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6776
               instance_name, remote_node_name)
6777

    
6778
    return remote_node_name
6779

    
6780
  def _FindFaultyDisks(self, node_name):
6781
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6782
                                    node_name, True)
6783

    
6784
  def CheckPrereq(self):
6785
    """Check prerequisites.
6786

6787
    This checks that the instance is in the cluster.
6788

6789
    """
6790
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6791
    assert instance is not None, \
6792
      "Cannot retrieve locked instance %s" % self.instance_name
6793

    
6794
    if instance.disk_template != constants.DT_DRBD8:
6795
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6796
                                 " instances", errors.ECODE_INVAL)
6797

    
6798
    if len(instance.secondary_nodes) != 1:
6799
      raise errors.OpPrereqError("The instance has a strange layout,"
6800
                                 " expected one secondary but found %d" %
6801
                                 len(instance.secondary_nodes),
6802
                                 errors.ECODE_FAULT)
6803

    
6804
    if not self.delay_iallocator:
6805
      self._CheckPrereq2()
6806

    
6807
  def _CheckPrereq2(self):
6808
    """Check prerequisites, second part.
6809

6810
    This function should always be part of CheckPrereq. It was separated and is
6811
    now called from Exec because during node evacuation iallocator was only
6812
    called with an unmodified cluster model, not taking planned changes into
6813
    account.
6814

6815
    """
6816
    instance = self.instance
6817
    secondary_node = instance.secondary_nodes[0]
6818

    
6819
    if self.iallocator_name is None:
6820
      remote_node = self.remote_node
6821
    else:
6822
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6823
                                       instance.name, instance.secondary_nodes)
6824

    
6825
    if remote_node is not None:
6826
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6827
      assert self.remote_node_info is not None, \
6828
        "Cannot retrieve locked node %s" % remote_node
6829
    else:
6830
      self.remote_node_info = None
6831

    
6832
    if remote_node == self.instance.primary_node:
6833
      raise errors.OpPrereqError("The specified node is the primary node of"
6834
                                 " the instance.", errors.ECODE_INVAL)
6835

    
6836
    if remote_node == secondary_node:
6837
      raise errors.OpPrereqError("The specified node is already the"
6838
                                 " secondary node of the instance.",
6839
                                 errors.ECODE_INVAL)
6840

    
6841
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6842
                                    constants.REPLACE_DISK_CHG):
6843
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6844
                                 errors.ECODE_INVAL)
6845

    
6846
    if self.mode == constants.REPLACE_DISK_AUTO:
6847
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6848
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6849

    
6850
      if faulty_primary and faulty_secondary:
6851
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6852
                                   " one node and can not be repaired"
6853
                                   " automatically" % self.instance_name,
6854
                                   errors.ECODE_STATE)
6855

    
6856
      if faulty_primary:
6857
        self.disks = faulty_primary
6858
        self.target_node = instance.primary_node
6859
        self.other_node = secondary_node
6860
        check_nodes = [self.target_node, self.other_node]
6861
      elif faulty_secondary:
6862
        self.disks = faulty_secondary
6863
        self.target_node = secondary_node
6864
        self.other_node = instance.primary_node
6865
        check_nodes = [self.target_node, self.other_node]
6866
      else:
6867
        self.disks = []
6868
        check_nodes = []
6869

    
6870
    else:
6871
      # Non-automatic modes
6872
      if self.mode == constants.REPLACE_DISK_PRI:
6873
        self.target_node = instance.primary_node
6874
        self.other_node = secondary_node
6875
        check_nodes = [self.target_node, self.other_node]
6876

    
6877
      elif self.mode == constants.REPLACE_DISK_SEC:
6878
        self.target_node = secondary_node
6879
        self.other_node = instance.primary_node
6880
        check_nodes = [self.target_node, self.other_node]
6881

    
6882
      elif self.mode == constants.REPLACE_DISK_CHG:
6883
        self.new_node = remote_node
6884
        self.other_node = instance.primary_node
6885
        self.target_node = secondary_node
6886
        check_nodes = [self.new_node, self.other_node]
6887

    
6888
        _CheckNodeNotDrained(self.lu, remote_node)
6889

    
6890
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
6891
        assert old_node_info is not None
6892
        if old_node_info.offline and not self.early_release:
6893
          # doesn't make sense to delay the release
6894
          self.early_release = True
6895
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6896
                          " early-release mode", secondary_node)
6897

    
6898
      else:
6899
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6900
                                     self.mode)
6901

    
6902
      # If not specified all disks should be replaced
6903
      if not self.disks:
6904
        self.disks = range(len(self.instance.disks))
6905

    
6906
    for node in check_nodes:
6907
      _CheckNodeOnline(self.lu, node)
6908

    
6909
    # Check whether disks are valid
6910
    for disk_idx in self.disks:
6911
      instance.FindDisk(disk_idx)
6912

    
6913
    # Get secondary node IP addresses
6914
    node_2nd_ip = {}
6915

    
6916
    for node_name in [self.target_node, self.other_node, self.new_node]:
6917
      if node_name is not None:
6918
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6919

    
6920
    self.node_secondary_ip = node_2nd_ip
6921

    
6922
  def Exec(self, feedback_fn):
6923
    """Execute disk replacement.
6924

6925
    This dispatches the disk replacement to the appropriate handler.
6926

6927
    """
6928
    if self.delay_iallocator:
6929
      self._CheckPrereq2()
6930

    
6931
    if not self.disks:
6932
      feedback_fn("No disks need replacement")
6933
      return
6934

    
6935
    feedback_fn("Replacing disk(s) %s for %s" %
6936
                (utils.CommaJoin(self.disks), self.instance.name))
6937

    
6938
    activate_disks = (not self.instance.admin_up)
6939

    
6940
    # Activate the instance disks if we're replacing them on a down instance
6941
    if activate_disks:
6942
      _StartInstanceDisks(self.lu, self.instance, True)
6943

    
6944
    try:
6945
      # Should we replace the secondary node?
6946
      if self.new_node is not None:
6947
        fn = self._ExecDrbd8Secondary
6948
      else:
6949
        fn = self._ExecDrbd8DiskOnly
6950

    
6951
      return fn(feedback_fn)
6952

    
6953
    finally:
6954
      # Deactivate the instance disks if we're replacing them on a
6955
      # down instance
6956
      if activate_disks:
6957
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6958

    
6959
  def _CheckVolumeGroup(self, nodes):
6960
    self.lu.LogInfo("Checking volume groups")
6961

    
6962
    vgname = self.cfg.GetVGName()
6963

    
6964
    # Make sure volume group exists on all involved nodes
6965
    results = self.rpc.call_vg_list(nodes)
6966
    if not results:
6967
      raise errors.OpExecError("Can't list volume groups on the nodes")
6968

    
6969
    for node in nodes:
6970
      res = results[node]
6971
      res.Raise("Error checking node %s" % node)
6972
      if vgname not in res.payload:
6973
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6974
                                 (vgname, node))
6975

    
6976
  def _CheckDisksExistence(self, nodes):
6977
    # Check disk existence
6978
    for idx, dev in enumerate(self.instance.disks):
6979
      if idx not in self.disks:
6980
        continue
6981

    
6982
      for node in nodes:
6983
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6984
        self.cfg.SetDiskID(dev, node)
6985

    
6986
        result = self.rpc.call_blockdev_find(node, dev)
6987

    
6988
        msg = result.fail_msg
6989
        if msg or not result.payload:
6990
          if not msg:
6991
            msg = "disk not found"
6992
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6993
                                   (idx, node, msg))
6994

    
6995
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6996
    for idx, dev in enumerate(self.instance.disks):
6997
      if idx not in self.disks:
6998
        continue
6999

    
7000
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7001
                      (idx, node_name))
7002

    
7003
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7004
                                   ldisk=ldisk):
7005
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7006
                                 " replace disks for instance %s" %
7007
                                 (node_name, self.instance.name))
7008

    
7009
  def _CreateNewStorage(self, node_name):
7010
    vgname = self.cfg.GetVGName()
7011
    iv_names = {}
7012

    
7013
    for idx, dev in enumerate(self.instance.disks):
7014
      if idx not in self.disks:
7015
        continue
7016

    
7017
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7018

    
7019
      self.cfg.SetDiskID(dev, node_name)
7020

    
7021
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7022
      names = _GenerateUniqueNames(self.lu, lv_names)
7023

    
7024
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7025
                             logical_id=(vgname, names[0]))
7026
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7027
                             logical_id=(vgname, names[1]))
7028

    
7029
      new_lvs = [lv_data, lv_meta]
7030
      old_lvs = dev.children
7031
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7032

    
7033
      # we pass force_create=True to force the LVM creation
7034
      for new_lv in new_lvs:
7035
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7036
                        _GetInstanceInfoText(self.instance), False)
7037

    
7038
    return iv_names
7039

    
7040
  def _CheckDevices(self, node_name, iv_names):
7041
    for name, (dev, _, _) in iv_names.iteritems():
7042
      self.cfg.SetDiskID(dev, node_name)
7043

    
7044
      result = self.rpc.call_blockdev_find(node_name, dev)
7045

    
7046
      msg = result.fail_msg
7047
      if msg or not result.payload:
7048
        if not msg:
7049
          msg = "disk not found"
7050
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7051
                                 (name, msg))
7052

    
7053
      if result.payload.is_degraded:
7054
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7055

    
7056
  def _RemoveOldStorage(self, node_name, iv_names):
7057
    for name, (_, old_lvs, _) in iv_names.iteritems():
7058
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7059

    
7060
      for lv in old_lvs:
7061
        self.cfg.SetDiskID(lv, node_name)
7062

    
7063
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7064
        if msg:
7065
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7066
                             hint="remove unused LVs manually")
7067

    
7068
  def _ReleaseNodeLock(self, node_name):
7069
    """Releases the lock for a given node."""
7070
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7071

    
7072
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7073
    """Replace a disk on the primary or secondary for DRBD 8.
7074

7075
    The algorithm for replace is quite complicated:
7076

7077
      1. for each disk to be replaced:
7078

7079
        1. create new LVs on the target node with unique names
7080
        1. detach old LVs from the drbd device
7081
        1. rename old LVs to name_replaced.<time_t>
7082
        1. rename new LVs to old LVs
7083
        1. attach the new LVs (with the old names now) to the drbd device
7084

7085
      1. wait for sync across all devices
7086

7087
      1. for each modified disk:
7088

7089
        1. remove old LVs (which have the name name_replaces.<time_t>)
7090

7091
    Failures are not very well handled.
7092

7093
    """
7094
    steps_total = 6
7095

    
7096
    # Step: check device activation
7097
    self.lu.LogStep(1, steps_total, "Check device existence")
7098
    self._CheckDisksExistence([self.other_node, self.target_node])
7099
    self._CheckVolumeGroup([self.target_node, self.other_node])
7100

    
7101
    # Step: check other node consistency
7102
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7103
    self._CheckDisksConsistency(self.other_node,
7104
                                self.other_node == self.instance.primary_node,
7105
                                False)
7106

    
7107
    # Step: create new storage
7108
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7109
    iv_names = self._CreateNewStorage(self.target_node)
7110

    
7111
    # Step: for each lv, detach+rename*2+attach
7112
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7113
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7114
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7115

    
7116
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7117
                                                     old_lvs)
7118
      result.Raise("Can't detach drbd from local storage on node"
7119
                   " %s for device %s" % (self.target_node, dev.iv_name))
7120
      #dev.children = []
7121
      #cfg.Update(instance)
7122

    
7123
      # ok, we created the new LVs, so now we know we have the needed
7124
      # storage; as such, we proceed on the target node to rename
7125
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7126
      # using the assumption that logical_id == physical_id (which in
7127
      # turn is the unique_id on that node)
7128

    
7129
      # FIXME(iustin): use a better name for the replaced LVs
7130
      temp_suffix = int(time.time())
7131
      ren_fn = lambda d, suff: (d.physical_id[0],
7132
                                d.physical_id[1] + "_replaced-%s" % suff)
7133

    
7134
      # Build the rename list based on what LVs exist on the node
7135
      rename_old_to_new = []
7136
      for to_ren in old_lvs:
7137
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7138
        if not result.fail_msg and result.payload:
7139
          # device exists
7140
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7141

    
7142
      self.lu.LogInfo("Renaming the old LVs on the target node")
7143
      result = self.rpc.call_blockdev_rename(self.target_node,
7144
                                             rename_old_to_new)
7145
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7146

    
7147
      # Now we rename the new LVs to the old LVs
7148
      self.lu.LogInfo("Renaming the new LVs on the target node")
7149
      rename_new_to_old = [(new, old.physical_id)
7150
                           for old, new in zip(old_lvs, new_lvs)]
7151
      result = self.rpc.call_blockdev_rename(self.target_node,
7152
                                             rename_new_to_old)
7153
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7154

    
7155
      for old, new in zip(old_lvs, new_lvs):
7156
        new.logical_id = old.logical_id
7157
        self.cfg.SetDiskID(new, self.target_node)
7158

    
7159
      for disk in old_lvs:
7160
        disk.logical_id = ren_fn(disk, temp_suffix)
7161
        self.cfg.SetDiskID(disk, self.target_node)
7162

    
7163
      # Now that the new lvs have the old name, we can add them to the device
7164
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7165
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7166
                                                  new_lvs)
7167
      msg = result.fail_msg
7168
      if msg:
7169
        for new_lv in new_lvs:
7170
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7171
                                               new_lv).fail_msg
7172
          if msg2:
7173
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7174
                               hint=("cleanup manually the unused logical"
7175
                                     "volumes"))
7176
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7177

    
7178
      dev.children = new_lvs
7179

    
7180
      self.cfg.Update(self.instance, feedback_fn)
7181

    
7182
    cstep = 5
7183
    if self.early_release:
7184
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7185
      cstep += 1
7186
      self._RemoveOldStorage(self.target_node, iv_names)
7187
      # WARNING: we release both node locks here, do not do other RPCs
7188
      # than WaitForSync to the primary node
7189
      self._ReleaseNodeLock([self.target_node, self.other_node])
7190

    
7191
    # Wait for sync
7192
    # This can fail as the old devices are degraded and _WaitForSync
7193
    # does a combined result over all disks, so we don't check its return value
7194
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7195
    cstep += 1
7196
    _WaitForSync(self.lu, self.instance)
7197

    
7198
    # Check all devices manually
7199
    self._CheckDevices(self.instance.primary_node, iv_names)
7200

    
7201
    # Step: remove old storage
7202
    if not self.early_release:
7203
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7204
      cstep += 1
7205
      self._RemoveOldStorage(self.target_node, iv_names)
7206

    
7207
  def _ExecDrbd8Secondary(self, feedback_fn):
7208
    """Replace the secondary node for DRBD 8.
7209

7210
    The algorithm for replace is quite complicated:
7211
      - for all disks of the instance:
7212
        - create new LVs on the new node with same names
7213
        - shutdown the drbd device on the old secondary
7214
        - disconnect the drbd network on the primary
7215
        - create the drbd device on the new secondary
7216
        - network attach the drbd on the primary, using an artifice:
7217
          the drbd code for Attach() will connect to the network if it
7218
          finds a device which is connected to the good local disks but
7219
          not network enabled
7220
      - wait for sync across all devices
7221
      - remove all disks from the old secondary
7222

7223
    Failures are not very well handled.
7224

7225
    """
7226
    steps_total = 6
7227

    
7228
    # Step: check device activation
7229
    self.lu.LogStep(1, steps_total, "Check device existence")
7230
    self._CheckDisksExistence([self.instance.primary_node])
7231
    self._CheckVolumeGroup([self.instance.primary_node])
7232

    
7233
    # Step: check other node consistency
7234
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7235
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7236

    
7237
    # Step: create new storage
7238
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7239
    for idx, dev in enumerate(self.instance.disks):
7240
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7241
                      (self.new_node, idx))
7242
      # we pass force_create=True to force LVM creation
7243
      for new_lv in dev.children:
7244
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7245
                        _GetInstanceInfoText(self.instance), False)
7246

    
7247
    # Step 4: dbrd minors and drbd setups changes
7248
    # after this, we must manually remove the drbd minors on both the
7249
    # error and the success paths
7250
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7251
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7252
                                         for dev in self.instance.disks],
7253
                                        self.instance.name)
7254
    logging.debug("Allocated minors %r", minors)
7255

    
7256
    iv_names = {}
7257
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7258
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7259
                      (self.new_node, idx))
7260
      # create new devices on new_node; note that we create two IDs:
7261
      # one without port, so the drbd will be activated without
7262
      # networking information on the new node at this stage, and one
7263
      # with network, for the latter activation in step 4
7264
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7265
      if self.instance.primary_node == o_node1:
7266
        p_minor = o_minor1
7267
      else:
7268
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7269
        p_minor = o_minor2
7270

    
7271
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7272
                      p_minor, new_minor, o_secret)
7273
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7274
                    p_minor, new_minor, o_secret)
7275

    
7276
      iv_names[idx] = (dev, dev.children, new_net_id)
7277
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7278
                    new_net_id)
7279
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7280
                              logical_id=new_alone_id,
7281
                              children=dev.children,
7282
                              size=dev.size)
7283
      try:
7284
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7285
                              _GetInstanceInfoText(self.instance), False)
7286
      except errors.GenericError:
7287
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7288
        raise
7289

    
7290
    # We have new devices, shutdown the drbd on the old secondary
7291
    for idx, dev in enumerate(self.instance.disks):
7292
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7293
      self.cfg.SetDiskID(dev, self.target_node)
7294
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7295
      if msg:
7296
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7297
                           "node: %s" % (idx, msg),
7298
                           hint=("Please cleanup this device manually as"
7299
                                 " soon as possible"))
7300

    
7301
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7302
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7303
                                               self.node_secondary_ip,
7304
                                               self.instance.disks)\
7305
                                              [self.instance.primary_node]
7306

    
7307
    msg = result.fail_msg
7308
    if msg:
7309
      # detaches didn't succeed (unlikely)
7310
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7311
      raise errors.OpExecError("Can't detach the disks from the network on"
7312
                               " old node: %s" % (msg,))
7313

    
7314
    # if we managed to detach at least one, we update all the disks of
7315
    # the instance to point to the new secondary
7316
    self.lu.LogInfo("Updating instance configuration")
7317
    for dev, _, new_logical_id in iv_names.itervalues():
7318
      dev.logical_id = new_logical_id
7319
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7320

    
7321
    self.cfg.Update(self.instance, feedback_fn)
7322

    
7323
    # and now perform the drbd attach
7324
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7325
                    " (standalone => connected)")
7326
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7327
                                            self.new_node],
7328
                                           self.node_secondary_ip,
7329
                                           self.instance.disks,
7330
                                           self.instance.name,
7331
                                           False)
7332
    for to_node, to_result in result.items():
7333
      msg = to_result.fail_msg
7334
      if msg:
7335
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7336
                           to_node, msg,
7337
                           hint=("please do a gnt-instance info to see the"
7338
                                 " status of disks"))
7339
    cstep = 5
7340
    if self.early_release:
7341
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7342
      cstep += 1
7343
      self._RemoveOldStorage(self.target_node, iv_names)
7344
      # WARNING: we release all node locks here, do not do other RPCs
7345
      # than WaitForSync to the primary node
7346
      self._ReleaseNodeLock([self.instance.primary_node,
7347
                             self.target_node,
7348
                             self.new_node])
7349

    
7350
    # Wait for sync
7351
    # This can fail as the old devices are degraded and _WaitForSync
7352
    # does a combined result over all disks, so we don't check its return value
7353
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7354
    cstep += 1
7355
    _WaitForSync(self.lu, self.instance)
7356

    
7357
    # Check all devices manually
7358
    self._CheckDevices(self.instance.primary_node, iv_names)
7359

    
7360
    # Step: remove old storage
7361
    if not self.early_release:
7362
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7363
      self._RemoveOldStorage(self.target_node, iv_names)
7364

    
7365

    
7366
class LURepairNodeStorage(NoHooksLU):
7367
  """Repairs the volume group on a node.
7368

7369
  """
7370
  _OP_REQP = ["node_name"]
7371
  REQ_BGL = False
7372

    
7373
  def CheckArguments(self):
7374
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7375

    
7376
  def ExpandNames(self):
7377
    self.needed_locks = {
7378
      locking.LEVEL_NODE: [self.op.node_name],
7379
      }
7380

    
7381
  def _CheckFaultyDisks(self, instance, node_name):
7382
    """Ensure faulty disks abort the opcode or at least warn."""
7383
    try:
7384
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7385
                                  node_name, True):
7386
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7387
                                   " node '%s'" % (instance.name, node_name),
7388
                                   errors.ECODE_STATE)
7389
    except errors.OpPrereqError, err:
7390
      if self.op.ignore_consistency:
7391
        self.proc.LogWarning(str(err.args[0]))
7392
      else:
7393
        raise
7394

    
7395
  def CheckPrereq(self):
7396
    """Check prerequisites.
7397

7398
    """
7399
    storage_type = self.op.storage_type
7400

    
7401
    if (constants.SO_FIX_CONSISTENCY not in
7402
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7403
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7404
                                 " repaired" % storage_type,
7405
                                 errors.ECODE_INVAL)
7406

    
7407
    # Check whether any instance on this node has faulty disks
7408
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7409
      if not inst.admin_up:
7410
        continue
7411
      check_nodes = set(inst.all_nodes)
7412
      check_nodes.discard(self.op.node_name)
7413
      for inst_node_name in check_nodes:
7414
        self._CheckFaultyDisks(inst, inst_node_name)
7415

    
7416
  def Exec(self, feedback_fn):
7417
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7418
                (self.op.name, self.op.node_name))
7419

    
7420
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7421
    result = self.rpc.call_storage_execute(self.op.node_name,
7422
                                           self.op.storage_type, st_args,
7423
                                           self.op.name,
7424
                                           constants.SO_FIX_CONSISTENCY)
7425
    result.Raise("Failed to repair storage unit '%s' on %s" %
7426
                 (self.op.name, self.op.node_name))
7427

    
7428

    
7429
class LUNodeEvacuationStrategy(NoHooksLU):
7430
  """Computes the node evacuation strategy.
7431

7432
  """
7433
  _OP_REQP = ["nodes"]
7434
  REQ_BGL = False
7435

    
7436
  def CheckArguments(self):
7437
    if not hasattr(self.op, "remote_node"):
7438
      self.op.remote_node = None
7439
    if not hasattr(self.op, "iallocator"):
7440
      self.op.iallocator = None
7441
    if self.op.remote_node is not None and self.op.iallocator is not None:
7442
      raise errors.OpPrereqError("Give either the iallocator or the new"
7443
                                 " secondary, not both", errors.ECODE_INVAL)
7444

    
7445
  def ExpandNames(self):
7446
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7447
    self.needed_locks = locks = {}
7448
    if self.op.remote_node is None:
7449
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7450
    else:
7451
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7452
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7453

    
7454
  def CheckPrereq(self):
7455
    pass
7456

    
7457
  def Exec(self, feedback_fn):
7458
    if self.op.remote_node is not None:
7459
      instances = []
7460
      for node in self.op.nodes:
7461
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7462
      result = []
7463
      for i in instances:
7464
        if i.primary_node == self.op.remote_node:
7465
          raise errors.OpPrereqError("Node %s is the primary node of"
7466
                                     " instance %s, cannot use it as"
7467
                                     " secondary" %
7468
                                     (self.op.remote_node, i.name),
7469
                                     errors.ECODE_INVAL)
7470
        result.append([i.name, self.op.remote_node])
7471
    else:
7472
      ial = IAllocator(self.cfg, self.rpc,
7473
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7474
                       evac_nodes=self.op.nodes)
7475
      ial.Run(self.op.iallocator, validate=True)
7476
      if not ial.success:
7477
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7478
                                 errors.ECODE_NORES)
7479
      result = ial.result
7480
    return result
7481

    
7482

    
7483
class LUGrowDisk(LogicalUnit):
7484
  """Grow a disk of an instance.
7485

7486
  """
7487
  HPATH = "disk-grow"
7488
  HTYPE = constants.HTYPE_INSTANCE
7489
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7490
  REQ_BGL = False
7491

    
7492
  def ExpandNames(self):
7493
    self._ExpandAndLockInstance()
7494
    self.needed_locks[locking.LEVEL_NODE] = []
7495
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7496

    
7497
  def DeclareLocks(self, level):
7498
    if level == locking.LEVEL_NODE:
7499
      self._LockInstancesNodes()
7500

    
7501
  def BuildHooksEnv(self):
7502
    """Build hooks env.
7503

7504
    This runs on the master, the primary and all the secondaries.
7505

7506
    """
7507
    env = {
7508
      "DISK": self.op.disk,
7509
      "AMOUNT": self.op.amount,
7510
      }
7511
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7512
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7513
    return env, nl, nl
7514

    
7515
  def CheckPrereq(self):
7516
    """Check prerequisites.
7517

7518
    This checks that the instance is in the cluster.
7519

7520
    """
7521
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7522
    assert instance is not None, \
7523
      "Cannot retrieve locked instance %s" % self.op.instance_name
7524
    nodenames = list(instance.all_nodes)
7525
    for node in nodenames:
7526
      _CheckNodeOnline(self, node)
7527

    
7528

    
7529
    self.instance = instance
7530

    
7531
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7532
      raise errors.OpPrereqError("Instance's disk layout does not support"
7533
                                 " growing.", errors.ECODE_INVAL)
7534

    
7535
    self.disk = instance.FindDisk(self.op.disk)
7536

    
7537
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7538
                                       instance.hypervisor)
7539
    for node in nodenames:
7540
      info = nodeinfo[node]
7541
      info.Raise("Cannot get current information from node %s" % node)
7542
      vg_free = info.payload.get('vg_free', None)
7543
      if not isinstance(vg_free, int):
7544
        raise errors.OpPrereqError("Can't compute free disk space on"
7545
                                   " node %s" % node, errors.ECODE_ENVIRON)
7546
      if self.op.amount > vg_free:
7547
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7548
                                   " %d MiB available, %d MiB required" %
7549
                                   (node, vg_free, self.op.amount),
7550
                                   errors.ECODE_NORES)
7551

    
7552
  def Exec(self, feedback_fn):
7553
    """Execute disk grow.
7554

7555
    """
7556
    instance = self.instance
7557
    disk = self.disk
7558
    for node in instance.all_nodes:
7559
      self.cfg.SetDiskID(disk, node)
7560
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7561
      result.Raise("Grow request failed to node %s" % node)
7562

    
7563
      # TODO: Rewrite code to work properly
7564
      # DRBD goes into sync mode for a short amount of time after executing the
7565
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7566
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7567
      # time is a work-around.
7568
      time.sleep(5)
7569

    
7570
    disk.RecordGrow(self.op.amount)
7571
    self.cfg.Update(instance, feedback_fn)
7572
    if self.op.wait_for_sync:
7573
      disk_abort = not _WaitForSync(self, instance)
7574
      if disk_abort:
7575
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7576
                             " status.\nPlease check the instance.")
7577

    
7578

    
7579
class LUQueryInstanceData(NoHooksLU):
7580
  """Query runtime instance data.
7581

7582
  """
7583
  _OP_REQP = ["instances", "static"]
7584
  REQ_BGL = False
7585

    
7586
  def ExpandNames(self):
7587
    self.needed_locks = {}
7588
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7589

    
7590
    if not isinstance(self.op.instances, list):
7591
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7592
                                 errors.ECODE_INVAL)
7593

    
7594
    if self.op.instances:
7595
      self.wanted_names = []
7596
      for name in self.op.instances:
7597
        full_name = _ExpandInstanceName(self.cfg, name)
7598
        self.wanted_names.append(full_name)
7599
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7600
    else:
7601
      self.wanted_names = None
7602
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7603

    
7604
    self.needed_locks[locking.LEVEL_NODE] = []
7605
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7606

    
7607
  def DeclareLocks(self, level):
7608
    if level == locking.LEVEL_NODE:
7609
      self._LockInstancesNodes()
7610

    
7611
  def CheckPrereq(self):
7612
    """Check prerequisites.
7613

7614
    This only checks the optional instance list against the existing names.
7615

7616
    """
7617
    if self.wanted_names is None:
7618
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7619

    
7620
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7621
                             in self.wanted_names]
7622
    return
7623

    
7624
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7625
    """Returns the status of a block device
7626

7627
    """
7628
    if self.op.static or not node:
7629
      return None
7630

    
7631
    self.cfg.SetDiskID(dev, node)
7632

    
7633
    result = self.rpc.call_blockdev_find(node, dev)
7634
    if result.offline:
7635
      return None
7636

    
7637
    result.Raise("Can't compute disk status for %s" % instance_name)
7638

    
7639
    status = result.payload
7640
    if status is None:
7641
      return None
7642

    
7643
    return (status.dev_path, status.major, status.minor,
7644
            status.sync_percent, status.estimated_time,
7645
            status.is_degraded, status.ldisk_status)
7646

    
7647
  def _ComputeDiskStatus(self, instance, snode, dev):
7648
    """Compute block device status.
7649

7650
    """
7651
    if dev.dev_type in constants.LDS_DRBD:
7652
      # we change the snode then (otherwise we use the one passed in)
7653
      if dev.logical_id[0] == instance.primary_node:
7654
        snode = dev.logical_id[1]
7655
      else:
7656
        snode = dev.logical_id[0]
7657

    
7658
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7659
                                              instance.name, dev)
7660
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7661

    
7662
    if dev.children:
7663
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7664
                      for child in dev.children]
7665
    else:
7666
      dev_children = []
7667

    
7668
    data = {
7669
      "iv_name": dev.iv_name,
7670
      "dev_type": dev.dev_type,
7671
      "logical_id": dev.logical_id,
7672
      "physical_id": dev.physical_id,
7673
      "pstatus": dev_pstatus,
7674
      "sstatus": dev_sstatus,
7675
      "children": dev_children,
7676
      "mode": dev.mode,
7677
      "size": dev.size,
7678
      }
7679

    
7680
    return data
7681

    
7682
  def Exec(self, feedback_fn):
7683
    """Gather and return data"""
7684
    result = {}
7685

    
7686
    cluster = self.cfg.GetClusterInfo()
7687

    
7688
    for instance in self.wanted_instances:
7689
      if not self.op.static:
7690
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7691
                                                  instance.name,
7692
                                                  instance.hypervisor)
7693
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7694
        remote_info = remote_info.payload
7695
        if remote_info and "state" in remote_info:
7696
          remote_state = "up"
7697
        else:
7698
          remote_state = "down"
7699
      else:
7700
        remote_state = None
7701
      if instance.admin_up:
7702
        config_state = "up"
7703
      else:
7704
        config_state = "down"
7705

    
7706
      disks = [self._ComputeDiskStatus(instance, None, device)
7707
               for device in instance.disks]
7708

    
7709
      idict = {
7710
        "name": instance.name,
7711
        "config_state": config_state,
7712
        "run_state": remote_state,
7713
        "pnode": instance.primary_node,
7714
        "snodes": instance.secondary_nodes,
7715
        "os": instance.os,
7716
        # this happens to be the same format used for hooks
7717
        "nics": _NICListToTuple(self, instance.nics),
7718
        "disks": disks,
7719
        "hypervisor": instance.hypervisor,
7720
        "network_port": instance.network_port,
7721
        "hv_instance": instance.hvparams,
7722
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7723
        "be_instance": instance.beparams,
7724
        "be_actual": cluster.FillBE(instance),
7725
        "serial_no": instance.serial_no,
7726
        "mtime": instance.mtime,
7727
        "ctime": instance.ctime,
7728
        "uuid": instance.uuid,
7729
        }
7730

    
7731
      result[instance.name] = idict
7732

    
7733
    return result
7734

    
7735

    
7736
class LUSetInstanceParams(LogicalUnit):
7737
  """Modifies an instances's parameters.
7738

7739
  """
7740
  HPATH = "instance-modify"
7741
  HTYPE = constants.HTYPE_INSTANCE
7742
  _OP_REQP = ["instance_name"]
7743
  REQ_BGL = False
7744

    
7745
  def CheckArguments(self):
7746
    if not hasattr(self.op, 'nics'):
7747
      self.op.nics = []
7748
    if not hasattr(self.op, 'disks'):
7749
      self.op.disks = []
7750
    if not hasattr(self.op, 'beparams'):
7751
      self.op.beparams = {}
7752
    if not hasattr(self.op, 'hvparams'):
7753
      self.op.hvparams = {}
7754
    self.op.force = getattr(self.op, "force", False)
7755
    if not (self.op.nics or self.op.disks or
7756
            self.op.hvparams or self.op.beparams):
7757
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7758

    
7759
    if self.op.hvparams:
7760
      _CheckGlobalHvParams(self.op.hvparams)
7761

    
7762
    # Disk validation
7763
    disk_addremove = 0
7764
    for disk_op, disk_dict in self.op.disks:
7765
      if disk_op == constants.DDM_REMOVE:
7766
        disk_addremove += 1
7767
        continue
7768
      elif disk_op == constants.DDM_ADD:
7769
        disk_addremove += 1
7770
      else:
7771
        if not isinstance(disk_op, int):
7772
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7773
        if not isinstance(disk_dict, dict):
7774
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7775
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7776

    
7777
      if disk_op == constants.DDM_ADD:
7778
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7779
        if mode not in constants.DISK_ACCESS_SET:
7780
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7781
                                     errors.ECODE_INVAL)
7782
        size = disk_dict.get('size', None)
7783
        if size is None:
7784
          raise errors.OpPrereqError("Required disk parameter size missing",
7785
                                     errors.ECODE_INVAL)
7786
        try:
7787
          size = int(size)
7788
        except (TypeError, ValueError), err:
7789
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7790
                                     str(err), errors.ECODE_INVAL)
7791
        disk_dict['size'] = size
7792
      else:
7793
        # modification of disk
7794
        if 'size' in disk_dict:
7795
          raise errors.OpPrereqError("Disk size change not possible, use"
7796
                                     " grow-disk", errors.ECODE_INVAL)
7797

    
7798
    if disk_addremove > 1:
7799
      raise errors.OpPrereqError("Only one disk add or remove operation"
7800
                                 " supported at a time", errors.ECODE_INVAL)
7801

    
7802
    # NIC validation
7803
    nic_addremove = 0
7804
    for nic_op, nic_dict in self.op.nics:
7805
      if nic_op == constants.DDM_REMOVE:
7806
        nic_addremove += 1
7807
        continue
7808
      elif nic_op == constants.DDM_ADD:
7809
        nic_addremove += 1
7810
      else:
7811
        if not isinstance(nic_op, int):
7812
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7813
        if not isinstance(nic_dict, dict):
7814
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7815
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7816

    
7817
      # nic_dict should be a dict
7818
      nic_ip = nic_dict.get('ip', None)
7819
      if nic_ip is not None:
7820
        if nic_ip.lower() == constants.VALUE_NONE:
7821
          nic_dict['ip'] = None
7822
        else:
7823
          if not utils.IsValidIP(nic_ip):
7824
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7825
                                       errors.ECODE_INVAL)
7826

    
7827
      nic_bridge = nic_dict.get('bridge', None)
7828
      nic_link = nic_dict.get('link', None)
7829
      if nic_bridge and nic_link:
7830
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7831
                                   " at the same time", errors.ECODE_INVAL)
7832
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7833
        nic_dict['bridge'] = None
7834
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7835
        nic_dict['link'] = None
7836

    
7837
      if nic_op == constants.DDM_ADD:
7838
        nic_mac = nic_dict.get('mac', None)
7839
        if nic_mac is None:
7840
          nic_dict['mac'] = constants.VALUE_AUTO
7841

    
7842
      if 'mac' in nic_dict:
7843
        nic_mac = nic_dict['mac']
7844
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7845
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7846

    
7847
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7848
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7849
                                     " modifying an existing nic",
7850
                                     errors.ECODE_INVAL)
7851

    
7852
    if nic_addremove > 1:
7853
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7854
                                 " supported at a time", errors.ECODE_INVAL)
7855

    
7856
  def ExpandNames(self):
7857
    self._ExpandAndLockInstance()
7858
    self.needed_locks[locking.LEVEL_NODE] = []
7859
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7860

    
7861
  def DeclareLocks(self, level):
7862
    if level == locking.LEVEL_NODE:
7863
      self._LockInstancesNodes()
7864

    
7865
  def BuildHooksEnv(self):
7866
    """Build hooks env.
7867

7868
    This runs on the master, primary and secondaries.
7869

7870
    """
7871
    args = dict()
7872
    if constants.BE_MEMORY in self.be_new:
7873
      args['memory'] = self.be_new[constants.BE_MEMORY]
7874
    if constants.BE_VCPUS in self.be_new:
7875
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7876
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7877
    # information at all.
7878
    if self.op.nics:
7879
      args['nics'] = []
7880
      nic_override = dict(self.op.nics)
7881
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7882
      for idx, nic in enumerate(self.instance.nics):
7883
        if idx in nic_override:
7884
          this_nic_override = nic_override[idx]
7885
        else:
7886
          this_nic_override = {}
7887
        if 'ip' in this_nic_override:
7888
          ip = this_nic_override['ip']
7889
        else:
7890
          ip = nic.ip
7891
        if 'mac' in this_nic_override:
7892
          mac = this_nic_override['mac']
7893
        else:
7894
          mac = nic.mac
7895
        if idx in self.nic_pnew:
7896
          nicparams = self.nic_pnew[idx]
7897
        else:
7898
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7899
        mode = nicparams[constants.NIC_MODE]
7900
        link = nicparams[constants.NIC_LINK]
7901
        args['nics'].append((ip, mac, mode, link))
7902
      if constants.DDM_ADD in nic_override:
7903
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7904
        mac = nic_override[constants.DDM_ADD]['mac']
7905
        nicparams = self.nic_pnew[constants.DDM_ADD]
7906
        mode = nicparams[constants.NIC_MODE]
7907
        link = nicparams[constants.NIC_LINK]
7908
        args['nics'].append((ip, mac, mode, link))
7909
      elif constants.DDM_REMOVE in nic_override:
7910
        del args['nics'][-1]
7911

    
7912
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7913
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7914
    return env, nl, nl
7915

    
7916
  @staticmethod
7917
  def _GetUpdatedParams(old_params, update_dict,
7918
                        default_values, parameter_types):
7919
    """Return the new params dict for the given params.
7920

7921
    @type old_params: dict
7922
    @param old_params: old parameters
7923
    @type update_dict: dict
7924
    @param update_dict: dict containing new parameter values,
7925
                        or constants.VALUE_DEFAULT to reset the
7926
                        parameter to its default value
7927
    @type default_values: dict
7928
    @param default_values: default values for the filled parameters
7929
    @type parameter_types: dict
7930
    @param parameter_types: dict mapping target dict keys to types
7931
                            in constants.ENFORCEABLE_TYPES
7932
    @rtype: (dict, dict)
7933
    @return: (new_parameters, filled_parameters)
7934

7935
    """
7936
    params_copy = copy.deepcopy(old_params)
7937
    for key, val in update_dict.iteritems():
7938
      if val == constants.VALUE_DEFAULT:
7939
        try:
7940
          del params_copy[key]
7941
        except KeyError:
7942
          pass
7943
      else:
7944
        params_copy[key] = val
7945
    utils.ForceDictType(params_copy, parameter_types)
7946
    params_filled = objects.FillDict(default_values, params_copy)
7947
    return (params_copy, params_filled)
7948

    
7949
  def CheckPrereq(self):
7950
    """Check prerequisites.
7951

7952
    This only checks the instance list against the existing names.
7953

7954
    """
7955
    self.force = self.op.force
7956

    
7957
    # checking the new params on the primary/secondary nodes
7958

    
7959
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7960
    cluster = self.cluster = self.cfg.GetClusterInfo()
7961
    assert self.instance is not None, \
7962
      "Cannot retrieve locked instance %s" % self.op.instance_name
7963
    pnode = instance.primary_node
7964
    nodelist = list(instance.all_nodes)
7965

    
7966
    # hvparams processing
7967
    if self.op.hvparams:
7968
      i_hvdict, hv_new = self._GetUpdatedParams(
7969
                             instance.hvparams, self.op.hvparams,
7970
                             cluster.hvparams[instance.hypervisor],
7971
                             constants.HVS_PARAMETER_TYPES)
7972
      # local check
7973
      hypervisor.GetHypervisor(
7974
        instance.hypervisor).CheckParameterSyntax(hv_new)
7975
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7976
      self.hv_new = hv_new # the new actual values
7977
      self.hv_inst = i_hvdict # the new dict (without defaults)
7978
    else:
7979
      self.hv_new = self.hv_inst = {}
7980

    
7981
    # beparams processing
7982
    if self.op.beparams:
7983
      i_bedict, be_new = self._GetUpdatedParams(
7984
                             instance.beparams, self.op.beparams,
7985
                             cluster.beparams[constants.PP_DEFAULT],
7986
                             constants.BES_PARAMETER_TYPES)
7987
      self.be_new = be_new # the new actual values
7988
      self.be_inst = i_bedict # the new dict (without defaults)
7989
    else:
7990
      self.be_new = self.be_inst = {}
7991

    
7992
    self.warn = []
7993

    
7994
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7995
      mem_check_list = [pnode]
7996
      if be_new[constants.BE_AUTO_BALANCE]:
7997
        # either we changed auto_balance to yes or it was from before
7998
        mem_check_list.extend(instance.secondary_nodes)
7999
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8000
                                                  instance.hypervisor)
8001
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8002
                                         instance.hypervisor)
8003
      pninfo = nodeinfo[pnode]
8004
      msg = pninfo.fail_msg
8005
      if msg:
8006
        # Assume the primary node is unreachable and go ahead
8007
        self.warn.append("Can't get info from primary node %s: %s" %
8008
                         (pnode,  msg))
8009
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8010
        self.warn.append("Node data from primary node %s doesn't contain"
8011
                         " free memory information" % pnode)
8012
      elif instance_info.fail_msg:
8013
        self.warn.append("Can't get instance runtime information: %s" %
8014
                        instance_info.fail_msg)
8015
      else:
8016
        if instance_info.payload:
8017
          current_mem = int(instance_info.payload['memory'])
8018
        else:
8019
          # Assume instance not running
8020
          # (there is a slight race condition here, but it's not very probable,
8021
          # and we have no other way to check)
8022
          current_mem = 0
8023
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8024
                    pninfo.payload['memory_free'])
8025
        if miss_mem > 0:
8026
          raise errors.OpPrereqError("This change will prevent the instance"
8027
                                     " from starting, due to %d MB of memory"
8028
                                     " missing on its primary node" % miss_mem,
8029
                                     errors.ECODE_NORES)
8030

    
8031
      if be_new[constants.BE_AUTO_BALANCE]:
8032
        for node, nres in nodeinfo.items():
8033
          if node not in instance.secondary_nodes:
8034
            continue
8035
          msg = nres.fail_msg
8036
          if msg:
8037
            self.warn.append("Can't get info from secondary node %s: %s" %
8038
                             (node, msg))
8039
          elif not isinstance(nres.payload.get('memory_free', None), int):
8040
            self.warn.append("Secondary node %s didn't return free"
8041
                             " memory information" % node)
8042
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8043
            self.warn.append("Not enough memory to failover instance to"
8044
                             " secondary node %s" % node)
8045

    
8046
    # NIC processing
8047
    self.nic_pnew = {}
8048
    self.nic_pinst = {}
8049
    for nic_op, nic_dict in self.op.nics:
8050
      if nic_op == constants.DDM_REMOVE:
8051
        if not instance.nics:
8052
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8053
                                     errors.ECODE_INVAL)
8054
        continue
8055
      if nic_op != constants.DDM_ADD:
8056
        # an existing nic
8057
        if not instance.nics:
8058
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8059
                                     " no NICs" % nic_op,
8060
                                     errors.ECODE_INVAL)
8061
        if nic_op < 0 or nic_op >= len(instance.nics):
8062
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8063
                                     " are 0 to %d" %
8064
                                     (nic_op, len(instance.nics) - 1),
8065
                                     errors.ECODE_INVAL)
8066
        old_nic_params = instance.nics[nic_op].nicparams
8067
        old_nic_ip = instance.nics[nic_op].ip
8068
      else:
8069
        old_nic_params = {}
8070
        old_nic_ip = None
8071

    
8072
      update_params_dict = dict([(key, nic_dict[key])
8073
                                 for key in constants.NICS_PARAMETERS
8074
                                 if key in nic_dict])
8075

    
8076
      if 'bridge' in nic_dict:
8077
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8078

    
8079
      new_nic_params, new_filled_nic_params = \
8080
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8081
                                 cluster.nicparams[constants.PP_DEFAULT],
8082
                                 constants.NICS_PARAMETER_TYPES)
8083
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8084
      self.nic_pinst[nic_op] = new_nic_params
8085
      self.nic_pnew[nic_op] = new_filled_nic_params
8086
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8087

    
8088
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8089
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8090
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8091
        if msg:
8092
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8093
          if self.force:
8094
            self.warn.append(msg)
8095
          else:
8096
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8097
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8098
        if 'ip' in nic_dict:
8099
          nic_ip = nic_dict['ip']
8100
        else:
8101
          nic_ip = old_nic_ip
8102
        if nic_ip is None:
8103
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8104
                                     ' on a routed nic', errors.ECODE_INVAL)
8105
      if 'mac' in nic_dict:
8106
        nic_mac = nic_dict['mac']
8107
        if nic_mac is None:
8108
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8109
                                     errors.ECODE_INVAL)
8110
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8111
          # otherwise generate the mac
8112
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8113
        else:
8114
          # or validate/reserve the current one
8115
          try:
8116
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8117
          except errors.ReservationError:
8118
            raise errors.OpPrereqError("MAC address %s already in use"
8119
                                       " in cluster" % nic_mac,
8120
                                       errors.ECODE_NOTUNIQUE)
8121

    
8122
    # DISK processing
8123
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8124
      raise errors.OpPrereqError("Disk operations not supported for"
8125
                                 " diskless instances",
8126
                                 errors.ECODE_INVAL)
8127
    for disk_op, _ in self.op.disks:
8128
      if disk_op == constants.DDM_REMOVE:
8129
        if len(instance.disks) == 1:
8130
          raise errors.OpPrereqError("Cannot remove the last disk of"
8131
                                     " an instance",
8132
                                     errors.ECODE_INVAL)
8133
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
8134
        ins_l = ins_l[pnode]
8135
        msg = ins_l.fail_msg
8136
        if msg:
8137
          raise errors.OpPrereqError("Can't contact node %s: %s" %
8138
                                     (pnode, msg), errors.ECODE_ENVIRON)
8139
        if instance.name in ins_l.payload:
8140
          raise errors.OpPrereqError("Instance is running, can't remove"
8141
                                     " disks.", errors.ECODE_STATE)
8142

    
8143
      if (disk_op == constants.DDM_ADD and
8144
          len(instance.nics) >= constants.MAX_DISKS):
8145
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8146
                                   " add more" % constants.MAX_DISKS,
8147
                                   errors.ECODE_STATE)
8148
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8149
        # an existing disk
8150
        if disk_op < 0 or disk_op >= len(instance.disks):
8151
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8152
                                     " are 0 to %d" %
8153
                                     (disk_op, len(instance.disks)),
8154
                                     errors.ECODE_INVAL)
8155

    
8156
    return
8157

    
8158
  def Exec(self, feedback_fn):
8159
    """Modifies an instance.
8160

8161
    All parameters take effect only at the next restart of the instance.
8162

8163
    """
8164
    # Process here the warnings from CheckPrereq, as we don't have a
8165
    # feedback_fn there.
8166
    for warn in self.warn:
8167
      feedback_fn("WARNING: %s" % warn)
8168

    
8169
    result = []
8170
    instance = self.instance
8171
    # disk changes
8172
    for disk_op, disk_dict in self.op.disks:
8173
      if disk_op == constants.DDM_REMOVE:
8174
        # remove the last disk
8175
        device = instance.disks.pop()
8176
        device_idx = len(instance.disks)
8177
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8178
          self.cfg.SetDiskID(disk, node)
8179
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8180
          if msg:
8181
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8182
                            " continuing anyway", device_idx, node, msg)
8183
        result.append(("disk/%d" % device_idx, "remove"))
8184
      elif disk_op == constants.DDM_ADD:
8185
        # add a new disk
8186
        if instance.disk_template == constants.DT_FILE:
8187
          file_driver, file_path = instance.disks[0].logical_id
8188
          file_path = os.path.dirname(file_path)
8189
        else:
8190
          file_driver = file_path = None
8191
        disk_idx_base = len(instance.disks)
8192
        new_disk = _GenerateDiskTemplate(self,
8193
                                         instance.disk_template,
8194
                                         instance.name, instance.primary_node,
8195
                                         instance.secondary_nodes,
8196
                                         [disk_dict],
8197
                                         file_path,
8198
                                         file_driver,
8199
                                         disk_idx_base)[0]
8200
        instance.disks.append(new_disk)
8201
        info = _GetInstanceInfoText(instance)
8202

    
8203
        logging.info("Creating volume %s for instance %s",
8204
                     new_disk.iv_name, instance.name)
8205
        # Note: this needs to be kept in sync with _CreateDisks
8206
        #HARDCODE
8207
        for node in instance.all_nodes:
8208
          f_create = node == instance.primary_node
8209
          try:
8210
            _CreateBlockDev(self, node, instance, new_disk,
8211
                            f_create, info, f_create)
8212
          except errors.OpExecError, err:
8213
            self.LogWarning("Failed to create volume %s (%s) on"
8214
                            " node %s: %s",
8215
                            new_disk.iv_name, new_disk, node, err)
8216
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8217
                       (new_disk.size, new_disk.mode)))
8218
      else:
8219
        # change a given disk
8220
        instance.disks[disk_op].mode = disk_dict['mode']
8221
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8222
    # NIC changes
8223
    for nic_op, nic_dict in self.op.nics:
8224
      if nic_op == constants.DDM_REMOVE:
8225
        # remove the last nic
8226
        del instance.nics[-1]
8227
        result.append(("nic.%d" % len(instance.nics), "remove"))
8228
      elif nic_op == constants.DDM_ADD:
8229
        # mac and bridge should be set, by now
8230
        mac = nic_dict['mac']
8231
        ip = nic_dict.get('ip', None)
8232
        nicparams = self.nic_pinst[constants.DDM_ADD]
8233
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8234
        instance.nics.append(new_nic)
8235
        result.append(("nic.%d" % (len(instance.nics) - 1),
8236
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8237
                       (new_nic.mac, new_nic.ip,
8238
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8239
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8240
                       )))
8241
      else:
8242
        for key in 'mac', 'ip':
8243
          if key in nic_dict:
8244
            setattr(instance.nics[nic_op], key, nic_dict[key])
8245
        if nic_op in self.nic_pinst:
8246
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8247
        for key, val in nic_dict.iteritems():
8248
          result.append(("nic.%s/%d" % (key, nic_op), val))
8249

    
8250
    # hvparams changes
8251
    if self.op.hvparams:
8252
      instance.hvparams = self.hv_inst
8253
      for key, val in self.op.hvparams.iteritems():
8254
        result.append(("hv/%s" % key, val))
8255

    
8256
    # beparams changes
8257
    if self.op.beparams:
8258
      instance.beparams = self.be_inst
8259
      for key, val in self.op.beparams.iteritems():
8260
        result.append(("be/%s" % key, val))
8261

    
8262
    self.cfg.Update(instance, feedback_fn)
8263

    
8264
    return result
8265

    
8266

    
8267
class LUQueryExports(NoHooksLU):
8268
  """Query the exports list
8269

8270
  """
8271
  _OP_REQP = ['nodes']
8272
  REQ_BGL = False
8273

    
8274
  def ExpandNames(self):
8275
    self.needed_locks = {}
8276
    self.share_locks[locking.LEVEL_NODE] = 1
8277
    if not self.op.nodes:
8278
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8279
    else:
8280
      self.needed_locks[locking.LEVEL_NODE] = \
8281
        _GetWantedNodes(self, self.op.nodes)
8282

    
8283
  def CheckPrereq(self):
8284
    """Check prerequisites.
8285

8286
    """
8287
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8288

    
8289
  def Exec(self, feedback_fn):
8290
    """Compute the list of all the exported system images.
8291

8292
    @rtype: dict
8293
    @return: a dictionary with the structure node->(export-list)
8294
        where export-list is a list of the instances exported on
8295
        that node.
8296

8297
    """
8298
    rpcresult = self.rpc.call_export_list(self.nodes)
8299
    result = {}
8300
    for node in rpcresult:
8301
      if rpcresult[node].fail_msg:
8302
        result[node] = False
8303
      else:
8304
        result[node] = rpcresult[node].payload
8305

    
8306
    return result
8307

    
8308

    
8309
class LUExportInstance(LogicalUnit):
8310
  """Export an instance to an image in the cluster.
8311

8312
  """
8313
  HPATH = "instance-export"
8314
  HTYPE = constants.HTYPE_INSTANCE
8315
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8316
  REQ_BGL = False
8317

    
8318
  def CheckArguments(self):
8319
    """Check the arguments.
8320

8321
    """
8322
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8323
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8324

    
8325
  def ExpandNames(self):
8326
    self._ExpandAndLockInstance()
8327
    # FIXME: lock only instance primary and destination node
8328
    #
8329
    # Sad but true, for now we have do lock all nodes, as we don't know where
8330
    # the previous export might be, and and in this LU we search for it and
8331
    # remove it from its current node. In the future we could fix this by:
8332
    #  - making a tasklet to search (share-lock all), then create the new one,
8333
    #    then one to remove, after
8334
    #  - removing the removal operation altogether
8335
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8336

    
8337
  def DeclareLocks(self, level):
8338
    """Last minute lock declaration."""
8339
    # All nodes are locked anyway, so nothing to do here.
8340

    
8341
  def BuildHooksEnv(self):
8342
    """Build hooks env.
8343

8344
    This will run on the master, primary node and target node.
8345

8346
    """
8347
    env = {
8348
      "EXPORT_NODE": self.op.target_node,
8349
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8350
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8351
      }
8352
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8353
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8354
          self.op.target_node]
8355
    return env, nl, nl
8356

    
8357
  def CheckPrereq(self):
8358
    """Check prerequisites.
8359

8360
    This checks that the instance and node names are valid.
8361

8362
    """
8363
    instance_name = self.op.instance_name
8364
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8365
    assert self.instance is not None, \
8366
          "Cannot retrieve locked instance %s" % self.op.instance_name
8367
    _CheckNodeOnline(self, self.instance.primary_node)
8368

    
8369
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8370
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8371
    assert self.dst_node is not None
8372

    
8373
    _CheckNodeOnline(self, self.dst_node.name)
8374
    _CheckNodeNotDrained(self, self.dst_node.name)
8375

    
8376
    # instance disk type verification
8377
    for disk in self.instance.disks:
8378
      if disk.dev_type == constants.LD_FILE:
8379
        raise errors.OpPrereqError("Export not supported for instances with"
8380
                                   " file-based disks", errors.ECODE_INVAL)
8381

    
8382
  def Exec(self, feedback_fn):
8383
    """Export an instance to an image in the cluster.
8384

8385
    """
8386
    instance = self.instance
8387
    dst_node = self.dst_node
8388
    src_node = instance.primary_node
8389

    
8390
    if self.op.shutdown:
8391
      # shutdown the instance, but not the disks
8392
      feedback_fn("Shutting down instance %s" % instance.name)
8393
      result = self.rpc.call_instance_shutdown(src_node, instance,
8394
                                               self.shutdown_timeout)
8395
      result.Raise("Could not shutdown instance %s on"
8396
                   " node %s" % (instance.name, src_node))
8397

    
8398
    vgname = self.cfg.GetVGName()
8399

    
8400
    snap_disks = []
8401

    
8402
    # set the disks ID correctly since call_instance_start needs the
8403
    # correct drbd minor to create the symlinks
8404
    for disk in instance.disks:
8405
      self.cfg.SetDiskID(disk, src_node)
8406

    
8407
    activate_disks = (not instance.admin_up)
8408

    
8409
    if activate_disks:
8410
      # Activate the instance disks if we'exporting a stopped instance
8411
      feedback_fn("Activating disks for %s" % instance.name)
8412
      _StartInstanceDisks(self, instance, None)
8413

    
8414
    try:
8415
      # per-disk results
8416
      dresults = []
8417
      try:
8418
        for idx, disk in enumerate(instance.disks):
8419
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8420
                      (idx, src_node))
8421

    
8422
          # result.payload will be a snapshot of an lvm leaf of the one we
8423
          # passed
8424
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8425
          msg = result.fail_msg
8426
          if msg:
8427
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8428
                            idx, src_node, msg)
8429
            snap_disks.append(False)
8430
          else:
8431
            disk_id = (vgname, result.payload)
8432
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8433
                                   logical_id=disk_id, physical_id=disk_id,
8434
                                   iv_name=disk.iv_name)
8435
            snap_disks.append(new_dev)
8436

    
8437
      finally:
8438
        if self.op.shutdown and instance.admin_up:
8439
          feedback_fn("Starting instance %s" % instance.name)
8440
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8441
          msg = result.fail_msg
8442
          if msg:
8443
            _ShutdownInstanceDisks(self, instance)
8444
            raise errors.OpExecError("Could not start instance: %s" % msg)
8445

    
8446
      # TODO: check for size
8447

    
8448
      cluster_name = self.cfg.GetClusterName()
8449
      for idx, dev in enumerate(snap_disks):
8450
        feedback_fn("Exporting snapshot %s from %s to %s" %
8451
                    (idx, src_node, dst_node.name))
8452
        if dev:
8453
          # FIXME: pass debug from opcode to backend
8454
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8455
                                                 instance, cluster_name,
8456
                                                 idx, self.op.debug_level)
8457
          msg = result.fail_msg
8458
          if msg:
8459
            self.LogWarning("Could not export disk/%s from node %s to"
8460
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8461
            dresults.append(False)
8462
          else:
8463
            dresults.append(True)
8464
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8465
          if msg:
8466
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8467
                            " %s: %s", idx, src_node, msg)
8468
        else:
8469
          dresults.append(False)
8470

    
8471
      feedback_fn("Finalizing export on %s" % dst_node.name)
8472
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8473
                                             snap_disks)
8474
      fin_resu = True
8475
      msg = result.fail_msg
8476
      if msg:
8477
        self.LogWarning("Could not finalize export for instance %s"
8478
                        " on node %s: %s", instance.name, dst_node.name, msg)
8479
        fin_resu = False
8480

    
8481
    finally:
8482
      if activate_disks:
8483
        feedback_fn("Deactivating disks for %s" % instance.name)
8484
        _ShutdownInstanceDisks(self, instance)
8485

    
8486
    nodelist = self.cfg.GetNodeList()
8487
    nodelist.remove(dst_node.name)
8488

    
8489
    # on one-node clusters nodelist will be empty after the removal
8490
    # if we proceed the backup would be removed because OpQueryExports
8491
    # substitutes an empty list with the full cluster node list.
8492
    iname = instance.name
8493
    if nodelist:
8494
      feedback_fn("Removing old exports for instance %s" % iname)
8495
      exportlist = self.rpc.call_export_list(nodelist)
8496
      for node in exportlist:
8497
        if exportlist[node].fail_msg:
8498
          continue
8499
        if iname in exportlist[node].payload:
8500
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8501
          if msg:
8502
            self.LogWarning("Could not remove older export for instance %s"
8503
                            " on node %s: %s", iname, node, msg)
8504
    return fin_resu, dresults
8505

    
8506

    
8507
class LURemoveExport(NoHooksLU):
8508
  """Remove exports related to the named instance.
8509

8510
  """
8511
  _OP_REQP = ["instance_name"]
8512
  REQ_BGL = False
8513

    
8514
  def ExpandNames(self):
8515
    self.needed_locks = {}
8516
    # We need all nodes to be locked in order for RemoveExport to work, but we
8517
    # don't need to lock the instance itself, as nothing will happen to it (and
8518
    # we can remove exports also for a removed instance)
8519
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8520

    
8521
  def CheckPrereq(self):
8522
    """Check prerequisites.
8523
    """
8524
    pass
8525

    
8526
  def Exec(self, feedback_fn):
8527
    """Remove any export.
8528

8529
    """
8530
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8531
    # If the instance was not found we'll try with the name that was passed in.
8532
    # This will only work if it was an FQDN, though.
8533
    fqdn_warn = False
8534
    if not instance_name:
8535
      fqdn_warn = True
8536
      instance_name = self.op.instance_name
8537

    
8538
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8539
    exportlist = self.rpc.call_export_list(locked_nodes)
8540
    found = False
8541
    for node in exportlist:
8542
      msg = exportlist[node].fail_msg
8543
      if msg:
8544
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8545
        continue
8546
      if instance_name in exportlist[node].payload:
8547
        found = True
8548
        result = self.rpc.call_export_remove(node, instance_name)
8549
        msg = result.fail_msg
8550
        if msg:
8551
          logging.error("Could not remove export for instance %s"
8552
                        " on node %s: %s", instance_name, node, msg)
8553

    
8554
    if fqdn_warn and not found:
8555
      feedback_fn("Export not found. If trying to remove an export belonging"
8556
                  " to a deleted instance please use its Fully Qualified"
8557
                  " Domain Name.")
8558

    
8559

    
8560
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8561
  """Generic tags LU.
8562

8563
  This is an abstract class which is the parent of all the other tags LUs.
8564

8565
  """
8566

    
8567
  def ExpandNames(self):
8568
    self.needed_locks = {}
8569
    if self.op.kind == constants.TAG_NODE:
8570
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8571
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
8572
    elif self.op.kind == constants.TAG_INSTANCE:
8573
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8574
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8575

    
8576
  def CheckPrereq(self):
8577
    """Check prerequisites.
8578

8579
    """
8580
    if self.op.kind == constants.TAG_CLUSTER:
8581
      self.target = self.cfg.GetClusterInfo()
8582
    elif self.op.kind == constants.TAG_NODE:
8583
      self.target = self.cfg.GetNodeInfo(self.op.name)
8584
    elif self.op.kind == constants.TAG_INSTANCE:
8585
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8586
    else:
8587
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8588
                                 str(self.op.kind), errors.ECODE_INVAL)
8589

    
8590

    
8591
class LUGetTags(TagsLU):
8592
  """Returns the tags of a given object.
8593

8594
  """
8595
  _OP_REQP = ["kind", "name"]
8596
  REQ_BGL = False
8597

    
8598
  def Exec(self, feedback_fn):
8599
    """Returns the tag list.
8600

8601
    """
8602
    return list(self.target.GetTags())
8603

    
8604

    
8605
class LUSearchTags(NoHooksLU):
8606
  """Searches the tags for a given pattern.
8607

8608
  """
8609
  _OP_REQP = ["pattern"]
8610
  REQ_BGL = False
8611

    
8612
  def ExpandNames(self):
8613
    self.needed_locks = {}
8614

    
8615
  def CheckPrereq(self):
8616
    """Check prerequisites.
8617

8618
    This checks the pattern passed for validity by compiling it.
8619

8620
    """
8621
    try:
8622
      self.re = re.compile(self.op.pattern)
8623
    except re.error, err:
8624
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8625
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8626

    
8627
  def Exec(self, feedback_fn):
8628
    """Returns the tag list.
8629

8630
    """
8631
    cfg = self.cfg
8632
    tgts = [("/cluster", cfg.GetClusterInfo())]
8633
    ilist = cfg.GetAllInstancesInfo().values()
8634
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8635
    nlist = cfg.GetAllNodesInfo().values()
8636
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8637
    results = []
8638
    for path, target in tgts:
8639
      for tag in target.GetTags():
8640
        if self.re.search(tag):
8641
          results.append((path, tag))
8642
    return results
8643

    
8644

    
8645
class LUAddTags(TagsLU):
8646
  """Sets a tag on a given object.
8647

8648
  """
8649
  _OP_REQP = ["kind", "name", "tags"]
8650
  REQ_BGL = False
8651

    
8652
  def CheckPrereq(self):
8653
    """Check prerequisites.
8654

8655
    This checks the type and length of the tag name and value.
8656

8657
    """
8658
    TagsLU.CheckPrereq(self)
8659
    for tag in self.op.tags:
8660
      objects.TaggableObject.ValidateTag(tag)
8661

    
8662
  def Exec(self, feedback_fn):
8663
    """Sets the tag.
8664

8665
    """
8666
    try:
8667
      for tag in self.op.tags:
8668
        self.target.AddTag(tag)
8669
    except errors.TagError, err:
8670
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8671
    self.cfg.Update(self.target, feedback_fn)
8672

    
8673

    
8674
class LUDelTags(TagsLU):
8675
  """Delete a list of tags from a given object.
8676

8677
  """
8678
  _OP_REQP = ["kind", "name", "tags"]
8679
  REQ_BGL = False
8680

    
8681
  def CheckPrereq(self):
8682
    """Check prerequisites.
8683

8684
    This checks that we have the given tag.
8685

8686
    """
8687
    TagsLU.CheckPrereq(self)
8688
    for tag in self.op.tags:
8689
      objects.TaggableObject.ValidateTag(tag)
8690
    del_tags = frozenset(self.op.tags)
8691
    cur_tags = self.target.GetTags()
8692
    if not del_tags <= cur_tags:
8693
      diff_tags = del_tags - cur_tags
8694
      diff_names = ["'%s'" % tag for tag in diff_tags]
8695
      diff_names.sort()
8696
      raise errors.OpPrereqError("Tag(s) %s not found" %
8697
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8698

    
8699
  def Exec(self, feedback_fn):
8700
    """Remove the tag from the object.
8701

8702
    """
8703
    for tag in self.op.tags:
8704
      self.target.RemoveTag(tag)
8705
    self.cfg.Update(self.target, feedback_fn)
8706

    
8707

    
8708
class LUTestDelay(NoHooksLU):
8709
  """Sleep for a specified amount of time.
8710

8711
  This LU sleeps on the master and/or nodes for a specified amount of
8712
  time.
8713

8714
  """
8715
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8716
  REQ_BGL = False
8717

    
8718
  def ExpandNames(self):
8719
    """Expand names and set required locks.
8720

8721
    This expands the node list, if any.
8722

8723
    """
8724
    self.needed_locks = {}
8725
    if self.op.on_nodes:
8726
      # _GetWantedNodes can be used here, but is not always appropriate to use
8727
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8728
      # more information.
8729
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8730
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8731

    
8732
  def CheckPrereq(self):
8733
    """Check prerequisites.
8734

8735
    """
8736

    
8737
  def Exec(self, feedback_fn):
8738
    """Do the actual sleep.
8739

8740
    """
8741
    if self.op.on_master:
8742
      if not utils.TestDelay(self.op.duration):
8743
        raise errors.OpExecError("Error during master delay test")
8744
    if self.op.on_nodes:
8745
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8746
      for node, node_result in result.items():
8747
        node_result.Raise("Failure during rpc call to node %s" % node)
8748

    
8749

    
8750
class IAllocator(object):
8751
  """IAllocator framework.
8752

8753
  An IAllocator instance has three sets of attributes:
8754
    - cfg that is needed to query the cluster
8755
    - input data (all members of the _KEYS class attribute are required)
8756
    - four buffer attributes (in|out_data|text), that represent the
8757
      input (to the external script) in text and data structure format,
8758
      and the output from it, again in two formats
8759
    - the result variables from the script (success, info, nodes) for
8760
      easy usage
8761

8762
  """
8763
  # pylint: disable-msg=R0902
8764
  # lots of instance attributes
8765
  _ALLO_KEYS = [
8766
    "name", "mem_size", "disks", "disk_template",
8767
    "os", "tags", "nics", "vcpus", "hypervisor",
8768
    ]
8769
  _RELO_KEYS = [
8770
    "name", "relocate_from",
8771
    ]
8772
  _EVAC_KEYS = [
8773
    "evac_nodes",
8774
    ]
8775

    
8776
  def __init__(self, cfg, rpc, mode, **kwargs):
8777
    self.cfg = cfg
8778
    self.rpc = rpc
8779
    # init buffer variables
8780
    self.in_text = self.out_text = self.in_data = self.out_data = None
8781
    # init all input fields so that pylint is happy
8782
    self.mode = mode
8783
    self.mem_size = self.disks = self.disk_template = None
8784
    self.os = self.tags = self.nics = self.vcpus = None
8785
    self.hypervisor = None
8786
    self.relocate_from = None
8787
    self.name = None
8788
    self.evac_nodes = None
8789
    # computed fields
8790
    self.required_nodes = None
8791
    # init result fields
8792
    self.success = self.info = self.result = None
8793
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8794
      keyset = self._ALLO_KEYS
8795
      fn = self._AddNewInstance
8796
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8797
      keyset = self._RELO_KEYS
8798
      fn = self._AddRelocateInstance
8799
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8800
      keyset = self._EVAC_KEYS
8801
      fn = self._AddEvacuateNodes
8802
    else:
8803
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8804
                                   " IAllocator" % self.mode)
8805
    for key in kwargs:
8806
      if key not in keyset:
8807
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8808
                                     " IAllocator" % key)
8809
      setattr(self, key, kwargs[key])
8810

    
8811
    for key in keyset:
8812
      if key not in kwargs:
8813
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8814
                                     " IAllocator" % key)
8815
    self._BuildInputData(fn)
8816

    
8817
  def _ComputeClusterData(self):
8818
    """Compute the generic allocator input data.
8819

8820
    This is the data that is independent of the actual operation.
8821

8822
    """
8823
    cfg = self.cfg
8824
    cluster_info = cfg.GetClusterInfo()
8825
    # cluster data
8826
    data = {
8827
      "version": constants.IALLOCATOR_VERSION,
8828
      "cluster_name": cfg.GetClusterName(),
8829
      "cluster_tags": list(cluster_info.GetTags()),
8830
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8831
      # we don't have job IDs
8832
      }
8833
    iinfo = cfg.GetAllInstancesInfo().values()
8834
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8835

    
8836
    # node data
8837
    node_results = {}
8838
    node_list = cfg.GetNodeList()
8839

    
8840
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8841
      hypervisor_name = self.hypervisor
8842
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8843
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8844
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8845
      hypervisor_name = cluster_info.enabled_hypervisors[0]
8846

    
8847
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8848
                                        hypervisor_name)
8849
    node_iinfo = \
8850
      self.rpc.call_all_instances_info(node_list,
8851
                                       cluster_info.enabled_hypervisors)
8852
    for nname, nresult in node_data.items():
8853
      # first fill in static (config-based) values
8854
      ninfo = cfg.GetNodeInfo(nname)
8855
      pnr = {
8856
        "tags": list(ninfo.GetTags()),
8857
        "primary_ip": ninfo.primary_ip,
8858
        "secondary_ip": ninfo.secondary_ip,
8859
        "offline": ninfo.offline,
8860
        "drained": ninfo.drained,
8861
        "master_candidate": ninfo.master_candidate,
8862
        }
8863

    
8864
      if not (ninfo.offline or ninfo.drained):
8865
        nresult.Raise("Can't get data for node %s" % nname)
8866
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8867
                                nname)
8868
        remote_info = nresult.payload
8869

    
8870
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8871
                     'vg_size', 'vg_free', 'cpu_total']:
8872
          if attr not in remote_info:
8873
            raise errors.OpExecError("Node '%s' didn't return attribute"
8874
                                     " '%s'" % (nname, attr))
8875
          if not isinstance(remote_info[attr], int):
8876
            raise errors.OpExecError("Node '%s' returned invalid value"
8877
                                     " for '%s': %s" %
8878
                                     (nname, attr, remote_info[attr]))
8879
        # compute memory used by primary instances
8880
        i_p_mem = i_p_up_mem = 0
8881
        for iinfo, beinfo in i_list:
8882
          if iinfo.primary_node == nname:
8883
            i_p_mem += beinfo[constants.BE_MEMORY]
8884
            if iinfo.name not in node_iinfo[nname].payload:
8885
              i_used_mem = 0
8886
            else:
8887
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8888
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8889
            remote_info['memory_free'] -= max(0, i_mem_diff)
8890

    
8891
            if iinfo.admin_up:
8892
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8893

    
8894
        # compute memory used by instances
8895
        pnr_dyn = {
8896
          "total_memory": remote_info['memory_total'],
8897
          "reserved_memory": remote_info['memory_dom0'],
8898
          "free_memory": remote_info['memory_free'],
8899
          "total_disk": remote_info['vg_size'],
8900
          "free_disk": remote_info['vg_free'],
8901
          "total_cpus": remote_info['cpu_total'],
8902
          "i_pri_memory": i_p_mem,
8903
          "i_pri_up_memory": i_p_up_mem,
8904
          }
8905
        pnr.update(pnr_dyn)
8906

    
8907
      node_results[nname] = pnr
8908
    data["nodes"] = node_results
8909

    
8910
    # instance data
8911
    instance_data = {}
8912
    for iinfo, beinfo in i_list:
8913
      nic_data = []
8914
      for nic in iinfo.nics:
8915
        filled_params = objects.FillDict(
8916
            cluster_info.nicparams[constants.PP_DEFAULT],
8917
            nic.nicparams)
8918
        nic_dict = {"mac": nic.mac,
8919
                    "ip": nic.ip,
8920
                    "mode": filled_params[constants.NIC_MODE],
8921
                    "link": filled_params[constants.NIC_LINK],
8922
                   }
8923
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8924
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8925
        nic_data.append(nic_dict)
8926
      pir = {
8927
        "tags": list(iinfo.GetTags()),
8928
        "admin_up": iinfo.admin_up,
8929
        "vcpus": beinfo[constants.BE_VCPUS],
8930
        "memory": beinfo[constants.BE_MEMORY],
8931
        "os": iinfo.os,
8932
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8933
        "nics": nic_data,
8934
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8935
        "disk_template": iinfo.disk_template,
8936
        "hypervisor": iinfo.hypervisor,
8937
        }
8938
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8939
                                                 pir["disks"])
8940
      instance_data[iinfo.name] = pir
8941

    
8942
    data["instances"] = instance_data
8943

    
8944
    self.in_data = data
8945

    
8946
  def _AddNewInstance(self):
8947
    """Add new instance data to allocator structure.
8948

8949
    This in combination with _AllocatorGetClusterData will create the
8950
    correct structure needed as input for the allocator.
8951

8952
    The checks for the completeness of the opcode must have already been
8953
    done.
8954

8955
    """
8956
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8957

    
8958
    if self.disk_template in constants.DTS_NET_MIRROR:
8959
      self.required_nodes = 2
8960
    else:
8961
      self.required_nodes = 1
8962
    request = {
8963
      "name": self.name,
8964
      "disk_template": self.disk_template,
8965
      "tags": self.tags,
8966
      "os": self.os,
8967
      "vcpus": self.vcpus,
8968
      "memory": self.mem_size,
8969
      "disks": self.disks,
8970
      "disk_space_total": disk_space,
8971
      "nics": self.nics,
8972
      "required_nodes": self.required_nodes,
8973
      }
8974
    return request
8975

    
8976
  def _AddRelocateInstance(self):
8977
    """Add relocate instance data to allocator structure.
8978

8979
    This in combination with _IAllocatorGetClusterData will create the
8980
    correct structure needed as input for the allocator.
8981

8982
    The checks for the completeness of the opcode must have already been
8983
    done.
8984

8985
    """
8986
    instance = self.cfg.GetInstanceInfo(self.name)
8987
    if instance is None:
8988
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8989
                                   " IAllocator" % self.name)
8990

    
8991
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8992
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8993
                                 errors.ECODE_INVAL)
8994

    
8995
    if len(instance.secondary_nodes) != 1:
8996
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8997
                                 errors.ECODE_STATE)
8998

    
8999
    self.required_nodes = 1
9000
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9001
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9002

    
9003
    request = {
9004
      "name": self.name,
9005
      "disk_space_total": disk_space,
9006
      "required_nodes": self.required_nodes,
9007
      "relocate_from": self.relocate_from,
9008
      }
9009
    return request
9010

    
9011
  def _AddEvacuateNodes(self):
9012
    """Add evacuate nodes data to allocator structure.
9013

9014
    """
9015
    request = {
9016
      "evac_nodes": self.evac_nodes
9017
      }
9018
    return request
9019

    
9020
  def _BuildInputData(self, fn):
9021
    """Build input data structures.
9022

9023
    """
9024
    self._ComputeClusterData()
9025

    
9026
    request = fn()
9027
    request["type"] = self.mode
9028
    self.in_data["request"] = request
9029

    
9030
    self.in_text = serializer.Dump(self.in_data)
9031

    
9032
  def Run(self, name, validate=True, call_fn=None):
9033
    """Run an instance allocator and return the results.
9034

9035
    """
9036
    if call_fn is None:
9037
      call_fn = self.rpc.call_iallocator_runner
9038

    
9039
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9040
    result.Raise("Failure while running the iallocator script")
9041

    
9042
    self.out_text = result.payload
9043
    if validate:
9044
      self._ValidateResult()
9045

    
9046
  def _ValidateResult(self):
9047
    """Process the allocator results.
9048

9049
    This will process and if successful save the result in
9050
    self.out_data and the other parameters.
9051

9052
    """
9053
    try:
9054
      rdict = serializer.Load(self.out_text)
9055
    except Exception, err:
9056
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9057

    
9058
    if not isinstance(rdict, dict):
9059
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9060

    
9061
    # TODO: remove backwards compatiblity in later versions
9062
    if "nodes" in rdict and "result" not in rdict:
9063
      rdict["result"] = rdict["nodes"]
9064
      del rdict["nodes"]
9065

    
9066
    for key in "success", "info", "result":
9067
      if key not in rdict:
9068
        raise errors.OpExecError("Can't parse iallocator results:"
9069
                                 " missing key '%s'" % key)
9070
      setattr(self, key, rdict[key])
9071

    
9072
    if not isinstance(rdict["result"], list):
9073
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9074
                               " is not a list")
9075
    self.out_data = rdict
9076

    
9077

    
9078
class LUTestAllocator(NoHooksLU):
9079
  """Run allocator tests.
9080

9081
  This LU runs the allocator tests
9082

9083
  """
9084
  _OP_REQP = ["direction", "mode", "name"]
9085

    
9086
  def CheckPrereq(self):
9087
    """Check prerequisites.
9088

9089
    This checks the opcode parameters depending on the director and mode test.
9090

9091
    """
9092
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9093
      for attr in ["name", "mem_size", "disks", "disk_template",
9094
                   "os", "tags", "nics", "vcpus"]:
9095
        if not hasattr(self.op, attr):
9096
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9097
                                     attr, errors.ECODE_INVAL)
9098
      iname = self.cfg.ExpandInstanceName(self.op.name)
9099
      if iname is not None:
9100
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9101
                                   iname, errors.ECODE_EXISTS)
9102
      if not isinstance(self.op.nics, list):
9103
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9104
                                   errors.ECODE_INVAL)
9105
      for row in self.op.nics:
9106
        if (not isinstance(row, dict) or
9107
            "mac" not in row or
9108
            "ip" not in row or
9109
            "bridge" not in row):
9110
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9111
                                     " parameter", errors.ECODE_INVAL)
9112
      if not isinstance(self.op.disks, list):
9113
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9114
                                   errors.ECODE_INVAL)
9115
      for row in self.op.disks:
9116
        if (not isinstance(row, dict) or
9117
            "size" not in row or
9118
            not isinstance(row["size"], int) or
9119
            "mode" not in row or
9120
            row["mode"] not in ['r', 'w']):
9121
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9122
                                     " parameter", errors.ECODE_INVAL)
9123
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9124
        self.op.hypervisor = self.cfg.GetHypervisorType()
9125
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9126
      if not hasattr(self.op, "name"):
9127
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9128
                                   errors.ECODE_INVAL)
9129
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9130
      self.op.name = fname
9131
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9132
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9133
      if not hasattr(self.op, "evac_nodes"):
9134
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9135
                                   " opcode input", errors.ECODE_INVAL)
9136
    else:
9137
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9138
                                 self.op.mode, errors.ECODE_INVAL)
9139

    
9140
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9141
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9142
        raise errors.OpPrereqError("Missing allocator name",
9143
                                   errors.ECODE_INVAL)
9144
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9145
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9146
                                 self.op.direction, errors.ECODE_INVAL)
9147

    
9148
  def Exec(self, feedback_fn):
9149
    """Run the allocator test.
9150

9151
    """
9152
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9153
      ial = IAllocator(self.cfg, self.rpc,
9154
                       mode=self.op.mode,
9155
                       name=self.op.name,
9156
                       mem_size=self.op.mem_size,
9157
                       disks=self.op.disks,
9158
                       disk_template=self.op.disk_template,
9159
                       os=self.op.os,
9160
                       tags=self.op.tags,
9161
                       nics=self.op.nics,
9162
                       vcpus=self.op.vcpus,
9163
                       hypervisor=self.op.hypervisor,
9164
                       )
9165
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9166
      ial = IAllocator(self.cfg, self.rpc,
9167
                       mode=self.op.mode,
9168
                       name=self.op.name,
9169
                       relocate_from=list(self.relocate_from),
9170
                       )
9171
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9172
      ial = IAllocator(self.cfg, self.rpc,
9173
                       mode=self.op.mode,
9174
                       evac_nodes=self.op.evac_nodes)
9175
    else:
9176
      raise errors.ProgrammerError("Uncatched mode %s in"
9177
                                   " LUTestAllocator.Exec", self.op.mode)
9178

    
9179
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9180
      result = ial.in_text
9181
    else:
9182
      ial.Run(self.op.allocator, validate=False)
9183
      result = ial.out_text
9184
    return result