Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 7ea7bcf6

History | View | Annotate | Download (311.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq (except when tasklets are used)
54
    - implement Exec (except when tasklets are used)
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  @ivar dry_run_result: the value (if any) that will be returned to the caller
63
      in dry-run mode (signalled by opcode dry_run parameter)
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_BGL = True
70

    
71
  def __init__(self, processor, op, context, rpc):
72
    """Constructor for LogicalUnit.
73

74
    This needs to be overridden in derived classes in order to check op
75
    validity.
76

77
    """
78
    self.proc = processor
79
    self.op = op
80
    self.cfg = context.cfg
81
    self.context = context
82
    self.rpc = rpc
83
    # Dicts used to declare locking needs to mcpu
84
    self.needed_locks = None
85
    self.acquired_locks = {}
86
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
87
    self.add_locks = {}
88
    self.remove_locks = {}
89
    # Used to force good behavior when calling helper functions
90
    self.recalculate_locks = {}
91
    self.__ssh = None
92
    # logging
93
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
94
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
95
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
96
    # support for dry-run
97
    self.dry_run_result = None
98

    
99
    # Tasklets
100
    self.tasklets = None
101

    
102
    for attr_name in self._OP_REQP:
103
      attr_val = getattr(op, attr_name, None)
104
      if attr_val is None:
105
        raise errors.OpPrereqError("Required parameter '%s' missing" %
106
                                   attr_name, errors.ECODE_INVAL)
107

    
108
    self.CheckArguments()
109

    
110
  def __GetSSH(self):
111
    """Returns the SshRunner object
112

113
    """
114
    if not self.__ssh:
115
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
116
    return self.__ssh
117

    
118
  ssh = property(fget=__GetSSH)
119

    
120
  def CheckArguments(self):
121
    """Check syntactic validity for the opcode arguments.
122

123
    This method is for doing a simple syntactic check and ensure
124
    validity of opcode parameters, without any cluster-related
125
    checks. While the same can be accomplished in ExpandNames and/or
126
    CheckPrereq, doing these separate is better because:
127

128
      - ExpandNames is left as as purely a lock-related function
129
      - CheckPrereq is run after we have acquired locks (and possible
130
        waited for them)
131

132
    The function is allowed to change the self.op attribute so that
133
    later methods can no longer worry about missing parameters.
134

135
    """
136
    pass
137

    
138
  def ExpandNames(self):
139
    """Expand names for this LU.
140

141
    This method is called before starting to execute the opcode, and it should
142
    update all the parameters of the opcode to their canonical form (e.g. a
143
    short node name must be fully expanded after this method has successfully
144
    completed). This way locking, hooks, logging, ecc. can work correctly.
145

146
    LUs which implement this method must also populate the self.needed_locks
147
    member, as a dict with lock levels as keys, and a list of needed lock names
148
    as values. Rules:
149

150
      - use an empty dict if you don't need any lock
151
      - if you don't need any lock at a particular level omit that level
152
      - don't put anything for the BGL level
153
      - if you want all locks at a level use locking.ALL_SET as a value
154

155
    If you need to share locks (rather than acquire them exclusively) at one
156
    level you can modify self.share_locks, setting a true value (usually 1) for
157
    that level. By default locks are not shared.
158

159
    This function can also define a list of tasklets, which then will be
160
    executed in order instead of the usual LU-level CheckPrereq and Exec
161
    functions, if those are not defined by the LU.
162

163
    Examples::
164

165
      # Acquire all nodes and one instance
166
      self.needed_locks = {
167
        locking.LEVEL_NODE: locking.ALL_SET,
168
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
169
      }
170
      # Acquire just two nodes
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
173
      }
174
      # Acquire no locks
175
      self.needed_locks = {} # No, you can't leave it to the default value None
176

177
    """
178
    # The implementation of this method is mandatory only if the new LU is
179
    # concurrent, so that old LUs don't need to be changed all at the same
180
    # time.
181
    if self.REQ_BGL:
182
      self.needed_locks = {} # Exclusive LUs don't need locks.
183
    else:
184
      raise NotImplementedError
185

    
186
  def DeclareLocks(self, level):
187
    """Declare LU locking needs for a level
188

189
    While most LUs can just declare their locking needs at ExpandNames time,
190
    sometimes there's the need to calculate some locks after having acquired
191
    the ones before. This function is called just before acquiring locks at a
192
    particular level, but after acquiring the ones at lower levels, and permits
193
    such calculations. It can be used to modify self.needed_locks, and by
194
    default it does nothing.
195

196
    This function is only called if you have something already set in
197
    self.needed_locks for the level.
198

199
    @param level: Locking level which is going to be locked
200
    @type level: member of ganeti.locking.LEVELS
201

202
    """
203

    
204
  def CheckPrereq(self):
205
    """Check prerequisites for this LU.
206

207
    This method should check that the prerequisites for the execution
208
    of this LU are fulfilled. It can do internode communication, but
209
    it should be idempotent - no cluster or system changes are
210
    allowed.
211

212
    The method should raise errors.OpPrereqError in case something is
213
    not fulfilled. Its return value is ignored.
214

215
    This method should also update all the parameters of the opcode to
216
    their canonical form if it hasn't been done by ExpandNames before.
217

218
    """
219
    if self.tasklets is not None:
220
      for (idx, tl) in enumerate(self.tasklets):
221
        logging.debug("Checking prerequisites for tasklet %s/%s",
222
                      idx + 1, len(self.tasklets))
223
        tl.CheckPrereq()
224
    else:
225
      raise NotImplementedError
226

    
227
  def Exec(self, feedback_fn):
228
    """Execute the LU.
229

230
    This method should implement the actual work. It should raise
231
    errors.OpExecError for failures that are somewhat dealt with in
232
    code, or expected.
233

234
    """
235
    if self.tasklets is not None:
236
      for (idx, tl) in enumerate(self.tasklets):
237
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
238
        tl.Exec(feedback_fn)
239
    else:
240
      raise NotImplementedError
241

    
242
  def BuildHooksEnv(self):
243
    """Build hooks environment for this LU.
244

245
    This method should return a three-node tuple consisting of: a dict
246
    containing the environment that will be used for running the
247
    specific hook for this LU, a list of node names on which the hook
248
    should run before the execution, and a list of node names on which
249
    the hook should run after the execution.
250

251
    The keys of the dict must not have 'GANETI_' prefixed as this will
252
    be handled in the hooks runner. Also note additional keys will be
253
    added by the hooks runner. If the LU doesn't define any
254
    environment, an empty dict (and not None) should be returned.
255

256
    No nodes should be returned as an empty list (and not None).
257

258
    Note that if the HPATH for a LU class is None, this function will
259
    not be called.
260

261
    """
262
    raise NotImplementedError
263

    
264
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
265
    """Notify the LU about the results of its hooks.
266

267
    This method is called every time a hooks phase is executed, and notifies
268
    the Logical Unit about the hooks' result. The LU can then use it to alter
269
    its result based on the hooks.  By default the method does nothing and the
270
    previous result is passed back unchanged but any LU can define it if it
271
    wants to use the local cluster hook-scripts somehow.
272

273
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
274
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
275
    @param hook_results: the results of the multi-node hooks rpc call
276
    @param feedback_fn: function used send feedback back to the caller
277
    @param lu_result: the previous Exec result this LU had, or None
278
        in the PRE phase
279
    @return: the new Exec result, based on the previous result
280
        and hook results
281

282
    """
283
    # API must be kept, thus we ignore the unused argument and could
284
    # be a function warnings
285
    # pylint: disable-msg=W0613,R0201
286
    return lu_result
287

    
288
  def _ExpandAndLockInstance(self):
289
    """Helper function to expand and lock an instance.
290

291
    Many LUs that work on an instance take its name in self.op.instance_name
292
    and need to expand it and then declare the expanded name for locking. This
293
    function does it, and then updates self.op.instance_name to the expanded
294
    name. It also initializes needed_locks as a dict, if this hasn't been done
295
    before.
296

297
    """
298
    if self.needed_locks is None:
299
      self.needed_locks = {}
300
    else:
301
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
302
        "_ExpandAndLockInstance called with instance-level locks set"
303
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
304
    if expanded_name is None:
305
      raise errors.OpPrereqError("Instance '%s' not known" %
306
                                 self.op.instance_name, errors.ECODE_NOENT)
307
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
308
    self.op.instance_name = expanded_name
309

    
310
  def _LockInstancesNodes(self, primary_only=False):
311
    """Helper function to declare instances' nodes for locking.
312

313
    This function should be called after locking one or more instances to lock
314
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
315
    with all primary or secondary nodes for instances already locked and
316
    present in self.needed_locks[locking.LEVEL_INSTANCE].
317

318
    It should be called from DeclareLocks, and for safety only works if
319
    self.recalculate_locks[locking.LEVEL_NODE] is set.
320

321
    In the future it may grow parameters to just lock some instance's nodes, or
322
    to just lock primaries or secondary nodes, if needed.
323

324
    If should be called in DeclareLocks in a way similar to::
325

326
      if level == locking.LEVEL_NODE:
327
        self._LockInstancesNodes()
328

329
    @type primary_only: boolean
330
    @param primary_only: only lock primary nodes of locked instances
331

332
    """
333
    assert locking.LEVEL_NODE in self.recalculate_locks, \
334
      "_LockInstancesNodes helper function called with no nodes to recalculate"
335

    
336
    # TODO: check if we're really been called with the instance locks held
337

    
338
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
339
    # future we might want to have different behaviors depending on the value
340
    # of self.recalculate_locks[locking.LEVEL_NODE]
341
    wanted_nodes = []
342
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
343
      instance = self.context.cfg.GetInstanceInfo(instance_name)
344
      wanted_nodes.append(instance.primary_node)
345
      if not primary_only:
346
        wanted_nodes.extend(instance.secondary_nodes)
347

    
348
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
349
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
350
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
351
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
352

    
353
    del self.recalculate_locks[locking.LEVEL_NODE]
354

    
355

    
356
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
357
  """Simple LU which runs no hooks.
358

359
  This LU is intended as a parent for other LogicalUnits which will
360
  run no hooks, in order to reduce duplicate code.
361

362
  """
363
  HPATH = None
364
  HTYPE = None
365

    
366
  def BuildHooksEnv(self):
367
    """Empty BuildHooksEnv for NoHooksLu.
368

369
    This just raises an error.
370

371
    """
372
    assert False, "BuildHooksEnv called for NoHooksLUs"
373

    
374

    
375
class Tasklet:
376
  """Tasklet base class.
377

378
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
379
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
380
  tasklets know nothing about locks.
381

382
  Subclasses must follow these rules:
383
    - Implement CheckPrereq
384
    - Implement Exec
385

386
  """
387
  def __init__(self, lu):
388
    self.lu = lu
389

    
390
    # Shortcuts
391
    self.cfg = lu.cfg
392
    self.rpc = lu.rpc
393

    
394
  def CheckPrereq(self):
395
    """Check prerequisites for this tasklets.
396

397
    This method should check whether the prerequisites for the execution of
398
    this tasklet are fulfilled. It can do internode communication, but it
399
    should be idempotent - no cluster or system changes are allowed.
400

401
    The method should raise errors.OpPrereqError in case something is not
402
    fulfilled. Its return value is ignored.
403

404
    This method should also update all parameters to their canonical form if it
405
    hasn't been done before.
406

407
    """
408
    raise NotImplementedError
409

    
410
  def Exec(self, feedback_fn):
411
    """Execute the tasklet.
412

413
    This method should implement the actual work. It should raise
414
    errors.OpExecError for failures that are somewhat dealt with in code, or
415
    expected.
416

417
    """
418
    raise NotImplementedError
419

    
420

    
421
def _GetWantedNodes(lu, nodes):
422
  """Returns list of checked and expanded node names.
423

424
  @type lu: L{LogicalUnit}
425
  @param lu: the logical unit on whose behalf we execute
426
  @type nodes: list
427
  @param nodes: list of node names or None for all nodes
428
  @rtype: list
429
  @return: the list of nodes, sorted
430
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
431

432
  """
433
  if not isinstance(nodes, list):
434
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
435
                               errors.ECODE_INVAL)
436

    
437
  if not nodes:
438
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
439
      " non-empty list of nodes whose name is to be expanded.")
440

    
441
  wanted = []
442
  for name in nodes:
443
    node = lu.cfg.ExpandNodeName(name)
444
    if node is None:
445
      raise errors.OpPrereqError("No such node name '%s'" % name,
446
                                 errors.ECODE_NOENT)
447
    wanted.append(node)
448

    
449
  return utils.NiceSort(wanted)
450

    
451

    
452
def _GetWantedInstances(lu, instances):
453
  """Returns list of checked and expanded instance names.
454

455
  @type lu: L{LogicalUnit}
456
  @param lu: the logical unit on whose behalf we execute
457
  @type instances: list
458
  @param instances: list of instance names or None for all instances
459
  @rtype: list
460
  @return: the list of instances, sorted
461
  @raise errors.OpPrereqError: if the instances parameter is wrong type
462
  @raise errors.OpPrereqError: if any of the passed instances is not found
463

464
  """
465
  if not isinstance(instances, list):
466
    raise errors.OpPrereqError("Invalid argument type 'instances'",
467
                               errors.ECODE_INVAL)
468

    
469
  if instances:
470
    wanted = []
471

    
472
    for name in instances:
473
      instance = lu.cfg.ExpandInstanceName(name)
474
      if instance is None:
475
        raise errors.OpPrereqError("No such instance name '%s'" % name,
476
                                   errors.ECODE_NOENT)
477
      wanted.append(instance)
478

    
479
  else:
480
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
481
  return wanted
482

    
483

    
484
def _CheckOutputFields(static, dynamic, selected):
485
  """Checks whether all selected fields are valid.
486

487
  @type static: L{utils.FieldSet}
488
  @param static: static fields set
489
  @type dynamic: L{utils.FieldSet}
490
  @param dynamic: dynamic fields set
491

492
  """
493
  f = utils.FieldSet()
494
  f.Extend(static)
495
  f.Extend(dynamic)
496

    
497
  delta = f.NonMatching(selected)
498
  if delta:
499
    raise errors.OpPrereqError("Unknown output fields selected: %s"
500
                               % ",".join(delta), errors.ECODE_INVAL)
501

    
502

    
503
def _CheckBooleanOpField(op, name):
504
  """Validates boolean opcode parameters.
505

506
  This will ensure that an opcode parameter is either a boolean value,
507
  or None (but that it always exists).
508

509
  """
510
  val = getattr(op, name, None)
511
  if not (val is None or isinstance(val, bool)):
512
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
513
                               (name, str(val)), errors.ECODE_INVAL)
514
  setattr(op, name, val)
515

    
516

    
517
def _CheckGlobalHvParams(params):
518
  """Validates that given hypervisor params are not global ones.
519

520
  This will ensure that instances don't get customised versions of
521
  global params.
522

523
  """
524
  used_globals = constants.HVC_GLOBALS.intersection(params)
525
  if used_globals:
526
    msg = ("The following hypervisor parameters are global and cannot"
527
           " be customized at instance level, please modify them at"
528
           " cluster level: %s" % utils.CommaJoin(used_globals))
529
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeOnline(lu, node):
533
  """Ensure that a given node is online.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is offline
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).offline:
541
    raise errors.OpPrereqError("Can't use offline node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckNodeNotDrained(lu, node):
546
  """Ensure that a given node is not drained.
547

548
  @param lu: the LU on behalf of which we make the check
549
  @param node: the node to check
550
  @raise errors.OpPrereqError: if the node is drained
551

552
  """
553
  if lu.cfg.GetNodeInfo(node).drained:
554
    raise errors.OpPrereqError("Can't use drained node %s" % node,
555
                               errors.ECODE_INVAL)
556

    
557

    
558
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
559
                          memory, vcpus, nics, disk_template, disks,
560
                          bep, hvp, hypervisor_name):
561
  """Builds instance related env variables for hooks
562

563
  This builds the hook environment from individual variables.
564

565
  @type name: string
566
  @param name: the name of the instance
567
  @type primary_node: string
568
  @param primary_node: the name of the instance's primary node
569
  @type secondary_nodes: list
570
  @param secondary_nodes: list of secondary nodes as strings
571
  @type os_type: string
572
  @param os_type: the name of the instance's OS
573
  @type status: boolean
574
  @param status: the should_run status of the instance
575
  @type memory: string
576
  @param memory: the memory size of the instance
577
  @type vcpus: string
578
  @param vcpus: the count of VCPUs the instance has
579
  @type nics: list
580
  @param nics: list of tuples (ip, mac, mode, link) representing
581
      the NICs the instance has
582
  @type disk_template: string
583
  @param disk_template: the disk template of the instance
584
  @type disks: list
585
  @param disks: the list of (size, mode) pairs
586
  @type bep: dict
587
  @param bep: the backend parameters for the instance
588
  @type hvp: dict
589
  @param hvp: the hypervisor parameters for the instance
590
  @type hypervisor_name: string
591
  @param hypervisor_name: the hypervisor for the instance
592
  @rtype: dict
593
  @return: the hook environment for this instance
594

595
  """
596
  if status:
597
    str_status = "up"
598
  else:
599
    str_status = "down"
600
  env = {
601
    "OP_TARGET": name,
602
    "INSTANCE_NAME": name,
603
    "INSTANCE_PRIMARY": primary_node,
604
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
605
    "INSTANCE_OS_TYPE": os_type,
606
    "INSTANCE_STATUS": str_status,
607
    "INSTANCE_MEMORY": memory,
608
    "INSTANCE_VCPUS": vcpus,
609
    "INSTANCE_DISK_TEMPLATE": disk_template,
610
    "INSTANCE_HYPERVISOR": hypervisor_name,
611
  }
612

    
613
  if nics:
614
    nic_count = len(nics)
615
    for idx, (ip, mac, mode, link) in enumerate(nics):
616
      if ip is None:
617
        ip = ""
618
      env["INSTANCE_NIC%d_IP" % idx] = ip
619
      env["INSTANCE_NIC%d_MAC" % idx] = mac
620
      env["INSTANCE_NIC%d_MODE" % idx] = mode
621
      env["INSTANCE_NIC%d_LINK" % idx] = link
622
      if mode == constants.NIC_MODE_BRIDGED:
623
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
624
  else:
625
    nic_count = 0
626

    
627
  env["INSTANCE_NIC_COUNT"] = nic_count
628

    
629
  if disks:
630
    disk_count = len(disks)
631
    for idx, (size, mode) in enumerate(disks):
632
      env["INSTANCE_DISK%d_SIZE" % idx] = size
633
      env["INSTANCE_DISK%d_MODE" % idx] = mode
634
  else:
635
    disk_count = 0
636

    
637
  env["INSTANCE_DISK_COUNT"] = disk_count
638

    
639
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
640
    for key, value in source.items():
641
      env["INSTANCE_%s_%s" % (kind, key)] = value
642

    
643
  return env
644

    
645

    
646
def _NICListToTuple(lu, nics):
647
  """Build a list of nic information tuples.
648

649
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
650
  value in LUQueryInstanceData.
651

652
  @type lu:  L{LogicalUnit}
653
  @param lu: the logical unit on whose behalf we execute
654
  @type nics: list of L{objects.NIC}
655
  @param nics: list of nics to convert to hooks tuples
656

657
  """
658
  hooks_nics = []
659
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
660
  for nic in nics:
661
    ip = nic.ip
662
    mac = nic.mac
663
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
664
    mode = filled_params[constants.NIC_MODE]
665
    link = filled_params[constants.NIC_LINK]
666
    hooks_nics.append((ip, mac, mode, link))
667
  return hooks_nics
668

    
669

    
670
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
671
  """Builds instance related env variables for hooks from an object.
672

673
  @type lu: L{LogicalUnit}
674
  @param lu: the logical unit on whose behalf we execute
675
  @type instance: L{objects.Instance}
676
  @param instance: the instance for which we should build the
677
      environment
678
  @type override: dict
679
  @param override: dictionary with key/values that will override
680
      our values
681
  @rtype: dict
682
  @return: the hook environment dictionary
683

684
  """
685
  cluster = lu.cfg.GetClusterInfo()
686
  bep = cluster.FillBE(instance)
687
  hvp = cluster.FillHV(instance)
688
  args = {
689
    'name': instance.name,
690
    'primary_node': instance.primary_node,
691
    'secondary_nodes': instance.secondary_nodes,
692
    'os_type': instance.os,
693
    'status': instance.admin_up,
694
    'memory': bep[constants.BE_MEMORY],
695
    'vcpus': bep[constants.BE_VCPUS],
696
    'nics': _NICListToTuple(lu, instance.nics),
697
    'disk_template': instance.disk_template,
698
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
699
    'bep': bep,
700
    'hvp': hvp,
701
    'hypervisor_name': instance.hypervisor,
702
  }
703
  if override:
704
    args.update(override)
705
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
706

    
707

    
708
def _AdjustCandidatePool(lu, exceptions):
709
  """Adjust the candidate pool after node operations.
710

711
  """
712
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
713
  if mod_list:
714
    lu.LogInfo("Promoted nodes to master candidate role: %s",
715
               utils.CommaJoin(node.name for node in mod_list))
716
    for name in mod_list:
717
      lu.context.ReaddNode(name)
718
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
719
  if mc_now > mc_max:
720
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
721
               (mc_now, mc_max))
722

    
723

    
724
def _DecideSelfPromotion(lu, exceptions=None):
725
  """Decide whether I should promote myself as a master candidate.
726

727
  """
728
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
729
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
730
  # the new node will increase mc_max with one, so:
731
  mc_should = min(mc_should + 1, cp_size)
732
  return mc_now < mc_should
733

    
734

    
735
def _CheckNicsBridgesExist(lu, target_nics, target_node,
736
                               profile=constants.PP_DEFAULT):
737
  """Check that the brigdes needed by a list of nics exist.
738

739
  """
740
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
741
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
742
                for nic in target_nics]
743
  brlist = [params[constants.NIC_LINK] for params in paramslist
744
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
745
  if brlist:
746
    result = lu.rpc.call_bridges_exist(target_node, brlist)
747
    result.Raise("Error checking bridges on destination node '%s'" %
748
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
749

    
750

    
751
def _CheckInstanceBridgesExist(lu, instance, node=None):
752
  """Check that the brigdes needed by an instance exist.
753

754
  """
755
  if node is None:
756
    node = instance.primary_node
757
  _CheckNicsBridgesExist(lu, instance.nics, node)
758

    
759

    
760
def _CheckOSVariant(os_obj, name):
761
  """Check whether an OS name conforms to the os variants specification.
762

763
  @type os_obj: L{objects.OS}
764
  @param os_obj: OS object to check
765
  @type name: string
766
  @param name: OS name passed by the user, to check for validity
767

768
  """
769
  if not os_obj.supported_variants:
770
    return
771
  try:
772
    variant = name.split("+", 1)[1]
773
  except IndexError:
774
    raise errors.OpPrereqError("OS name must include a variant",
775
                               errors.ECODE_INVAL)
776

    
777
  if variant not in os_obj.supported_variants:
778
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
779

    
780

    
781
def _GetNodeInstancesInner(cfg, fn):
782
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
783

    
784

    
785
def _GetNodeInstances(cfg, node_name):
786
  """Returns a list of all primary and secondary instances on a node.
787

788
  """
789

    
790
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
791

    
792

    
793
def _GetNodePrimaryInstances(cfg, node_name):
794
  """Returns primary instances on a node.
795

796
  """
797
  return _GetNodeInstancesInner(cfg,
798
                                lambda inst: node_name == inst.primary_node)
799

    
800

    
801
def _GetNodeSecondaryInstances(cfg, node_name):
802
  """Returns secondary instances on a node.
803

804
  """
805
  return _GetNodeInstancesInner(cfg,
806
                                lambda inst: node_name in inst.secondary_nodes)
807

    
808

    
809
def _GetStorageTypeArgs(cfg, storage_type):
810
  """Returns the arguments for a storage type.
811

812
  """
813
  # Special case for file storage
814
  if storage_type == constants.ST_FILE:
815
    # storage.FileStorage wants a list of storage directories
816
    return [[cfg.GetFileStorageDir()]]
817

    
818
  return []
819

    
820

    
821
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
822
  faulty = []
823

    
824
  for dev in instance.disks:
825
    cfg.SetDiskID(dev, node_name)
826

    
827
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
828
  result.Raise("Failed to get disk status from node %s" % node_name,
829
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
830

    
831
  for idx, bdev_status in enumerate(result.payload):
832
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
833
      faulty.append(idx)
834

    
835
  return faulty
836

    
837

    
838
class LUPostInitCluster(LogicalUnit):
839
  """Logical unit for running hooks after cluster initialization.
840

841
  """
842
  HPATH = "cluster-init"
843
  HTYPE = constants.HTYPE_CLUSTER
844
  _OP_REQP = []
845

    
846
  def BuildHooksEnv(self):
847
    """Build hooks env.
848

849
    """
850
    env = {"OP_TARGET": self.cfg.GetClusterName()}
851
    mn = self.cfg.GetMasterNode()
852
    return env, [], [mn]
853

    
854
  def CheckPrereq(self):
855
    """No prerequisites to check.
856

857
    """
858
    return True
859

    
860
  def Exec(self, feedback_fn):
861
    """Nothing to do.
862

863
    """
864
    return True
865

    
866

    
867
class LUDestroyCluster(LogicalUnit):
868
  """Logical unit for destroying the cluster.
869

870
  """
871
  HPATH = "cluster-destroy"
872
  HTYPE = constants.HTYPE_CLUSTER
873
  _OP_REQP = []
874

    
875
  def BuildHooksEnv(self):
876
    """Build hooks env.
877

878
    """
879
    env = {"OP_TARGET": self.cfg.GetClusterName()}
880
    return env, [], []
881

    
882
  def CheckPrereq(self):
883
    """Check prerequisites.
884

885
    This checks whether the cluster is empty.
886

887
    Any errors are signaled by raising errors.OpPrereqError.
888

889
    """
890
    master = self.cfg.GetMasterNode()
891

    
892
    nodelist = self.cfg.GetNodeList()
893
    if len(nodelist) != 1 or nodelist[0] != master:
894
      raise errors.OpPrereqError("There are still %d node(s) in"
895
                                 " this cluster." % (len(nodelist) - 1),
896
                                 errors.ECODE_INVAL)
897
    instancelist = self.cfg.GetInstanceList()
898
    if instancelist:
899
      raise errors.OpPrereqError("There are still %d instance(s) in"
900
                                 " this cluster." % len(instancelist),
901
                                 errors.ECODE_INVAL)
902

    
903
  def Exec(self, feedback_fn):
904
    """Destroys the cluster.
905

906
    """
907
    master = self.cfg.GetMasterNode()
908
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
909

    
910
    # Run post hooks on master node before it's removed
911
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
912
    try:
913
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
914
    except:
915
      # pylint: disable-msg=W0702
916
      self.LogWarning("Errors occurred running hooks on %s" % master)
917

    
918
    result = self.rpc.call_node_stop_master(master, False)
919
    result.Raise("Could not disable the master role")
920

    
921
    if modify_ssh_setup:
922
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
923
      utils.CreateBackup(priv_key)
924
      utils.CreateBackup(pub_key)
925

    
926
    return master
927

    
928

    
929
class LUVerifyCluster(LogicalUnit):
930
  """Verifies the cluster status.
931

932
  """
933
  HPATH = "cluster-verify"
934
  HTYPE = constants.HTYPE_CLUSTER
935
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
936
  REQ_BGL = False
937

    
938
  TCLUSTER = "cluster"
939
  TNODE = "node"
940
  TINSTANCE = "instance"
941

    
942
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
943
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
944
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
945
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
946
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
947
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
948
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
949
  ENODEDRBD = (TNODE, "ENODEDRBD")
950
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
951
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
952
  ENODEHV = (TNODE, "ENODEHV")
953
  ENODELVM = (TNODE, "ENODELVM")
954
  ENODEN1 = (TNODE, "ENODEN1")
955
  ENODENET = (TNODE, "ENODENET")
956
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
957
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
958
  ENODERPC = (TNODE, "ENODERPC")
959
  ENODESSH = (TNODE, "ENODESSH")
960
  ENODEVERSION = (TNODE, "ENODEVERSION")
961
  ENODESETUP = (TNODE, "ENODESETUP")
962
  ENODETIME = (TNODE, "ENODETIME")
963

    
964
  ETYPE_FIELD = "code"
965
  ETYPE_ERROR = "ERROR"
966
  ETYPE_WARNING = "WARNING"
967

    
968
  def ExpandNames(self):
969
    self.needed_locks = {
970
      locking.LEVEL_NODE: locking.ALL_SET,
971
      locking.LEVEL_INSTANCE: locking.ALL_SET,
972
    }
973
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
974

    
975
  def _Error(self, ecode, item, msg, *args, **kwargs):
976
    """Format an error message.
977

978
    Based on the opcode's error_codes parameter, either format a
979
    parseable error code, or a simpler error string.
980

981
    This must be called only from Exec and functions called from Exec.
982

983
    """
984
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
985
    itype, etxt = ecode
986
    # first complete the msg
987
    if args:
988
      msg = msg % args
989
    # then format the whole message
990
    if self.op.error_codes:
991
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
992
    else:
993
      if item:
994
        item = " " + item
995
      else:
996
        item = ""
997
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
998
    # and finally report it via the feedback_fn
999
    self._feedback_fn("  - %s" % msg)
1000

    
1001
  def _ErrorIf(self, cond, *args, **kwargs):
1002
    """Log an error message if the passed condition is True.
1003

1004
    """
1005
    cond = bool(cond) or self.op.debug_simulate_errors
1006
    if cond:
1007
      self._Error(*args, **kwargs)
1008
    # do not mark the operation as failed for WARN cases only
1009
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1010
      self.bad = self.bad or cond
1011

    
1012
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1013
                  node_result, master_files, drbd_map, vg_name):
1014
    """Run multiple tests against a node.
1015

1016
    Test list:
1017

1018
      - compares ganeti version
1019
      - checks vg existence and size > 20G
1020
      - checks config file checksum
1021
      - checks ssh to other nodes
1022

1023
    @type nodeinfo: L{objects.Node}
1024
    @param nodeinfo: the node to check
1025
    @param file_list: required list of files
1026
    @param local_cksum: dictionary of local files and their checksums
1027
    @param node_result: the results from the node
1028
    @param master_files: list of files that only masters should have
1029
    @param drbd_map: the useddrbd minors for this node, in
1030
        form of minor: (instance, must_exist) which correspond to instances
1031
        and their running status
1032
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1033

1034
    """
1035
    node = nodeinfo.name
1036
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1037

    
1038
    # main result, node_result should be a non-empty dict
1039
    test = not node_result or not isinstance(node_result, dict)
1040
    _ErrorIf(test, self.ENODERPC, node,
1041
                  "unable to verify node: no data returned")
1042
    if test:
1043
      return
1044

    
1045
    # compares ganeti version
1046
    local_version = constants.PROTOCOL_VERSION
1047
    remote_version = node_result.get('version', None)
1048
    test = not (remote_version and
1049
                isinstance(remote_version, (list, tuple)) and
1050
                len(remote_version) == 2)
1051
    _ErrorIf(test, self.ENODERPC, node,
1052
             "connection to node returned invalid data")
1053
    if test:
1054
      return
1055

    
1056
    test = local_version != remote_version[0]
1057
    _ErrorIf(test, self.ENODEVERSION, node,
1058
             "incompatible protocol versions: master %s,"
1059
             " node %s", local_version, remote_version[0])
1060
    if test:
1061
      return
1062

    
1063
    # node seems compatible, we can actually try to look into its results
1064

    
1065
    # full package version
1066
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1067
                  self.ENODEVERSION, node,
1068
                  "software version mismatch: master %s, node %s",
1069
                  constants.RELEASE_VERSION, remote_version[1],
1070
                  code=self.ETYPE_WARNING)
1071

    
1072
    # checks vg existence and size > 20G
1073
    if vg_name is not None:
1074
      vglist = node_result.get(constants.NV_VGLIST, None)
1075
      test = not vglist
1076
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1077
      if not test:
1078
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1079
                                              constants.MIN_VG_SIZE)
1080
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1081

    
1082
    # checks config file checksum
1083

    
1084
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1085
    test = not isinstance(remote_cksum, dict)
1086
    _ErrorIf(test, self.ENODEFILECHECK, node,
1087
             "node hasn't returned file checksum data")
1088
    if not test:
1089
      for file_name in file_list:
1090
        node_is_mc = nodeinfo.master_candidate
1091
        must_have = (file_name not in master_files) or node_is_mc
1092
        # missing
1093
        test1 = file_name not in remote_cksum
1094
        # invalid checksum
1095
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1096
        # existing and good
1097
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1098
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1099
                 "file '%s' missing", file_name)
1100
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1101
                 "file '%s' has wrong checksum", file_name)
1102
        # not candidate and this is not a must-have file
1103
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1104
                 "file '%s' should not exist on non master"
1105
                 " candidates (and the file is outdated)", file_name)
1106
        # all good, except non-master/non-must have combination
1107
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1108
                 "file '%s' should not exist"
1109
                 " on non master candidates", file_name)
1110

    
1111
    # checks ssh to any
1112

    
1113
    test = constants.NV_NODELIST not in node_result
1114
    _ErrorIf(test, self.ENODESSH, node,
1115
             "node hasn't returned node ssh connectivity data")
1116
    if not test:
1117
      if node_result[constants.NV_NODELIST]:
1118
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1119
          _ErrorIf(True, self.ENODESSH, node,
1120
                   "ssh communication with node '%s': %s", a_node, a_msg)
1121

    
1122
    test = constants.NV_NODENETTEST not in node_result
1123
    _ErrorIf(test, self.ENODENET, node,
1124
             "node hasn't returned node tcp connectivity data")
1125
    if not test:
1126
      if node_result[constants.NV_NODENETTEST]:
1127
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1128
        for anode in nlist:
1129
          _ErrorIf(True, self.ENODENET, node,
1130
                   "tcp communication with node '%s': %s",
1131
                   anode, node_result[constants.NV_NODENETTEST][anode])
1132

    
1133
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1134
    if isinstance(hyp_result, dict):
1135
      for hv_name, hv_result in hyp_result.iteritems():
1136
        test = hv_result is not None
1137
        _ErrorIf(test, self.ENODEHV, node,
1138
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1139

    
1140
    # check used drbd list
1141
    if vg_name is not None:
1142
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1143
      test = not isinstance(used_minors, (tuple, list))
1144
      _ErrorIf(test, self.ENODEDRBD, node,
1145
               "cannot parse drbd status file: %s", str(used_minors))
1146
      if not test:
1147
        for minor, (iname, must_exist) in drbd_map.items():
1148
          test = minor not in used_minors and must_exist
1149
          _ErrorIf(test, self.ENODEDRBD, node,
1150
                   "drbd minor %d of instance %s is not active",
1151
                   minor, iname)
1152
        for minor in used_minors:
1153
          test = minor not in drbd_map
1154
          _ErrorIf(test, self.ENODEDRBD, node,
1155
                   "unallocated drbd minor %d is in use", minor)
1156
    test = node_result.get(constants.NV_NODESETUP,
1157
                           ["Missing NODESETUP results"])
1158
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1159
             "; ".join(test))
1160

    
1161
    # check pv names
1162
    if vg_name is not None:
1163
      pvlist = node_result.get(constants.NV_PVLIST, None)
1164
      test = pvlist is None
1165
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1166
      if not test:
1167
        # check that ':' is not present in PV names, since it's a
1168
        # special character for lvcreate (denotes the range of PEs to
1169
        # use on the PV)
1170
        for _, pvname, owner_vg in pvlist:
1171
          test = ":" in pvname
1172
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1173
                   " '%s' of VG '%s'", pvname, owner_vg)
1174

    
1175
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1176
                      node_instance, n_offline):
1177
    """Verify an instance.
1178

1179
    This function checks to see if the required block devices are
1180
    available on the instance's node.
1181

1182
    """
1183
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1184
    node_current = instanceconfig.primary_node
1185

    
1186
    node_vol_should = {}
1187
    instanceconfig.MapLVsByNode(node_vol_should)
1188

    
1189
    for node in node_vol_should:
1190
      if node in n_offline:
1191
        # ignore missing volumes on offline nodes
1192
        continue
1193
      for volume in node_vol_should[node]:
1194
        test = node not in node_vol_is or volume not in node_vol_is[node]
1195
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1196
                 "volume %s missing on node %s", volume, node)
1197

    
1198
    if instanceconfig.admin_up:
1199
      test = ((node_current not in node_instance or
1200
               not instance in node_instance[node_current]) and
1201
              node_current not in n_offline)
1202
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1203
               "instance not running on its primary node %s",
1204
               node_current)
1205

    
1206
    for node in node_instance:
1207
      if (not node == node_current):
1208
        test = instance in node_instance[node]
1209
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1210
                 "instance should not run on node %s", node)
1211

    
1212
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1213
    """Verify if there are any unknown volumes in the cluster.
1214

1215
    The .os, .swap and backup volumes are ignored. All other volumes are
1216
    reported as unknown.
1217

1218
    """
1219
    for node in node_vol_is:
1220
      for volume in node_vol_is[node]:
1221
        test = (node not in node_vol_should or
1222
                volume not in node_vol_should[node])
1223
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1224
                      "volume %s is unknown", volume)
1225

    
1226
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1227
    """Verify the list of running instances.
1228

1229
    This checks what instances are running but unknown to the cluster.
1230

1231
    """
1232
    for node in node_instance:
1233
      for o_inst in node_instance[node]:
1234
        test = o_inst not in instancelist
1235
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1236
                      "instance %s on node %s should not exist", o_inst, node)
1237

    
1238
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1239
    """Verify N+1 Memory Resilience.
1240

1241
    Check that if one single node dies we can still start all the instances it
1242
    was primary for.
1243

1244
    """
1245
    for node, nodeinfo in node_info.iteritems():
1246
      # This code checks that every node which is now listed as secondary has
1247
      # enough memory to host all instances it is supposed to should a single
1248
      # other node in the cluster fail.
1249
      # FIXME: not ready for failover to an arbitrary node
1250
      # FIXME: does not support file-backed instances
1251
      # WARNING: we currently take into account down instances as well as up
1252
      # ones, considering that even if they're down someone might want to start
1253
      # them even in the event of a node failure.
1254
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1255
        needed_mem = 0
1256
        for instance in instances:
1257
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1258
          if bep[constants.BE_AUTO_BALANCE]:
1259
            needed_mem += bep[constants.BE_MEMORY]
1260
        test = nodeinfo['mfree'] < needed_mem
1261
        self._ErrorIf(test, self.ENODEN1, node,
1262
                      "not enough memory on to accommodate"
1263
                      " failovers should peer node %s fail", prinode)
1264

    
1265
  def CheckPrereq(self):
1266
    """Check prerequisites.
1267

1268
    Transform the list of checks we're going to skip into a set and check that
1269
    all its members are valid.
1270

1271
    """
1272
    self.skip_set = frozenset(self.op.skip_checks)
1273
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1274
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1275
                                 errors.ECODE_INVAL)
1276

    
1277
  def BuildHooksEnv(self):
1278
    """Build hooks env.
1279

1280
    Cluster-Verify hooks just ran in the post phase and their failure makes
1281
    the output be logged in the verify output and the verification to fail.
1282

1283
    """
1284
    all_nodes = self.cfg.GetNodeList()
1285
    env = {
1286
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1287
      }
1288
    for node in self.cfg.GetAllNodesInfo().values():
1289
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1290

    
1291
    return env, [], all_nodes
1292

    
1293
  def Exec(self, feedback_fn):
1294
    """Verify integrity of cluster, performing various test on nodes.
1295

1296
    """
1297
    self.bad = False
1298
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1299
    verbose = self.op.verbose
1300
    self._feedback_fn = feedback_fn
1301
    feedback_fn("* Verifying global settings")
1302
    for msg in self.cfg.VerifyConfig():
1303
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1304

    
1305
    vg_name = self.cfg.GetVGName()
1306
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1307
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1308
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1309
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1310
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1311
                        for iname in instancelist)
1312
    i_non_redundant = [] # Non redundant instances
1313
    i_non_a_balanced = [] # Non auto-balanced instances
1314
    n_offline = [] # List of offline nodes
1315
    n_drained = [] # List of nodes being drained
1316
    node_volume = {}
1317
    node_instance = {}
1318
    node_info = {}
1319
    instance_cfg = {}
1320

    
1321
    # FIXME: verify OS list
1322
    # do local checksums
1323
    master_files = [constants.CLUSTER_CONF_FILE]
1324

    
1325
    file_names = ssconf.SimpleStore().GetFileList()
1326
    file_names.append(constants.SSL_CERT_FILE)
1327
    file_names.append(constants.RAPI_CERT_FILE)
1328
    file_names.extend(master_files)
1329

    
1330
    local_checksums = utils.FingerprintFiles(file_names)
1331

    
1332
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1333
    node_verify_param = {
1334
      constants.NV_FILELIST: file_names,
1335
      constants.NV_NODELIST: [node.name for node in nodeinfo
1336
                              if not node.offline],
1337
      constants.NV_HYPERVISOR: hypervisors,
1338
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1339
                                  node.secondary_ip) for node in nodeinfo
1340
                                 if not node.offline],
1341
      constants.NV_INSTANCELIST: hypervisors,
1342
      constants.NV_VERSION: None,
1343
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1344
      constants.NV_NODESETUP: None,
1345
      constants.NV_TIME: None,
1346
      }
1347

    
1348
    if vg_name is not None:
1349
      node_verify_param[constants.NV_VGLIST] = None
1350
      node_verify_param[constants.NV_LVLIST] = vg_name
1351
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1352
      node_verify_param[constants.NV_DRBDLIST] = None
1353

    
1354
    # Due to the way our RPC system works, exact response times cannot be
1355
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1356
    # time before and after executing the request, we can at least have a time
1357
    # window.
1358
    nvinfo_starttime = time.time()
1359
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1360
                                           self.cfg.GetClusterName())
1361
    nvinfo_endtime = time.time()
1362

    
1363
    cluster = self.cfg.GetClusterInfo()
1364
    master_node = self.cfg.GetMasterNode()
1365
    all_drbd_map = self.cfg.ComputeDRBDMap()
1366

    
1367
    feedback_fn("* Verifying node status")
1368
    for node_i in nodeinfo:
1369
      node = node_i.name
1370

    
1371
      if node_i.offline:
1372
        if verbose:
1373
          feedback_fn("* Skipping offline node %s" % (node,))
1374
        n_offline.append(node)
1375
        continue
1376

    
1377
      if node == master_node:
1378
        ntype = "master"
1379
      elif node_i.master_candidate:
1380
        ntype = "master candidate"
1381
      elif node_i.drained:
1382
        ntype = "drained"
1383
        n_drained.append(node)
1384
      else:
1385
        ntype = "regular"
1386
      if verbose:
1387
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1388

    
1389
      msg = all_nvinfo[node].fail_msg
1390
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1391
      if msg:
1392
        continue
1393

    
1394
      nresult = all_nvinfo[node].payload
1395
      node_drbd = {}
1396
      for minor, instance in all_drbd_map[node].items():
1397
        test = instance not in instanceinfo
1398
        _ErrorIf(test, self.ECLUSTERCFG, None,
1399
                 "ghost instance '%s' in temporary DRBD map", instance)
1400
          # ghost instance should not be running, but otherwise we
1401
          # don't give double warnings (both ghost instance and
1402
          # unallocated minor in use)
1403
        if test:
1404
          node_drbd[minor] = (instance, False)
1405
        else:
1406
          instance = instanceinfo[instance]
1407
          node_drbd[minor] = (instance.name, instance.admin_up)
1408

    
1409
      self._VerifyNode(node_i, file_names, local_checksums,
1410
                       nresult, master_files, node_drbd, vg_name)
1411

    
1412
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1413
      if vg_name is None:
1414
        node_volume[node] = {}
1415
      elif isinstance(lvdata, basestring):
1416
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1417
                 utils.SafeEncode(lvdata))
1418
        node_volume[node] = {}
1419
      elif not isinstance(lvdata, dict):
1420
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1421
        continue
1422
      else:
1423
        node_volume[node] = lvdata
1424

    
1425
      # node_instance
1426
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1427
      test = not isinstance(idata, list)
1428
      _ErrorIf(test, self.ENODEHV, node,
1429
               "rpc call to node failed (instancelist)")
1430
      if test:
1431
        continue
1432

    
1433
      node_instance[node] = idata
1434

    
1435
      # node_info
1436
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1437
      test = not isinstance(nodeinfo, dict)
1438
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1439
      if test:
1440
        continue
1441

    
1442
      # Node time
1443
      ntime = nresult.get(constants.NV_TIME, None)
1444
      try:
1445
        ntime_merged = utils.MergeTime(ntime)
1446
      except (ValueError, TypeError):
1447
        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1448

    
1449
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1450
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1451
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1452
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1453
      else:
1454
        ntime_diff = None
1455

    
1456
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1457
               "Node time diverges by at least %0.1fs from master node time",
1458
               ntime_diff)
1459

    
1460
      if ntime_diff is not None:
1461
        continue
1462

    
1463
      try:
1464
        node_info[node] = {
1465
          "mfree": int(nodeinfo['memory_free']),
1466
          "pinst": [],
1467
          "sinst": [],
1468
          # dictionary holding all instances this node is secondary for,
1469
          # grouped by their primary node. Each key is a cluster node, and each
1470
          # value is a list of instances which have the key as primary and the
1471
          # current node as secondary.  this is handy to calculate N+1 memory
1472
          # availability if you can only failover from a primary to its
1473
          # secondary.
1474
          "sinst-by-pnode": {},
1475
        }
1476
        # FIXME: devise a free space model for file based instances as well
1477
        if vg_name is not None:
1478
          test = (constants.NV_VGLIST not in nresult or
1479
                  vg_name not in nresult[constants.NV_VGLIST])
1480
          _ErrorIf(test, self.ENODELVM, node,
1481
                   "node didn't return data for the volume group '%s'"
1482
                   " - it is either missing or broken", vg_name)
1483
          if test:
1484
            continue
1485
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1486
      except (ValueError, KeyError):
1487
        _ErrorIf(True, self.ENODERPC, node,
1488
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1489
        continue
1490

    
1491
    node_vol_should = {}
1492

    
1493
    feedback_fn("* Verifying instance status")
1494
    for instance in instancelist:
1495
      if verbose:
1496
        feedback_fn("* Verifying instance %s" % instance)
1497
      inst_config = instanceinfo[instance]
1498
      self._VerifyInstance(instance, inst_config, node_volume,
1499
                           node_instance, n_offline)
1500
      inst_nodes_offline = []
1501

    
1502
      inst_config.MapLVsByNode(node_vol_should)
1503

    
1504
      instance_cfg[instance] = inst_config
1505

    
1506
      pnode = inst_config.primary_node
1507
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1508
               self.ENODERPC, pnode, "instance %s, connection to"
1509
               " primary node failed", instance)
1510
      if pnode in node_info:
1511
        node_info[pnode]['pinst'].append(instance)
1512

    
1513
      if pnode in n_offline:
1514
        inst_nodes_offline.append(pnode)
1515

    
1516
      # If the instance is non-redundant we cannot survive losing its primary
1517
      # node, so we are not N+1 compliant. On the other hand we have no disk
1518
      # templates with more than one secondary so that situation is not well
1519
      # supported either.
1520
      # FIXME: does not support file-backed instances
1521
      if len(inst_config.secondary_nodes) == 0:
1522
        i_non_redundant.append(instance)
1523
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1524
               self.EINSTANCELAYOUT, instance,
1525
               "instance has multiple secondary nodes", code="WARNING")
1526

    
1527
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1528
        i_non_a_balanced.append(instance)
1529

    
1530
      for snode in inst_config.secondary_nodes:
1531
        _ErrorIf(snode not in node_info and snode not in n_offline,
1532
                 self.ENODERPC, snode,
1533
                 "instance %s, connection to secondary node"
1534
                 "failed", instance)
1535

    
1536
        if snode in node_info:
1537
          node_info[snode]['sinst'].append(instance)
1538
          if pnode not in node_info[snode]['sinst-by-pnode']:
1539
            node_info[snode]['sinst-by-pnode'][pnode] = []
1540
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1541

    
1542
        if snode in n_offline:
1543
          inst_nodes_offline.append(snode)
1544

    
1545
      # warn that the instance lives on offline nodes
1546
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1547
               "instance lives on offline node(s) %s",
1548
               utils.CommaJoin(inst_nodes_offline))
1549

    
1550
    feedback_fn("* Verifying orphan volumes")
1551
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1552

    
1553
    feedback_fn("* Verifying remaining instances")
1554
    self._VerifyOrphanInstances(instancelist, node_instance)
1555

    
1556
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1557
      feedback_fn("* Verifying N+1 Memory redundancy")
1558
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1559

    
1560
    feedback_fn("* Other Notes")
1561
    if i_non_redundant:
1562
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1563
                  % len(i_non_redundant))
1564

    
1565
    if i_non_a_balanced:
1566
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1567
                  % len(i_non_a_balanced))
1568

    
1569
    if n_offline:
1570
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1571

    
1572
    if n_drained:
1573
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1574

    
1575
    return not self.bad
1576

    
1577
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1578
    """Analyze the post-hooks' result
1579

1580
    This method analyses the hook result, handles it, and sends some
1581
    nicely-formatted feedback back to the user.
1582

1583
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1584
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1585
    @param hooks_results: the results of the multi-node hooks rpc call
1586
    @param feedback_fn: function used send feedback back to the caller
1587
    @param lu_result: previous Exec result
1588
    @return: the new Exec result, based on the previous result
1589
        and hook results
1590

1591
    """
1592
    # We only really run POST phase hooks, and are only interested in
1593
    # their results
1594
    if phase == constants.HOOKS_PHASE_POST:
1595
      # Used to change hooks' output to proper indentation
1596
      indent_re = re.compile('^', re.M)
1597
      feedback_fn("* Hooks Results")
1598
      assert hooks_results, "invalid result from hooks"
1599

    
1600
      for node_name in hooks_results:
1601
        res = hooks_results[node_name]
1602
        msg = res.fail_msg
1603
        test = msg and not res.offline
1604
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1605
                      "Communication failure in hooks execution: %s", msg)
1606
        if res.offline or msg:
1607
          # No need to investigate payload if node is offline or gave an error.
1608
          # override manually lu_result here as _ErrorIf only
1609
          # overrides self.bad
1610
          lu_result = 1
1611
          continue
1612
        for script, hkr, output in res.payload:
1613
          test = hkr == constants.HKR_FAIL
1614
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1615
                        "Script %s failed, output:", script)
1616
          if test:
1617
            output = indent_re.sub('      ', output)
1618
            feedback_fn("%s" % output)
1619
            lu_result = 1
1620

    
1621
      return lu_result
1622

    
1623

    
1624
class LUVerifyDisks(NoHooksLU):
1625
  """Verifies the cluster disks status.
1626

1627
  """
1628
  _OP_REQP = []
1629
  REQ_BGL = False
1630

    
1631
  def ExpandNames(self):
1632
    self.needed_locks = {
1633
      locking.LEVEL_NODE: locking.ALL_SET,
1634
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1635
    }
1636
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1637

    
1638
  def CheckPrereq(self):
1639
    """Check prerequisites.
1640

1641
    This has no prerequisites.
1642

1643
    """
1644
    pass
1645

    
1646
  def Exec(self, feedback_fn):
1647
    """Verify integrity of cluster disks.
1648

1649
    @rtype: tuple of three items
1650
    @return: a tuple of (dict of node-to-node_error, list of instances
1651
        which need activate-disks, dict of instance: (node, volume) for
1652
        missing volumes
1653

1654
    """
1655
    result = res_nodes, res_instances, res_missing = {}, [], {}
1656

    
1657
    vg_name = self.cfg.GetVGName()
1658
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1659
    instances = [self.cfg.GetInstanceInfo(name)
1660
                 for name in self.cfg.GetInstanceList()]
1661

    
1662
    nv_dict = {}
1663
    for inst in instances:
1664
      inst_lvs = {}
1665
      if (not inst.admin_up or
1666
          inst.disk_template not in constants.DTS_NET_MIRROR):
1667
        continue
1668
      inst.MapLVsByNode(inst_lvs)
1669
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1670
      for node, vol_list in inst_lvs.iteritems():
1671
        for vol in vol_list:
1672
          nv_dict[(node, vol)] = inst
1673

    
1674
    if not nv_dict:
1675
      return result
1676

    
1677
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1678

    
1679
    for node in nodes:
1680
      # node_volume
1681
      node_res = node_lvs[node]
1682
      if node_res.offline:
1683
        continue
1684
      msg = node_res.fail_msg
1685
      if msg:
1686
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1687
        res_nodes[node] = msg
1688
        continue
1689

    
1690
      lvs = node_res.payload
1691
      for lv_name, (_, _, lv_online) in lvs.items():
1692
        inst = nv_dict.pop((node, lv_name), None)
1693
        if (not lv_online and inst is not None
1694
            and inst.name not in res_instances):
1695
          res_instances.append(inst.name)
1696

    
1697
    # any leftover items in nv_dict are missing LVs, let's arrange the
1698
    # data better
1699
    for key, inst in nv_dict.iteritems():
1700
      if inst.name not in res_missing:
1701
        res_missing[inst.name] = []
1702
      res_missing[inst.name].append(key)
1703

    
1704
    return result
1705

    
1706

    
1707
class LURepairDiskSizes(NoHooksLU):
1708
  """Verifies the cluster disks sizes.
1709

1710
  """
1711
  _OP_REQP = ["instances"]
1712
  REQ_BGL = False
1713

    
1714
  def ExpandNames(self):
1715
    if not isinstance(self.op.instances, list):
1716
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1717
                                 errors.ECODE_INVAL)
1718

    
1719
    if self.op.instances:
1720
      self.wanted_names = []
1721
      for name in self.op.instances:
1722
        full_name = self.cfg.ExpandInstanceName(name)
1723
        if full_name is None:
1724
          raise errors.OpPrereqError("Instance '%s' not known" % name,
1725
                                     errors.ECODE_NOENT)
1726
        self.wanted_names.append(full_name)
1727
      self.needed_locks = {
1728
        locking.LEVEL_NODE: [],
1729
        locking.LEVEL_INSTANCE: self.wanted_names,
1730
        }
1731
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1732
    else:
1733
      self.wanted_names = None
1734
      self.needed_locks = {
1735
        locking.LEVEL_NODE: locking.ALL_SET,
1736
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1737
        }
1738
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1739

    
1740
  def DeclareLocks(self, level):
1741
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1742
      self._LockInstancesNodes(primary_only=True)
1743

    
1744
  def CheckPrereq(self):
1745
    """Check prerequisites.
1746

1747
    This only checks the optional instance list against the existing names.
1748

1749
    """
1750
    if self.wanted_names is None:
1751
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1752

    
1753
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1754
                             in self.wanted_names]
1755

    
1756
  def _EnsureChildSizes(self, disk):
1757
    """Ensure children of the disk have the needed disk size.
1758

1759
    This is valid mainly for DRBD8 and fixes an issue where the
1760
    children have smaller disk size.
1761

1762
    @param disk: an L{ganeti.objects.Disk} object
1763

1764
    """
1765
    if disk.dev_type == constants.LD_DRBD8:
1766
      assert disk.children, "Empty children for DRBD8?"
1767
      fchild = disk.children[0]
1768
      mismatch = fchild.size < disk.size
1769
      if mismatch:
1770
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1771
                     fchild.size, disk.size)
1772
        fchild.size = disk.size
1773

    
1774
      # and we recurse on this child only, not on the metadev
1775
      return self._EnsureChildSizes(fchild) or mismatch
1776
    else:
1777
      return False
1778

    
1779
  def Exec(self, feedback_fn):
1780
    """Verify the size of cluster disks.
1781

1782
    """
1783
    # TODO: check child disks too
1784
    # TODO: check differences in size between primary/secondary nodes
1785
    per_node_disks = {}
1786
    for instance in self.wanted_instances:
1787
      pnode = instance.primary_node
1788
      if pnode not in per_node_disks:
1789
        per_node_disks[pnode] = []
1790
      for idx, disk in enumerate(instance.disks):
1791
        per_node_disks[pnode].append((instance, idx, disk))
1792

    
1793
    changed = []
1794
    for node, dskl in per_node_disks.items():
1795
      newl = [v[2].Copy() for v in dskl]
1796
      for dsk in newl:
1797
        self.cfg.SetDiskID(dsk, node)
1798
      result = self.rpc.call_blockdev_getsizes(node, newl)
1799
      if result.fail_msg:
1800
        self.LogWarning("Failure in blockdev_getsizes call to node"
1801
                        " %s, ignoring", node)
1802
        continue
1803
      if len(result.data) != len(dskl):
1804
        self.LogWarning("Invalid result from node %s, ignoring node results",
1805
                        node)
1806
        continue
1807
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1808
        if size is None:
1809
          self.LogWarning("Disk %d of instance %s did not return size"
1810
                          " information, ignoring", idx, instance.name)
1811
          continue
1812
        if not isinstance(size, (int, long)):
1813
          self.LogWarning("Disk %d of instance %s did not return valid"
1814
                          " size information, ignoring", idx, instance.name)
1815
          continue
1816
        size = size >> 20
1817
        if size != disk.size:
1818
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1819
                       " correcting: recorded %d, actual %d", idx,
1820
                       instance.name, disk.size, size)
1821
          disk.size = size
1822
          self.cfg.Update(instance, feedback_fn)
1823
          changed.append((instance.name, idx, size))
1824
        if self._EnsureChildSizes(disk):
1825
          self.cfg.Update(instance, feedback_fn)
1826
          changed.append((instance.name, idx, disk.size))
1827
    return changed
1828

    
1829

    
1830
class LURenameCluster(LogicalUnit):
1831
  """Rename the cluster.
1832

1833
  """
1834
  HPATH = "cluster-rename"
1835
  HTYPE = constants.HTYPE_CLUSTER
1836
  _OP_REQP = ["name"]
1837

    
1838
  def BuildHooksEnv(self):
1839
    """Build hooks env.
1840

1841
    """
1842
    env = {
1843
      "OP_TARGET": self.cfg.GetClusterName(),
1844
      "NEW_NAME": self.op.name,
1845
      }
1846
    mn = self.cfg.GetMasterNode()
1847
    all_nodes = self.cfg.GetNodeList()
1848
    return env, [mn], all_nodes
1849

    
1850
  def CheckPrereq(self):
1851
    """Verify that the passed name is a valid one.
1852

1853
    """
1854
    hostname = utils.GetHostInfo(self.op.name)
1855

    
1856
    new_name = hostname.name
1857
    self.ip = new_ip = hostname.ip
1858
    old_name = self.cfg.GetClusterName()
1859
    old_ip = self.cfg.GetMasterIP()
1860
    if new_name == old_name and new_ip == old_ip:
1861
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1862
                                 " cluster has changed",
1863
                                 errors.ECODE_INVAL)
1864
    if new_ip != old_ip:
1865
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1866
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1867
                                   " reachable on the network. Aborting." %
1868
                                   new_ip, errors.ECODE_NOTUNIQUE)
1869

    
1870
    self.op.name = new_name
1871

    
1872
  def Exec(self, feedback_fn):
1873
    """Rename the cluster.
1874

1875
    """
1876
    clustername = self.op.name
1877
    ip = self.ip
1878

    
1879
    # shutdown the master IP
1880
    master = self.cfg.GetMasterNode()
1881
    result = self.rpc.call_node_stop_master(master, False)
1882
    result.Raise("Could not disable the master role")
1883

    
1884
    try:
1885
      cluster = self.cfg.GetClusterInfo()
1886
      cluster.cluster_name = clustername
1887
      cluster.master_ip = ip
1888
      self.cfg.Update(cluster, feedback_fn)
1889

    
1890
      # update the known hosts file
1891
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1892
      node_list = self.cfg.GetNodeList()
1893
      try:
1894
        node_list.remove(master)
1895
      except ValueError:
1896
        pass
1897
      result = self.rpc.call_upload_file(node_list,
1898
                                         constants.SSH_KNOWN_HOSTS_FILE)
1899
      for to_node, to_result in result.iteritems():
1900
        msg = to_result.fail_msg
1901
        if msg:
1902
          msg = ("Copy of file %s to node %s failed: %s" %
1903
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1904
          self.proc.LogWarning(msg)
1905

    
1906
    finally:
1907
      result = self.rpc.call_node_start_master(master, False, False)
1908
      msg = result.fail_msg
1909
      if msg:
1910
        self.LogWarning("Could not re-enable the master role on"
1911
                        " the master, please restart manually: %s", msg)
1912

    
1913

    
1914
def _RecursiveCheckIfLVMBased(disk):
1915
  """Check if the given disk or its children are lvm-based.
1916

1917
  @type disk: L{objects.Disk}
1918
  @param disk: the disk to check
1919
  @rtype: boolean
1920
  @return: boolean indicating whether a LD_LV dev_type was found or not
1921

1922
  """
1923
  if disk.children:
1924
    for chdisk in disk.children:
1925
      if _RecursiveCheckIfLVMBased(chdisk):
1926
        return True
1927
  return disk.dev_type == constants.LD_LV
1928

    
1929

    
1930
class LUSetClusterParams(LogicalUnit):
1931
  """Change the parameters of the cluster.
1932

1933
  """
1934
  HPATH = "cluster-modify"
1935
  HTYPE = constants.HTYPE_CLUSTER
1936
  _OP_REQP = []
1937
  REQ_BGL = False
1938

    
1939
  def CheckArguments(self):
1940
    """Check parameters
1941

1942
    """
1943
    if not hasattr(self.op, "candidate_pool_size"):
1944
      self.op.candidate_pool_size = None
1945
    if self.op.candidate_pool_size is not None:
1946
      try:
1947
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1948
      except (ValueError, TypeError), err:
1949
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1950
                                   str(err), errors.ECODE_INVAL)
1951
      if self.op.candidate_pool_size < 1:
1952
        raise errors.OpPrereqError("At least one master candidate needed",
1953
                                   errors.ECODE_INVAL)
1954

    
1955
  def ExpandNames(self):
1956
    # FIXME: in the future maybe other cluster params won't require checking on
1957
    # all nodes to be modified.
1958
    self.needed_locks = {
1959
      locking.LEVEL_NODE: locking.ALL_SET,
1960
    }
1961
    self.share_locks[locking.LEVEL_NODE] = 1
1962

    
1963
  def BuildHooksEnv(self):
1964
    """Build hooks env.
1965

1966
    """
1967
    env = {
1968
      "OP_TARGET": self.cfg.GetClusterName(),
1969
      "NEW_VG_NAME": self.op.vg_name,
1970
      }
1971
    mn = self.cfg.GetMasterNode()
1972
    return env, [mn], [mn]
1973

    
1974
  def CheckPrereq(self):
1975
    """Check prerequisites.
1976

1977
    This checks whether the given params don't conflict and
1978
    if the given volume group is valid.
1979

1980
    """
1981
    if self.op.vg_name is not None and not self.op.vg_name:
1982
      instances = self.cfg.GetAllInstancesInfo().values()
1983
      for inst in instances:
1984
        for disk in inst.disks:
1985
          if _RecursiveCheckIfLVMBased(disk):
1986
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1987
                                       " lvm-based instances exist",
1988
                                       errors.ECODE_INVAL)
1989

    
1990
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1991

    
1992
    # if vg_name not None, checks given volume group on all nodes
1993
    if self.op.vg_name:
1994
      vglist = self.rpc.call_vg_list(node_list)
1995
      for node in node_list:
1996
        msg = vglist[node].fail_msg
1997
        if msg:
1998
          # ignoring down node
1999
          self.LogWarning("Error while gathering data on node %s"
2000
                          " (ignoring node): %s", node, msg)
2001
          continue
2002
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2003
                                              self.op.vg_name,
2004
                                              constants.MIN_VG_SIZE)
2005
        if vgstatus:
2006
          raise errors.OpPrereqError("Error on node '%s': %s" %
2007
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2008

    
2009
    self.cluster = cluster = self.cfg.GetClusterInfo()
2010
    # validate params changes
2011
    if self.op.beparams:
2012
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2013
      self.new_beparams = objects.FillDict(
2014
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2015

    
2016
    if self.op.nicparams:
2017
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2018
      self.new_nicparams = objects.FillDict(
2019
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2020
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2021
      nic_errors = []
2022

    
2023
      # check all instances for consistency
2024
      for instance in self.cfg.GetAllInstancesInfo().values():
2025
        for nic_idx, nic in enumerate(instance.nics):
2026
          params_copy = copy.deepcopy(nic.nicparams)
2027
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2028

    
2029
          # check parameter syntax
2030
          try:
2031
            objects.NIC.CheckParameterSyntax(params_filled)
2032
          except errors.ConfigurationError, err:
2033
            nic_errors.append("Instance %s, nic/%d: %s" %
2034
                              (instance.name, nic_idx, err))
2035

    
2036
          # if we're moving instances to routed, check that they have an ip
2037
          target_mode = params_filled[constants.NIC_MODE]
2038
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2039
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2040
                              (instance.name, nic_idx))
2041
      if nic_errors:
2042
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2043
                                   "\n".join(nic_errors))
2044

    
2045
    # hypervisor list/parameters
2046
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2047
    if self.op.hvparams:
2048
      if not isinstance(self.op.hvparams, dict):
2049
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2050
                                   errors.ECODE_INVAL)
2051
      for hv_name, hv_dict in self.op.hvparams.items():
2052
        if hv_name not in self.new_hvparams:
2053
          self.new_hvparams[hv_name] = hv_dict
2054
        else:
2055
          self.new_hvparams[hv_name].update(hv_dict)
2056

    
2057
    if self.op.enabled_hypervisors is not None:
2058
      self.hv_list = self.op.enabled_hypervisors
2059
      if not self.hv_list:
2060
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2061
                                   " least one member",
2062
                                   errors.ECODE_INVAL)
2063
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2064
      if invalid_hvs:
2065
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2066
                                   " entries: %s" %
2067
                                   utils.CommaJoin(invalid_hvs),
2068
                                   errors.ECODE_INVAL)
2069
    else:
2070
      self.hv_list = cluster.enabled_hypervisors
2071

    
2072
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2073
      # either the enabled list has changed, or the parameters have, validate
2074
      for hv_name, hv_params in self.new_hvparams.items():
2075
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2076
            (self.op.enabled_hypervisors and
2077
             hv_name in self.op.enabled_hypervisors)):
2078
          # either this is a new hypervisor, or its parameters have changed
2079
          hv_class = hypervisor.GetHypervisor(hv_name)
2080
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2081
          hv_class.CheckParameterSyntax(hv_params)
2082
          _CheckHVParams(self, node_list, hv_name, hv_params)
2083

    
2084
  def Exec(self, feedback_fn):
2085
    """Change the parameters of the cluster.
2086

2087
    """
2088
    if self.op.vg_name is not None:
2089
      new_volume = self.op.vg_name
2090
      if not new_volume:
2091
        new_volume = None
2092
      if new_volume != self.cfg.GetVGName():
2093
        self.cfg.SetVGName(new_volume)
2094
      else:
2095
        feedback_fn("Cluster LVM configuration already in desired"
2096
                    " state, not changing")
2097
    if self.op.hvparams:
2098
      self.cluster.hvparams = self.new_hvparams
2099
    if self.op.enabled_hypervisors is not None:
2100
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2101
    if self.op.beparams:
2102
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2103
    if self.op.nicparams:
2104
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2105

    
2106
    if self.op.candidate_pool_size is not None:
2107
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2108
      # we need to update the pool size here, otherwise the save will fail
2109
      _AdjustCandidatePool(self, [])
2110

    
2111
    self.cfg.Update(self.cluster, feedback_fn)
2112

    
2113

    
2114
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2115
  """Distribute additional files which are part of the cluster configuration.
2116

2117
  ConfigWriter takes care of distributing the config and ssconf files, but
2118
  there are more files which should be distributed to all nodes. This function
2119
  makes sure those are copied.
2120

2121
  @param lu: calling logical unit
2122
  @param additional_nodes: list of nodes not in the config to distribute to
2123

2124
  """
2125
  # 1. Gather target nodes
2126
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2127
  dist_nodes = lu.cfg.GetNodeList()
2128
  if additional_nodes is not None:
2129
    dist_nodes.extend(additional_nodes)
2130
  if myself.name in dist_nodes:
2131
    dist_nodes.remove(myself.name)
2132

    
2133
  # 2. Gather files to distribute
2134
  dist_files = set([constants.ETC_HOSTS,
2135
                    constants.SSH_KNOWN_HOSTS_FILE,
2136
                    constants.RAPI_CERT_FILE,
2137
                    constants.RAPI_USERS_FILE,
2138
                    constants.HMAC_CLUSTER_KEY,
2139
                   ])
2140

    
2141
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2142
  for hv_name in enabled_hypervisors:
2143
    hv_class = hypervisor.GetHypervisor(hv_name)
2144
    dist_files.update(hv_class.GetAncillaryFiles())
2145

    
2146
  # 3. Perform the files upload
2147
  for fname in dist_files:
2148
    if os.path.exists(fname):
2149
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2150
      for to_node, to_result in result.items():
2151
        msg = to_result.fail_msg
2152
        if msg:
2153
          msg = ("Copy of file %s to node %s failed: %s" %
2154
                 (fname, to_node, msg))
2155
          lu.proc.LogWarning(msg)
2156

    
2157

    
2158
class LURedistributeConfig(NoHooksLU):
2159
  """Force the redistribution of cluster configuration.
2160

2161
  This is a very simple LU.
2162

2163
  """
2164
  _OP_REQP = []
2165
  REQ_BGL = False
2166

    
2167
  def ExpandNames(self):
2168
    self.needed_locks = {
2169
      locking.LEVEL_NODE: locking.ALL_SET,
2170
    }
2171
    self.share_locks[locking.LEVEL_NODE] = 1
2172

    
2173
  def CheckPrereq(self):
2174
    """Check prerequisites.
2175

2176
    """
2177

    
2178
  def Exec(self, feedback_fn):
2179
    """Redistribute the configuration.
2180

2181
    """
2182
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2183
    _RedistributeAncillaryFiles(self)
2184

    
2185

    
2186
def _WaitForSync(lu, instance, oneshot=False):
2187
  """Sleep and poll for an instance's disk to sync.
2188

2189
  """
2190
  if not instance.disks:
2191
    return True
2192

    
2193
  if not oneshot:
2194
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2195

    
2196
  node = instance.primary_node
2197

    
2198
  for dev in instance.disks:
2199
    lu.cfg.SetDiskID(dev, node)
2200

    
2201
  # TODO: Convert to utils.Retry
2202

    
2203
  retries = 0
2204
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2205
  while True:
2206
    max_time = 0
2207
    done = True
2208
    cumul_degraded = False
2209
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2210
    msg = rstats.fail_msg
2211
    if msg:
2212
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2213
      retries += 1
2214
      if retries >= 10:
2215
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2216
                                 " aborting." % node)
2217
      time.sleep(6)
2218
      continue
2219
    rstats = rstats.payload
2220
    retries = 0
2221
    for i, mstat in enumerate(rstats):
2222
      if mstat is None:
2223
        lu.LogWarning("Can't compute data for node %s/%s",
2224
                           node, instance.disks[i].iv_name)
2225
        continue
2226

    
2227
      cumul_degraded = (cumul_degraded or
2228
                        (mstat.is_degraded and mstat.sync_percent is None))
2229
      if mstat.sync_percent is not None:
2230
        done = False
2231
        if mstat.estimated_time is not None:
2232
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2233
          max_time = mstat.estimated_time
2234
        else:
2235
          rem_time = "no time estimate"
2236
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2237
                        (instance.disks[i].iv_name, mstat.sync_percent,
2238
                         rem_time))
2239

    
2240
    # if we're done but degraded, let's do a few small retries, to
2241
    # make sure we see a stable and not transient situation; therefore
2242
    # we force restart of the loop
2243
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2244
      logging.info("Degraded disks found, %d retries left", degr_retries)
2245
      degr_retries -= 1
2246
      time.sleep(1)
2247
      continue
2248

    
2249
    if done or oneshot:
2250
      break
2251

    
2252
    time.sleep(min(60, max_time))
2253

    
2254
  if done:
2255
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2256
  return not cumul_degraded
2257

    
2258

    
2259
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2260
  """Check that mirrors are not degraded.
2261

2262
  The ldisk parameter, if True, will change the test from the
2263
  is_degraded attribute (which represents overall non-ok status for
2264
  the device(s)) to the ldisk (representing the local storage status).
2265

2266
  """
2267
  lu.cfg.SetDiskID(dev, node)
2268

    
2269
  result = True
2270

    
2271
  if on_primary or dev.AssembleOnSecondary():
2272
    rstats = lu.rpc.call_blockdev_find(node, dev)
2273
    msg = rstats.fail_msg
2274
    if msg:
2275
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2276
      result = False
2277
    elif not rstats.payload:
2278
      lu.LogWarning("Can't find disk on node %s", node)
2279
      result = False
2280
    else:
2281
      if ldisk:
2282
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2283
      else:
2284
        result = result and not rstats.payload.is_degraded
2285

    
2286
  if dev.children:
2287
    for child in dev.children:
2288
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2289

    
2290
  return result
2291

    
2292

    
2293
class LUDiagnoseOS(NoHooksLU):
2294
  """Logical unit for OS diagnose/query.
2295

2296
  """
2297
  _OP_REQP = ["output_fields", "names"]
2298
  REQ_BGL = False
2299
  _FIELDS_STATIC = utils.FieldSet()
2300
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2301
  # Fields that need calculation of global os validity
2302
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2303

    
2304
  def ExpandNames(self):
2305
    if self.op.names:
2306
      raise errors.OpPrereqError("Selective OS query not supported",
2307
                                 errors.ECODE_INVAL)
2308

    
2309
    _CheckOutputFields(static=self._FIELDS_STATIC,
2310
                       dynamic=self._FIELDS_DYNAMIC,
2311
                       selected=self.op.output_fields)
2312

    
2313
    # Lock all nodes, in shared mode
2314
    # Temporary removal of locks, should be reverted later
2315
    # TODO: reintroduce locks when they are lighter-weight
2316
    self.needed_locks = {}
2317
    #self.share_locks[locking.LEVEL_NODE] = 1
2318
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2319

    
2320
  def CheckPrereq(self):
2321
    """Check prerequisites.
2322

2323
    """
2324

    
2325
  @staticmethod
2326
  def _DiagnoseByOS(rlist):
2327
    """Remaps a per-node return list into an a per-os per-node dictionary
2328

2329
    @param rlist: a map with node names as keys and OS objects as values
2330

2331
    @rtype: dict
2332
    @return: a dictionary with osnames as keys and as value another map, with
2333
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2334

2335
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2336
                                     (/srv/..., False, "invalid api")],
2337
                           "node2": [(/srv/..., True, "")]}
2338
          }
2339

2340
    """
2341
    all_os = {}
2342
    # we build here the list of nodes that didn't fail the RPC (at RPC
2343
    # level), so that nodes with a non-responding node daemon don't
2344
    # make all OSes invalid
2345
    good_nodes = [node_name for node_name in rlist
2346
                  if not rlist[node_name].fail_msg]
2347
    for node_name, nr in rlist.items():
2348
      if nr.fail_msg or not nr.payload:
2349
        continue
2350
      for name, path, status, diagnose, variants in nr.payload:
2351
        if name not in all_os:
2352
          # build a list of nodes for this os containing empty lists
2353
          # for each node in node_list
2354
          all_os[name] = {}
2355
          for nname in good_nodes:
2356
            all_os[name][nname] = []
2357
        all_os[name][node_name].append((path, status, diagnose, variants))
2358
    return all_os
2359

    
2360
  def Exec(self, feedback_fn):
2361
    """Compute the list of OSes.
2362

2363
    """
2364
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2365
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2366
    pol = self._DiagnoseByOS(node_data)
2367
    output = []
2368
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2369
    calc_variants = "variants" in self.op.output_fields
2370

    
2371
    for os_name, os_data in pol.items():
2372
      row = []
2373
      if calc_valid:
2374
        valid = True
2375
        variants = None
2376
        for osl in os_data.values():
2377
          valid = valid and osl and osl[0][1]
2378
          if not valid:
2379
            variants = None
2380
            break
2381
          if calc_variants:
2382
            node_variants = osl[0][3]
2383
            if variants is None:
2384
              variants = node_variants
2385
            else:
2386
              variants = [v for v in variants if v in node_variants]
2387

    
2388
      for field in self.op.output_fields:
2389
        if field == "name":
2390
          val = os_name
2391
        elif field == "valid":
2392
          val = valid
2393
        elif field == "node_status":
2394
          # this is just a copy of the dict
2395
          val = {}
2396
          for node_name, nos_list in os_data.items():
2397
            val[node_name] = nos_list
2398
        elif field == "variants":
2399
          val =  variants
2400
        else:
2401
          raise errors.ParameterError(field)
2402
        row.append(val)
2403
      output.append(row)
2404

    
2405
    return output
2406

    
2407

    
2408
class LURemoveNode(LogicalUnit):
2409
  """Logical unit for removing a node.
2410

2411
  """
2412
  HPATH = "node-remove"
2413
  HTYPE = constants.HTYPE_NODE
2414
  _OP_REQP = ["node_name"]
2415

    
2416
  def BuildHooksEnv(self):
2417
    """Build hooks env.
2418

2419
    This doesn't run on the target node in the pre phase as a failed
2420
    node would then be impossible to remove.
2421

2422
    """
2423
    env = {
2424
      "OP_TARGET": self.op.node_name,
2425
      "NODE_NAME": self.op.node_name,
2426
      }
2427
    all_nodes = self.cfg.GetNodeList()
2428
    try:
2429
      all_nodes.remove(self.op.node_name)
2430
    except ValueError:
2431
      logging.warning("Node %s which is about to be removed not found"
2432
                      " in the all nodes list", self.op.node_name)
2433
    return env, all_nodes, all_nodes
2434

    
2435
  def CheckPrereq(self):
2436
    """Check prerequisites.
2437

2438
    This checks:
2439
     - the node exists in the configuration
2440
     - it does not have primary or secondary instances
2441
     - it's not the master
2442

2443
    Any errors are signaled by raising errors.OpPrereqError.
2444

2445
    """
2446
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2447
    if node is None:
2448
      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2449
                                 errors.ECODE_NOENT)
2450

    
2451
    instance_list = self.cfg.GetInstanceList()
2452

    
2453
    masternode = self.cfg.GetMasterNode()
2454
    if node.name == masternode:
2455
      raise errors.OpPrereqError("Node is the master node,"
2456
                                 " you need to failover first.",
2457
                                 errors.ECODE_INVAL)
2458

    
2459
    for instance_name in instance_list:
2460
      instance = self.cfg.GetInstanceInfo(instance_name)
2461
      if node.name in instance.all_nodes:
2462
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2463
                                   " please remove first." % instance_name,
2464
                                   errors.ECODE_INVAL)
2465
    self.op.node_name = node.name
2466
    self.node = node
2467

    
2468
  def Exec(self, feedback_fn):
2469
    """Removes the node from the cluster.
2470

2471
    """
2472
    node = self.node
2473
    logging.info("Stopping the node daemon and removing configs from node %s",
2474
                 node.name)
2475

    
2476
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2477

    
2478
    # Promote nodes to master candidate as needed
2479
    _AdjustCandidatePool(self, exceptions=[node.name])
2480
    self.context.RemoveNode(node.name)
2481

    
2482
    # Run post hooks on the node before it's removed
2483
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2484
    try:
2485
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2486
    except:
2487
      # pylint: disable-msg=W0702
2488
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2489

    
2490
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2491
    msg = result.fail_msg
2492
    if msg:
2493
      self.LogWarning("Errors encountered on the remote node while leaving"
2494
                      " the cluster: %s", msg)
2495

    
2496

    
2497
class LUQueryNodes(NoHooksLU):
2498
  """Logical unit for querying nodes.
2499

2500
  """
2501
  # pylint: disable-msg=W0142
2502
  _OP_REQP = ["output_fields", "names", "use_locking"]
2503
  REQ_BGL = False
2504

    
2505
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2506
                    "master_candidate", "offline", "drained"]
2507

    
2508
  _FIELDS_DYNAMIC = utils.FieldSet(
2509
    "dtotal", "dfree",
2510
    "mtotal", "mnode", "mfree",
2511
    "bootid",
2512
    "ctotal", "cnodes", "csockets",
2513
    )
2514

    
2515
  _FIELDS_STATIC = utils.FieldSet(*[
2516
    "pinst_cnt", "sinst_cnt",
2517
    "pinst_list", "sinst_list",
2518
    "pip", "sip", "tags",
2519
    "master",
2520
    "role"] + _SIMPLE_FIELDS
2521
    )
2522

    
2523
  def ExpandNames(self):
2524
    _CheckOutputFields(static=self._FIELDS_STATIC,
2525
                       dynamic=self._FIELDS_DYNAMIC,
2526
                       selected=self.op.output_fields)
2527

    
2528
    self.needed_locks = {}
2529
    self.share_locks[locking.LEVEL_NODE] = 1
2530

    
2531
    if self.op.names:
2532
      self.wanted = _GetWantedNodes(self, self.op.names)
2533
    else:
2534
      self.wanted = locking.ALL_SET
2535

    
2536
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2537
    self.do_locking = self.do_node_query and self.op.use_locking
2538
    if self.do_locking:
2539
      # if we don't request only static fields, we need to lock the nodes
2540
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2541

    
2542
  def CheckPrereq(self):
2543
    """Check prerequisites.
2544

2545
    """
2546
    # The validation of the node list is done in the _GetWantedNodes,
2547
    # if non empty, and if empty, there's no validation to do
2548
    pass
2549

    
2550
  def Exec(self, feedback_fn):
2551
    """Computes the list of nodes and their attributes.
2552

2553
    """
2554
    all_info = self.cfg.GetAllNodesInfo()
2555
    if self.do_locking:
2556
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2557
    elif self.wanted != locking.ALL_SET:
2558
      nodenames = self.wanted
2559
      missing = set(nodenames).difference(all_info.keys())
2560
      if missing:
2561
        raise errors.OpExecError(
2562
          "Some nodes were removed before retrieving their data: %s" % missing)
2563
    else:
2564
      nodenames = all_info.keys()
2565

    
2566
    nodenames = utils.NiceSort(nodenames)
2567
    nodelist = [all_info[name] for name in nodenames]
2568

    
2569
    # begin data gathering
2570

    
2571
    if self.do_node_query:
2572
      live_data = {}
2573
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2574
                                          self.cfg.GetHypervisorType())
2575
      for name in nodenames:
2576
        nodeinfo = node_data[name]
2577
        if not nodeinfo.fail_msg and nodeinfo.payload:
2578
          nodeinfo = nodeinfo.payload
2579
          fn = utils.TryConvert
2580
          live_data[name] = {
2581
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2582
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2583
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2584
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2585
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2586
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2587
            "bootid": nodeinfo.get('bootid', None),
2588
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2589
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2590
            }
2591
        else:
2592
          live_data[name] = {}
2593
    else:
2594
      live_data = dict.fromkeys(nodenames, {})
2595

    
2596
    node_to_primary = dict([(name, set()) for name in nodenames])
2597
    node_to_secondary = dict([(name, set()) for name in nodenames])
2598

    
2599
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2600
                             "sinst_cnt", "sinst_list"))
2601
    if inst_fields & frozenset(self.op.output_fields):
2602
      inst_data = self.cfg.GetAllInstancesInfo()
2603

    
2604
      for inst in inst_data.values():
2605
        if inst.primary_node in node_to_primary:
2606
          node_to_primary[inst.primary_node].add(inst.name)
2607
        for secnode in inst.secondary_nodes:
2608
          if secnode in node_to_secondary:
2609
            node_to_secondary[secnode].add(inst.name)
2610

    
2611
    master_node = self.cfg.GetMasterNode()
2612

    
2613
    # end data gathering
2614

    
2615
    output = []
2616
    for node in nodelist:
2617
      node_output = []
2618
      for field in self.op.output_fields:
2619
        if field in self._SIMPLE_FIELDS:
2620
          val = getattr(node, field)
2621
        elif field == "pinst_list":
2622
          val = list(node_to_primary[node.name])
2623
        elif field == "sinst_list":
2624
          val = list(node_to_secondary[node.name])
2625
        elif field == "pinst_cnt":
2626
          val = len(node_to_primary[node.name])
2627
        elif field == "sinst_cnt":
2628
          val = len(node_to_secondary[node.name])
2629
        elif field == "pip":
2630
          val = node.primary_ip
2631
        elif field == "sip":
2632
          val = node.secondary_ip
2633
        elif field == "tags":
2634
          val = list(node.GetTags())
2635
        elif field == "master":
2636
          val = node.name == master_node
2637
        elif self._FIELDS_DYNAMIC.Matches(field):
2638
          val = live_data[node.name].get(field, None)
2639
        elif field == "role":
2640
          if node.name == master_node:
2641
            val = "M"
2642
          elif node.master_candidate:
2643
            val = "C"
2644
          elif node.drained:
2645
            val = "D"
2646
          elif node.offline:
2647
            val = "O"
2648
          else:
2649
            val = "R"
2650
        else:
2651
          raise errors.ParameterError(field)
2652
        node_output.append(val)
2653
      output.append(node_output)
2654

    
2655
    return output
2656

    
2657

    
2658
class LUQueryNodeVolumes(NoHooksLU):
2659
  """Logical unit for getting volumes on node(s).
2660

2661
  """
2662
  _OP_REQP = ["nodes", "output_fields"]
2663
  REQ_BGL = False
2664
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2665
  _FIELDS_STATIC = utils.FieldSet("node")
2666

    
2667
  def ExpandNames(self):
2668
    _CheckOutputFields(static=self._FIELDS_STATIC,
2669
                       dynamic=self._FIELDS_DYNAMIC,
2670
                       selected=self.op.output_fields)
2671

    
2672
    self.needed_locks = {}
2673
    self.share_locks[locking.LEVEL_NODE] = 1
2674
    if not self.op.nodes:
2675
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2676
    else:
2677
      self.needed_locks[locking.LEVEL_NODE] = \
2678
        _GetWantedNodes(self, self.op.nodes)
2679

    
2680
  def CheckPrereq(self):
2681
    """Check prerequisites.
2682

2683
    This checks that the fields required are valid output fields.
2684

2685
    """
2686
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2687

    
2688
  def Exec(self, feedback_fn):
2689
    """Computes the list of nodes and their attributes.
2690

2691
    """
2692
    nodenames = self.nodes
2693
    volumes = self.rpc.call_node_volumes(nodenames)
2694

    
2695
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2696
             in self.cfg.GetInstanceList()]
2697

    
2698
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2699

    
2700
    output = []
2701
    for node in nodenames:
2702
      nresult = volumes[node]
2703
      if nresult.offline:
2704
        continue
2705
      msg = nresult.fail_msg
2706
      if msg:
2707
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2708
        continue
2709

    
2710
      node_vols = nresult.payload[:]
2711
      node_vols.sort(key=lambda vol: vol['dev'])
2712

    
2713
      for vol in node_vols:
2714
        node_output = []
2715
        for field in self.op.output_fields:
2716
          if field == "node":
2717
            val = node
2718
          elif field == "phys":
2719
            val = vol['dev']
2720
          elif field == "vg":
2721
            val = vol['vg']
2722
          elif field == "name":
2723
            val = vol['name']
2724
          elif field == "size":
2725
            val = int(float(vol['size']))
2726
          elif field == "instance":
2727
            for inst in ilist:
2728
              if node not in lv_by_node[inst]:
2729
                continue
2730
              if vol['name'] in lv_by_node[inst][node]:
2731
                val = inst.name
2732
                break
2733
            else:
2734
              val = '-'
2735
          else:
2736
            raise errors.ParameterError(field)
2737
          node_output.append(str(val))
2738

    
2739
        output.append(node_output)
2740

    
2741
    return output
2742

    
2743

    
2744
class LUQueryNodeStorage(NoHooksLU):
2745
  """Logical unit for getting information on storage units on node(s).
2746

2747
  """
2748
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2749
  REQ_BGL = False
2750
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2751

    
2752
  def ExpandNames(self):
2753
    storage_type = self.op.storage_type
2754

    
2755
    if storage_type not in constants.VALID_STORAGE_TYPES:
2756
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2757
                                 errors.ECODE_INVAL)
2758

    
2759
    _CheckOutputFields(static=self._FIELDS_STATIC,
2760
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2761
                       selected=self.op.output_fields)
2762

    
2763
    self.needed_locks = {}
2764
    self.share_locks[locking.LEVEL_NODE] = 1
2765

    
2766
    if self.op.nodes:
2767
      self.needed_locks[locking.LEVEL_NODE] = \
2768
        _GetWantedNodes(self, self.op.nodes)
2769
    else:
2770
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2771

    
2772
  def CheckPrereq(self):
2773
    """Check prerequisites.
2774

2775
    This checks that the fields required are valid output fields.
2776

2777
    """
2778
    self.op.name = getattr(self.op, "name", None)
2779

    
2780
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2781

    
2782
  def Exec(self, feedback_fn):
2783
    """Computes the list of nodes and their attributes.
2784

2785
    """
2786
    # Always get name to sort by
2787
    if constants.SF_NAME in self.op.output_fields:
2788
      fields = self.op.output_fields[:]
2789
    else:
2790
      fields = [constants.SF_NAME] + self.op.output_fields
2791

    
2792
    # Never ask for node or type as it's only known to the LU
2793
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2794
      while extra in fields:
2795
        fields.remove(extra)
2796

    
2797
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2798
    name_idx = field_idx[constants.SF_NAME]
2799

    
2800
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2801
    data = self.rpc.call_storage_list(self.nodes,
2802
                                      self.op.storage_type, st_args,
2803
                                      self.op.name, fields)
2804

    
2805
    result = []
2806

    
2807
    for node in utils.NiceSort(self.nodes):
2808
      nresult = data[node]
2809
      if nresult.offline:
2810
        continue
2811

    
2812
      msg = nresult.fail_msg
2813
      if msg:
2814
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2815
        continue
2816

    
2817
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2818

    
2819
      for name in utils.NiceSort(rows.keys()):
2820
        row = rows[name]
2821

    
2822
        out = []
2823

    
2824
        for field in self.op.output_fields:
2825
          if field == constants.SF_NODE:
2826
            val = node
2827
          elif field == constants.SF_TYPE:
2828
            val = self.op.storage_type
2829
          elif field in field_idx:
2830
            val = row[field_idx[field]]
2831
          else:
2832
            raise errors.ParameterError(field)
2833

    
2834
          out.append(val)
2835

    
2836
        result.append(out)
2837

    
2838
    return result
2839

    
2840

    
2841
class LUModifyNodeStorage(NoHooksLU):
2842
  """Logical unit for modifying a storage volume on a node.
2843

2844
  """
2845
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2846
  REQ_BGL = False
2847

    
2848
  def CheckArguments(self):
2849
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2850
    if node_name is None:
2851
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2852
                                 errors.ECODE_NOENT)
2853

    
2854
    self.op.node_name = node_name
2855

    
2856
    storage_type = self.op.storage_type
2857
    if storage_type not in constants.VALID_STORAGE_TYPES:
2858
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2859
                                 errors.ECODE_INVAL)
2860

    
2861
  def ExpandNames(self):
2862
    self.needed_locks = {
2863
      locking.LEVEL_NODE: self.op.node_name,
2864
      }
2865

    
2866
  def CheckPrereq(self):
2867
    """Check prerequisites.
2868

2869
    """
2870
    storage_type = self.op.storage_type
2871

    
2872
    try:
2873
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2874
    except KeyError:
2875
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2876
                                 " modified" % storage_type,
2877
                                 errors.ECODE_INVAL)
2878

    
2879
    diff = set(self.op.changes.keys()) - modifiable
2880
    if diff:
2881
      raise errors.OpPrereqError("The following fields can not be modified for"
2882
                                 " storage units of type '%s': %r" %
2883
                                 (storage_type, list(diff)),
2884
                                 errors.ECODE_INVAL)
2885

    
2886
  def Exec(self, feedback_fn):
2887
    """Computes the list of nodes and their attributes.
2888

2889
    """
2890
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2891
    result = self.rpc.call_storage_modify(self.op.node_name,
2892
                                          self.op.storage_type, st_args,
2893
                                          self.op.name, self.op.changes)
2894
    result.Raise("Failed to modify storage unit '%s' on %s" %
2895
                 (self.op.name, self.op.node_name))
2896

    
2897

    
2898
class LUAddNode(LogicalUnit):
2899
  """Logical unit for adding node to the cluster.
2900

2901
  """
2902
  HPATH = "node-add"
2903
  HTYPE = constants.HTYPE_NODE
2904
  _OP_REQP = ["node_name"]
2905

    
2906
  def BuildHooksEnv(self):
2907
    """Build hooks env.
2908

2909
    This will run on all nodes before, and on all nodes + the new node after.
2910

2911
    """
2912
    env = {
2913
      "OP_TARGET": self.op.node_name,
2914
      "NODE_NAME": self.op.node_name,
2915
      "NODE_PIP": self.op.primary_ip,
2916
      "NODE_SIP": self.op.secondary_ip,
2917
      }
2918
    nodes_0 = self.cfg.GetNodeList()
2919
    nodes_1 = nodes_0 + [self.op.node_name, ]
2920
    return env, nodes_0, nodes_1
2921

    
2922
  def CheckPrereq(self):
2923
    """Check prerequisites.
2924

2925
    This checks:
2926
     - the new node is not already in the config
2927
     - it is resolvable
2928
     - its parameters (single/dual homed) matches the cluster
2929

2930
    Any errors are signaled by raising errors.OpPrereqError.
2931

2932
    """
2933
    node_name = self.op.node_name
2934
    cfg = self.cfg
2935

    
2936
    dns_data = utils.GetHostInfo(node_name)
2937

    
2938
    node = dns_data.name
2939
    primary_ip = self.op.primary_ip = dns_data.ip
2940
    secondary_ip = getattr(self.op, "secondary_ip", None)
2941
    if secondary_ip is None:
2942
      secondary_ip = primary_ip
2943
    if not utils.IsValidIP(secondary_ip):
2944
      raise errors.OpPrereqError("Invalid secondary IP given",
2945
                                 errors.ECODE_INVAL)
2946
    self.op.secondary_ip = secondary_ip
2947

    
2948
    node_list = cfg.GetNodeList()
2949
    if not self.op.readd and node in node_list:
2950
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2951
                                 node, errors.ECODE_EXISTS)
2952
    elif self.op.readd and node not in node_list:
2953
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2954
                                 errors.ECODE_NOENT)
2955

    
2956
    for existing_node_name in node_list:
2957
      existing_node = cfg.GetNodeInfo(existing_node_name)
2958

    
2959
      if self.op.readd and node == existing_node_name:
2960
        if (existing_node.primary_ip != primary_ip or
2961
            existing_node.secondary_ip != secondary_ip):
2962
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2963
                                     " address configuration as before",
2964
                                     errors.ECODE_INVAL)
2965
        continue
2966

    
2967
      if (existing_node.primary_ip == primary_ip or
2968
          existing_node.secondary_ip == primary_ip or
2969
          existing_node.primary_ip == secondary_ip or
2970
          existing_node.secondary_ip == secondary_ip):
2971
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2972
                                   " existing node %s" % existing_node.name,
2973
                                   errors.ECODE_NOTUNIQUE)
2974

    
2975
    # check that the type of the node (single versus dual homed) is the
2976
    # same as for the master
2977
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2978
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2979
    newbie_singlehomed = secondary_ip == primary_ip
2980
    if master_singlehomed != newbie_singlehomed:
2981
      if master_singlehomed:
2982
        raise errors.OpPrereqError("The master has no private ip but the"
2983
                                   " new node has one",
2984
                                   errors.ECODE_INVAL)
2985
      else:
2986
        raise errors.OpPrereqError("The master has a private ip but the"
2987
                                   " new node doesn't have one",
2988
                                   errors.ECODE_INVAL)
2989

    
2990
    # checks reachability
2991
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2992
      raise errors.OpPrereqError("Node not reachable by ping",
2993
                                 errors.ECODE_ENVIRON)
2994

    
2995
    if not newbie_singlehomed:
2996
      # check reachability from my secondary ip to newbie's secondary ip
2997
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2998
                           source=myself.secondary_ip):
2999
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3000
                                   " based ping to noded port",
3001
                                   errors.ECODE_ENVIRON)
3002

    
3003
    if self.op.readd:
3004
      exceptions = [node]
3005
    else:
3006
      exceptions = []
3007

    
3008
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3009

    
3010
    if self.op.readd:
3011
      self.new_node = self.cfg.GetNodeInfo(node)
3012
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3013
    else:
3014
      self.new_node = objects.Node(name=node,
3015
                                   primary_ip=primary_ip,
3016
                                   secondary_ip=secondary_ip,
3017
                                   master_candidate=self.master_candidate,
3018
                                   offline=False, drained=False)
3019

    
3020
  def Exec(self, feedback_fn):
3021
    """Adds the new node to the cluster.
3022

3023
    """
3024
    new_node = self.new_node
3025
    node = new_node.name
3026

    
3027
    # for re-adds, reset the offline/drained/master-candidate flags;
3028
    # we need to reset here, otherwise offline would prevent RPC calls
3029
    # later in the procedure; this also means that if the re-add
3030
    # fails, we are left with a non-offlined, broken node
3031
    if self.op.readd:
3032
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3033
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3034
      # if we demote the node, we do cleanup later in the procedure
3035
      new_node.master_candidate = self.master_candidate
3036

    
3037
    # notify the user about any possible mc promotion
3038
    if new_node.master_candidate:
3039
      self.LogInfo("Node will be a master candidate")
3040

    
3041
    # check connectivity
3042
    result = self.rpc.call_version([node])[node]
3043
    result.Raise("Can't get version information from node %s" % node)
3044
    if constants.PROTOCOL_VERSION == result.payload:
3045
      logging.info("Communication to node %s fine, sw version %s match",
3046
                   node, result.payload)
3047
    else:
3048
      raise errors.OpExecError("Version mismatch master version %s,"
3049
                               " node version %s" %
3050
                               (constants.PROTOCOL_VERSION, result.payload))
3051

    
3052
    # setup ssh on node
3053
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3054
      logging.info("Copy ssh key to node %s", node)
3055
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3056
      keyarray = []
3057
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3058
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3059
                  priv_key, pub_key]
3060

    
3061
      for i in keyfiles:
3062
        keyarray.append(utils.ReadFile(i))
3063

    
3064
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3065
                                      keyarray[2], keyarray[3], keyarray[4],
3066
                                      keyarray[5])
3067
      result.Raise("Cannot transfer ssh keys to the new node")
3068

    
3069
    # Add node to our /etc/hosts, and add key to known_hosts
3070
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3071
      utils.AddHostToEtcHosts(new_node.name)
3072

    
3073
    if new_node.secondary_ip != new_node.primary_ip:
3074
      result = self.rpc.call_node_has_ip_address(new_node.name,
3075
                                                 new_node.secondary_ip)
3076
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3077
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3078
      if not result.payload:
3079
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3080
                                 " you gave (%s). Please fix and re-run this"
3081
                                 " command." % new_node.secondary_ip)
3082

    
3083
    node_verify_list = [self.cfg.GetMasterNode()]
3084
    node_verify_param = {
3085
      constants.NV_NODELIST: [node],
3086
      # TODO: do a node-net-test as well?
3087
    }
3088

    
3089
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3090
                                       self.cfg.GetClusterName())
3091
    for verifier in node_verify_list:
3092
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3093
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3094
      if nl_payload:
3095
        for failed in nl_payload:
3096
          feedback_fn("ssh/hostname verification failed"
3097
                      " (checking from %s): %s" %
3098
                      (verifier, nl_payload[failed]))
3099
        raise errors.OpExecError("ssh/hostname verification failed.")
3100

    
3101
    if self.op.readd:
3102
      _RedistributeAncillaryFiles(self)
3103
      self.context.ReaddNode(new_node)
3104
      # make sure we redistribute the config
3105
      self.cfg.Update(new_node, feedback_fn)
3106
      # and make sure the new node will not have old files around
3107
      if not new_node.master_candidate:
3108
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3109
        msg = result.fail_msg
3110
        if msg:
3111
          self.LogWarning("Node failed to demote itself from master"
3112
                          " candidate status: %s" % msg)
3113
    else:
3114
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3115
      self.context.AddNode(new_node, self.proc.GetECId())
3116

    
3117

    
3118
class LUSetNodeParams(LogicalUnit):
3119
  """Modifies the parameters of a node.
3120

3121
  """
3122
  HPATH = "node-modify"
3123
  HTYPE = constants.HTYPE_NODE
3124
  _OP_REQP = ["node_name"]
3125
  REQ_BGL = False
3126

    
3127
  def CheckArguments(self):
3128
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3129
    if node_name is None:
3130
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3131
                                 errors.ECODE_INVAL)
3132
    self.op.node_name = node_name
3133
    _CheckBooleanOpField(self.op, 'master_candidate')
3134
    _CheckBooleanOpField(self.op, 'offline')
3135
    _CheckBooleanOpField(self.op, 'drained')
3136
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3137
    if all_mods.count(None) == 3:
3138
      raise errors.OpPrereqError("Please pass at least one modification",
3139
                                 errors.ECODE_INVAL)
3140
    if all_mods.count(True) > 1:
3141
      raise errors.OpPrereqError("Can't set the node into more than one"
3142
                                 " state at the same time",
3143
                                 errors.ECODE_INVAL)
3144

    
3145
  def ExpandNames(self):
3146
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3147

    
3148
  def BuildHooksEnv(self):
3149
    """Build hooks env.
3150

3151
    This runs on the master node.
3152

3153
    """
3154
    env = {
3155
      "OP_TARGET": self.op.node_name,
3156
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3157
      "OFFLINE": str(self.op.offline),
3158
      "DRAINED": str(self.op.drained),
3159
      }
3160
    nl = [self.cfg.GetMasterNode(),
3161
          self.op.node_name]
3162
    return env, nl, nl
3163

    
3164
  def CheckPrereq(self):
3165
    """Check prerequisites.
3166

3167
    This only checks the instance list against the existing names.
3168

3169
    """
3170
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3171

    
3172
    if (self.op.master_candidate is not None or
3173
        self.op.drained is not None or
3174
        self.op.offline is not None):
3175
      # we can't change the master's node flags
3176
      if self.op.node_name == self.cfg.GetMasterNode():
3177
        raise errors.OpPrereqError("The master role can be changed"
3178
                                   " only via masterfailover",
3179
                                   errors.ECODE_INVAL)
3180

    
3181
    # Boolean value that tells us whether we're offlining or draining the node
3182
    offline_or_drain = self.op.offline == True or self.op.drained == True
3183
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3184

    
3185
    if (node.master_candidate and
3186
        (self.op.master_candidate == False or offline_or_drain)):
3187
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3188
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3189
      if mc_now <= cp_size:
3190
        msg = ("Not enough master candidates (desired"
3191
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3192
        # Only allow forcing the operation if it's an offline/drain operation,
3193
        # and we could not possibly promote more nodes.
3194
        # FIXME: this can still lead to issues if in any way another node which
3195
        # could be promoted appears in the meantime.
3196
        if self.op.force and offline_or_drain and mc_should == mc_max:
3197
          self.LogWarning(msg)
3198
        else:
3199
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3200

    
3201
    if (self.op.master_candidate == True and
3202
        ((node.offline and not self.op.offline == False) or
3203
         (node.drained and not self.op.drained == False))):
3204
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3205
                                 " to master_candidate" % node.name,
3206
                                 errors.ECODE_INVAL)
3207

    
3208
    # If we're being deofflined/drained, we'll MC ourself if needed
3209
    if (deoffline_or_drain and not offline_or_drain and not
3210
        self.op.master_candidate == True and not node.master_candidate):
3211
      self.op.master_candidate = _DecideSelfPromotion(self)
3212
      if self.op.master_candidate:
3213
        self.LogInfo("Autopromoting node to master candidate")
3214

    
3215
    return
3216

    
3217
  def Exec(self, feedback_fn):
3218
    """Modifies a node.
3219

3220
    """
3221
    node = self.node
3222

    
3223
    result = []
3224
    changed_mc = False
3225

    
3226
    if self.op.offline is not None:
3227
      node.offline = self.op.offline
3228
      result.append(("offline", str(self.op.offline)))
3229
      if self.op.offline == True:
3230
        if node.master_candidate:
3231
          node.master_candidate = False
3232
          changed_mc = True
3233
          result.append(("master_candidate", "auto-demotion due to offline"))
3234
        if node.drained:
3235
          node.drained = False
3236
          result.append(("drained", "clear drained status due to offline"))
3237

    
3238
    if self.op.master_candidate is not None:
3239
      node.master_candidate = self.op.master_candidate
3240
      changed_mc = True
3241
      result.append(("master_candidate", str(self.op.master_candidate)))
3242
      if self.op.master_candidate == False:
3243
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3244
        msg = rrc.fail_msg
3245
        if msg:
3246
          self.LogWarning("Node failed to demote itself: %s" % msg)
3247

    
3248
    if self.op.drained is not None:
3249
      node.drained = self.op.drained
3250
      result.append(("drained", str(self.op.drained)))
3251
      if self.op.drained == True:
3252
        if node.master_candidate:
3253
          node.master_candidate = False
3254
          changed_mc = True
3255
          result.append(("master_candidate", "auto-demotion due to drain"))
3256
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3257
          msg = rrc.fail_msg
3258
          if msg:
3259
            self.LogWarning("Node failed to demote itself: %s" % msg)
3260
        if node.offline:
3261
          node.offline = False
3262
          result.append(("offline", "clear offline status due to drain"))
3263

    
3264
    # this will trigger configuration file update, if needed
3265
    self.cfg.Update(node, feedback_fn)
3266
    # this will trigger job queue propagation or cleanup
3267
    if changed_mc:
3268
      self.context.ReaddNode(node)
3269

    
3270
    return result
3271

    
3272

    
3273
class LUPowercycleNode(NoHooksLU):
3274
  """Powercycles a node.
3275

3276
  """
3277
  _OP_REQP = ["node_name", "force"]
3278
  REQ_BGL = False
3279

    
3280
  def CheckArguments(self):
3281
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3282
    if node_name is None:
3283
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3284
                                 errors.ECODE_NOENT)
3285
    self.op.node_name = node_name
3286
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3287
      raise errors.OpPrereqError("The node is the master and the force"
3288
                                 " parameter was not set",
3289
                                 errors.ECODE_INVAL)
3290

    
3291
  def ExpandNames(self):
3292
    """Locking for PowercycleNode.
3293

3294
    This is a last-resort option and shouldn't block on other
3295
    jobs. Therefore, we grab no locks.
3296

3297
    """
3298
    self.needed_locks = {}
3299

    
3300
  def CheckPrereq(self):
3301
    """Check prerequisites.
3302

3303
    This LU has no prereqs.
3304

3305
    """
3306
    pass
3307

    
3308
  def Exec(self, feedback_fn):
3309
    """Reboots a node.
3310

3311
    """
3312
    result = self.rpc.call_node_powercycle(self.op.node_name,
3313
                                           self.cfg.GetHypervisorType())
3314
    result.Raise("Failed to schedule the reboot")
3315
    return result.payload
3316

    
3317

    
3318
class LUQueryClusterInfo(NoHooksLU):
3319
  """Query cluster configuration.
3320

3321
  """
3322
  _OP_REQP = []
3323
  REQ_BGL = False
3324

    
3325
  def ExpandNames(self):
3326
    self.needed_locks = {}
3327

    
3328
  def CheckPrereq(self):
3329
    """No prerequsites needed for this LU.
3330

3331
    """
3332
    pass
3333

    
3334
  def Exec(self, feedback_fn):
3335
    """Return cluster config.
3336

3337
    """
3338
    cluster = self.cfg.GetClusterInfo()
3339
    result = {
3340
      "software_version": constants.RELEASE_VERSION,
3341
      "protocol_version": constants.PROTOCOL_VERSION,
3342
      "config_version": constants.CONFIG_VERSION,
3343
      "os_api_version": max(constants.OS_API_VERSIONS),
3344
      "export_version": constants.EXPORT_VERSION,
3345
      "architecture": (platform.architecture()[0], platform.machine()),
3346
      "name": cluster.cluster_name,
3347
      "master": cluster.master_node,
3348
      "default_hypervisor": cluster.enabled_hypervisors[0],
3349
      "enabled_hypervisors": cluster.enabled_hypervisors,
3350
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3351
                        for hypervisor_name in cluster.enabled_hypervisors]),
3352
      "beparams": cluster.beparams,
3353
      "nicparams": cluster.nicparams,
3354
      "candidate_pool_size": cluster.candidate_pool_size,
3355
      "master_netdev": cluster.master_netdev,
3356
      "volume_group_name": cluster.volume_group_name,
3357
      "file_storage_dir": cluster.file_storage_dir,
3358
      "ctime": cluster.ctime,
3359
      "mtime": cluster.mtime,
3360
      "uuid": cluster.uuid,
3361
      "tags": list(cluster.GetTags()),
3362
      }
3363

    
3364
    return result
3365

    
3366

    
3367
class LUQueryConfigValues(NoHooksLU):
3368
  """Return configuration values.
3369

3370
  """
3371
  _OP_REQP = []
3372
  REQ_BGL = False
3373
  _FIELDS_DYNAMIC = utils.FieldSet()
3374
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3375
                                  "watcher_pause")
3376

    
3377
  def ExpandNames(self):
3378
    self.needed_locks = {}
3379

    
3380
    _CheckOutputFields(static=self._FIELDS_STATIC,
3381
                       dynamic=self._FIELDS_DYNAMIC,
3382
                       selected=self.op.output_fields)
3383

    
3384
  def CheckPrereq(self):
3385
    """No prerequisites.
3386

3387
    """
3388
    pass
3389

    
3390
  def Exec(self, feedback_fn):
3391
    """Dump a representation of the cluster config to the standard output.
3392

3393
    """
3394
    values = []
3395
    for field in self.op.output_fields:
3396
      if field == "cluster_name":
3397
        entry = self.cfg.GetClusterName()
3398
      elif field == "master_node":
3399
        entry = self.cfg.GetMasterNode()
3400
      elif field == "drain_flag":
3401
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3402
      elif field == "watcher_pause":
3403
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3404
      else:
3405
        raise errors.ParameterError(field)
3406
      values.append(entry)
3407
    return values
3408

    
3409

    
3410
class LUActivateInstanceDisks(NoHooksLU):
3411
  """Bring up an instance's disks.
3412

3413
  """
3414
  _OP_REQP = ["instance_name"]
3415
  REQ_BGL = False
3416

    
3417
  def ExpandNames(self):
3418
    self._ExpandAndLockInstance()
3419
    self.needed_locks[locking.LEVEL_NODE] = []
3420
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3421

    
3422
  def DeclareLocks(self, level):
3423
    if level == locking.LEVEL_NODE:
3424
      self._LockInstancesNodes()
3425

    
3426
  def CheckPrereq(self):
3427
    """Check prerequisites.
3428

3429
    This checks that the instance is in the cluster.
3430

3431
    """
3432
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3433
    assert self.instance is not None, \
3434
      "Cannot retrieve locked instance %s" % self.op.instance_name
3435
    _CheckNodeOnline(self, self.instance.primary_node)
3436
    if not hasattr(self.op, "ignore_size"):
3437
      self.op.ignore_size = False
3438

    
3439
  def Exec(self, feedback_fn):
3440
    """Activate the disks.
3441

3442
    """
3443
    disks_ok, disks_info = \
3444
              _AssembleInstanceDisks(self, self.instance,
3445
                                     ignore_size=self.op.ignore_size)
3446
    if not disks_ok:
3447
      raise errors.OpExecError("Cannot activate block devices")
3448

    
3449
    return disks_info
3450

    
3451

    
3452
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3453
                           ignore_size=False):
3454
  """Prepare the block devices for an instance.
3455

3456
  This sets up the block devices on all nodes.
3457

3458
  @type lu: L{LogicalUnit}
3459
  @param lu: the logical unit on whose behalf we execute
3460
  @type instance: L{objects.Instance}
3461
  @param instance: the instance for whose disks we assemble
3462
  @type ignore_secondaries: boolean
3463
  @param ignore_secondaries: if true, errors on secondary nodes
3464
      won't result in an error return from the function
3465
  @type ignore_size: boolean
3466
  @param ignore_size: if true, the current known size of the disk
3467
      will not be used during the disk activation, useful for cases
3468
      when the size is wrong
3469
  @return: False if the operation failed, otherwise a list of
3470
      (host, instance_visible_name, node_visible_name)
3471
      with the mapping from node devices to instance devices
3472

3473
  """
3474
  device_info = []
3475
  disks_ok = True
3476
  iname = instance.name
3477
  # With the two passes mechanism we try to reduce the window of
3478
  # opportunity for the race condition of switching DRBD to primary
3479
  # before handshaking occured, but we do not eliminate it
3480

    
3481
  # The proper fix would be to wait (with some limits) until the
3482
  # connection has been made and drbd transitions from WFConnection
3483
  # into any other network-connected state (Connected, SyncTarget,
3484
  # SyncSource, etc.)
3485

    
3486
  # 1st pass, assemble on all nodes in secondary mode
3487
  for inst_disk in instance.disks:
3488
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3489
      if ignore_size:
3490
        node_disk = node_disk.Copy()
3491
        node_disk.UnsetSize()
3492
      lu.cfg.SetDiskID(node_disk, node)
3493
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3494
      msg = result.fail_msg
3495
      if msg:
3496
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3497
                           " (is_primary=False, pass=1): %s",
3498
                           inst_disk.iv_name, node, msg)
3499
        if not ignore_secondaries:
3500
          disks_ok = False
3501

    
3502
  # FIXME: race condition on drbd migration to primary
3503

    
3504
  # 2nd pass, do only the primary node
3505
  for inst_disk in instance.disks:
3506
    dev_path = None
3507

    
3508
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3509
      if node != instance.primary_node:
3510
        continue
3511
      if ignore_size:
3512
        node_disk = node_disk.Copy()
3513
        node_disk.UnsetSize()
3514
      lu.cfg.SetDiskID(node_disk, node)
3515
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3516
      msg = result.fail_msg
3517
      if msg:
3518
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3519
                           " (is_primary=True, pass=2): %s",
3520
                           inst_disk.iv_name, node, msg)
3521
        disks_ok = False
3522
      else:
3523
        dev_path = result.payload
3524

    
3525
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3526

    
3527
  # leave the disks configured for the primary node
3528
  # this is a workaround that would be fixed better by
3529
  # improving the logical/physical id handling
3530
  for disk in instance.disks:
3531
    lu.cfg.SetDiskID(disk, instance.primary_node)
3532

    
3533
  return disks_ok, device_info
3534

    
3535

    
3536
def _StartInstanceDisks(lu, instance, force):
3537
  """Start the disks of an instance.
3538

3539
  """
3540
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3541
                                           ignore_secondaries=force)
3542
  if not disks_ok:
3543
    _ShutdownInstanceDisks(lu, instance)
3544
    if force is not None and not force:
3545
      lu.proc.LogWarning("", hint="If the message above refers to a"
3546
                         " secondary node,"
3547
                         " you can retry the operation using '--force'.")
3548
    raise errors.OpExecError("Disk consistency error")
3549

    
3550

    
3551
class LUDeactivateInstanceDisks(NoHooksLU):
3552
  """Shutdown an instance's disks.
3553

3554
  """
3555
  _OP_REQP = ["instance_name"]
3556
  REQ_BGL = False
3557

    
3558
  def ExpandNames(self):
3559
    self._ExpandAndLockInstance()
3560
    self.needed_locks[locking.LEVEL_NODE] = []
3561
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3562

    
3563
  def DeclareLocks(self, level):
3564
    if level == locking.LEVEL_NODE:
3565
      self._LockInstancesNodes()
3566

    
3567
  def CheckPrereq(self):
3568
    """Check prerequisites.
3569

3570
    This checks that the instance is in the cluster.
3571

3572
    """
3573
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574
    assert self.instance is not None, \
3575
      "Cannot retrieve locked instance %s" % self.op.instance_name
3576

    
3577
  def Exec(self, feedback_fn):
3578
    """Deactivate the disks
3579

3580
    """
3581
    instance = self.instance
3582
    _SafeShutdownInstanceDisks(self, instance)
3583

    
3584

    
3585
def _SafeShutdownInstanceDisks(lu, instance):
3586
  """Shutdown block devices of an instance.
3587

3588
  This function checks if an instance is running, before calling
3589
  _ShutdownInstanceDisks.
3590

3591
  """
3592
  pnode = instance.primary_node
3593
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3594
  ins_l.Raise("Can't contact node %s" % pnode)
3595

    
3596
  if instance.name in ins_l.payload:
3597
    raise errors.OpExecError("Instance is running, can't shutdown"
3598
                             " block devices.")
3599

    
3600
  _ShutdownInstanceDisks(lu, instance)
3601

    
3602

    
3603
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3604
  """Shutdown block devices of an instance.
3605

3606
  This does the shutdown on all nodes of the instance.
3607

3608
  If the ignore_primary is false, errors on the primary node are
3609
  ignored.
3610

3611
  """
3612
  all_result = True
3613
  for disk in instance.disks:
3614
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3615
      lu.cfg.SetDiskID(top_disk, node)
3616
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3617
      msg = result.fail_msg
3618
      if msg:
3619
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3620
                      disk.iv_name, node, msg)
3621
        if not ignore_primary or node != instance.primary_node:
3622
          all_result = False
3623
  return all_result
3624

    
3625

    
3626
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3627
  """Checks if a node has enough free memory.
3628

3629
  This function check if a given node has the needed amount of free
3630
  memory. In case the node has less memory or we cannot get the
3631
  information from the node, this function raise an OpPrereqError
3632
  exception.
3633

3634
  @type lu: C{LogicalUnit}
3635
  @param lu: a logical unit from which we get configuration data
3636
  @type node: C{str}
3637
  @param node: the node to check
3638
  @type reason: C{str}
3639
  @param reason: string to use in the error message
3640
  @type requested: C{int}
3641
  @param requested: the amount of memory in MiB to check for
3642
  @type hypervisor_name: C{str}
3643
  @param hypervisor_name: the hypervisor to ask for memory stats
3644
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3645
      we cannot check the node
3646

3647
  """
3648
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3649
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3650
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3651
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3652
  if not isinstance(free_mem, int):
3653
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3654
                               " was '%s'" % (node, free_mem),
3655
                               errors.ECODE_ENVIRON)
3656
  if requested > free_mem:
3657
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3658
                               " needed %s MiB, available %s MiB" %
3659
                               (node, reason, requested, free_mem),
3660
                               errors.ECODE_NORES)
3661

    
3662

    
3663
class LUStartupInstance(LogicalUnit):
3664
  """Starts an instance.
3665

3666
  """
3667
  HPATH = "instance-start"
3668
  HTYPE = constants.HTYPE_INSTANCE
3669
  _OP_REQP = ["instance_name", "force"]
3670
  REQ_BGL = False
3671

    
3672
  def ExpandNames(self):
3673
    self._ExpandAndLockInstance()
3674

    
3675
  def BuildHooksEnv(self):
3676
    """Build hooks env.
3677

3678
    This runs on master, primary and secondary nodes of the instance.
3679

3680
    """
3681
    env = {
3682
      "FORCE": self.op.force,
3683
      }
3684
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3685
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3686
    return env, nl, nl
3687

    
3688
  def CheckPrereq(self):
3689
    """Check prerequisites.
3690

3691
    This checks that the instance is in the cluster.
3692

3693
    """
3694
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3695
    assert self.instance is not None, \
3696
      "Cannot retrieve locked instance %s" % self.op.instance_name
3697

    
3698
    # extra beparams
3699
    self.beparams = getattr(self.op, "beparams", {})
3700
    if self.beparams:
3701
      if not isinstance(self.beparams, dict):
3702
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3703
                                   " dict" % (type(self.beparams), ),
3704
                                   errors.ECODE_INVAL)
3705
      # fill the beparams dict
3706
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3707
      self.op.beparams = self.beparams
3708

    
3709
    # extra hvparams
3710
    self.hvparams = getattr(self.op, "hvparams", {})
3711
    if self.hvparams:
3712
      if not isinstance(self.hvparams, dict):
3713
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3714
                                   " dict" % (type(self.hvparams), ),
3715
                                   errors.ECODE_INVAL)
3716

    
3717
      # check hypervisor parameter syntax (locally)
3718
      cluster = self.cfg.GetClusterInfo()
3719
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3720
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3721
                                    instance.hvparams)
3722
      filled_hvp.update(self.hvparams)
3723
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3724
      hv_type.CheckParameterSyntax(filled_hvp)
3725
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3726
      self.op.hvparams = self.hvparams
3727

    
3728
    _CheckNodeOnline(self, instance.primary_node)
3729

    
3730
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3731
    # check bridges existence
3732
    _CheckInstanceBridgesExist(self, instance)
3733

    
3734
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3735
                                              instance.name,
3736
                                              instance.hypervisor)
3737
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3738
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3739
    if not remote_info.payload: # not running already
3740
      _CheckNodeFreeMemory(self, instance.primary_node,
3741
                           "starting instance %s" % instance.name,
3742
                           bep[constants.BE_MEMORY], instance.hypervisor)
3743

    
3744
  def Exec(self, feedback_fn):
3745
    """Start the instance.
3746

3747
    """
3748
    instance = self.instance
3749
    force = self.op.force
3750

    
3751
    self.cfg.MarkInstanceUp(instance.name)
3752

    
3753
    node_current = instance.primary_node
3754

    
3755
    _StartInstanceDisks(self, instance, force)
3756

    
3757
    result = self.rpc.call_instance_start(node_current, instance,
3758
                                          self.hvparams, self.beparams)
3759
    msg = result.fail_msg
3760
    if msg:
3761
      _ShutdownInstanceDisks(self, instance)
3762
      raise errors.OpExecError("Could not start instance: %s" % msg)
3763

    
3764

    
3765
class LURebootInstance(LogicalUnit):
3766
  """Reboot an instance.
3767

3768
  """
3769
  HPATH = "instance-reboot"
3770
  HTYPE = constants.HTYPE_INSTANCE
3771
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3772
  REQ_BGL = False
3773

    
3774
  def CheckArguments(self):
3775
    """Check the arguments.
3776

3777
    """
3778
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3779
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3780

    
3781
  def ExpandNames(self):
3782
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3783
                                   constants.INSTANCE_REBOOT_HARD,
3784
                                   constants.INSTANCE_REBOOT_FULL]:
3785
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3786
                                  (constants.INSTANCE_REBOOT_SOFT,
3787
                                   constants.INSTANCE_REBOOT_HARD,
3788
                                   constants.INSTANCE_REBOOT_FULL))
3789
    self._ExpandAndLockInstance()
3790

    
3791
  def BuildHooksEnv(self):
3792
    """Build hooks env.
3793

3794
    This runs on master, primary and secondary nodes of the instance.
3795

3796
    """
3797
    env = {
3798
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3799
      "REBOOT_TYPE": self.op.reboot_type,
3800
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3801
      }
3802
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3803
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3804
    return env, nl, nl
3805

    
3806
  def CheckPrereq(self):
3807
    """Check prerequisites.
3808

3809
    This checks that the instance is in the cluster.
3810

3811
    """
3812
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3813
    assert self.instance is not None, \
3814
      "Cannot retrieve locked instance %s" % self.op.instance_name
3815

    
3816
    _CheckNodeOnline(self, instance.primary_node)
3817

    
3818
    # check bridges existence
3819
    _CheckInstanceBridgesExist(self, instance)
3820

    
3821
  def Exec(self, feedback_fn):
3822
    """Reboot the instance.
3823

3824
    """
3825
    instance = self.instance
3826
    ignore_secondaries = self.op.ignore_secondaries
3827
    reboot_type = self.op.reboot_type
3828

    
3829
    node_current = instance.primary_node
3830

    
3831
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3832
                       constants.INSTANCE_REBOOT_HARD]:
3833
      for disk in instance.disks:
3834
        self.cfg.SetDiskID(disk, node_current)
3835
      result = self.rpc.call_instance_reboot(node_current, instance,
3836
                                             reboot_type,
3837
                                             self.shutdown_timeout)
3838
      result.Raise("Could not reboot instance")
3839
    else:
3840
      result = self.rpc.call_instance_shutdown(node_current, instance,
3841
                                               self.shutdown_timeout)
3842
      result.Raise("Could not shutdown instance for full reboot")
3843
      _ShutdownInstanceDisks(self, instance)
3844
      _StartInstanceDisks(self, instance, ignore_secondaries)
3845
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3846
      msg = result.fail_msg
3847
      if msg:
3848
        _ShutdownInstanceDisks(self, instance)
3849
        raise errors.OpExecError("Could not start instance for"
3850
                                 " full reboot: %s" % msg)
3851

    
3852
    self.cfg.MarkInstanceUp(instance.name)
3853

    
3854

    
3855
class LUShutdownInstance(LogicalUnit):
3856
  """Shutdown an instance.
3857

3858
  """
3859
  HPATH = "instance-stop"
3860
  HTYPE = constants.HTYPE_INSTANCE
3861
  _OP_REQP = ["instance_name"]
3862
  REQ_BGL = False
3863

    
3864
  def CheckArguments(self):
3865
    """Check the arguments.
3866

3867
    """
3868
    self.timeout = getattr(self.op, "timeout",
3869
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3870

    
3871
  def ExpandNames(self):
3872
    self._ExpandAndLockInstance()
3873

    
3874
  def BuildHooksEnv(self):
3875
    """Build hooks env.
3876

3877
    This runs on master, primary and secondary nodes of the instance.
3878

3879
    """
3880
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3881
    env["TIMEOUT"] = self.timeout
3882
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3883
    return env, nl, nl
3884

    
3885
  def CheckPrereq(self):
3886
    """Check prerequisites.
3887

3888
    This checks that the instance is in the cluster.
3889

3890
    """
3891
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3892
    assert self.instance is not None, \
3893
      "Cannot retrieve locked instance %s" % self.op.instance_name
3894
    _CheckNodeOnline(self, self.instance.primary_node)
3895

    
3896
  def Exec(self, feedback_fn):
3897
    """Shutdown the instance.
3898

3899
    """
3900
    instance = self.instance
3901
    node_current = instance.primary_node
3902
    timeout = self.timeout
3903
    self.cfg.MarkInstanceDown(instance.name)
3904
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3905
    msg = result.fail_msg
3906
    if msg:
3907
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3908

    
3909
    _ShutdownInstanceDisks(self, instance)
3910

    
3911

    
3912
class LUReinstallInstance(LogicalUnit):
3913
  """Reinstall an instance.
3914

3915
  """
3916
  HPATH = "instance-reinstall"
3917
  HTYPE = constants.HTYPE_INSTANCE
3918
  _OP_REQP = ["instance_name"]
3919
  REQ_BGL = False
3920

    
3921
  def ExpandNames(self):
3922
    self._ExpandAndLockInstance()
3923

    
3924
  def BuildHooksEnv(self):
3925
    """Build hooks env.
3926

3927
    This runs on master, primary and secondary nodes of the instance.
3928

3929
    """
3930
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3931
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3932
    return env, nl, nl
3933

    
3934
  def CheckPrereq(self):
3935
    """Check prerequisites.
3936

3937
    This checks that the instance is in the cluster and is not running.
3938

3939
    """
3940
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3941
    assert instance is not None, \
3942
      "Cannot retrieve locked instance %s" % self.op.instance_name
3943
    _CheckNodeOnline(self, instance.primary_node)
3944

    
3945
    if instance.disk_template == constants.DT_DISKLESS:
3946
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3947
                                 self.op.instance_name,
3948
                                 errors.ECODE_INVAL)
3949
    if instance.admin_up:
3950
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3951
                                 self.op.instance_name,
3952
                                 errors.ECODE_STATE)
3953
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3954
                                              instance.name,
3955
                                              instance.hypervisor)
3956
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3957
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3958
    if remote_info.payload:
3959
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3960
                                 (self.op.instance_name,
3961
                                  instance.primary_node),
3962
                                 errors.ECODE_STATE)
3963

    
3964
    self.op.os_type = getattr(self.op, "os_type", None)
3965
    self.op.force_variant = getattr(self.op, "force_variant", False)
3966
    if self.op.os_type is not None:
3967
      # OS verification
3968
      pnode = self.cfg.GetNodeInfo(
3969
        self.cfg.ExpandNodeName(instance.primary_node))
3970
      if pnode is None:
3971
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3972
                                   self.op.pnode, errors.ECODE_NOENT)
3973
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3974
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3975
                   (self.op.os_type, pnode.name),
3976
                   prereq=True, ecode=errors.ECODE_INVAL)
3977
      if not self.op.force_variant:
3978
        _CheckOSVariant(result.payload, self.op.os_type)
3979

    
3980
    self.instance = instance
3981

    
3982
  def Exec(self, feedback_fn):
3983
    """Reinstall the instance.
3984

3985
    """
3986
    inst = self.instance
3987

    
3988
    if self.op.os_type is not None:
3989
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3990
      inst.os = self.op.os_type
3991
      self.cfg.Update(inst, feedback_fn)
3992

    
3993
    _StartInstanceDisks(self, inst, None)
3994
    try:
3995
      feedback_fn("Running the instance OS create scripts...")
3996
      # FIXME: pass debug option from opcode to backend
3997
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 0)
3998
      result.Raise("Could not install OS for instance %s on node %s" %
3999
                   (inst.name, inst.primary_node))
4000
    finally:
4001
      _ShutdownInstanceDisks(self, inst)
4002

    
4003

    
4004
class LURecreateInstanceDisks(LogicalUnit):
4005
  """Recreate an instance's missing disks.
4006

4007
  """
4008
  HPATH = "instance-recreate-disks"
4009
  HTYPE = constants.HTYPE_INSTANCE
4010
  _OP_REQP = ["instance_name", "disks"]
4011
  REQ_BGL = False
4012

    
4013
  def CheckArguments(self):
4014
    """Check the arguments.
4015

4016
    """
4017
    if not isinstance(self.op.disks, list):
4018
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4019
    for item in self.op.disks:
4020
      if (not isinstance(item, int) or
4021
          item < 0):
4022
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4023
                                   str(item), errors.ECODE_INVAL)
4024

    
4025
  def ExpandNames(self):
4026
    self._ExpandAndLockInstance()
4027

    
4028
  def BuildHooksEnv(self):
4029
    """Build hooks env.
4030