Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 44caf5a8

History | View | Annotate | Download (314.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq (except when tasklets are used)
54
    - implement Exec (except when tasklets are used)
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  @ivar dry_run_result: the value (if any) that will be returned to the caller
63
      in dry-run mode (signalled by opcode dry_run parameter)
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_BGL = True
70

    
71
  def __init__(self, processor, op, context, rpc):
72
    """Constructor for LogicalUnit.
73

74
    This needs to be overridden in derived classes in order to check op
75
    validity.
76

77
    """
78
    self.proc = processor
79
    self.op = op
80
    self.cfg = context.cfg
81
    self.context = context
82
    self.rpc = rpc
83
    # Dicts used to declare locking needs to mcpu
84
    self.needed_locks = None
85
    self.acquired_locks = {}
86
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
87
    self.add_locks = {}
88
    self.remove_locks = {}
89
    # Used to force good behavior when calling helper functions
90
    self.recalculate_locks = {}
91
    self.__ssh = None
92
    # logging
93
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
94
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
95
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
96
    # support for dry-run
97
    self.dry_run_result = None
98
    # support for generic debug attribute
99
    if (not hasattr(self.op, "debug_level") or
100
        not isinstance(self.op.debug_level, int)):
101
      self.op.debug_level = 0
102

    
103
    # Tasklets
104
    self.tasklets = None
105

    
106
    for attr_name in self._OP_REQP:
107
      attr_val = getattr(op, attr_name, None)
108
      if attr_val is None:
109
        raise errors.OpPrereqError("Required parameter '%s' missing" %
110
                                   attr_name, errors.ECODE_INVAL)
111

    
112
    self.CheckArguments()
113

    
114
  def __GetSSH(self):
115
    """Returns the SshRunner object
116

117
    """
118
    if not self.__ssh:
119
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
120
    return self.__ssh
121

    
122
  ssh = property(fget=__GetSSH)
123

    
124
  def CheckArguments(self):
125
    """Check syntactic validity for the opcode arguments.
126

127
    This method is for doing a simple syntactic check and ensure
128
    validity of opcode parameters, without any cluster-related
129
    checks. While the same can be accomplished in ExpandNames and/or
130
    CheckPrereq, doing these separate is better because:
131

132
      - ExpandNames is left as as purely a lock-related function
133
      - CheckPrereq is run after we have acquired locks (and possible
134
        waited for them)
135

136
    The function is allowed to change the self.op attribute so that
137
    later methods can no longer worry about missing parameters.
138

139
    """
140
    pass
141

    
142
  def ExpandNames(self):
143
    """Expand names for this LU.
144

145
    This method is called before starting to execute the opcode, and it should
146
    update all the parameters of the opcode to their canonical form (e.g. a
147
    short node name must be fully expanded after this method has successfully
148
    completed). This way locking, hooks, logging, ecc. can work correctly.
149

150
    LUs which implement this method must also populate the self.needed_locks
151
    member, as a dict with lock levels as keys, and a list of needed lock names
152
    as values. Rules:
153

154
      - use an empty dict if you don't need any lock
155
      - if you don't need any lock at a particular level omit that level
156
      - don't put anything for the BGL level
157
      - if you want all locks at a level use locking.ALL_SET as a value
158

159
    If you need to share locks (rather than acquire them exclusively) at one
160
    level you can modify self.share_locks, setting a true value (usually 1) for
161
    that level. By default locks are not shared.
162

163
    This function can also define a list of tasklets, which then will be
164
    executed in order instead of the usual LU-level CheckPrereq and Exec
165
    functions, if those are not defined by the LU.
166

167
    Examples::
168

169
      # Acquire all nodes and one instance
170
      self.needed_locks = {
171
        locking.LEVEL_NODE: locking.ALL_SET,
172
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
173
      }
174
      # Acquire just two nodes
175
      self.needed_locks = {
176
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
177
      }
178
      # Acquire no locks
179
      self.needed_locks = {} # No, you can't leave it to the default value None
180

181
    """
182
    # The implementation of this method is mandatory only if the new LU is
183
    # concurrent, so that old LUs don't need to be changed all at the same
184
    # time.
185
    if self.REQ_BGL:
186
      self.needed_locks = {} # Exclusive LUs don't need locks.
187
    else:
188
      raise NotImplementedError
189

    
190
  def DeclareLocks(self, level):
191
    """Declare LU locking needs for a level
192

193
    While most LUs can just declare their locking needs at ExpandNames time,
194
    sometimes there's the need to calculate some locks after having acquired
195
    the ones before. This function is called just before acquiring locks at a
196
    particular level, but after acquiring the ones at lower levels, and permits
197
    such calculations. It can be used to modify self.needed_locks, and by
198
    default it does nothing.
199

200
    This function is only called if you have something already set in
201
    self.needed_locks for the level.
202

203
    @param level: Locking level which is going to be locked
204
    @type level: member of ganeti.locking.LEVELS
205

206
    """
207

    
208
  def CheckPrereq(self):
209
    """Check prerequisites for this LU.
210

211
    This method should check that the prerequisites for the execution
212
    of this LU are fulfilled. It can do internode communication, but
213
    it should be idempotent - no cluster or system changes are
214
    allowed.
215

216
    The method should raise errors.OpPrereqError in case something is
217
    not fulfilled. Its return value is ignored.
218

219
    This method should also update all the parameters of the opcode to
220
    their canonical form if it hasn't been done by ExpandNames before.
221

222
    """
223
    if self.tasklets is not None:
224
      for (idx, tl) in enumerate(self.tasklets):
225
        logging.debug("Checking prerequisites for tasklet %s/%s",
226
                      idx + 1, len(self.tasklets))
227
        tl.CheckPrereq()
228
    else:
229
      raise NotImplementedError
230

    
231
  def Exec(self, feedback_fn):
232
    """Execute the LU.
233

234
    This method should implement the actual work. It should raise
235
    errors.OpExecError for failures that are somewhat dealt with in
236
    code, or expected.
237

238
    """
239
    if self.tasklets is not None:
240
      for (idx, tl) in enumerate(self.tasklets):
241
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
242
        tl.Exec(feedback_fn)
243
    else:
244
      raise NotImplementedError
245

    
246
  def BuildHooksEnv(self):
247
    """Build hooks environment for this LU.
248

249
    This method should return a three-node tuple consisting of: a dict
250
    containing the environment that will be used for running the
251
    specific hook for this LU, a list of node names on which the hook
252
    should run before the execution, and a list of node names on which
253
    the hook should run after the execution.
254

255
    The keys of the dict must not have 'GANETI_' prefixed as this will
256
    be handled in the hooks runner. Also note additional keys will be
257
    added by the hooks runner. If the LU doesn't define any
258
    environment, an empty dict (and not None) should be returned.
259

260
    No nodes should be returned as an empty list (and not None).
261

262
    Note that if the HPATH for a LU class is None, this function will
263
    not be called.
264

265
    """
266
    raise NotImplementedError
267

    
268
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
269
    """Notify the LU about the results of its hooks.
270

271
    This method is called every time a hooks phase is executed, and notifies
272
    the Logical Unit about the hooks' result. The LU can then use it to alter
273
    its result based on the hooks.  By default the method does nothing and the
274
    previous result is passed back unchanged but any LU can define it if it
275
    wants to use the local cluster hook-scripts somehow.
276

277
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
278
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
279
    @param hook_results: the results of the multi-node hooks rpc call
280
    @param feedback_fn: function used send feedback back to the caller
281
    @param lu_result: the previous Exec result this LU had, or None
282
        in the PRE phase
283
    @return: the new Exec result, based on the previous result
284
        and hook results
285

286
    """
287
    # API must be kept, thus we ignore the unused argument and could
288
    # be a function warnings
289
    # pylint: disable-msg=W0613,R0201
290
    return lu_result
291

    
292
  def _ExpandAndLockInstance(self):
293
    """Helper function to expand and lock an instance.
294

295
    Many LUs that work on an instance take its name in self.op.instance_name
296
    and need to expand it and then declare the expanded name for locking. This
297
    function does it, and then updates self.op.instance_name to the expanded
298
    name. It also initializes needed_locks as a dict, if this hasn't been done
299
    before.
300

301
    """
302
    if self.needed_locks is None:
303
      self.needed_locks = {}
304
    else:
305
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
306
        "_ExpandAndLockInstance called with instance-level locks set"
307
    self.op.instance_name = _ExpandInstanceName(self.cfg,
308
                                                self.op.instance_name)
309
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
310

    
311
  def _LockInstancesNodes(self, primary_only=False):
312
    """Helper function to declare instances' nodes for locking.
313

314
    This function should be called after locking one or more instances to lock
315
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
316
    with all primary or secondary nodes for instances already locked and
317
    present in self.needed_locks[locking.LEVEL_INSTANCE].
318

319
    It should be called from DeclareLocks, and for safety only works if
320
    self.recalculate_locks[locking.LEVEL_NODE] is set.
321

322
    In the future it may grow parameters to just lock some instance's nodes, or
323
    to just lock primaries or secondary nodes, if needed.
324

325
    If should be called in DeclareLocks in a way similar to::
326

327
      if level == locking.LEVEL_NODE:
328
        self._LockInstancesNodes()
329

330
    @type primary_only: boolean
331
    @param primary_only: only lock primary nodes of locked instances
332

333
    """
334
    assert locking.LEVEL_NODE in self.recalculate_locks, \
335
      "_LockInstancesNodes helper function called with no nodes to recalculate"
336

    
337
    # TODO: check if we're really been called with the instance locks held
338

    
339
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
340
    # future we might want to have different behaviors depending on the value
341
    # of self.recalculate_locks[locking.LEVEL_NODE]
342
    wanted_nodes = []
343
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
344
      instance = self.context.cfg.GetInstanceInfo(instance_name)
345
      wanted_nodes.append(instance.primary_node)
346
      if not primary_only:
347
        wanted_nodes.extend(instance.secondary_nodes)
348

    
349
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
350
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
351
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
352
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
353

    
354
    del self.recalculate_locks[locking.LEVEL_NODE]
355

    
356

    
357
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
358
  """Simple LU which runs no hooks.
359

360
  This LU is intended as a parent for other LogicalUnits which will
361
  run no hooks, in order to reduce duplicate code.
362

363
  """
364
  HPATH = None
365
  HTYPE = None
366

    
367
  def BuildHooksEnv(self):
368
    """Empty BuildHooksEnv for NoHooksLu.
369

370
    This just raises an error.
371

372
    """
373
    assert False, "BuildHooksEnv called for NoHooksLUs"
374

    
375

    
376
class Tasklet:
377
  """Tasklet base class.
378

379
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
380
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
381
  tasklets know nothing about locks.
382

383
  Subclasses must follow these rules:
384
    - Implement CheckPrereq
385
    - Implement Exec
386

387
  """
388
  def __init__(self, lu):
389
    self.lu = lu
390

    
391
    # Shortcuts
392
    self.cfg = lu.cfg
393
    self.rpc = lu.rpc
394

    
395
  def CheckPrereq(self):
396
    """Check prerequisites for this tasklets.
397

398
    This method should check whether the prerequisites for the execution of
399
    this tasklet are fulfilled. It can do internode communication, but it
400
    should be idempotent - no cluster or system changes are allowed.
401

402
    The method should raise errors.OpPrereqError in case something is not
403
    fulfilled. Its return value is ignored.
404

405
    This method should also update all parameters to their canonical form if it
406
    hasn't been done before.
407

408
    """
409
    raise NotImplementedError
410

    
411
  def Exec(self, feedback_fn):
412
    """Execute the tasklet.
413

414
    This method should implement the actual work. It should raise
415
    errors.OpExecError for failures that are somewhat dealt with in code, or
416
    expected.
417

418
    """
419
    raise NotImplementedError
420

    
421

    
422
def _GetWantedNodes(lu, nodes):
423
  """Returns list of checked and expanded node names.
424

425
  @type lu: L{LogicalUnit}
426
  @param lu: the logical unit on whose behalf we execute
427
  @type nodes: list
428
  @param nodes: list of node names or None for all nodes
429
  @rtype: list
430
  @return: the list of nodes, sorted
431
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
432

433
  """
434
  if not isinstance(nodes, list):
435
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
436
                               errors.ECODE_INVAL)
437

    
438
  if not nodes:
439
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
440
      " non-empty list of nodes whose name is to be expanded.")
441

    
442
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
443
  return utils.NiceSort(wanted)
444

    
445

    
446
def _GetWantedInstances(lu, instances):
447
  """Returns list of checked and expanded instance names.
448

449
  @type lu: L{LogicalUnit}
450
  @param lu: the logical unit on whose behalf we execute
451
  @type instances: list
452
  @param instances: list of instance names or None for all instances
453
  @rtype: list
454
  @return: the list of instances, sorted
455
  @raise errors.OpPrereqError: if the instances parameter is wrong type
456
  @raise errors.OpPrereqError: if any of the passed instances is not found
457

458
  """
459
  if not isinstance(instances, list):
460
    raise errors.OpPrereqError("Invalid argument type 'instances'",
461
                               errors.ECODE_INVAL)
462

    
463
  if instances:
464
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
465
  else:
466
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
467
  return wanted
468

    
469

    
470
def _CheckOutputFields(static, dynamic, selected):
471
  """Checks whether all selected fields are valid.
472

473
  @type static: L{utils.FieldSet}
474
  @param static: static fields set
475
  @type dynamic: L{utils.FieldSet}
476
  @param dynamic: dynamic fields set
477

478
  """
479
  f = utils.FieldSet()
480
  f.Extend(static)
481
  f.Extend(dynamic)
482

    
483
  delta = f.NonMatching(selected)
484
  if delta:
485
    raise errors.OpPrereqError("Unknown output fields selected: %s"
486
                               % ",".join(delta), errors.ECODE_INVAL)
487

    
488

    
489
def _CheckBooleanOpField(op, name):
490
  """Validates boolean opcode parameters.
491

492
  This will ensure that an opcode parameter is either a boolean value,
493
  or None (but that it always exists).
494

495
  """
496
  val = getattr(op, name, None)
497
  if not (val is None or isinstance(val, bool)):
498
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
499
                               (name, str(val)), errors.ECODE_INVAL)
500
  setattr(op, name, val)
501

    
502

    
503
def _CheckGlobalHvParams(params):
504
  """Validates that given hypervisor params are not global ones.
505

506
  This will ensure that instances don't get customised versions of
507
  global params.
508

509
  """
510
  used_globals = constants.HVC_GLOBALS.intersection(params)
511
  if used_globals:
512
    msg = ("The following hypervisor parameters are global and cannot"
513
           " be customized at instance level, please modify them at"
514
           " cluster level: %s" % utils.CommaJoin(used_globals))
515
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
516

    
517

    
518
def _CheckNodeOnline(lu, node):
519
  """Ensure that a given node is online.
520

521
  @param lu: the LU on behalf of which we make the check
522
  @param node: the node to check
523
  @raise errors.OpPrereqError: if the node is offline
524

525
  """
526
  if lu.cfg.GetNodeInfo(node).offline:
527
    raise errors.OpPrereqError("Can't use offline node %s" % node,
528
                               errors.ECODE_INVAL)
529

    
530

    
531
def _CheckNodeNotDrained(lu, node):
532
  """Ensure that a given node is not drained.
533

534
  @param lu: the LU on behalf of which we make the check
535
  @param node: the node to check
536
  @raise errors.OpPrereqError: if the node is drained
537

538
  """
539
  if lu.cfg.GetNodeInfo(node).drained:
540
    raise errors.OpPrereqError("Can't use drained node %s" % node,
541
                               errors.ECODE_INVAL)
542

    
543

    
544
def _ExpandItemName(fn, name, kind):
545
  """Expand an item name.
546

547
  @param fn: the function to use for expansion
548
  @param name: requested item name
549
  @param kind: text description ('Node' or 'Instance')
550
  @return: the resolved (full) name
551
  @raise errors.OpPrereqError: if the item is not found
552

553
  """
554
  full_name = fn(name)
555
  if full_name is None:
556
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
557
                               errors.ECODE_NOENT)
558
  return full_name
559

    
560

    
561
def _ExpandNodeName(cfg, name):
562
  """Wrapper over L{_ExpandItemName} for nodes."""
563
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
564

    
565

    
566
def _ExpandInstanceName(cfg, name):
567
  """Wrapper over L{_ExpandItemName} for instance."""
568
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
569

    
570

    
571
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
572
                          memory, vcpus, nics, disk_template, disks,
573
                          bep, hvp, hypervisor_name):
574
  """Builds instance related env variables for hooks
575

576
  This builds the hook environment from individual variables.
577

578
  @type name: string
579
  @param name: the name of the instance
580
  @type primary_node: string
581
  @param primary_node: the name of the instance's primary node
582
  @type secondary_nodes: list
583
  @param secondary_nodes: list of secondary nodes as strings
584
  @type os_type: string
585
  @param os_type: the name of the instance's OS
586
  @type status: boolean
587
  @param status: the should_run status of the instance
588
  @type memory: string
589
  @param memory: the memory size of the instance
590
  @type vcpus: string
591
  @param vcpus: the count of VCPUs the instance has
592
  @type nics: list
593
  @param nics: list of tuples (ip, mac, mode, link) representing
594
      the NICs the instance has
595
  @type disk_template: string
596
  @param disk_template: the disk template of the instance
597
  @type disks: list
598
  @param disks: the list of (size, mode) pairs
599
  @type bep: dict
600
  @param bep: the backend parameters for the instance
601
  @type hvp: dict
602
  @param hvp: the hypervisor parameters for the instance
603
  @type hypervisor_name: string
604
  @param hypervisor_name: the hypervisor for the instance
605
  @rtype: dict
606
  @return: the hook environment for this instance
607

608
  """
609
  if status:
610
    str_status = "up"
611
  else:
612
    str_status = "down"
613
  env = {
614
    "OP_TARGET": name,
615
    "INSTANCE_NAME": name,
616
    "INSTANCE_PRIMARY": primary_node,
617
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
618
    "INSTANCE_OS_TYPE": os_type,
619
    "INSTANCE_STATUS": str_status,
620
    "INSTANCE_MEMORY": memory,
621
    "INSTANCE_VCPUS": vcpus,
622
    "INSTANCE_DISK_TEMPLATE": disk_template,
623
    "INSTANCE_HYPERVISOR": hypervisor_name,
624
  }
625

    
626
  if nics:
627
    nic_count = len(nics)
628
    for idx, (ip, mac, mode, link) in enumerate(nics):
629
      if ip is None:
630
        ip = ""
631
      env["INSTANCE_NIC%d_IP" % idx] = ip
632
      env["INSTANCE_NIC%d_MAC" % idx] = mac
633
      env["INSTANCE_NIC%d_MODE" % idx] = mode
634
      env["INSTANCE_NIC%d_LINK" % idx] = link
635
      if mode == constants.NIC_MODE_BRIDGED:
636
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
637
  else:
638
    nic_count = 0
639

    
640
  env["INSTANCE_NIC_COUNT"] = nic_count
641

    
642
  if disks:
643
    disk_count = len(disks)
644
    for idx, (size, mode) in enumerate(disks):
645
      env["INSTANCE_DISK%d_SIZE" % idx] = size
646
      env["INSTANCE_DISK%d_MODE" % idx] = mode
647
  else:
648
    disk_count = 0
649

    
650
  env["INSTANCE_DISK_COUNT"] = disk_count
651

    
652
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
653
    for key, value in source.items():
654
      env["INSTANCE_%s_%s" % (kind, key)] = value
655

    
656
  return env
657

    
658

    
659
def _NICListToTuple(lu, nics):
660
  """Build a list of nic information tuples.
661

662
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
663
  value in LUQueryInstanceData.
664

665
  @type lu:  L{LogicalUnit}
666
  @param lu: the logical unit on whose behalf we execute
667
  @type nics: list of L{objects.NIC}
668
  @param nics: list of nics to convert to hooks tuples
669

670
  """
671
  hooks_nics = []
672
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
673
  for nic in nics:
674
    ip = nic.ip
675
    mac = nic.mac
676
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
677
    mode = filled_params[constants.NIC_MODE]
678
    link = filled_params[constants.NIC_LINK]
679
    hooks_nics.append((ip, mac, mode, link))
680
  return hooks_nics
681

    
682

    
683
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
684
  """Builds instance related env variables for hooks from an object.
685

686
  @type lu: L{LogicalUnit}
687
  @param lu: the logical unit on whose behalf we execute
688
  @type instance: L{objects.Instance}
689
  @param instance: the instance for which we should build the
690
      environment
691
  @type override: dict
692
  @param override: dictionary with key/values that will override
693
      our values
694
  @rtype: dict
695
  @return: the hook environment dictionary
696

697
  """
698
  cluster = lu.cfg.GetClusterInfo()
699
  bep = cluster.FillBE(instance)
700
  hvp = cluster.FillHV(instance)
701
  args = {
702
    'name': instance.name,
703
    'primary_node': instance.primary_node,
704
    'secondary_nodes': instance.secondary_nodes,
705
    'os_type': instance.os,
706
    'status': instance.admin_up,
707
    'memory': bep[constants.BE_MEMORY],
708
    'vcpus': bep[constants.BE_VCPUS],
709
    'nics': _NICListToTuple(lu, instance.nics),
710
    'disk_template': instance.disk_template,
711
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
712
    'bep': bep,
713
    'hvp': hvp,
714
    'hypervisor_name': instance.hypervisor,
715
  }
716
  if override:
717
    args.update(override)
718
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
719

    
720

    
721
def _AdjustCandidatePool(lu, exceptions):
722
  """Adjust the candidate pool after node operations.
723

724
  """
725
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
726
  if mod_list:
727
    lu.LogInfo("Promoted nodes to master candidate role: %s",
728
               utils.CommaJoin(node.name for node in mod_list))
729
    for name in mod_list:
730
      lu.context.ReaddNode(name)
731
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
732
  if mc_now > mc_max:
733
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
734
               (mc_now, mc_max))
735

    
736

    
737
def _DecideSelfPromotion(lu, exceptions=None):
738
  """Decide whether I should promote myself as a master candidate.
739

740
  """
741
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
742
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
743
  # the new node will increase mc_max with one, so:
744
  mc_should = min(mc_should + 1, cp_size)
745
  return mc_now < mc_should
746

    
747

    
748
def _CheckNicsBridgesExist(lu, target_nics, target_node,
749
                               profile=constants.PP_DEFAULT):
750
  """Check that the brigdes needed by a list of nics exist.
751

752
  """
753
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
754
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
755
                for nic in target_nics]
756
  brlist = [params[constants.NIC_LINK] for params in paramslist
757
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
758
  if brlist:
759
    result = lu.rpc.call_bridges_exist(target_node, brlist)
760
    result.Raise("Error checking bridges on destination node '%s'" %
761
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
762

    
763

    
764
def _CheckInstanceBridgesExist(lu, instance, node=None):
765
  """Check that the brigdes needed by an instance exist.
766

767
  """
768
  if node is None:
769
    node = instance.primary_node
770
  _CheckNicsBridgesExist(lu, instance.nics, node)
771

    
772

    
773
def _CheckOSVariant(os_obj, name):
774
  """Check whether an OS name conforms to the os variants specification.
775

776
  @type os_obj: L{objects.OS}
777
  @param os_obj: OS object to check
778
  @type name: string
779
  @param name: OS name passed by the user, to check for validity
780

781
  """
782
  if not os_obj.supported_variants:
783
    return
784
  try:
785
    variant = name.split("+", 1)[1]
786
  except IndexError:
787
    raise errors.OpPrereqError("OS name must include a variant",
788
                               errors.ECODE_INVAL)
789

    
790
  if variant not in os_obj.supported_variants:
791
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
792

    
793

    
794
def _GetNodeInstancesInner(cfg, fn):
795
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
796

    
797

    
798
def _GetNodeInstances(cfg, node_name):
799
  """Returns a list of all primary and secondary instances on a node.
800

801
  """
802

    
803
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
804

    
805

    
806
def _GetNodePrimaryInstances(cfg, node_name):
807
  """Returns primary instances on a node.
808

809
  """
810
  return _GetNodeInstancesInner(cfg,
811
                                lambda inst: node_name == inst.primary_node)
812

    
813

    
814
def _GetNodeSecondaryInstances(cfg, node_name):
815
  """Returns secondary instances on a node.
816

817
  """
818
  return _GetNodeInstancesInner(cfg,
819
                                lambda inst: node_name in inst.secondary_nodes)
820

    
821

    
822
def _GetStorageTypeArgs(cfg, storage_type):
823
  """Returns the arguments for a storage type.
824

825
  """
826
  # Special case for file storage
827
  if storage_type == constants.ST_FILE:
828
    # storage.FileStorage wants a list of storage directories
829
    return [[cfg.GetFileStorageDir()]]
830

    
831
  return []
832

    
833

    
834
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
835
  faulty = []
836

    
837
  for dev in instance.disks:
838
    cfg.SetDiskID(dev, node_name)
839

    
840
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
841
  result.Raise("Failed to get disk status from node %s" % node_name,
842
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
843

    
844
  for idx, bdev_status in enumerate(result.payload):
845
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
846
      faulty.append(idx)
847

    
848
  return faulty
849

    
850

    
851
class LUPostInitCluster(LogicalUnit):
852
  """Logical unit for running hooks after cluster initialization.
853

854
  """
855
  HPATH = "cluster-init"
856
  HTYPE = constants.HTYPE_CLUSTER
857
  _OP_REQP = []
858

    
859
  def BuildHooksEnv(self):
860
    """Build hooks env.
861

862
    """
863
    env = {"OP_TARGET": self.cfg.GetClusterName()}
864
    mn = self.cfg.GetMasterNode()
865
    return env, [], [mn]
866

    
867
  def CheckPrereq(self):
868
    """No prerequisites to check.
869

870
    """
871
    return True
872

    
873
  def Exec(self, feedback_fn):
874
    """Nothing to do.
875

876
    """
877
    return True
878

    
879

    
880
class LUDestroyCluster(LogicalUnit):
881
  """Logical unit for destroying the cluster.
882

883
  """
884
  HPATH = "cluster-destroy"
885
  HTYPE = constants.HTYPE_CLUSTER
886
  _OP_REQP = []
887

    
888
  def BuildHooksEnv(self):
889
    """Build hooks env.
890

891
    """
892
    env = {"OP_TARGET": self.cfg.GetClusterName()}
893
    return env, [], []
894

    
895
  def CheckPrereq(self):
896
    """Check prerequisites.
897

898
    This checks whether the cluster is empty.
899

900
    Any errors are signaled by raising errors.OpPrereqError.
901

902
    """
903
    master = self.cfg.GetMasterNode()
904

    
905
    nodelist = self.cfg.GetNodeList()
906
    if len(nodelist) != 1 or nodelist[0] != master:
907
      raise errors.OpPrereqError("There are still %d node(s) in"
908
                                 " this cluster." % (len(nodelist) - 1),
909
                                 errors.ECODE_INVAL)
910
    instancelist = self.cfg.GetInstanceList()
911
    if instancelist:
912
      raise errors.OpPrereqError("There are still %d instance(s) in"
913
                                 " this cluster." % len(instancelist),
914
                                 errors.ECODE_INVAL)
915

    
916
  def Exec(self, feedback_fn):
917
    """Destroys the cluster.
918

919
    """
920
    master = self.cfg.GetMasterNode()
921
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
922

    
923
    # Run post hooks on master node before it's removed
924
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
925
    try:
926
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
927
    except:
928
      # pylint: disable-msg=W0702
929
      self.LogWarning("Errors occurred running hooks on %s" % master)
930

    
931
    result = self.rpc.call_node_stop_master(master, False)
932
    result.Raise("Could not disable the master role")
933

    
934
    if modify_ssh_setup:
935
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
936
      utils.CreateBackup(priv_key)
937
      utils.CreateBackup(pub_key)
938

    
939
    return master
940

    
941

    
942
class LUVerifyCluster(LogicalUnit):
943
  """Verifies the cluster status.
944

945
  """
946
  HPATH = "cluster-verify"
947
  HTYPE = constants.HTYPE_CLUSTER
948
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
949
  REQ_BGL = False
950

    
951
  TCLUSTER = "cluster"
952
  TNODE = "node"
953
  TINSTANCE = "instance"
954

    
955
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
956
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
957
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
958
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
959
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
960
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
961
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
962
  ENODEDRBD = (TNODE, "ENODEDRBD")
963
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
964
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
965
  ENODEHV = (TNODE, "ENODEHV")
966
  ENODELVM = (TNODE, "ENODELVM")
967
  ENODEN1 = (TNODE, "ENODEN1")
968
  ENODENET = (TNODE, "ENODENET")
969
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
970
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
971
  ENODERPC = (TNODE, "ENODERPC")
972
  ENODESSH = (TNODE, "ENODESSH")
973
  ENODEVERSION = (TNODE, "ENODEVERSION")
974
  ENODESETUP = (TNODE, "ENODESETUP")
975
  ENODETIME = (TNODE, "ENODETIME")
976

    
977
  ETYPE_FIELD = "code"
978
  ETYPE_ERROR = "ERROR"
979
  ETYPE_WARNING = "WARNING"
980

    
981
  def ExpandNames(self):
982
    self.needed_locks = {
983
      locking.LEVEL_NODE: locking.ALL_SET,
984
      locking.LEVEL_INSTANCE: locking.ALL_SET,
985
    }
986
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
987

    
988
  def _Error(self, ecode, item, msg, *args, **kwargs):
989
    """Format an error message.
990

991
    Based on the opcode's error_codes parameter, either format a
992
    parseable error code, or a simpler error string.
993

994
    This must be called only from Exec and functions called from Exec.
995

996
    """
997
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
998
    itype, etxt = ecode
999
    # first complete the msg
1000
    if args:
1001
      msg = msg % args
1002
    # then format the whole message
1003
    if self.op.error_codes:
1004
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1005
    else:
1006
      if item:
1007
        item = " " + item
1008
      else:
1009
        item = ""
1010
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1011
    # and finally report it via the feedback_fn
1012
    self._feedback_fn("  - %s" % msg)
1013

    
1014
  def _ErrorIf(self, cond, *args, **kwargs):
1015
    """Log an error message if the passed condition is True.
1016

1017
    """
1018
    cond = bool(cond) or self.op.debug_simulate_errors
1019
    if cond:
1020
      self._Error(*args, **kwargs)
1021
    # do not mark the operation as failed for WARN cases only
1022
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1023
      self.bad = self.bad or cond
1024

    
1025
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1026
                  node_result, master_files, drbd_map, vg_name):
1027
    """Run multiple tests against a node.
1028

1029
    Test list:
1030

1031
      - compares ganeti version
1032
      - checks vg existence and size > 20G
1033
      - checks config file checksum
1034
      - checks ssh to other nodes
1035

1036
    @type nodeinfo: L{objects.Node}
1037
    @param nodeinfo: the node to check
1038
    @param file_list: required list of files
1039
    @param local_cksum: dictionary of local files and their checksums
1040
    @param node_result: the results from the node
1041
    @param master_files: list of files that only masters should have
1042
    @param drbd_map: the useddrbd minors for this node, in
1043
        form of minor: (instance, must_exist) which correspond to instances
1044
        and their running status
1045
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1046

1047
    """
1048
    node = nodeinfo.name
1049
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1050

    
1051
    # main result, node_result should be a non-empty dict
1052
    test = not node_result or not isinstance(node_result, dict)
1053
    _ErrorIf(test, self.ENODERPC, node,
1054
                  "unable to verify node: no data returned")
1055
    if test:
1056
      return
1057

    
1058
    # compares ganeti version
1059
    local_version = constants.PROTOCOL_VERSION
1060
    remote_version = node_result.get('version', None)
1061
    test = not (remote_version and
1062
                isinstance(remote_version, (list, tuple)) and
1063
                len(remote_version) == 2)
1064
    _ErrorIf(test, self.ENODERPC, node,
1065
             "connection to node returned invalid data")
1066
    if test:
1067
      return
1068

    
1069
    test = local_version != remote_version[0]
1070
    _ErrorIf(test, self.ENODEVERSION, node,
1071
             "incompatible protocol versions: master %s,"
1072
             " node %s", local_version, remote_version[0])
1073
    if test:
1074
      return
1075

    
1076
    # node seems compatible, we can actually try to look into its results
1077

    
1078
    # full package version
1079
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1080
                  self.ENODEVERSION, node,
1081
                  "software version mismatch: master %s, node %s",
1082
                  constants.RELEASE_VERSION, remote_version[1],
1083
                  code=self.ETYPE_WARNING)
1084

    
1085
    # checks vg existence and size > 20G
1086
    if vg_name is not None:
1087
      vglist = node_result.get(constants.NV_VGLIST, None)
1088
      test = not vglist
1089
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1090
      if not test:
1091
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1092
                                              constants.MIN_VG_SIZE)
1093
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1094

    
1095
    # checks config file checksum
1096

    
1097
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1098
    test = not isinstance(remote_cksum, dict)
1099
    _ErrorIf(test, self.ENODEFILECHECK, node,
1100
             "node hasn't returned file checksum data")
1101
    if not test:
1102
      for file_name in file_list:
1103
        node_is_mc = nodeinfo.master_candidate
1104
        must_have = (file_name not in master_files) or node_is_mc
1105
        # missing
1106
        test1 = file_name not in remote_cksum
1107
        # invalid checksum
1108
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1109
        # existing and good
1110
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1111
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1112
                 "file '%s' missing", file_name)
1113
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1114
                 "file '%s' has wrong checksum", file_name)
1115
        # not candidate and this is not a must-have file
1116
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1117
                 "file '%s' should not exist on non master"
1118
                 " candidates (and the file is outdated)", file_name)
1119
        # all good, except non-master/non-must have combination
1120
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1121
                 "file '%s' should not exist"
1122
                 " on non master candidates", file_name)
1123

    
1124
    # checks ssh to any
1125

    
1126
    test = constants.NV_NODELIST not in node_result
1127
    _ErrorIf(test, self.ENODESSH, node,
1128
             "node hasn't returned node ssh connectivity data")
1129
    if not test:
1130
      if node_result[constants.NV_NODELIST]:
1131
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1132
          _ErrorIf(True, self.ENODESSH, node,
1133
                   "ssh communication with node '%s': %s", a_node, a_msg)
1134

    
1135
    test = constants.NV_NODENETTEST not in node_result
1136
    _ErrorIf(test, self.ENODENET, node,
1137
             "node hasn't returned node tcp connectivity data")
1138
    if not test:
1139
      if node_result[constants.NV_NODENETTEST]:
1140
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1141
        for anode in nlist:
1142
          _ErrorIf(True, self.ENODENET, node,
1143
                   "tcp communication with node '%s': %s",
1144
                   anode, node_result[constants.NV_NODENETTEST][anode])
1145

    
1146
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1147
    if isinstance(hyp_result, dict):
1148
      for hv_name, hv_result in hyp_result.iteritems():
1149
        test = hv_result is not None
1150
        _ErrorIf(test, self.ENODEHV, node,
1151
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1152

    
1153
    # check used drbd list
1154
    if vg_name is not None:
1155
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1156
      test = not isinstance(used_minors, (tuple, list))
1157
      _ErrorIf(test, self.ENODEDRBD, node,
1158
               "cannot parse drbd status file: %s", str(used_minors))
1159
      if not test:
1160
        for minor, (iname, must_exist) in drbd_map.items():
1161
          test = minor not in used_minors and must_exist
1162
          _ErrorIf(test, self.ENODEDRBD, node,
1163
                   "drbd minor %d of instance %s is not active",
1164
                   minor, iname)
1165
        for minor in used_minors:
1166
          test = minor not in drbd_map
1167
          _ErrorIf(test, self.ENODEDRBD, node,
1168
                   "unallocated drbd minor %d is in use", minor)
1169
    test = node_result.get(constants.NV_NODESETUP,
1170
                           ["Missing NODESETUP results"])
1171
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1172
             "; ".join(test))
1173

    
1174
    # check pv names
1175
    if vg_name is not None:
1176
      pvlist = node_result.get(constants.NV_PVLIST, None)
1177
      test = pvlist is None
1178
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1179
      if not test:
1180
        # check that ':' is not present in PV names, since it's a
1181
        # special character for lvcreate (denotes the range of PEs to
1182
        # use on the PV)
1183
        for _, pvname, owner_vg in pvlist:
1184
          test = ":" in pvname
1185
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1186
                   " '%s' of VG '%s'", pvname, owner_vg)
1187

    
1188
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1189
                      node_instance, n_offline):
1190
    """Verify an instance.
1191

1192
    This function checks to see if the required block devices are
1193
    available on the instance's node.
1194

1195
    """
1196
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1197
    node_current = instanceconfig.primary_node
1198

    
1199
    node_vol_should = {}
1200
    instanceconfig.MapLVsByNode(node_vol_should)
1201

    
1202
    for node in node_vol_should:
1203
      if node in n_offline:
1204
        # ignore missing volumes on offline nodes
1205
        continue
1206
      for volume in node_vol_should[node]:
1207
        test = node not in node_vol_is or volume not in node_vol_is[node]
1208
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1209
                 "volume %s missing on node %s", volume, node)
1210

    
1211
    if instanceconfig.admin_up:
1212
      test = ((node_current not in node_instance or
1213
               not instance in node_instance[node_current]) and
1214
              node_current not in n_offline)
1215
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1216
               "instance not running on its primary node %s",
1217
               node_current)
1218

    
1219
    for node in node_instance:
1220
      if (not node == node_current):
1221
        test = instance in node_instance[node]
1222
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1223
                 "instance should not run on node %s", node)
1224

    
1225
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1226
    """Verify if there are any unknown volumes in the cluster.
1227

1228
    The .os, .swap and backup volumes are ignored. All other volumes are
1229
    reported as unknown.
1230

1231
    """
1232
    for node in node_vol_is:
1233
      for volume in node_vol_is[node]:
1234
        test = (node not in node_vol_should or
1235
                volume not in node_vol_should[node])
1236
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1237
                      "volume %s is unknown", volume)
1238

    
1239
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1240
    """Verify the list of running instances.
1241

1242
    This checks what instances are running but unknown to the cluster.
1243

1244
    """
1245
    for node in node_instance:
1246
      for o_inst in node_instance[node]:
1247
        test = o_inst not in instancelist
1248
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1249
                      "instance %s on node %s should not exist", o_inst, node)
1250

    
1251
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1252
    """Verify N+1 Memory Resilience.
1253

1254
    Check that if one single node dies we can still start all the instances it
1255
    was primary for.
1256

1257
    """
1258
    for node, nodeinfo in node_info.iteritems():
1259
      # This code checks that every node which is now listed as secondary has
1260
      # enough memory to host all instances it is supposed to should a single
1261
      # other node in the cluster fail.
1262
      # FIXME: not ready for failover to an arbitrary node
1263
      # FIXME: does not support file-backed instances
1264
      # WARNING: we currently take into account down instances as well as up
1265
      # ones, considering that even if they're down someone might want to start
1266
      # them even in the event of a node failure.
1267
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1268
        needed_mem = 0
1269
        for instance in instances:
1270
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1271
          if bep[constants.BE_AUTO_BALANCE]:
1272
            needed_mem += bep[constants.BE_MEMORY]
1273
        test = nodeinfo['mfree'] < needed_mem
1274
        self._ErrorIf(test, self.ENODEN1, node,
1275
                      "not enough memory on to accommodate"
1276
                      " failovers should peer node %s fail", prinode)
1277

    
1278
  def CheckPrereq(self):
1279
    """Check prerequisites.
1280

1281
    Transform the list of checks we're going to skip into a set and check that
1282
    all its members are valid.
1283

1284
    """
1285
    self.skip_set = frozenset(self.op.skip_checks)
1286
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1287
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1288
                                 errors.ECODE_INVAL)
1289

    
1290
  def BuildHooksEnv(self):
1291
    """Build hooks env.
1292

1293
    Cluster-Verify hooks just ran in the post phase and their failure makes
1294
    the output be logged in the verify output and the verification to fail.
1295

1296
    """
1297
    all_nodes = self.cfg.GetNodeList()
1298
    env = {
1299
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1300
      }
1301
    for node in self.cfg.GetAllNodesInfo().values():
1302
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1303

    
1304
    return env, [], all_nodes
1305

    
1306
  def Exec(self, feedback_fn):
1307
    """Verify integrity of cluster, performing various test on nodes.
1308

1309
    """
1310
    self.bad = False
1311
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1312
    verbose = self.op.verbose
1313
    self._feedback_fn = feedback_fn
1314
    feedback_fn("* Verifying global settings")
1315
    for msg in self.cfg.VerifyConfig():
1316
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1317

    
1318
    vg_name = self.cfg.GetVGName()
1319
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1320
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1321
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1322
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1323
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1324
                        for iname in instancelist)
1325
    i_non_redundant = [] # Non redundant instances
1326
    i_non_a_balanced = [] # Non auto-balanced instances
1327
    n_offline = [] # List of offline nodes
1328
    n_drained = [] # List of nodes being drained
1329
    node_volume = {}
1330
    node_instance = {}
1331
    node_info = {}
1332
    instance_cfg = {}
1333

    
1334
    # FIXME: verify OS list
1335
    # do local checksums
1336
    master_files = [constants.CLUSTER_CONF_FILE]
1337

    
1338
    file_names = ssconf.SimpleStore().GetFileList()
1339
    file_names.append(constants.SSL_CERT_FILE)
1340
    file_names.append(constants.RAPI_CERT_FILE)
1341
    file_names.extend(master_files)
1342

    
1343
    local_checksums = utils.FingerprintFiles(file_names)
1344

    
1345
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1346
    node_verify_param = {
1347
      constants.NV_FILELIST: file_names,
1348
      constants.NV_NODELIST: [node.name for node in nodeinfo
1349
                              if not node.offline],
1350
      constants.NV_HYPERVISOR: hypervisors,
1351
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1352
                                  node.secondary_ip) for node in nodeinfo
1353
                                 if not node.offline],
1354
      constants.NV_INSTANCELIST: hypervisors,
1355
      constants.NV_VERSION: None,
1356
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1357
      constants.NV_NODESETUP: None,
1358
      constants.NV_TIME: None,
1359
      }
1360

    
1361
    if vg_name is not None:
1362
      node_verify_param[constants.NV_VGLIST] = None
1363
      node_verify_param[constants.NV_LVLIST] = vg_name
1364
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1365
      node_verify_param[constants.NV_DRBDLIST] = None
1366

    
1367
    # Due to the way our RPC system works, exact response times cannot be
1368
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1369
    # time before and after executing the request, we can at least have a time
1370
    # window.
1371
    nvinfo_starttime = time.time()
1372
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1373
                                           self.cfg.GetClusterName())
1374
    nvinfo_endtime = time.time()
1375

    
1376
    cluster = self.cfg.GetClusterInfo()
1377
    master_node = self.cfg.GetMasterNode()
1378
    all_drbd_map = self.cfg.ComputeDRBDMap()
1379

    
1380
    feedback_fn("* Verifying node status")
1381
    for node_i in nodeinfo:
1382
      node = node_i.name
1383

    
1384
      if node_i.offline:
1385
        if verbose:
1386
          feedback_fn("* Skipping offline node %s" % (node,))
1387
        n_offline.append(node)
1388
        continue
1389

    
1390
      if node == master_node:
1391
        ntype = "master"
1392
      elif node_i.master_candidate:
1393
        ntype = "master candidate"
1394
      elif node_i.drained:
1395
        ntype = "drained"
1396
        n_drained.append(node)
1397
      else:
1398
        ntype = "regular"
1399
      if verbose:
1400
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1401

    
1402
      msg = all_nvinfo[node].fail_msg
1403
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1404
      if msg:
1405
        continue
1406

    
1407
      nresult = all_nvinfo[node].payload
1408
      node_drbd = {}
1409
      for minor, instance in all_drbd_map[node].items():
1410
        test = instance not in instanceinfo
1411
        _ErrorIf(test, self.ECLUSTERCFG, None,
1412
                 "ghost instance '%s' in temporary DRBD map", instance)
1413
          # ghost instance should not be running, but otherwise we
1414
          # don't give double warnings (both ghost instance and
1415
          # unallocated minor in use)
1416
        if test:
1417
          node_drbd[minor] = (instance, False)
1418
        else:
1419
          instance = instanceinfo[instance]
1420
          node_drbd[minor] = (instance.name, instance.admin_up)
1421

    
1422
      self._VerifyNode(node_i, file_names, local_checksums,
1423
                       nresult, master_files, node_drbd, vg_name)
1424

    
1425
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1426
      if vg_name is None:
1427
        node_volume[node] = {}
1428
      elif isinstance(lvdata, basestring):
1429
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1430
                 utils.SafeEncode(lvdata))
1431
        node_volume[node] = {}
1432
      elif not isinstance(lvdata, dict):
1433
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1434
        continue
1435
      else:
1436
        node_volume[node] = lvdata
1437

    
1438
      # node_instance
1439
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1440
      test = not isinstance(idata, list)
1441
      _ErrorIf(test, self.ENODEHV, node,
1442
               "rpc call to node failed (instancelist)")
1443
      if test:
1444
        continue
1445

    
1446
      node_instance[node] = idata
1447

    
1448
      # node_info
1449
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1450
      test = not isinstance(nodeinfo, dict)
1451
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1452
      if test:
1453
        continue
1454

    
1455
      # Node time
1456
      ntime = nresult.get(constants.NV_TIME, None)
1457
      try:
1458
        ntime_merged = utils.MergeTime(ntime)
1459
      except (ValueError, TypeError):
1460
        _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1461

    
1462
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1463
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1464
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1465
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1466
      else:
1467
        ntime_diff = None
1468

    
1469
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1470
               "Node time diverges by at least %0.1fs from master node time",
1471
               ntime_diff)
1472

    
1473
      if ntime_diff is not None:
1474
        continue
1475

    
1476
      try:
1477
        node_info[node] = {
1478
          "mfree": int(nodeinfo['memory_free']),
1479
          "pinst": [],
1480
          "sinst": [],
1481
          # dictionary holding all instances this node is secondary for,
1482
          # grouped by their primary node. Each key is a cluster node, and each
1483
          # value is a list of instances which have the key as primary and the
1484
          # current node as secondary.  this is handy to calculate N+1 memory
1485
          # availability if you can only failover from a primary to its
1486
          # secondary.
1487
          "sinst-by-pnode": {},
1488
        }
1489
        # FIXME: devise a free space model for file based instances as well
1490
        if vg_name is not None:
1491
          test = (constants.NV_VGLIST not in nresult or
1492
                  vg_name not in nresult[constants.NV_VGLIST])
1493
          _ErrorIf(test, self.ENODELVM, node,
1494
                   "node didn't return data for the volume group '%s'"
1495
                   " - it is either missing or broken", vg_name)
1496
          if test:
1497
            continue
1498
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1499
      except (ValueError, KeyError):
1500
        _ErrorIf(True, self.ENODERPC, node,
1501
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1502
        continue
1503

    
1504
    node_vol_should = {}
1505

    
1506
    feedback_fn("* Verifying instance status")
1507
    for instance in instancelist:
1508
      if verbose:
1509
        feedback_fn("* Verifying instance %s" % instance)
1510
      inst_config = instanceinfo[instance]
1511
      self._VerifyInstance(instance, inst_config, node_volume,
1512
                           node_instance, n_offline)
1513
      inst_nodes_offline = []
1514

    
1515
      inst_config.MapLVsByNode(node_vol_should)
1516

    
1517
      instance_cfg[instance] = inst_config
1518

    
1519
      pnode = inst_config.primary_node
1520
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1521
               self.ENODERPC, pnode, "instance %s, connection to"
1522
               " primary node failed", instance)
1523
      if pnode in node_info:
1524
        node_info[pnode]['pinst'].append(instance)
1525

    
1526
      if pnode in n_offline:
1527
        inst_nodes_offline.append(pnode)
1528

    
1529
      # If the instance is non-redundant we cannot survive losing its primary
1530
      # node, so we are not N+1 compliant. On the other hand we have no disk
1531
      # templates with more than one secondary so that situation is not well
1532
      # supported either.
1533
      # FIXME: does not support file-backed instances
1534
      if len(inst_config.secondary_nodes) == 0:
1535
        i_non_redundant.append(instance)
1536
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1537
               self.EINSTANCELAYOUT, instance,
1538
               "instance has multiple secondary nodes", code="WARNING")
1539

    
1540
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1541
        i_non_a_balanced.append(instance)
1542

    
1543
      for snode in inst_config.secondary_nodes:
1544
        _ErrorIf(snode not in node_info and snode not in n_offline,
1545
                 self.ENODERPC, snode,
1546
                 "instance %s, connection to secondary node"
1547
                 "failed", instance)
1548

    
1549
        if snode in node_info:
1550
          node_info[snode]['sinst'].append(instance)
1551
          if pnode not in node_info[snode]['sinst-by-pnode']:
1552
            node_info[snode]['sinst-by-pnode'][pnode] = []
1553
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1554

    
1555
        if snode in n_offline:
1556
          inst_nodes_offline.append(snode)
1557

    
1558
      # warn that the instance lives on offline nodes
1559
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1560
               "instance lives on offline node(s) %s",
1561
               utils.CommaJoin(inst_nodes_offline))
1562

    
1563
    feedback_fn("* Verifying orphan volumes")
1564
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1565

    
1566
    feedback_fn("* Verifying remaining instances")
1567
    self._VerifyOrphanInstances(instancelist, node_instance)
1568

    
1569
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1570
      feedback_fn("* Verifying N+1 Memory redundancy")
1571
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1572

    
1573
    feedback_fn("* Other Notes")
1574
    if i_non_redundant:
1575
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1576
                  % len(i_non_redundant))
1577

    
1578
    if i_non_a_balanced:
1579
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1580
                  % len(i_non_a_balanced))
1581

    
1582
    if n_offline:
1583
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1584

    
1585
    if n_drained:
1586
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1587

    
1588
    return not self.bad
1589

    
1590
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1591
    """Analyze the post-hooks' result
1592

1593
    This method analyses the hook result, handles it, and sends some
1594
    nicely-formatted feedback back to the user.
1595

1596
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1597
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1598
    @param hooks_results: the results of the multi-node hooks rpc call
1599
    @param feedback_fn: function used send feedback back to the caller
1600
    @param lu_result: previous Exec result
1601
    @return: the new Exec result, based on the previous result
1602
        and hook results
1603

1604
    """
1605
    # We only really run POST phase hooks, and are only interested in
1606
    # their results
1607
    if phase == constants.HOOKS_PHASE_POST:
1608
      # Used to change hooks' output to proper indentation
1609
      indent_re = re.compile('^', re.M)
1610
      feedback_fn("* Hooks Results")
1611
      assert hooks_results, "invalid result from hooks"
1612

    
1613
      for node_name in hooks_results:
1614
        res = hooks_results[node_name]
1615
        msg = res.fail_msg
1616
        test = msg and not res.offline
1617
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1618
                      "Communication failure in hooks execution: %s", msg)
1619
        if res.offline or msg:
1620
          # No need to investigate payload if node is offline or gave an error.
1621
          # override manually lu_result here as _ErrorIf only
1622
          # overrides self.bad
1623
          lu_result = 1
1624
          continue
1625
        for script, hkr, output in res.payload:
1626
          test = hkr == constants.HKR_FAIL
1627
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1628
                        "Script %s failed, output:", script)
1629
          if test:
1630
            output = indent_re.sub('      ', output)
1631
            feedback_fn("%s" % output)
1632
            lu_result = 1
1633

    
1634
      return lu_result
1635

    
1636

    
1637
class LUVerifyDisks(NoHooksLU):
1638
  """Verifies the cluster disks status.
1639

1640
  """
1641
  _OP_REQP = []
1642
  REQ_BGL = False
1643

    
1644
  def ExpandNames(self):
1645
    self.needed_locks = {
1646
      locking.LEVEL_NODE: locking.ALL_SET,
1647
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1648
    }
1649
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1650

    
1651
  def CheckPrereq(self):
1652
    """Check prerequisites.
1653

1654
    This has no prerequisites.
1655

1656
    """
1657
    pass
1658

    
1659
  def Exec(self, feedback_fn):
1660
    """Verify integrity of cluster disks.
1661

1662
    @rtype: tuple of three items
1663
    @return: a tuple of (dict of node-to-node_error, list of instances
1664
        which need activate-disks, dict of instance: (node, volume) for
1665
        missing volumes
1666

1667
    """
1668
    result = res_nodes, res_instances, res_missing = {}, [], {}
1669

    
1670
    vg_name = self.cfg.GetVGName()
1671
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1672
    instances = [self.cfg.GetInstanceInfo(name)
1673
                 for name in self.cfg.GetInstanceList()]
1674

    
1675
    nv_dict = {}
1676
    for inst in instances:
1677
      inst_lvs = {}
1678
      if (not inst.admin_up or
1679
          inst.disk_template not in constants.DTS_NET_MIRROR):
1680
        continue
1681
      inst.MapLVsByNode(inst_lvs)
1682
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1683
      for node, vol_list in inst_lvs.iteritems():
1684
        for vol in vol_list:
1685
          nv_dict[(node, vol)] = inst
1686

    
1687
    if not nv_dict:
1688
      return result
1689

    
1690
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1691

    
1692
    for node in nodes:
1693
      # node_volume
1694
      node_res = node_lvs[node]
1695
      if node_res.offline:
1696
        continue
1697
      msg = node_res.fail_msg
1698
      if msg:
1699
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1700
        res_nodes[node] = msg
1701
        continue
1702

    
1703
      lvs = node_res.payload
1704
      for lv_name, (_, _, lv_online) in lvs.items():
1705
        inst = nv_dict.pop((node, lv_name), None)
1706
        if (not lv_online and inst is not None
1707
            and inst.name not in res_instances):
1708
          res_instances.append(inst.name)
1709

    
1710
    # any leftover items in nv_dict are missing LVs, let's arrange the
1711
    # data better
1712
    for key, inst in nv_dict.iteritems():
1713
      if inst.name not in res_missing:
1714
        res_missing[inst.name] = []
1715
      res_missing[inst.name].append(key)
1716

    
1717
    return result
1718

    
1719

    
1720
class LURepairDiskSizes(NoHooksLU):
1721
  """Verifies the cluster disks sizes.
1722

1723
  """
1724
  _OP_REQP = ["instances"]
1725
  REQ_BGL = False
1726

    
1727
  def ExpandNames(self):
1728
    if not isinstance(self.op.instances, list):
1729
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1730
                                 errors.ECODE_INVAL)
1731

    
1732
    if self.op.instances:
1733
      self.wanted_names = []
1734
      for name in self.op.instances:
1735
        full_name = _ExpandInstanceName(self.cfg, name)
1736
        self.wanted_names.append(full_name)
1737
      self.needed_locks = {
1738
        locking.LEVEL_NODE: [],
1739
        locking.LEVEL_INSTANCE: self.wanted_names,
1740
        }
1741
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1742
    else:
1743
      self.wanted_names = None
1744
      self.needed_locks = {
1745
        locking.LEVEL_NODE: locking.ALL_SET,
1746
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1747
        }
1748
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1749

    
1750
  def DeclareLocks(self, level):
1751
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1752
      self._LockInstancesNodes(primary_only=True)
1753

    
1754
  def CheckPrereq(self):
1755
    """Check prerequisites.
1756

1757
    This only checks the optional instance list against the existing names.
1758

1759
    """
1760
    if self.wanted_names is None:
1761
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1762

    
1763
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1764
                             in self.wanted_names]
1765

    
1766
  def _EnsureChildSizes(self, disk):
1767
    """Ensure children of the disk have the needed disk size.
1768

1769
    This is valid mainly for DRBD8 and fixes an issue where the
1770
    children have smaller disk size.
1771

1772
    @param disk: an L{ganeti.objects.Disk} object
1773

1774
    """
1775
    if disk.dev_type == constants.LD_DRBD8:
1776
      assert disk.children, "Empty children for DRBD8?"
1777
      fchild = disk.children[0]
1778
      mismatch = fchild.size < disk.size
1779
      if mismatch:
1780
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1781
                     fchild.size, disk.size)
1782
        fchild.size = disk.size
1783

    
1784
      # and we recurse on this child only, not on the metadev
1785
      return self._EnsureChildSizes(fchild) or mismatch
1786
    else:
1787
      return False
1788

    
1789
  def Exec(self, feedback_fn):
1790
    """Verify the size of cluster disks.
1791

1792
    """
1793
    # TODO: check child disks too
1794
    # TODO: check differences in size between primary/secondary nodes
1795
    per_node_disks = {}
1796
    for instance in self.wanted_instances:
1797
      pnode = instance.primary_node
1798
      if pnode not in per_node_disks:
1799
        per_node_disks[pnode] = []
1800
      for idx, disk in enumerate(instance.disks):
1801
        per_node_disks[pnode].append((instance, idx, disk))
1802

    
1803
    changed = []
1804
    for node, dskl in per_node_disks.items():
1805
      newl = [v[2].Copy() for v in dskl]
1806
      for dsk in newl:
1807
        self.cfg.SetDiskID(dsk, node)
1808
      result = self.rpc.call_blockdev_getsizes(node, newl)
1809
      if result.fail_msg:
1810
        self.LogWarning("Failure in blockdev_getsizes call to node"
1811
                        " %s, ignoring", node)
1812
        continue
1813
      if len(result.data) != len(dskl):
1814
        self.LogWarning("Invalid result from node %s, ignoring node results",
1815
                        node)
1816
        continue
1817
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1818
        if size is None:
1819
          self.LogWarning("Disk %d of instance %s did not return size"
1820
                          " information, ignoring", idx, instance.name)
1821
          continue
1822
        if not isinstance(size, (int, long)):
1823
          self.LogWarning("Disk %d of instance %s did not return valid"
1824
                          " size information, ignoring", idx, instance.name)
1825
          continue
1826
        size = size >> 20
1827
        if size != disk.size:
1828
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1829
                       " correcting: recorded %d, actual %d", idx,
1830
                       instance.name, disk.size, size)
1831
          disk.size = size
1832
          self.cfg.Update(instance, feedback_fn)
1833
          changed.append((instance.name, idx, size))
1834
        if self._EnsureChildSizes(disk):
1835
          self.cfg.Update(instance, feedback_fn)
1836
          changed.append((instance.name, idx, disk.size))
1837
    return changed
1838

    
1839

    
1840
class LURenameCluster(LogicalUnit):
1841
  """Rename the cluster.
1842

1843
  """
1844
  HPATH = "cluster-rename"
1845
  HTYPE = constants.HTYPE_CLUSTER
1846
  _OP_REQP = ["name"]
1847

    
1848
  def BuildHooksEnv(self):
1849
    """Build hooks env.
1850

1851
    """
1852
    env = {
1853
      "OP_TARGET": self.cfg.GetClusterName(),
1854
      "NEW_NAME": self.op.name,
1855
      }
1856
    mn = self.cfg.GetMasterNode()
1857
    all_nodes = self.cfg.GetNodeList()
1858
    return env, [mn], all_nodes
1859

    
1860
  def CheckPrereq(self):
1861
    """Verify that the passed name is a valid one.
1862

1863
    """
1864
    hostname = utils.GetHostInfo(self.op.name)
1865

    
1866
    new_name = hostname.name
1867
    self.ip = new_ip = hostname.ip
1868
    old_name = self.cfg.GetClusterName()
1869
    old_ip = self.cfg.GetMasterIP()
1870
    if new_name == old_name and new_ip == old_ip:
1871
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1872
                                 " cluster has changed",
1873
                                 errors.ECODE_INVAL)
1874
    if new_ip != old_ip:
1875
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1876
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1877
                                   " reachable on the network. Aborting." %
1878
                                   new_ip, errors.ECODE_NOTUNIQUE)
1879

    
1880
    self.op.name = new_name
1881

    
1882
  def Exec(self, feedback_fn):
1883
    """Rename the cluster.
1884

1885
    """
1886
    clustername = self.op.name
1887
    ip = self.ip
1888

    
1889
    # shutdown the master IP
1890
    master = self.cfg.GetMasterNode()
1891
    result = self.rpc.call_node_stop_master(master, False)
1892
    result.Raise("Could not disable the master role")
1893

    
1894
    try:
1895
      cluster = self.cfg.GetClusterInfo()
1896
      cluster.cluster_name = clustername
1897
      cluster.master_ip = ip
1898
      self.cfg.Update(cluster, feedback_fn)
1899

    
1900
      # update the known hosts file
1901
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1902
      node_list = self.cfg.GetNodeList()
1903
      try:
1904
        node_list.remove(master)
1905
      except ValueError:
1906
        pass
1907
      result = self.rpc.call_upload_file(node_list,
1908
                                         constants.SSH_KNOWN_HOSTS_FILE)
1909
      for to_node, to_result in result.iteritems():
1910
        msg = to_result.fail_msg
1911
        if msg:
1912
          msg = ("Copy of file %s to node %s failed: %s" %
1913
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1914
          self.proc.LogWarning(msg)
1915

    
1916
    finally:
1917
      result = self.rpc.call_node_start_master(master, False, False)
1918
      msg = result.fail_msg
1919
      if msg:
1920
        self.LogWarning("Could not re-enable the master role on"
1921
                        " the master, please restart manually: %s", msg)
1922

    
1923

    
1924
def _RecursiveCheckIfLVMBased(disk):
1925
  """Check if the given disk or its children are lvm-based.
1926

1927
  @type disk: L{objects.Disk}
1928
  @param disk: the disk to check
1929
  @rtype: boolean
1930
  @return: boolean indicating whether a LD_LV dev_type was found or not
1931

1932
  """
1933
  if disk.children:
1934
    for chdisk in disk.children:
1935
      if _RecursiveCheckIfLVMBased(chdisk):
1936
        return True
1937
  return disk.dev_type == constants.LD_LV
1938

    
1939

    
1940
class LUSetClusterParams(LogicalUnit):
1941
  """Change the parameters of the cluster.
1942

1943
  """
1944
  HPATH = "cluster-modify"
1945
  HTYPE = constants.HTYPE_CLUSTER
1946
  _OP_REQP = []
1947
  REQ_BGL = False
1948

    
1949
  def CheckArguments(self):
1950
    """Check parameters
1951

1952
    """
1953
    if not hasattr(self.op, "candidate_pool_size"):
1954
      self.op.candidate_pool_size = None
1955
    if self.op.candidate_pool_size is not None:
1956
      try:
1957
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1958
      except (ValueError, TypeError), err:
1959
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1960
                                   str(err), errors.ECODE_INVAL)
1961
      if self.op.candidate_pool_size < 1:
1962
        raise errors.OpPrereqError("At least one master candidate needed",
1963
                                   errors.ECODE_INVAL)
1964

    
1965
  def ExpandNames(self):
1966
    # FIXME: in the future maybe other cluster params won't require checking on
1967
    # all nodes to be modified.
1968
    self.needed_locks = {
1969
      locking.LEVEL_NODE: locking.ALL_SET,
1970
    }
1971
    self.share_locks[locking.LEVEL_NODE] = 1
1972

    
1973
  def BuildHooksEnv(self):
1974
    """Build hooks env.
1975

1976
    """
1977
    env = {
1978
      "OP_TARGET": self.cfg.GetClusterName(),
1979
      "NEW_VG_NAME": self.op.vg_name,
1980
      }
1981
    mn = self.cfg.GetMasterNode()
1982
    return env, [mn], [mn]
1983

    
1984
  def CheckPrereq(self):
1985
    """Check prerequisites.
1986

1987
    This checks whether the given params don't conflict and
1988
    if the given volume group is valid.
1989

1990
    """
1991
    if self.op.vg_name is not None and not self.op.vg_name:
1992
      instances = self.cfg.GetAllInstancesInfo().values()
1993
      for inst in instances:
1994
        for disk in inst.disks:
1995
          if _RecursiveCheckIfLVMBased(disk):
1996
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1997
                                       " lvm-based instances exist",
1998
                                       errors.ECODE_INVAL)
1999

    
2000
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2001

    
2002
    # if vg_name not None, checks given volume group on all nodes
2003
    if self.op.vg_name:
2004
      vglist = self.rpc.call_vg_list(node_list)
2005
      for node in node_list:
2006
        msg = vglist[node].fail_msg
2007
        if msg:
2008
          # ignoring down node
2009
          self.LogWarning("Error while gathering data on node %s"
2010
                          " (ignoring node): %s", node, msg)
2011
          continue
2012
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2013
                                              self.op.vg_name,
2014
                                              constants.MIN_VG_SIZE)
2015
        if vgstatus:
2016
          raise errors.OpPrereqError("Error on node '%s': %s" %
2017
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2018

    
2019
    self.cluster = cluster = self.cfg.GetClusterInfo()
2020
    # validate params changes
2021
    if self.op.beparams:
2022
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2023
      self.new_beparams = objects.FillDict(
2024
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2025

    
2026
    if self.op.nicparams:
2027
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2028
      self.new_nicparams = objects.FillDict(
2029
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2030
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2031
      nic_errors = []
2032

    
2033
      # check all instances for consistency
2034
      for instance in self.cfg.GetAllInstancesInfo().values():
2035
        for nic_idx, nic in enumerate(instance.nics):
2036
          params_copy = copy.deepcopy(nic.nicparams)
2037
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2038

    
2039
          # check parameter syntax
2040
          try:
2041
            objects.NIC.CheckParameterSyntax(params_filled)
2042
          except errors.ConfigurationError, err:
2043
            nic_errors.append("Instance %s, nic/%d: %s" %
2044
                              (instance.name, nic_idx, err))
2045

    
2046
          # if we're moving instances to routed, check that they have an ip
2047
          target_mode = params_filled[constants.NIC_MODE]
2048
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2049
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2050
                              (instance.name, nic_idx))
2051
      if nic_errors:
2052
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2053
                                   "\n".join(nic_errors))
2054

    
2055
    # hypervisor list/parameters
2056
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2057
    if self.op.hvparams:
2058
      if not isinstance(self.op.hvparams, dict):
2059
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2060
                                   errors.ECODE_INVAL)
2061
      for hv_name, hv_dict in self.op.hvparams.items():
2062
        if hv_name not in self.new_hvparams:
2063
          self.new_hvparams[hv_name] = hv_dict
2064
        else:
2065
          self.new_hvparams[hv_name].update(hv_dict)
2066

    
2067
    if self.op.enabled_hypervisors is not None:
2068
      self.hv_list = self.op.enabled_hypervisors
2069
      if not self.hv_list:
2070
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2071
                                   " least one member",
2072
                                   errors.ECODE_INVAL)
2073
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2074
      if invalid_hvs:
2075
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2076
                                   " entries: %s" %
2077
                                   utils.CommaJoin(invalid_hvs),
2078
                                   errors.ECODE_INVAL)
2079
    else:
2080
      self.hv_list = cluster.enabled_hypervisors
2081

    
2082
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2083
      # either the enabled list has changed, or the parameters have, validate
2084
      for hv_name, hv_params in self.new_hvparams.items():
2085
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2086
            (self.op.enabled_hypervisors and
2087
             hv_name in self.op.enabled_hypervisors)):
2088
          # either this is a new hypervisor, or its parameters have changed
2089
          hv_class = hypervisor.GetHypervisor(hv_name)
2090
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2091
          hv_class.CheckParameterSyntax(hv_params)
2092
          _CheckHVParams(self, node_list, hv_name, hv_params)
2093

    
2094
  def Exec(self, feedback_fn):
2095
    """Change the parameters of the cluster.
2096

2097
    """
2098
    if self.op.vg_name is not None:
2099
      new_volume = self.op.vg_name
2100
      if not new_volume:
2101
        new_volume = None
2102
      if new_volume != self.cfg.GetVGName():
2103
        self.cfg.SetVGName(new_volume)
2104
      else:
2105
        feedback_fn("Cluster LVM configuration already in desired"
2106
                    " state, not changing")
2107
    if self.op.hvparams:
2108
      self.cluster.hvparams = self.new_hvparams
2109
    if self.op.enabled_hypervisors is not None:
2110
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2111
    if self.op.beparams:
2112
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2113
    if self.op.nicparams:
2114
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2115

    
2116
    if self.op.candidate_pool_size is not None:
2117
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2118
      # we need to update the pool size here, otherwise the save will fail
2119
      _AdjustCandidatePool(self, [])
2120

    
2121
    self.cfg.Update(self.cluster, feedback_fn)
2122

    
2123

    
2124
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2125
  """Distribute additional files which are part of the cluster configuration.
2126

2127
  ConfigWriter takes care of distributing the config and ssconf files, but
2128
  there are more files which should be distributed to all nodes. This function
2129
  makes sure those are copied.
2130

2131
  @param lu: calling logical unit
2132
  @param additional_nodes: list of nodes not in the config to distribute to
2133

2134
  """
2135
  # 1. Gather target nodes
2136
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2137
  dist_nodes = lu.cfg.GetNodeList()
2138
  if additional_nodes is not None:
2139
    dist_nodes.extend(additional_nodes)
2140
  if myself.name in dist_nodes:
2141
    dist_nodes.remove(myself.name)
2142

    
2143
  # 2. Gather files to distribute
2144
  dist_files = set([constants.ETC_HOSTS,
2145
                    constants.SSH_KNOWN_HOSTS_FILE,
2146
                    constants.RAPI_CERT_FILE,
2147
                    constants.RAPI_USERS_FILE,
2148
                    constants.HMAC_CLUSTER_KEY,
2149
                   ])
2150

    
2151
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2152
  for hv_name in enabled_hypervisors:
2153
    hv_class = hypervisor.GetHypervisor(hv_name)
2154
    dist_files.update(hv_class.GetAncillaryFiles())
2155

    
2156
  # 3. Perform the files upload
2157
  for fname in dist_files:
2158
    if os.path.exists(fname):
2159
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2160
      for to_node, to_result in result.items():
2161
        msg = to_result.fail_msg
2162
        if msg:
2163
          msg = ("Copy of file %s to node %s failed: %s" %
2164
                 (fname, to_node, msg))
2165
          lu.proc.LogWarning(msg)
2166

    
2167

    
2168
class LURedistributeConfig(NoHooksLU):
2169
  """Force the redistribution of cluster configuration.
2170

2171
  This is a very simple LU.
2172

2173
  """
2174
  _OP_REQP = []
2175
  REQ_BGL = False
2176

    
2177
  def ExpandNames(self):
2178
    self.needed_locks = {
2179
      locking.LEVEL_NODE: locking.ALL_SET,
2180
    }
2181
    self.share_locks[locking.LEVEL_NODE] = 1
2182

    
2183
  def CheckPrereq(self):
2184
    """Check prerequisites.
2185

2186
    """
2187

    
2188
  def Exec(self, feedback_fn):
2189
    """Redistribute the configuration.
2190

2191
    """
2192
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2193
    _RedistributeAncillaryFiles(self)
2194

    
2195

    
2196
def _WaitForSync(lu, instance, oneshot=False):
2197
  """Sleep and poll for an instance's disk to sync.
2198

2199
  """
2200
  if not instance.disks:
2201
    return True
2202

    
2203
  if not oneshot:
2204
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2205

    
2206
  node = instance.primary_node
2207

    
2208
  for dev in instance.disks:
2209
    lu.cfg.SetDiskID(dev, node)
2210

    
2211
  # TODO: Convert to utils.Retry
2212

    
2213
  retries = 0
2214
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2215
  while True:
2216
    max_time = 0
2217
    done = True
2218
    cumul_degraded = False
2219
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2220
    msg = rstats.fail_msg
2221
    if msg:
2222
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2223
      retries += 1
2224
      if retries >= 10:
2225
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2226
                                 " aborting." % node)
2227
      time.sleep(6)
2228
      continue
2229
    rstats = rstats.payload
2230
    retries = 0
2231
    for i, mstat in enumerate(rstats):
2232
      if mstat is None:
2233
        lu.LogWarning("Can't compute data for node %s/%s",
2234
                           node, instance.disks[i].iv_name)
2235
        continue
2236

    
2237
      cumul_degraded = (cumul_degraded or
2238
                        (mstat.is_degraded and mstat.sync_percent is None))
2239
      if mstat.sync_percent is not None:
2240
        done = False
2241
        if mstat.estimated_time is not None:
2242
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2243
          max_time = mstat.estimated_time
2244
        else:
2245
          rem_time = "no time estimate"
2246
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2247
                        (instance.disks[i].iv_name, mstat.sync_percent,
2248
                         rem_time))
2249

    
2250
    # if we're done but degraded, let's do a few small retries, to
2251
    # make sure we see a stable and not transient situation; therefore
2252
    # we force restart of the loop
2253
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2254
      logging.info("Degraded disks found, %d retries left", degr_retries)
2255
      degr_retries -= 1
2256
      time.sleep(1)
2257
      continue
2258

    
2259
    if done or oneshot:
2260
      break
2261

    
2262
    time.sleep(min(60, max_time))
2263

    
2264
  if done:
2265
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2266
  return not cumul_degraded
2267

    
2268

    
2269
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2270
  """Check that mirrors are not degraded.
2271

2272
  The ldisk parameter, if True, will change the test from the
2273
  is_degraded attribute (which represents overall non-ok status for
2274
  the device(s)) to the ldisk (representing the local storage status).
2275

2276
  """
2277
  lu.cfg.SetDiskID(dev, node)
2278

    
2279
  result = True
2280

    
2281
  if on_primary or dev.AssembleOnSecondary():
2282
    rstats = lu.rpc.call_blockdev_find(node, dev)
2283
    msg = rstats.fail_msg
2284
    if msg:
2285
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2286
      result = False
2287
    elif not rstats.payload:
2288
      lu.LogWarning("Can't find disk on node %s", node)
2289
      result = False
2290
    else:
2291
      if ldisk:
2292
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2293
      else:
2294
        result = result and not rstats.payload.is_degraded
2295

    
2296
  if dev.children:
2297
    for child in dev.children:
2298
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2299

    
2300
  return result
2301

    
2302

    
2303
class LUDiagnoseOS(NoHooksLU):
2304
  """Logical unit for OS diagnose/query.
2305

2306
  """
2307
  _OP_REQP = ["output_fields", "names"]
2308
  REQ_BGL = False
2309
  _FIELDS_STATIC = utils.FieldSet()
2310
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2311
  # Fields that need calculation of global os validity
2312
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2313

    
2314
  def ExpandNames(self):
2315
    if self.op.names:
2316
      raise errors.OpPrereqError("Selective OS query not supported",
2317
                                 errors.ECODE_INVAL)
2318

    
2319
    _CheckOutputFields(static=self._FIELDS_STATIC,
2320
                       dynamic=self._FIELDS_DYNAMIC,
2321
                       selected=self.op.output_fields)
2322

    
2323
    # Lock all nodes, in shared mode
2324
    # Temporary removal of locks, should be reverted later
2325
    # TODO: reintroduce locks when they are lighter-weight
2326
    self.needed_locks = {}
2327
    #self.share_locks[locking.LEVEL_NODE] = 1
2328
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2329

    
2330
  def CheckPrereq(self):
2331
    """Check prerequisites.
2332

2333
    """
2334

    
2335
  @staticmethod
2336
  def _DiagnoseByOS(rlist):
2337
    """Remaps a per-node return list into an a per-os per-node dictionary
2338

2339
    @param rlist: a map with node names as keys and OS objects as values
2340

2341
    @rtype: dict
2342
    @return: a dictionary with osnames as keys and as value another map, with
2343
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2344

2345
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2346
                                     (/srv/..., False, "invalid api")],
2347
                           "node2": [(/srv/..., True, "")]}
2348
          }
2349

2350
    """
2351
    all_os = {}
2352
    # we build here the list of nodes that didn't fail the RPC (at RPC
2353
    # level), so that nodes with a non-responding node daemon don't
2354
    # make all OSes invalid
2355
    good_nodes = [node_name for node_name in rlist
2356
                  if not rlist[node_name].fail_msg]
2357
    for node_name, nr in rlist.items():
2358
      if nr.fail_msg or not nr.payload:
2359
        continue
2360
      for name, path, status, diagnose, variants in nr.payload:
2361
        if name not in all_os:
2362
          # build a list of nodes for this os containing empty lists
2363
          # for each node in node_list
2364
          all_os[name] = {}
2365
          for nname in good_nodes:
2366
            all_os[name][nname] = []
2367
        all_os[name][node_name].append((path, status, diagnose, variants))
2368
    return all_os
2369

    
2370
  def Exec(self, feedback_fn):
2371
    """Compute the list of OSes.
2372

2373
    """
2374
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2375
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2376
    pol = self._DiagnoseByOS(node_data)
2377
    output = []
2378
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2379
    calc_variants = "variants" in self.op.output_fields
2380

    
2381
    for os_name, os_data in pol.items():
2382
      row = []
2383
      if calc_valid:
2384
        valid = True
2385
        variants = None
2386
        for osl in os_data.values():
2387
          valid = valid and osl and osl[0][1]
2388
          if not valid:
2389
            variants = None
2390
            break
2391
          if calc_variants:
2392
            node_variants = osl[0][3]
2393
            if variants is None:
2394
              variants = node_variants
2395
            else:
2396
              variants = [v for v in variants if v in node_variants]
2397

    
2398
      for field in self.op.output_fields:
2399
        if field == "name":
2400
          val = os_name
2401
        elif field == "valid":
2402
          val = valid
2403
        elif field == "node_status":
2404
          # this is just a copy of the dict
2405
          val = {}
2406
          for node_name, nos_list in os_data.items():
2407
            val[node_name] = nos_list
2408
        elif field == "variants":
2409
          val =  variants
2410
        else:
2411
          raise errors.ParameterError(field)
2412
        row.append(val)
2413
      output.append(row)
2414

    
2415
    return output
2416

    
2417

    
2418
class LURemoveNode(LogicalUnit):
2419
  """Logical unit for removing a node.
2420

2421
  """
2422
  HPATH = "node-remove"
2423
  HTYPE = constants.HTYPE_NODE
2424
  _OP_REQP = ["node_name"]
2425

    
2426
  def BuildHooksEnv(self):
2427
    """Build hooks env.
2428

2429
    This doesn't run on the target node in the pre phase as a failed
2430
    node would then be impossible to remove.
2431

2432
    """
2433
    env = {
2434
      "OP_TARGET": self.op.node_name,
2435
      "NODE_NAME": self.op.node_name,
2436
      }
2437
    all_nodes = self.cfg.GetNodeList()
2438
    try:
2439
      all_nodes.remove(self.op.node_name)
2440
    except ValueError:
2441
      logging.warning("Node %s which is about to be removed not found"
2442
                      " in the all nodes list", self.op.node_name)
2443
    return env, all_nodes, all_nodes
2444

    
2445
  def CheckPrereq(self):
2446
    """Check prerequisites.
2447

2448
    This checks:
2449
     - the node exists in the configuration
2450
     - it does not have primary or secondary instances
2451
     - it's not the master
2452

2453
    Any errors are signaled by raising errors.OpPrereqError.
2454

2455
    """
2456
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2457
    node = self.cfg.GetNodeInfo(self.op.node_name)
2458
    assert node is not None
2459

    
2460
    instance_list = self.cfg.GetInstanceList()
2461

    
2462
    masternode = self.cfg.GetMasterNode()
2463
    if node.name == masternode:
2464
      raise errors.OpPrereqError("Node is the master node,"
2465
                                 " you need to failover first.",
2466
                                 errors.ECODE_INVAL)
2467

    
2468
    for instance_name in instance_list:
2469
      instance = self.cfg.GetInstanceInfo(instance_name)
2470
      if node.name in instance.all_nodes:
2471
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2472
                                   " please remove first." % instance_name,
2473
                                   errors.ECODE_INVAL)
2474
    self.op.node_name = node.name
2475
    self.node = node
2476

    
2477
  def Exec(self, feedback_fn):
2478
    """Removes the node from the cluster.
2479

2480
    """
2481
    node = self.node
2482
    logging.info("Stopping the node daemon and removing configs from node %s",
2483
                 node.name)
2484

    
2485
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2486

    
2487
    # Promote nodes to master candidate as needed
2488
    _AdjustCandidatePool(self, exceptions=[node.name])
2489
    self.context.RemoveNode(node.name)
2490

    
2491
    # Run post hooks on the node before it's removed
2492
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2493
    try:
2494
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2495
    except:
2496
      # pylint: disable-msg=W0702
2497
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2498

    
2499
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2500
    msg = result.fail_msg
2501
    if msg:
2502
      self.LogWarning("Errors encountered on the remote node while leaving"
2503
                      " the cluster: %s", msg)
2504

    
2505

    
2506
class LUQueryNodes(NoHooksLU):
2507
  """Logical unit for querying nodes.
2508

2509
  """
2510
  # pylint: disable-msg=W0142
2511
  _OP_REQP = ["output_fields", "names", "use_locking"]
2512
  REQ_BGL = False
2513

    
2514
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2515
                    "master_candidate", "offline", "drained"]
2516

    
2517
  _FIELDS_DYNAMIC = utils.FieldSet(
2518
    "dtotal", "dfree",
2519
    "mtotal", "mnode", "mfree",
2520
    "bootid",
2521
    "ctotal", "cnodes", "csockets",
2522
    )
2523

    
2524
  _FIELDS_STATIC = utils.FieldSet(*[
2525
    "pinst_cnt", "sinst_cnt",
2526
    "pinst_list", "sinst_list",
2527
    "pip", "sip", "tags",
2528
    "master",
2529
    "role"] + _SIMPLE_FIELDS
2530
    )
2531

    
2532
  def ExpandNames(self):
2533
    _CheckOutputFields(static=self._FIELDS_STATIC,
2534
                       dynamic=self._FIELDS_DYNAMIC,
2535
                       selected=self.op.output_fields)
2536

    
2537
    self.needed_locks = {}
2538
    self.share_locks[locking.LEVEL_NODE] = 1
2539

    
2540
    if self.op.names:
2541
      self.wanted = _GetWantedNodes(self, self.op.names)
2542
    else:
2543
      self.wanted = locking.ALL_SET
2544

    
2545
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2546
    self.do_locking = self.do_node_query and self.op.use_locking
2547
    if self.do_locking:
2548
      # if we don't request only static fields, we need to lock the nodes
2549
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2550

    
2551
  def CheckPrereq(self):
2552
    """Check prerequisites.
2553

2554
    """
2555
    # The validation of the node list is done in the _GetWantedNodes,
2556
    # if non empty, and if empty, there's no validation to do
2557
    pass
2558

    
2559
  def Exec(self, feedback_fn):
2560
    """Computes the list of nodes and their attributes.
2561

2562
    """
2563
    all_info = self.cfg.GetAllNodesInfo()
2564
    if self.do_locking:
2565
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2566
    elif self.wanted != locking.ALL_SET:
2567
      nodenames = self.wanted
2568
      missing = set(nodenames).difference(all_info.keys())
2569
      if missing:
2570
        raise errors.OpExecError(
2571
          "Some nodes were removed before retrieving their data: %s" % missing)
2572
    else:
2573
      nodenames = all_info.keys()
2574

    
2575
    nodenames = utils.NiceSort(nodenames)
2576
    nodelist = [all_info[name] for name in nodenames]
2577

    
2578
    # begin data gathering
2579

    
2580
    if self.do_node_query:
2581
      live_data = {}
2582
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2583
                                          self.cfg.GetHypervisorType())
2584
      for name in nodenames:
2585
        nodeinfo = node_data[name]
2586
        if not nodeinfo.fail_msg and nodeinfo.payload:
2587
          nodeinfo = nodeinfo.payload
2588
          fn = utils.TryConvert
2589
          live_data[name] = {
2590
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2591
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2592
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2593
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2594
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2595
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2596
            "bootid": nodeinfo.get('bootid', None),
2597
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2598
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2599
            }
2600
        else:
2601
          live_data[name] = {}
2602
    else:
2603
      live_data = dict.fromkeys(nodenames, {})
2604

    
2605
    node_to_primary = dict([(name, set()) for name in nodenames])
2606
    node_to_secondary = dict([(name, set()) for name in nodenames])
2607

    
2608
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2609
                             "sinst_cnt", "sinst_list"))
2610
    if inst_fields & frozenset(self.op.output_fields):
2611
      inst_data = self.cfg.GetAllInstancesInfo()
2612

    
2613
      for inst in inst_data.values():
2614
        if inst.primary_node in node_to_primary:
2615
          node_to_primary[inst.primary_node].add(inst.name)
2616
        for secnode in inst.secondary_nodes:
2617
          if secnode in node_to_secondary:
2618
            node_to_secondary[secnode].add(inst.name)
2619

    
2620
    master_node = self.cfg.GetMasterNode()
2621

    
2622
    # end data gathering
2623

    
2624
    output = []
2625
    for node in nodelist:
2626
      node_output = []
2627
      for field in self.op.output_fields:
2628
        if field in self._SIMPLE_FIELDS:
2629
          val = getattr(node, field)
2630
        elif field == "pinst_list":
2631
          val = list(node_to_primary[node.name])
2632
        elif field == "sinst_list":
2633
          val = list(node_to_secondary[node.name])
2634
        elif field == "pinst_cnt":
2635
          val = len(node_to_primary[node.name])
2636
        elif field == "sinst_cnt":
2637
          val = len(node_to_secondary[node.name])
2638
        elif field == "pip":
2639
          val = node.primary_ip
2640
        elif field == "sip":
2641
          val = node.secondary_ip
2642
        elif field == "tags":
2643
          val = list(node.GetTags())
2644
        elif field == "master":
2645
          val = node.name == master_node
2646
        elif self._FIELDS_DYNAMIC.Matches(field):
2647
          val = live_data[node.name].get(field, None)
2648
        elif field == "role":
2649
          if node.name == master_node:
2650
            val = "M"
2651
          elif node.master_candidate:
2652
            val = "C"
2653
          elif node.drained:
2654
            val = "D"
2655
          elif node.offline:
2656
            val = "O"
2657
          else:
2658
            val = "R"
2659
        else:
2660
          raise errors.ParameterError(field)
2661
        node_output.append(val)
2662
      output.append(node_output)
2663

    
2664
    return output
2665

    
2666

    
2667
class LUQueryNodeVolumes(NoHooksLU):
2668
  """Logical unit for getting volumes on node(s).
2669

2670
  """
2671
  _OP_REQP = ["nodes", "output_fields"]
2672
  REQ_BGL = False
2673
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2674
  _FIELDS_STATIC = utils.FieldSet("node")
2675

    
2676
  def ExpandNames(self):
2677
    _CheckOutputFields(static=self._FIELDS_STATIC,
2678
                       dynamic=self._FIELDS_DYNAMIC,
2679
                       selected=self.op.output_fields)
2680

    
2681
    self.needed_locks = {}
2682
    self.share_locks[locking.LEVEL_NODE] = 1
2683
    if not self.op.nodes:
2684
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2685
    else:
2686
      self.needed_locks[locking.LEVEL_NODE] = \
2687
        _GetWantedNodes(self, self.op.nodes)
2688

    
2689
  def CheckPrereq(self):
2690
    """Check prerequisites.
2691

2692
    This checks that the fields required are valid output fields.
2693

2694
    """
2695
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2696

    
2697
  def Exec(self, feedback_fn):
2698
    """Computes the list of nodes and their attributes.
2699

2700
    """
2701
    nodenames = self.nodes
2702
    volumes = self.rpc.call_node_volumes(nodenames)
2703

    
2704
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2705
             in self.cfg.GetInstanceList()]
2706

    
2707
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2708

    
2709
    output = []
2710
    for node in nodenames:
2711
      nresult = volumes[node]
2712
      if nresult.offline:
2713
        continue
2714
      msg = nresult.fail_msg
2715
      if msg:
2716
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2717
        continue
2718

    
2719
      node_vols = nresult.payload[:]
2720
      node_vols.sort(key=lambda vol: vol['dev'])
2721

    
2722
      for vol in node_vols:
2723
        node_output = []
2724
        for field in self.op.output_fields:
2725
          if field == "node":
2726
            val = node
2727
          elif field == "phys":
2728
            val = vol['dev']
2729
          elif field == "vg":
2730
            val = vol['vg']
2731
          elif field == "name":
2732
            val = vol['name']
2733
          elif field == "size":
2734
            val = int(float(vol['size']))
2735
          elif field == "instance":
2736
            for inst in ilist:
2737
              if node not in lv_by_node[inst]:
2738
                continue
2739
              if vol['name'] in lv_by_node[inst][node]:
2740
                val = inst.name
2741
                break
2742
            else:
2743
              val = '-'
2744
          else:
2745
            raise errors.ParameterError(field)
2746
          node_output.append(str(val))
2747

    
2748
        output.append(node_output)
2749

    
2750
    return output
2751

    
2752

    
2753
class LUQueryNodeStorage(NoHooksLU):
2754
  """Logical unit for getting information on storage units on node(s).
2755

2756
  """
2757
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2758
  REQ_BGL = False
2759
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2760

    
2761
  def ExpandNames(self):
2762
    storage_type = self.op.storage_type
2763

    
2764
    if storage_type not in constants.VALID_STORAGE_TYPES:
2765
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2766
                                 errors.ECODE_INVAL)
2767

    
2768
    _CheckOutputFields(static=self._FIELDS_STATIC,
2769
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2770
                       selected=self.op.output_fields)
2771

    
2772
    self.needed_locks = {}
2773
    self.share_locks[locking.LEVEL_NODE] = 1
2774

    
2775
    if self.op.nodes:
2776
      self.needed_locks[locking.LEVEL_NODE] = \
2777
        _GetWantedNodes(self, self.op.nodes)
2778
    else:
2779
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2780

    
2781
  def CheckPrereq(self):
2782
    """Check prerequisites.
2783

2784
    This checks that the fields required are valid output fields.
2785

2786
    """
2787
    self.op.name = getattr(self.op, "name", None)
2788

    
2789
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2790

    
2791
  def Exec(self, feedback_fn):
2792
    """Computes the list of nodes and their attributes.
2793

2794
    """
2795
    # Always get name to sort by
2796
    if constants.SF_NAME in self.op.output_fields:
2797
      fields = self.op.output_fields[:]
2798
    else:
2799
      fields = [constants.SF_NAME] + self.op.output_fields
2800

    
2801
    # Never ask for node or type as it's only known to the LU
2802
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2803
      while extra in fields:
2804
        fields.remove(extra)
2805

    
2806
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2807
    name_idx = field_idx[constants.SF_NAME]
2808

    
2809
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2810
    data = self.rpc.call_storage_list(self.nodes,
2811
                                      self.op.storage_type, st_args,
2812
                                      self.op.name, fields)
2813

    
2814
    result = []
2815

    
2816
    for node in utils.NiceSort(self.nodes):
2817
      nresult = data[node]
2818
      if nresult.offline:
2819
        continue
2820

    
2821
      msg = nresult.fail_msg
2822
      if msg:
2823
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2824
        continue
2825

    
2826
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2827

    
2828
      for name in utils.NiceSort(rows.keys()):
2829
        row = rows[name]
2830

    
2831
        out = []
2832

    
2833
        for field in self.op.output_fields:
2834
          if field == constants.SF_NODE:
2835
            val = node
2836
          elif field == constants.SF_TYPE:
2837
            val = self.op.storage_type
2838
          elif field in field_idx:
2839
            val = row[field_idx[field]]
2840
          else:
2841
            raise errors.ParameterError(field)
2842

    
2843
          out.append(val)
2844

    
2845
        result.append(out)
2846

    
2847
    return result
2848

    
2849

    
2850
class LUModifyNodeStorage(NoHooksLU):
2851
  """Logical unit for modifying a storage volume on a node.
2852

2853
  """
2854
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2855
  REQ_BGL = False
2856

    
2857
  def CheckArguments(self):
2858
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2859

    
2860
    storage_type = self.op.storage_type
2861
    if storage_type not in constants.VALID_STORAGE_TYPES:
2862
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2863
                                 errors.ECODE_INVAL)
2864

    
2865
  def ExpandNames(self):
2866
    self.needed_locks = {
2867
      locking.LEVEL_NODE: self.op.node_name,
2868
      }
2869

    
2870
  def CheckPrereq(self):
2871
    """Check prerequisites.
2872

2873
    """
2874
    storage_type = self.op.storage_type
2875

    
2876
    try:
2877
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2878
    except KeyError:
2879
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2880
                                 " modified" % storage_type,
2881
                                 errors.ECODE_INVAL)
2882

    
2883
    diff = set(self.op.changes.keys()) - modifiable
2884
    if diff:
2885
      raise errors.OpPrereqError("The following fields can not be modified for"
2886
                                 " storage units of type '%s': %r" %
2887
                                 (storage_type, list(diff)),
2888
                                 errors.ECODE_INVAL)
2889

    
2890
  def Exec(self, feedback_fn):
2891
    """Computes the list of nodes and their attributes.
2892

2893
    """
2894
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2895
    result = self.rpc.call_storage_modify(self.op.node_name,
2896
                                          self.op.storage_type, st_args,
2897
                                          self.op.name, self.op.changes)
2898
    result.Raise("Failed to modify storage unit '%s' on %s" %
2899
                 (self.op.name, self.op.node_name))
2900

    
2901

    
2902
class LUAddNode(LogicalUnit):
2903
  """Logical unit for adding node to the cluster.
2904

2905
  """
2906
  HPATH = "node-add"
2907
  HTYPE = constants.HTYPE_NODE
2908
  _OP_REQP = ["node_name"]
2909

    
2910
  def CheckArguments(self):
2911
    # validate/normalize the node name
2912
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
2913

    
2914
  def BuildHooksEnv(self):
2915
    """Build hooks env.
2916

2917
    This will run on all nodes before, and on all nodes + the new node after.
2918

2919
    """
2920
    env = {
2921
      "OP_TARGET": self.op.node_name,
2922
      "NODE_NAME": self.op.node_name,
2923
      "NODE_PIP": self.op.primary_ip,
2924
      "NODE_SIP": self.op.secondary_ip,
2925
      }
2926
    nodes_0 = self.cfg.GetNodeList()
2927
    nodes_1 = nodes_0 + [self.op.node_name, ]
2928
    return env, nodes_0, nodes_1
2929

    
2930
  def CheckPrereq(self):
2931
    """Check prerequisites.
2932

2933
    This checks:
2934
     - the new node is not already in the config
2935
     - it is resolvable
2936
     - its parameters (single/dual homed) matches the cluster
2937

2938
    Any errors are signaled by raising errors.OpPrereqError.
2939

2940
    """
2941
    node_name = self.op.node_name
2942
    cfg = self.cfg
2943

    
2944
    dns_data = utils.GetHostInfo(node_name)
2945

    
2946
    node = dns_data.name
2947
    primary_ip = self.op.primary_ip = dns_data.ip
2948
    secondary_ip = getattr(self.op, "secondary_ip", None)
2949
    if secondary_ip is None:
2950
      secondary_ip = primary_ip
2951
    if not utils.IsValidIP(secondary_ip):
2952
      raise errors.OpPrereqError("Invalid secondary IP given",
2953
                                 errors.ECODE_INVAL)
2954
    self.op.secondary_ip = secondary_ip
2955

    
2956
    node_list = cfg.GetNodeList()
2957
    if not self.op.readd and node in node_list:
2958
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2959
                                 node, errors.ECODE_EXISTS)
2960
    elif self.op.readd and node not in node_list:
2961
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2962
                                 errors.ECODE_NOENT)
2963

    
2964
    for existing_node_name in node_list:
2965
      existing_node = cfg.GetNodeInfo(existing_node_name)
2966

    
2967
      if self.op.readd and node == existing_node_name:
2968
        if (existing_node.primary_ip != primary_ip or
2969
            existing_node.secondary_ip != secondary_ip):
2970
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2971
                                     " address configuration as before",
2972
                                     errors.ECODE_INVAL)
2973
        continue
2974

    
2975
      if (existing_node.primary_ip == primary_ip or
2976
          existing_node.secondary_ip == primary_ip or
2977
          existing_node.primary_ip == secondary_ip or
2978
          existing_node.secondary_ip == secondary_ip):
2979
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2980
                                   " existing node %s" % existing_node.name,
2981
                                   errors.ECODE_NOTUNIQUE)
2982

    
2983
    # check that the type of the node (single versus dual homed) is the
2984
    # same as for the master
2985
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2986
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2987
    newbie_singlehomed = secondary_ip == primary_ip
2988
    if master_singlehomed != newbie_singlehomed:
2989
      if master_singlehomed:
2990
        raise errors.OpPrereqError("The master has no private ip but the"
2991
                                   " new node has one",
2992
                                   errors.ECODE_INVAL)
2993
      else:
2994
        raise errors.OpPrereqError("The master has a private ip but the"
2995
                                   " new node doesn't have one",
2996
                                   errors.ECODE_INVAL)
2997

    
2998
    # checks reachability
2999
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3000
      raise errors.OpPrereqError("Node not reachable by ping",
3001
                                 errors.ECODE_ENVIRON)
3002

    
3003
    if not newbie_singlehomed:
3004
      # check reachability from my secondary ip to newbie's secondary ip
3005
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3006
                           source=myself.secondary_ip):
3007
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3008
                                   " based ping to noded port",
3009
                                   errors.ECODE_ENVIRON)
3010

    
3011
    if self.op.readd:
3012
      exceptions = [node]
3013
    else:
3014
      exceptions = []
3015

    
3016
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3017

    
3018
    if self.op.readd:
3019
      self.new_node = self.cfg.GetNodeInfo(node)
3020
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3021
    else:
3022
      self.new_node = objects.Node(name=node,
3023
                                   primary_ip=primary_ip,
3024
                                   secondary_ip=secondary_ip,
3025
                                   master_candidate=self.master_candidate,
3026
                                   offline=False, drained=False)
3027

    
3028
  def Exec(self, feedback_fn):
3029
    """Adds the new node to the cluster.
3030

3031
    """
3032
    new_node = self.new_node
3033
    node = new_node.name
3034

    
3035
    # for re-adds, reset the offline/drained/master-candidate flags;
3036
    # we need to reset here, otherwise offline would prevent RPC calls
3037
    # later in the procedure; this also means that if the re-add
3038
    # fails, we are left with a non-offlined, broken node
3039
    if self.op.readd:
3040
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3041
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3042
      # if we demote the node, we do cleanup later in the procedure
3043
      new_node.master_candidate = self.master_candidate
3044

    
3045
    # notify the user about any possible mc promotion
3046
    if new_node.master_candidate:
3047
      self.LogInfo("Node will be a master candidate")
3048

    
3049
    # check connectivity
3050
    result = self.rpc.call_version([node])[node]
3051
    result.Raise("Can't get version information from node %s" % node)
3052
    if constants.PROTOCOL_VERSION == result.payload:
3053
      logging.info("Communication to node %s fine, sw version %s match",
3054
                   node, result.payload)
3055
    else:
3056
      raise errors.OpExecError("Version mismatch master version %s,"
3057
                               " node version %s" %
3058
                               (constants.PROTOCOL_VERSION, result.payload))
3059

    
3060
    # setup ssh on node
3061
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3062
      logging.info("Copy ssh key to node %s", node)
3063
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3064
      keyarray = []
3065
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3066
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3067
                  priv_key, pub_key]
3068

    
3069
      for i in keyfiles:
3070
        keyarray.append(utils.ReadFile(i))
3071

    
3072
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3073
                                      keyarray[2], keyarray[3], keyarray[4],
3074
                                      keyarray[5])
3075
      result.Raise("Cannot transfer ssh keys to the new node")
3076

    
3077
    # Add node to our /etc/hosts, and add key to known_hosts
3078
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3079
      utils.AddHostToEtcHosts(new_node.name)
3080

    
3081
    if new_node.secondary_ip != new_node.primary_ip:
3082
      result = self.rpc.call_node_has_ip_address(new_node.name,
3083
                                                 new_node.secondary_ip)
3084
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3085
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3086
      if not result.payload:
3087
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3088
                                 " you gave (%s). Please fix and re-run this"
3089
                                 " command." % new_node.secondary_ip)
3090

    
3091
    node_verify_list = [self.cfg.GetMasterNode()]
3092
    node_verify_param = {
3093
      constants.NV_NODELIST: [node],
3094
      # TODO: do a node-net-test as well?
3095
    }
3096

    
3097
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3098
                                       self.cfg.GetClusterName())
3099
    for verifier in node_verify_list:
3100
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3101
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3102
      if nl_payload:
3103
        for failed in nl_payload:
3104
          feedback_fn("ssh/hostname verification failed"
3105
                      " (checking from %s): %s" %
3106
                      (verifier, nl_payload[failed]))
3107
        raise errors.OpExecError("ssh/hostname verification failed.")
3108

    
3109
    if self.op.readd:
3110
      _RedistributeAncillaryFiles(self)
3111
      self.context.ReaddNode(new_node)
3112
      # make sure we redistribute the config
3113
      self.cfg.Update(new_node, feedback_fn)
3114
      # and make sure the new node will not have old files around
3115
      if not new_node.master_candidate:
3116
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3117
        msg = result.fail_msg
3118
        if msg:
3119
          self.LogWarning("Node failed to demote itself from master"
3120
                          " candidate status: %s" % msg)
3121
    else:
3122
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3123
      self.context.AddNode(new_node, self.proc.GetECId())
3124

    
3125

    
3126
class LUSetNodeParams(LogicalUnit):
3127
  """Modifies the parameters of a node.
3128

3129
  """
3130
  HPATH = "node-modify"
3131
  HTYPE = constants.HTYPE_NODE
3132
  _OP_REQP = ["node_name"]
3133
  REQ_BGL = False
3134

    
3135
  def CheckArguments(self):
3136
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3137
    _CheckBooleanOpField(self.op, 'master_candidate')
3138
    _CheckBooleanOpField(self.op, 'offline')
3139
    _CheckBooleanOpField(self.op, 'drained')
3140
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3141
    if all_mods.count(None) == 3:
3142
      raise errors.OpPrereqError("Please pass at least one modification",
3143
                                 errors.ECODE_INVAL)
3144
    if all_mods.count(True) > 1:
3145
      raise errors.OpPrereqError("Can't set the node into more than one"
3146
                                 " state at the same time",
3147
                                 errors.ECODE_INVAL)
3148

    
3149
  def ExpandNames(self):
3150
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3151

    
3152
  def BuildHooksEnv(self):
3153
    """Build hooks env.
3154

3155
    This runs on the master node.
3156

3157
    """
3158
    env = {
3159
      "OP_TARGET": self.op.node_name,
3160
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3161
      "OFFLINE": str(self.op.offline),
3162
      "DRAINED": str(self.op.drained),
3163
      }
3164
    nl = [self.cfg.GetMasterNode(),
3165
          self.op.node_name]
3166
    return env, nl, nl
3167

    
3168
  def CheckPrereq(self):
3169
    """Check prerequisites.
3170

3171
    This only checks the instance list against the existing names.
3172

3173
    """
3174
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3175

    
3176
    if (self.op.master_candidate is not None or
3177
        self.op.drained is not None or
3178
        self.op.offline is not None):
3179
      # we can't change the master's node flags
3180
      if self.op.node_name == self.cfg.GetMasterNode():
3181
        raise errors.OpPrereqError("The master role can be changed"
3182
                                   " only via masterfailover",
3183
                                   errors.ECODE_INVAL)
3184

    
3185
    # Boolean value that tells us whether we're offlining or draining the node
3186
    offline_or_drain = self.op.offline == True or self.op.drained == True
3187
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3188

    
3189
    if (node.master_candidate and
3190
        (self.op.master_candidate == False or offline_or_drain)):
3191
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3192
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3193
      if mc_now <= cp_size:
3194
        msg = ("Not enough master candidates (desired"
3195
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3196
        # Only allow forcing the operation if it's an offline/drain operation,
3197
        # and we could not possibly promote more nodes.
3198
        # FIXME: this can still lead to issues if in any way another node which
3199
        # could be promoted appears in the meantime.
3200
        if self.op.force and offline_or_drain and mc_should == mc_max:
3201
          self.LogWarning(msg)
3202
        else:
3203
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3204

    
3205
    if (self.op.master_candidate == True and
3206
        ((node.offline and not self.op.offline == False) or
3207
         (node.drained and not self.op.drained == False))):
3208
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3209
                                 " to master_candidate" % node.name,
3210
                                 errors.ECODE_INVAL)
3211

    
3212
    # If we're being deofflined/drained, we'll MC ourself if needed
3213
    if (deoffline_or_drain and not offline_or_drain and not
3214
        self.op.master_candidate == True and not node.master_candidate):
3215
      self.op.master_candidate = _DecideSelfPromotion(self)
3216
      if self.op.master_candidate:
3217
        self.LogInfo("Autopromoting node to master candidate")
3218

    
3219
    return
3220

    
3221
  def Exec(self, feedback_fn):
3222
    """Modifies a node.
3223

3224
    """
3225
    node = self.node
3226

    
3227
    result = []
3228
    changed_mc = False
3229

    
3230
    if self.op.offline is not None:
3231
      node.offline = self.op.offline
3232
      result.append(("offline", str(self.op.offline)))
3233
      if self.op.offline == True:
3234
        if node.master_candidate:
3235
          node.master_candidate = False
3236
          changed_mc = True
3237
          result.append(("master_candidate", "auto-demotion due to offline"))
3238
        if node.drained:
3239
          node.drained = False
3240
          result.append(("drained", "clear drained status due to offline"))
3241

    
3242
    if self.op.master_candidate is not None:
3243
      node.master_candidate = self.op.master_candidate
3244
      changed_mc = True
3245
      result.append(("master_candidate", str(self.op.master_candidate)))
3246
      if self.op.master_candidate == False:
3247
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3248
        msg = rrc.fail_msg
3249
        if msg:
3250
          self.LogWarning("Node failed to demote itself: %s" % msg)
3251

    
3252
    if self.op.drained is not None:
3253
      node.drained = self.op.drained
3254
      result.append(("drained", str(self.op.drained)))
3255
      if self.op.drained == True:
3256
        if node.master_candidate:
3257
          node.master_candidate = False
3258
          changed_mc = True
3259
          result.append(("master_candidate", "auto-demotion due to drain"))
3260
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3261
          msg = rrc.fail_msg
3262
          if msg:
3263
            self.LogWarning("Node failed to demote itself: %s" % msg)
3264
        if node.offline:
3265
          node.offline = False
3266
          result.append(("offline", "clear offline status due to drain"))
3267

    
3268
    # this will trigger configuration file update, if needed
3269
    self.cfg.Update(node, feedback_fn)
3270
    # this will trigger job queue propagation or cleanup
3271
    if changed_mc:
3272
      self.context.ReaddNode(node)
3273

    
3274
    return result
3275

    
3276

    
3277
class LUPowercycleNode(NoHooksLU):
3278
  """Powercycles a node.
3279

3280
  """
3281
  _OP_REQP = ["node_name", "force"]
3282
  REQ_BGL = False
3283

    
3284
  def CheckArguments(self):
3285
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3286
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3287
      raise errors.OpPrereqError("The node is the master and the force"
3288
                                 " parameter was not set",
3289
                                 errors.ECODE_INVAL)
3290

    
3291
  def ExpandNames(self):
3292
    """Locking for PowercycleNode.
3293

3294
    This is a last-resort option and shouldn't block on other
3295
    jobs. Therefore, we grab no locks.
3296

3297
    """
3298
    self.needed_locks = {}
3299

    
3300
  def CheckPrereq(self):
3301
    """Check prerequisites.
3302

3303
    This LU has no prereqs.
3304

3305
    """
3306
    pass
3307

    
3308
  def Exec(self, feedback_fn):
3309
    """Reboots a node.
3310

3311
    """
3312
    result = self.rpc.call_node_powercycle(self.op.node_name,
3313
                                           self.cfg.GetHypervisorType())
3314
    result.Raise("Failed to schedule the reboot")
3315
    return result.payload
3316

    
3317

    
3318
class LUQueryClusterInfo(NoHooksLU):
3319
  """Query cluster configuration.
3320

3321
  """
3322
  _OP_REQP = []
3323
  REQ_BGL = False
3324

    
3325
  def ExpandNames(self):
3326
    self.needed_locks = {}
3327

    
3328
  def CheckPrereq(self):
3329
    """No prerequsites needed for this LU.
3330

3331
    """
3332
    pass
3333

    
3334
  def Exec(self, feedback_fn):
3335
    """Return cluster config.
3336

3337
    """
3338
    cluster = self.cfg.GetClusterInfo()
3339
    result = {
3340
      "software_version": constants.RELEASE_VERSION,
3341
      "protocol_version": constants.PROTOCOL_VERSION,
3342
      "config_version": constants.CONFIG_VERSION,
3343
      "os_api_version": max(constants.OS_API_VERSIONS),
3344
      "export_version": constants.EXPORT_VERSION,
3345
      "architecture": (platform.architecture()[0], platform.machine()),
3346
      "name": cluster.cluster_name,
3347
      "master": cluster.master_node,
3348
      "default_hypervisor": cluster.enabled_hypervisors[0],
3349
      "enabled_hypervisors": cluster.enabled_hypervisors,
3350
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3351
                        for hypervisor_name in cluster.enabled_hypervisors]),
3352
      "beparams": cluster.beparams,
3353
      "nicparams": cluster.nicparams,
3354
      "candidate_pool_size": cluster.candidate_pool_size,
3355
      "master_netdev": cluster.master_netdev,
3356
      "volume_group_name": cluster.volume_group_name,
3357
      "file_storage_dir": cluster.file_storage_dir,
3358
      "ctime": cluster.ctime,
3359
      "mtime": cluster.mtime,
3360
      "uuid": cluster.uuid,
3361
      "tags": list(cluster.GetTags()),
3362
      }
3363

    
3364
    return result
3365

    
3366

    
3367
class LUQueryConfigValues(NoHooksLU):
3368
  """Return configuration values.
3369

3370
  """
3371
  _OP_REQP = []
3372
  REQ_BGL = False
3373
  _FIELDS_DYNAMIC = utils.FieldSet()
3374
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3375
                                  "watcher_pause")
3376

    
3377
  def ExpandNames(self):
3378
    self.needed_locks = {}
3379

    
3380
    _CheckOutputFields(static=self._FIELDS_STATIC,
3381
                       dynamic=self._FIELDS_DYNAMIC,
3382
                       selected=self.op.output_fields)
3383

    
3384
  def CheckPrereq(self):
3385
    """No prerequisites.
3386

3387
    """
3388
    pass
3389

    
3390
  def Exec(self, feedback_fn):
3391
    """Dump a representation of the cluster config to the standard output.
3392

3393
    """
3394
    values = []
3395
    for field in self.op.output_fields:
3396
      if field == "cluster_name":
3397
        entry = self.cfg.GetClusterName()
3398
      elif field == "master_node":
3399
        entry = self.cfg.GetMasterNode()
3400
      elif field == "drain_flag":
3401
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3402
      elif field == "watcher_pause":
3403
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3404
      else:
3405
        raise errors.ParameterError(field)
3406
      values.append(entry)
3407
    return values
3408

    
3409

    
3410
class LUActivateInstanceDisks(NoHooksLU):
3411
  """Bring up an instance's disks.
3412

3413
  """
3414
  _OP_REQP = ["instance_name"]
3415
  REQ_BGL = False
3416

    
3417
  def ExpandNames(self):
3418
    self._ExpandAndLockInstance()
3419
    self.needed_locks[locking.LEVEL_NODE] = []
3420
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3421

    
3422
  def DeclareLocks(self, level):
3423
    if level == locking.LEVEL_NODE:
3424
      self._LockInstancesNodes()
3425

    
3426
  def CheckPrereq(self):
3427
    """Check prerequisites.
3428

3429
    This checks that the instance is in the cluster.
3430

3431
    """
3432
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3433
    assert self.instance is not None, \
3434
      "Cannot retrieve locked instance %s" % self.op.instance_name
3435
    _CheckNodeOnline(self, self.instance.primary_node)
3436
    if not hasattr(self.op, "ignore_size"):
3437
      self.op.ignore_size = False
3438

    
3439
  def Exec(self, feedback_fn):
3440
    """Activate the disks.
3441

3442
    """
3443
    disks_ok, disks_info = \
3444
              _AssembleInstanceDisks(self, self.instance,
3445
                                     ignore_size=self.op.ignore_size)
3446
    if not disks_ok:
3447
      raise errors.OpExecError("Cannot activate block devices")
3448

    
3449
    return disks_info
3450

    
3451

    
3452
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3453
                           ignore_size=False):
3454
  """Prepare the block devices for an instance.
3455

3456
  This sets up the block devices on all nodes.
3457

3458
  @type lu: L{LogicalUnit}
3459
  @param lu: the logical unit on whose behalf we execute
3460
  @type instance: L{objects.Instance}
3461
  @param instance: the instance for whose disks we assemble
3462
  @type ignore_secondaries: boolean
3463
  @param ignore_secondaries: if true, errors on secondary nodes
3464
      won't result in an error return from the function
3465
  @type ignore_size: boolean
3466
  @param ignore_size: if true, the current known size of the disk
3467
      will not be used during the disk activation, useful for cases
3468
      when the size is wrong
3469
  @return: False if the operation failed, otherwise a list of
3470
      (host, instance_visible_name, node_visible_name)
3471
      with the mapping from node devices to instance devices
3472

3473
  """
3474
  device_info = []
3475
  disks_ok = True
3476
  iname = instance.name
3477
  # With the two passes mechanism we try to reduce the window of
3478
  # opportunity for the race condition of switching DRBD to primary
3479
  # before handshaking occured, but we do not eliminate it
3480

    
3481
  # The proper fix would be to wait (with some limits) until the
3482
  # connection has been made and drbd transitions from WFConnection
3483
  # into any other network-connected state (Connected, SyncTarget,
3484
  # SyncSource, etc.)
3485

    
3486
  # 1st pass, assemble on all nodes in secondary mode
3487
  for inst_disk in instance.disks:
3488
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3489
      if ignore_size:
3490
        node_disk = node_disk.Copy()
3491
        node_disk.UnsetSize()
3492
      lu.cfg.SetDiskID(node_disk, node)
3493
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3494
      msg = result.fail_msg
3495
      if msg:
3496
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3497
                           " (is_primary=False, pass=1): %s",
3498
                           inst_disk.iv_name, node, msg)
3499
        if not ignore_secondaries:
3500
          disks_ok = False
3501

    
3502
  # FIXME: race condition on drbd migration to primary
3503

    
3504
  # 2nd pass, do only the primary node
3505
  for inst_disk in instance.disks:
3506
    dev_path = None
3507

    
3508
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3509
      if node != instance.primary_node:
3510
        continue
3511
      if ignore_size:
3512
        node_disk = node_disk.Copy()
3513
        node_disk.UnsetSize()
3514
      lu.cfg.SetDiskID(node_disk, node)
3515
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3516
      msg = result.fail_msg
3517
      if msg:
3518
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3519
                           " (is_primary=True, pass=2): %s",
3520
                           inst_disk.iv_name, node, msg)
3521
        disks_ok = False
3522
      else:
3523
        dev_path = result.payload
3524

    
3525
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3526

    
3527
  # leave the disks configured for the primary node
3528
  # this is a workaround that would be fixed better by
3529
  # improving the logical/physical id handling
3530
  for disk in instance.disks:
3531
    lu.cfg.SetDiskID(disk, instance.primary_node)
3532

    
3533
  return disks_ok, device_info
3534

    
3535

    
3536
def _StartInstanceDisks(lu, instance, force):
3537
  """Start the disks of an instance.
3538

3539
  """
3540
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3541
                                           ignore_secondaries=force)
3542
  if not disks_ok:
3543
    _ShutdownInstanceDisks(lu, instance)
3544
    if force is not None and not force:
3545
      lu.proc.LogWarning("", hint="If the message above refers to a"
3546
                         " secondary node,"
3547
                         " you can retry the operation using '--force'.")
3548
    raise errors.OpExecError("Disk consistency error")
3549

    
3550

    
3551
class LUDeactivateInstanceDisks(NoHooksLU):
3552
  """Shutdown an instance's disks.
3553

3554
  """
3555
  _OP_REQP = ["instance_name"]
3556
  REQ_BGL = False
3557

    
3558
  def ExpandNames(self):
3559
    self._ExpandAndLockInstance()
3560
    self.needed_locks[locking.LEVEL_NODE] = []
3561
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3562

    
3563
  def DeclareLocks(self, level):
3564
    if level == locking.LEVEL_NODE:
3565
      self._LockInstancesNodes()
3566

    
3567
  def CheckPrereq(self):
3568
    """Check prerequisites.
3569

3570
    This checks that the instance is in the cluster.
3571

3572
    """
3573
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574
    assert self.instance is not None, \
3575
      "Cannot retrieve locked instance %s" % self.op.instance_name
3576

    
3577
  def Exec(self, feedback_fn):
3578
    """Deactivate the disks
3579

3580
    """
3581
    instance = self.instance
3582
    _SafeShutdownInstanceDisks(self, instance)
3583

    
3584

    
3585
def _SafeShutdownInstanceDisks(lu, instance):
3586
  """Shutdown block devices of an instance.
3587

3588
  This function checks if an instance is running, before calling
3589
  _ShutdownInstanceDisks.
3590

3591
  """
3592
  pnode = instance.primary_node
3593
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3594
  ins_l.Raise("Can't contact node %s" % pnode)
3595

    
3596
  if instance.name in ins_l.payload:
3597
    raise errors.OpExecError("Instance is running, can't shutdown"
3598
                             " block devices.")
3599

    
3600
  _ShutdownInstanceDisks(lu, instance)
3601

    
3602

    
3603
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3604
  """Shutdown block devices of an instance.
3605

3606
  This does the shutdown on all nodes of the instance.
3607

3608
  If the ignore_primary is false, errors on the primary node are
3609
  ignored.
3610

3611
  """
3612
  all_result = True
3613
  for disk in instance.disks:
3614
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3615
      lu.cfg.SetDiskID(top_disk, node)
3616
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3617
      msg = result.fail_msg
3618
      if msg:
3619
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3620
                      disk.iv_name, node, msg)
3621
        if not ignore_primary or node != instance.primary_node:
3622
          all_result = False
3623
  return all_result
3624

    
3625

    
3626
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3627
  """Checks if a node has enough free memory.
3628

3629
  This function check if a given node has the needed amount of free
3630
  memory. In case the node has less memory or we cannot get the
3631
  information from the node, this function raise an OpPrereqError
3632
  exception.
3633

3634
  @type lu: C{LogicalUnit}
3635
  @param lu: a logical unit from which we get configuration data
3636
  @type node: C{str}
3637
  @param node: the node to check
3638
  @type reason: C{str}
3639
  @param reason: string to use in the error message
3640
  @type requested: C{int}
3641
  @param requested: the amount of memory in MiB to check for
3642
  @type hypervisor_name: C{str}
3643
  @param hypervisor_name: the hypervisor to ask for memory stats
3644
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3645
      we cannot check the node
3646

3647
  """
3648
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3649
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3650
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3651
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3652
  if not isinstance(free_mem, int):
3653
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3654
                               " was '%s'" % (node, free_mem),
3655
                               errors.ECODE_ENVIRON)
3656
  if requested > free_mem:
3657
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3658
                               " needed %s MiB, available %s MiB" %
3659
                               (node, reason, requested, free_mem),
3660
                               errors.ECODE_NORES)
3661

    
3662

    
3663
class LUStartupInstance(LogicalUnit):
3664
  """Starts an instance.
3665

3666
  """
3667
  HPATH = "instance-start"
3668
  HTYPE = constants.HTYPE_INSTANCE
3669
  _OP_REQP = ["instance_name", "force"]
3670
  REQ_BGL = False
3671

    
3672
  def ExpandNames(self):
3673
    self._ExpandAndLockInstance()
3674

    
3675
  def BuildHooksEnv(self):
3676
    """Build hooks env.
3677

3678
    This runs on master, primary and secondary nodes of the instance.
3679

3680
    """
3681
    env = {
3682
      "FORCE": self.op.force,
3683
      }
3684
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3685
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3686
    return env, nl, nl
3687

    
3688
  def CheckPrereq(self):
3689
    """Check prerequisites.
3690

3691
    This checks that the instance is in the cluster.
3692

3693
    """
3694
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3695
    assert self.instance is not None, \
3696
      "Cannot retrieve locked instance %s" % self.op.instance_name
3697

    
3698
    # extra beparams
3699
    self.beparams = getattr(self.op, "beparams", {})
3700
    if self.beparams:
3701
      if not isinstance(self.beparams, dict):
3702
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3703
                                   " dict" % (type(self.beparams), ),
3704
                                   errors.ECODE_INVAL)
3705
      # fill the beparams dict
3706
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3707
      self.op.beparams = self.beparams
3708

    
3709
    # extra hvparams
3710
    self.hvparams = getattr(self.op, "hvparams", {})
3711
    if self.hvparams:
3712
      if not isinstance(self.hvparams, dict):
3713
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3714
                                   " dict" % (type(self.hvparams), ),
3715
                                   errors.ECODE_INVAL)
3716

    
3717
      # check hypervisor parameter syntax (locally)
3718
      cluster = self.cfg.GetClusterInfo()
3719
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3720
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3721
                                    instance.hvparams)
3722
      filled_hvp.update(self.hvparams)
3723
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3724
      hv_type.CheckParameterSyntax(filled_hvp)
3725
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3726
      self.op.hvparams = self.hvparams
3727

    
3728
    _CheckNodeOnline(self, instance.primary_node)
3729

    
3730
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3731
    # check bridges existence
3732
    _CheckInstanceBridgesExist(self, instance)
3733

    
3734
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3735
                                              instance.name,
3736
                                              instance.hypervisor)
3737
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3738
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3739
    if not remote_info.payload: # not running already
3740
      _CheckNodeFreeMemory(self, instance.primary_node,
3741
                           "starting instance %s" % instance.name,
3742
                           bep[constants.BE_MEMORY], instance.hypervisor)
3743

    
3744
  def Exec(self, feedback_fn):
3745
    """Start the instance.
3746

3747
    """
3748
    instance = self.instance
3749
    force = self.op.force
3750

    
3751
    self.cfg.MarkInstanceUp(instance.name)
3752

    
3753
    node_current = instance.primary_node
3754

    
3755
    _StartInstanceDisks(self, instance, force)
3756

    
3757
    result = self.rpc.call_instance_start(node_current, instance,
3758
                                          self.hvparams, self.beparams)
3759
    msg = result.fail_msg
3760
    if msg:
3761
      _ShutdownInstanceDisks(self, instance)
3762
      raise errors.OpExecError("Could not start instance: %s" % msg)
3763

    
3764

    
3765
class LURebootInstance(LogicalUnit):
3766
  """Reboot an instance.
3767

3768
  """
3769
  HPATH = "instance-reboot"
3770
  HTYPE = constants.HTYPE_INSTANCE
3771
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3772
  REQ_BGL = False
3773

    
3774
  def CheckArguments(self):
3775
    """Check the arguments.
3776

3777
    """
3778
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3779
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3780

    
3781
  def ExpandNames(self):
3782
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3783
                                   constants.INSTANCE_REBOOT_HARD,
3784
                                   constants.INSTANCE_REBOOT_FULL]:
3785
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3786
                                  (constants.INSTANCE_REBOOT_SOFT,
3787
                                   constants.INSTANCE_REBOOT_HARD,
3788
                                   constants.INSTANCE_REBOOT_FULL))
3789
    self._ExpandAndLockInstance()
3790

    
3791
  def BuildHooksEnv(self):
3792
    """Build hooks env.
3793

3794
    This runs on master, primary and secondary nodes of the instance.
3795

3796
    """
3797
    env = {
3798
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3799
      "REBOOT_TYPE": self.op.reboot_type,
3800
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3801
      }
3802
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3803
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3804
    return env, nl, nl
3805

    
3806
  def CheckPrereq(self):
3807
    """Check prerequisites.
3808

3809
    This checks that the instance is in the cluster.
3810

3811
    """
3812
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3813
    assert self.instance is not None, \
3814
      "Cannot retrieve locked instance %s" % self.op.instance_name
3815

    
3816
    _CheckNodeOnline(self, instance.primary_node)
3817

    
3818
    # check bridges existence
3819
    _CheckInstanceBridgesExist(self, instance)
3820

    
3821
  def Exec(self, feedback_fn):
3822
    """Reboot the instance.
3823

3824
    """
3825
    instance = self.instance
3826
    ignore_secondaries = self.op.ignore_secondaries
3827
    reboot_type = self.op.reboot_type
3828

    
3829
    node_current = instance.primary_node
3830

    
3831
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3832
                       constants.INSTANCE_REBOOT_HARD]:
3833
      for disk in instance.disks:
3834
        self.cfg.SetDiskID(disk, node_current)
3835
      result = self.rpc.call_instance_reboot(node_current, instance,
3836
                                             reboot_type,
3837
                                             self.shutdown_timeout)
3838
      result.Raise("Could not reboot instance")
3839
    else:
3840
      result = self.rpc.call_instance_shutdown(node_current, instance,
3841
                                               self.shutdown_timeout)
3842
      result.Raise("Could not shutdown instance for full reboot")
3843
      _ShutdownInstanceDisks(self, instance)
3844
      _StartInstanceDisks(self, instance, ignore_secondaries)
3845
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3846
      msg = result.fail_msg
3847
      if msg:
3848
        _ShutdownInstanceDisks(self, instance)
3849
        raise errors.OpExecError("Could not start instance for"
3850
                                 " full reboot: %s" % msg)
3851

    
3852
    self.cfg.MarkInstanceUp(instance.name)
3853

    
3854

    
3855
class LUShutdownInstance(LogicalUnit):
3856
  """Shutdown an instance.
3857

3858
  """
3859
  HPATH = "instance-stop"
3860
  HTYPE = constants.HTYPE_INSTANCE
3861
  _OP_REQP = ["instance_name"]
3862
  REQ_BGL = False
3863

    
3864
  def CheckArguments(self):
3865
    """Check the arguments.
3866

3867
    """
3868
    self.timeout = getattr(self.op, "timeout",
3869
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3870

    
3871
  def ExpandNames(self):
3872
    self._ExpandAndLockInstance()
3873

    
3874
  def BuildHooksEnv(self):
3875
    """Build hooks env.
3876

3877
    This runs on master, primary and secondary nodes of the instance.
3878

3879
    """
3880
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3881
    env["TIMEOUT"] = self.timeout
3882
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3883
    return env, nl, nl
3884

    
3885
  def CheckPrereq(self):
3886
    """Check prerequisites.
3887

3888
    This checks that the instance is in the cluster.
3889

3890
    """
3891
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3892
    assert self.instance is not None, \
3893
      "Cannot retrieve locked instance %s" % self.op.instance_name
3894
    _CheckNodeOnline(self, self.instance.primary_node)
3895

    
3896
  def Exec(self, feedback_fn):
3897
    """Shutdown the instance.
3898

3899
    """
3900
    instance = self.instance
3901
    node_current = instance.primary_node
3902
    timeout = self.timeout
3903
    self.cfg.MarkInstanceDown(instance.name)
3904
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3905
    msg = result.fail_msg
3906
    if msg:
3907
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3908

    
3909
    _ShutdownInstanceDisks(self, instance)
3910

    
3911

    
3912
class LUReinstallInstance(LogicalUnit):
3913
  """Reinstall an instance.
3914

3915
  """
3916
  HPATH = "instance-reinstall"
3917
  HTYPE = constants.HTYPE_INSTANCE
3918
  _OP_REQP = ["instance_name"]
3919
  REQ_BGL = False
3920

    
3921
  def ExpandNames(self):
3922
    self._ExpandAndLockInstance()
3923

    
3924
  def BuildHooksEnv(self):
3925
    """Build hooks env.
3926

3927
    This runs on master, primary and secondary nodes of the instance.
3928

3929
    """
3930
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3931
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3932
    return env, nl, nl
3933

    
3934
  def CheckPrereq(self):
3935
    """Check prerequisites.
3936

3937
    This checks that the instance is in the cluster and is not running.
3938

3939
    """
3940
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3941
    assert instance is not None, \
3942
      "Cannot retrieve locked instance %s" % self.op.instance_name
3943
    _CheckNodeOnline(self, instance.primary_node)
3944

    
3945
    if instance.disk_template == constants.DT_DISKLESS:
3946
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3947
                                 self.op.instance_name,
3948
                                 errors.ECODE_INVAL)
3949
    if instance.admin_up:
3950
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3951
                                 self.op.instance_name,
3952
                                 errors.ECODE_STATE)
3953
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3954
                                              instance.name,
3955
                                              instance.hypervisor)
3956
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3957
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3958
    if remote_info.payload:
3959
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3960
                                 (self.op.instance_name,
3961
                                  instance.primary_node),
3962
                                 errors.ECODE_STATE)
3963

    
3964
    self.op.os_type = getattr(self.op, "os_type", None)
3965
    self.op.force_variant = getattr(self.op, "force_variant", False)
3966
    if self.op.os_type is not None:
3967
      # OS verification
3968
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
3969
      result = self.rpc.call_os_get(pnode, self.op.os_type)
3970
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3971
                   (self.op.os_type, pnode),
3972
                   prereq=True, ecode=errors.ECODE_INVAL)
3973
      if not self.op.force_variant:
3974
        _CheckOSVariant(result.payload, self.op.os_type)
3975

    
3976
    self.instance = instance
3977

    
3978
  def Exec(self, feedback_fn):
3979
    """Reinstall the instance.
3980

3981
    """
3982
    inst = self.instance
3983

    
3984
    if self.op.os_type is not None:
3985
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3986
      inst.os = self.op.os_type
3987
      self.cfg.Update(inst, feedback_fn)
3988

    
3989
    _StartInstanceDisks(self, inst, None)
3990
    try:
3991
      feedback_fn("Running the instance OS create scripts...")
3992
      # FIXME: pass debug option from opcode to backend
3993
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
3994
                                             self.op.debug_level)
3995
      result.Raise("Could not install OS for instance %s on node %s" %
3996
                   (inst.name, inst.primary_node))
3997
    finally:
3998
      _ShutdownInstanceDisks(self, inst)
3999

    
4000

    
4001
class LURecreateInstanceDisks(LogicalUnit):
4002
  """Recreate an instance's missing disks.
4003

4004
  """
4005
  HPATH = "instance-recreate-disks"
4006
  HTYPE = constants.HTYPE_INSTANCE
4007
  _OP_REQP = ["instance_name", "disks"]