Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ cf26a87a

History | View | Annotate | Download (310.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq (except when tasklets are used)
54
    - implement Exec (except when tasklets are used)
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  @ivar dry_run_result: the value (if any) that will be returned to the caller
63
      in dry-run mode (signalled by opcode dry_run parameter)
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_BGL = True
70

    
71
  def __init__(self, processor, op, context, rpc):
72
    """Constructor for LogicalUnit.
73

74
    This needs to be overridden in derived classes in order to check op
75
    validity.
76

77
    """
78
    self.proc = processor
79
    self.op = op
80
    self.cfg = context.cfg
81
    self.context = context
82
    self.rpc = rpc
83
    # Dicts used to declare locking needs to mcpu
84
    self.needed_locks = None
85
    self.acquired_locks = {}
86
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
87
    self.add_locks = {}
88
    self.remove_locks = {}
89
    # Used to force good behavior when calling helper functions
90
    self.recalculate_locks = {}
91
    self.__ssh = None
92
    # logging
93
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
94
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
95
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
96
    # support for dry-run
97
    self.dry_run_result = None
98
    # support for generic debug attribute
99
    if (not hasattr(self.op, "debug_level") or
100
        not isinstance(self.op.debug_level, int)):
101
      self.op.debug_level = 0
102

    
103
    # Tasklets
104
    self.tasklets = None
105

    
106
    for attr_name in self._OP_REQP:
107
      attr_val = getattr(op, attr_name, None)
108
      if attr_val is None:
109
        raise errors.OpPrereqError("Required parameter '%s' missing" %
110
                                   attr_name, errors.ECODE_INVAL)
111

    
112
    self.CheckArguments()
113

    
114
  def __GetSSH(self):
115
    """Returns the SshRunner object
116

117
    """
118
    if not self.__ssh:
119
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
120
    return self.__ssh
121

    
122
  ssh = property(fget=__GetSSH)
123

    
124
  def CheckArguments(self):
125
    """Check syntactic validity for the opcode arguments.
126

127
    This method is for doing a simple syntactic check and ensure
128
    validity of opcode parameters, without any cluster-related
129
    checks. While the same can be accomplished in ExpandNames and/or
130
    CheckPrereq, doing these separate is better because:
131

132
      - ExpandNames is left as as purely a lock-related function
133
      - CheckPrereq is run after we have acquired locks (and possible
134
        waited for them)
135

136
    The function is allowed to change the self.op attribute so that
137
    later methods can no longer worry about missing parameters.
138

139
    """
140
    pass
141

    
142
  def ExpandNames(self):
143
    """Expand names for this LU.
144

145
    This method is called before starting to execute the opcode, and it should
146
    update all the parameters of the opcode to their canonical form (e.g. a
147
    short node name must be fully expanded after this method has successfully
148
    completed). This way locking, hooks, logging, ecc. can work correctly.
149

150
    LUs which implement this method must also populate the self.needed_locks
151
    member, as a dict with lock levels as keys, and a list of needed lock names
152
    as values. Rules:
153

154
      - use an empty dict if you don't need any lock
155
      - if you don't need any lock at a particular level omit that level
156
      - don't put anything for the BGL level
157
      - if you want all locks at a level use locking.ALL_SET as a value
158

159
    If you need to share locks (rather than acquire them exclusively) at one
160
    level you can modify self.share_locks, setting a true value (usually 1) for
161
    that level. By default locks are not shared.
162

163
    This function can also define a list of tasklets, which then will be
164
    executed in order instead of the usual LU-level CheckPrereq and Exec
165
    functions, if those are not defined by the LU.
166

167
    Examples::
168

169
      # Acquire all nodes and one instance
170
      self.needed_locks = {
171
        locking.LEVEL_NODE: locking.ALL_SET,
172
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
173
      }
174
      # Acquire just two nodes
175
      self.needed_locks = {
176
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
177
      }
178
      # Acquire no locks
179
      self.needed_locks = {} # No, you can't leave it to the default value None
180

181
    """
182
    # The implementation of this method is mandatory only if the new LU is
183
    # concurrent, so that old LUs don't need to be changed all at the same
184
    # time.
185
    if self.REQ_BGL:
186
      self.needed_locks = {} # Exclusive LUs don't need locks.
187
    else:
188
      raise NotImplementedError
189

    
190
  def DeclareLocks(self, level):
191
    """Declare LU locking needs for a level
192

193
    While most LUs can just declare their locking needs at ExpandNames time,
194
    sometimes there's the need to calculate some locks after having acquired
195
    the ones before. This function is called just before acquiring locks at a
196
    particular level, but after acquiring the ones at lower levels, and permits
197
    such calculations. It can be used to modify self.needed_locks, and by
198
    default it does nothing.
199

200
    This function is only called if you have something already set in
201
    self.needed_locks for the level.
202

203
    @param level: Locking level which is going to be locked
204
    @type level: member of ganeti.locking.LEVELS
205

206
    """
207

    
208
  def CheckPrereq(self):
209
    """Check prerequisites for this LU.
210

211
    This method should check that the prerequisites for the execution
212
    of this LU are fulfilled. It can do internode communication, but
213
    it should be idempotent - no cluster or system changes are
214
    allowed.
215

216
    The method should raise errors.OpPrereqError in case something is
217
    not fulfilled. Its return value is ignored.
218

219
    This method should also update all the parameters of the opcode to
220
    their canonical form if it hasn't been done by ExpandNames before.
221

222
    """
223
    if self.tasklets is not None:
224
      for (idx, tl) in enumerate(self.tasklets):
225
        logging.debug("Checking prerequisites for tasklet %s/%s",
226
                      idx + 1, len(self.tasklets))
227
        tl.CheckPrereq()
228
    else:
229
      raise NotImplementedError
230

    
231
  def Exec(self, feedback_fn):
232
    """Execute the LU.
233

234
    This method should implement the actual work. It should raise
235
    errors.OpExecError for failures that are somewhat dealt with in
236
    code, or expected.
237

238
    """
239
    if self.tasklets is not None:
240
      for (idx, tl) in enumerate(self.tasklets):
241
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
242
        tl.Exec(feedback_fn)
243
    else:
244
      raise NotImplementedError
245

    
246
  def BuildHooksEnv(self):
247
    """Build hooks environment for this LU.
248

249
    This method should return a three-node tuple consisting of: a dict
250
    containing the environment that will be used for running the
251
    specific hook for this LU, a list of node names on which the hook
252
    should run before the execution, and a list of node names on which
253
    the hook should run after the execution.
254

255
    The keys of the dict must not have 'GANETI_' prefixed as this will
256
    be handled in the hooks runner. Also note additional keys will be
257
    added by the hooks runner. If the LU doesn't define any
258
    environment, an empty dict (and not None) should be returned.
259

260
    No nodes should be returned as an empty list (and not None).
261

262
    Note that if the HPATH for a LU class is None, this function will
263
    not be called.
264

265
    """
266
    raise NotImplementedError
267

    
268
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
269
    """Notify the LU about the results of its hooks.
270

271
    This method is called every time a hooks phase is executed, and notifies
272
    the Logical Unit about the hooks' result. The LU can then use it to alter
273
    its result based on the hooks.  By default the method does nothing and the
274
    previous result is passed back unchanged but any LU can define it if it
275
    wants to use the local cluster hook-scripts somehow.
276

277
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
278
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
279
    @param hook_results: the results of the multi-node hooks rpc call
280
    @param feedback_fn: function used send feedback back to the caller
281
    @param lu_result: the previous Exec result this LU had, or None
282
        in the PRE phase
283
    @return: the new Exec result, based on the previous result
284
        and hook results
285

286
    """
287
    # API must be kept, thus we ignore the unused argument and could
288
    # be a function warnings
289
    # pylint: disable-msg=W0613,R0201
290
    return lu_result
291

    
292
  def _ExpandAndLockInstance(self):
293
    """Helper function to expand and lock an instance.
294

295
    Many LUs that work on an instance take its name in self.op.instance_name
296
    and need to expand it and then declare the expanded name for locking. This
297
    function does it, and then updates self.op.instance_name to the expanded
298
    name. It also initializes needed_locks as a dict, if this hasn't been done
299
    before.
300

301
    """
302
    if self.needed_locks is None:
303
      self.needed_locks = {}
304
    else:
305
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
306
        "_ExpandAndLockInstance called with instance-level locks set"
307
    self.op.instance_name = _ExpandInstanceName(self.cfg,
308
                                                self.op.instance_name)
309
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
310

    
311
  def _LockInstancesNodes(self, primary_only=False):
312
    """Helper function to declare instances' nodes for locking.
313

314
    This function should be called after locking one or more instances to lock
315
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
316
    with all primary or secondary nodes for instances already locked and
317
    present in self.needed_locks[locking.LEVEL_INSTANCE].
318

319
    It should be called from DeclareLocks, and for safety only works if
320
    self.recalculate_locks[locking.LEVEL_NODE] is set.
321

322
    In the future it may grow parameters to just lock some instance's nodes, or
323
    to just lock primaries or secondary nodes, if needed.
324

325
    If should be called in DeclareLocks in a way similar to::
326

327
      if level == locking.LEVEL_NODE:
328
        self._LockInstancesNodes()
329

330
    @type primary_only: boolean
331
    @param primary_only: only lock primary nodes of locked instances
332

333
    """
334
    assert locking.LEVEL_NODE in self.recalculate_locks, \
335
      "_LockInstancesNodes helper function called with no nodes to recalculate"
336

    
337
    # TODO: check if we're really been called with the instance locks held
338

    
339
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
340
    # future we might want to have different behaviors depending on the value
341
    # of self.recalculate_locks[locking.LEVEL_NODE]
342
    wanted_nodes = []
343
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
344
      instance = self.context.cfg.GetInstanceInfo(instance_name)
345
      wanted_nodes.append(instance.primary_node)
346
      if not primary_only:
347
        wanted_nodes.extend(instance.secondary_nodes)
348

    
349
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
350
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
351
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
352
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
353

    
354
    del self.recalculate_locks[locking.LEVEL_NODE]
355

    
356

    
357
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
358
  """Simple LU which runs no hooks.
359

360
  This LU is intended as a parent for other LogicalUnits which will
361
  run no hooks, in order to reduce duplicate code.
362

363
  """
364
  HPATH = None
365
  HTYPE = None
366

    
367
  def BuildHooksEnv(self):
368
    """Empty BuildHooksEnv for NoHooksLu.
369

370
    This just raises an error.
371

372
    """
373
    assert False, "BuildHooksEnv called for NoHooksLUs"
374

    
375

    
376
class Tasklet:
377
  """Tasklet base class.
378

379
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
380
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
381
  tasklets know nothing about locks.
382

383
  Subclasses must follow these rules:
384
    - Implement CheckPrereq
385
    - Implement Exec
386

387
  """
388
  def __init__(self, lu):
389
    self.lu = lu
390

    
391
    # Shortcuts
392
    self.cfg = lu.cfg
393
    self.rpc = lu.rpc
394

    
395
  def CheckPrereq(self):
396
    """Check prerequisites for this tasklets.
397

398
    This method should check whether the prerequisites for the execution of
399
    this tasklet are fulfilled. It can do internode communication, but it
400
    should be idempotent - no cluster or system changes are allowed.
401

402
    The method should raise errors.OpPrereqError in case something is not
403
    fulfilled. Its return value is ignored.
404

405
    This method should also update all parameters to their canonical form if it
406
    hasn't been done before.
407

408
    """
409
    raise NotImplementedError
410

    
411
  def Exec(self, feedback_fn):
412
    """Execute the tasklet.
413

414
    This method should implement the actual work. It should raise
415
    errors.OpExecError for failures that are somewhat dealt with in code, or
416
    expected.
417

418
    """
419
    raise NotImplementedError
420

    
421

    
422
def _GetWantedNodes(lu, nodes):
423
  """Returns list of checked and expanded node names.
424

425
  @type lu: L{LogicalUnit}
426
  @param lu: the logical unit on whose behalf we execute
427
  @type nodes: list
428
  @param nodes: list of node names or None for all nodes
429
  @rtype: list
430
  @return: the list of nodes, sorted
431
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
432

433
  """
434
  if not isinstance(nodes, list):
435
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
436
                               errors.ECODE_INVAL)
437

    
438
  if not nodes:
439
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
440
      " non-empty list of nodes whose name is to be expanded.")
441

    
442
  wanted = []
443
  for name in nodes:
444
    node = _ExpandNodeName(lu.cfg, name)
445
    wanted.append(node)
446

    
447
  return utils.NiceSort(wanted)
448

    
449

    
450
def _GetWantedInstances(lu, instances):
451
  """Returns list of checked and expanded instance names.
452

453
  @type lu: L{LogicalUnit}
454
  @param lu: the logical unit on whose behalf we execute
455
  @type instances: list
456
  @param instances: list of instance names or None for all instances
457
  @rtype: list
458
  @return: the list of instances, sorted
459
  @raise errors.OpPrereqError: if the instances parameter is wrong type
460
  @raise errors.OpPrereqError: if any of the passed instances is not found
461

462
  """
463
  if not isinstance(instances, list):
464
    raise errors.OpPrereqError("Invalid argument type 'instances'",
465
                               errors.ECODE_INVAL)
466

    
467
  if instances:
468
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
469
  else:
470
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471
  return wanted
472

    
473

    
474
def _CheckOutputFields(static, dynamic, selected):
475
  """Checks whether all selected fields are valid.
476

477
  @type static: L{utils.FieldSet}
478
  @param static: static fields set
479
  @type dynamic: L{utils.FieldSet}
480
  @param dynamic: dynamic fields set
481

482
  """
483
  f = utils.FieldSet()
484
  f.Extend(static)
485
  f.Extend(dynamic)
486

    
487
  delta = f.NonMatching(selected)
488
  if delta:
489
    raise errors.OpPrereqError("Unknown output fields selected: %s"
490
                               % ",".join(delta), errors.ECODE_INVAL)
491

    
492

    
493
def _CheckBooleanOpField(op, name):
494
  """Validates boolean opcode parameters.
495

496
  This will ensure that an opcode parameter is either a boolean value,
497
  or None (but that it always exists).
498

499
  """
500
  val = getattr(op, name, None)
501
  if not (val is None or isinstance(val, bool)):
502
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
503
                               (name, str(val)), errors.ECODE_INVAL)
504
  setattr(op, name, val)
505

    
506

    
507
def _CheckGlobalHvParams(params):
508
  """Validates that given hypervisor params are not global ones.
509

510
  This will ensure that instances don't get customised versions of
511
  global params.
512

513
  """
514
  used_globals = constants.HVC_GLOBALS.intersection(params)
515
  if used_globals:
516
    msg = ("The following hypervisor parameters are global and cannot"
517
           " be customized at instance level, please modify them at"
518
           " cluster level: %s" % utils.CommaJoin(used_globals))
519
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
520

    
521

    
522
def _CheckNodeOnline(lu, node):
523
  """Ensure that a given node is online.
524

525
  @param lu: the LU on behalf of which we make the check
526
  @param node: the node to check
527
  @raise errors.OpPrereqError: if the node is offline
528

529
  """
530
  if lu.cfg.GetNodeInfo(node).offline:
531
    raise errors.OpPrereqError("Can't use offline node %s" % node,
532
                               errors.ECODE_INVAL)
533

    
534

    
535
def _CheckNodeNotDrained(lu, node):
536
  """Ensure that a given node is not drained.
537

538
  @param lu: the LU on behalf of which we make the check
539
  @param node: the node to check
540
  @raise errors.OpPrereqError: if the node is drained
541

542
  """
543
  if lu.cfg.GetNodeInfo(node).drained:
544
    raise errors.OpPrereqError("Can't use drained node %s" % node,
545
                               errors.ECODE_INVAL)
546

    
547

    
548
def _ExpandItemName(fn, name, kind):
549
  """Expand an item name.
550

551
  @param fn: the function to use for expansion
552
  @param name: requested item name
553
  @param kind: text description ('Node' or 'Instance')
554
  @return: the resolved (full) name
555
  @raise errors.OpPrereqError: if the item is not found
556

557
  """
558
  full_name = fn(name)
559
  if full_name is None:
560
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
561
                               errors.ECODE_NOENT)
562
  return full_name
563

    
564

    
565
def _ExpandNodeName(cfg, name):
566
  """Wrapper over L{_ExpandItemName} for nodes."""
567
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
568

    
569

    
570
def _ExpandInstanceName(cfg, name):
571
  """Wrapper over L{_ExpandItemName} for instance."""
572
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
573

    
574

    
575
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
576
                          memory, vcpus, nics, disk_template, disks,
577
                          bep, hvp, hypervisor_name):
578
  """Builds instance related env variables for hooks
579

580
  This builds the hook environment from individual variables.
581

582
  @type name: string
583
  @param name: the name of the instance
584
  @type primary_node: string
585
  @param primary_node: the name of the instance's primary node
586
  @type secondary_nodes: list
587
  @param secondary_nodes: list of secondary nodes as strings
588
  @type os_type: string
589
  @param os_type: the name of the instance's OS
590
  @type status: boolean
591
  @param status: the should_run status of the instance
592
  @type memory: string
593
  @param memory: the memory size of the instance
594
  @type vcpus: string
595
  @param vcpus: the count of VCPUs the instance has
596
  @type nics: list
597
  @param nics: list of tuples (ip, mac, mode, link) representing
598
      the NICs the instance has
599
  @type disk_template: string
600
  @param disk_template: the disk template of the instance
601
  @type disks: list
602
  @param disks: the list of (size, mode) pairs
603
  @type bep: dict
604
  @param bep: the backend parameters for the instance
605
  @type hvp: dict
606
  @param hvp: the hypervisor parameters for the instance
607
  @type hypervisor_name: string
608
  @param hypervisor_name: the hypervisor for the instance
609
  @rtype: dict
610
  @return: the hook environment for this instance
611

612
  """
613
  if status:
614
    str_status = "up"
615
  else:
616
    str_status = "down"
617
  env = {
618
    "OP_TARGET": name,
619
    "INSTANCE_NAME": name,
620
    "INSTANCE_PRIMARY": primary_node,
621
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
622
    "INSTANCE_OS_TYPE": os_type,
623
    "INSTANCE_STATUS": str_status,
624
    "INSTANCE_MEMORY": memory,
625
    "INSTANCE_VCPUS": vcpus,
626
    "INSTANCE_DISK_TEMPLATE": disk_template,
627
    "INSTANCE_HYPERVISOR": hypervisor_name,
628
  }
629

    
630
  if nics:
631
    nic_count = len(nics)
632
    for idx, (ip, mac, mode, link) in enumerate(nics):
633
      if ip is None:
634
        ip = ""
635
      env["INSTANCE_NIC%d_IP" % idx] = ip
636
      env["INSTANCE_NIC%d_MAC" % idx] = mac
637
      env["INSTANCE_NIC%d_MODE" % idx] = mode
638
      env["INSTANCE_NIC%d_LINK" % idx] = link
639
      if mode == constants.NIC_MODE_BRIDGED:
640
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
641
  else:
642
    nic_count = 0
643

    
644
  env["INSTANCE_NIC_COUNT"] = nic_count
645

    
646
  if disks:
647
    disk_count = len(disks)
648
    for idx, (size, mode) in enumerate(disks):
649
      env["INSTANCE_DISK%d_SIZE" % idx] = size
650
      env["INSTANCE_DISK%d_MODE" % idx] = mode
651
  else:
652
    disk_count = 0
653

    
654
  env["INSTANCE_DISK_COUNT"] = disk_count
655

    
656
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
657
    for key, value in source.items():
658
      env["INSTANCE_%s_%s" % (kind, key)] = value
659

    
660
  return env
661

    
662

    
663
def _NICListToTuple(lu, nics):
664
  """Build a list of nic information tuples.
665

666
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
667
  value in LUQueryInstanceData.
668

669
  @type lu:  L{LogicalUnit}
670
  @param lu: the logical unit on whose behalf we execute
671
  @type nics: list of L{objects.NIC}
672
  @param nics: list of nics to convert to hooks tuples
673

674
  """
675
  hooks_nics = []
676
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
677
  for nic in nics:
678
    ip = nic.ip
679
    mac = nic.mac
680
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
681
    mode = filled_params[constants.NIC_MODE]
682
    link = filled_params[constants.NIC_LINK]
683
    hooks_nics.append((ip, mac, mode, link))
684
  return hooks_nics
685

    
686

    
687
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
688
  """Builds instance related env variables for hooks from an object.
689

690
  @type lu: L{LogicalUnit}
691
  @param lu: the logical unit on whose behalf we execute
692
  @type instance: L{objects.Instance}
693
  @param instance: the instance for which we should build the
694
      environment
695
  @type override: dict
696
  @param override: dictionary with key/values that will override
697
      our values
698
  @rtype: dict
699
  @return: the hook environment dictionary
700

701
  """
702
  cluster = lu.cfg.GetClusterInfo()
703
  bep = cluster.FillBE(instance)
704
  hvp = cluster.FillHV(instance)
705
  args = {
706
    'name': instance.name,
707
    'primary_node': instance.primary_node,
708
    'secondary_nodes': instance.secondary_nodes,
709
    'os_type': instance.os,
710
    'status': instance.admin_up,
711
    'memory': bep[constants.BE_MEMORY],
712
    'vcpus': bep[constants.BE_VCPUS],
713
    'nics': _NICListToTuple(lu, instance.nics),
714
    'disk_template': instance.disk_template,
715
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
716
    'bep': bep,
717
    'hvp': hvp,
718
    'hypervisor_name': instance.hypervisor,
719
  }
720
  if override:
721
    args.update(override)
722
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
723

    
724

    
725
def _AdjustCandidatePool(lu, exceptions):
726
  """Adjust the candidate pool after node operations.
727

728
  """
729
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
730
  if mod_list:
731
    lu.LogInfo("Promoted nodes to master candidate role: %s",
732
               utils.CommaJoin(node.name for node in mod_list))
733
    for name in mod_list:
734
      lu.context.ReaddNode(name)
735
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
736
  if mc_now > mc_max:
737
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
738
               (mc_now, mc_max))
739

    
740

    
741
def _DecideSelfPromotion(lu, exceptions=None):
742
  """Decide whether I should promote myself as a master candidate.
743

744
  """
745
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
746
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
747
  # the new node will increase mc_max with one, so:
748
  mc_should = min(mc_should + 1, cp_size)
749
  return mc_now < mc_should
750

    
751

    
752
def _CheckNicsBridgesExist(lu, target_nics, target_node,
753
                               profile=constants.PP_DEFAULT):
754
  """Check that the brigdes needed by a list of nics exist.
755

756
  """
757
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
758
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
759
                for nic in target_nics]
760
  brlist = [params[constants.NIC_LINK] for params in paramslist
761
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
762
  if brlist:
763
    result = lu.rpc.call_bridges_exist(target_node, brlist)
764
    result.Raise("Error checking bridges on destination node '%s'" %
765
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
766

    
767

    
768
def _CheckInstanceBridgesExist(lu, instance, node=None):
769
  """Check that the brigdes needed by an instance exist.
770

771
  """
772
  if node is None:
773
    node = instance.primary_node
774
  _CheckNicsBridgesExist(lu, instance.nics, node)
775

    
776

    
777
def _CheckOSVariant(os_obj, name):
778
  """Check whether an OS name conforms to the os variants specification.
779

780
  @type os_obj: L{objects.OS}
781
  @param os_obj: OS object to check
782
  @type name: string
783
  @param name: OS name passed by the user, to check for validity
784

785
  """
786
  if not os_obj.supported_variants:
787
    return
788
  try:
789
    variant = name.split("+", 1)[1]
790
  except IndexError:
791
    raise errors.OpPrereqError("OS name must include a variant",
792
                               errors.ECODE_INVAL)
793

    
794
  if variant not in os_obj.supported_variants:
795
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
796

    
797

    
798
def _GetNodeInstancesInner(cfg, fn):
799
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
800

    
801

    
802
def _GetNodeInstances(cfg, node_name):
803
  """Returns a list of all primary and secondary instances on a node.
804

805
  """
806

    
807
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
808

    
809

    
810
def _GetNodePrimaryInstances(cfg, node_name):
811
  """Returns primary instances on a node.
812

813
  """
814
  return _GetNodeInstancesInner(cfg,
815
                                lambda inst: node_name == inst.primary_node)
816

    
817

    
818
def _GetNodeSecondaryInstances(cfg, node_name):
819
  """Returns secondary instances on a node.
820

821
  """
822
  return _GetNodeInstancesInner(cfg,
823
                                lambda inst: node_name in inst.secondary_nodes)
824

    
825

    
826
def _GetStorageTypeArgs(cfg, storage_type):
827
  """Returns the arguments for a storage type.
828

829
  """
830
  # Special case for file storage
831
  if storage_type == constants.ST_FILE:
832
    # storage.FileStorage wants a list of storage directories
833
    return [[cfg.GetFileStorageDir()]]
834

    
835
  return []
836

    
837

    
838
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
839
  faulty = []
840

    
841
  for dev in instance.disks:
842
    cfg.SetDiskID(dev, node_name)
843

    
844
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
845
  result.Raise("Failed to get disk status from node %s" % node_name,
846
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
847

    
848
  for idx, bdev_status in enumerate(result.payload):
849
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
850
      faulty.append(idx)
851

    
852
  return faulty
853

    
854

    
855
class LUPostInitCluster(LogicalUnit):
856
  """Logical unit for running hooks after cluster initialization.
857

858
  """
859
  HPATH = "cluster-init"
860
  HTYPE = constants.HTYPE_CLUSTER
861
  _OP_REQP = []
862

    
863
  def BuildHooksEnv(self):
864
    """Build hooks env.
865

866
    """
867
    env = {"OP_TARGET": self.cfg.GetClusterName()}
868
    mn = self.cfg.GetMasterNode()
869
    return env, [], [mn]
870

    
871
  def CheckPrereq(self):
872
    """No prerequisites to check.
873

874
    """
875
    return True
876

    
877
  def Exec(self, feedback_fn):
878
    """Nothing to do.
879

880
    """
881
    return True
882

    
883

    
884
class LUDestroyCluster(LogicalUnit):
885
  """Logical unit for destroying the cluster.
886

887
  """
888
  HPATH = "cluster-destroy"
889
  HTYPE = constants.HTYPE_CLUSTER
890
  _OP_REQP = []
891

    
892
  def BuildHooksEnv(self):
893
    """Build hooks env.
894

895
    """
896
    env = {"OP_TARGET": self.cfg.GetClusterName()}
897
    return env, [], []
898

    
899
  def CheckPrereq(self):
900
    """Check prerequisites.
901

902
    This checks whether the cluster is empty.
903

904
    Any errors are signaled by raising errors.OpPrereqError.
905

906
    """
907
    master = self.cfg.GetMasterNode()
908

    
909
    nodelist = self.cfg.GetNodeList()
910
    if len(nodelist) != 1 or nodelist[0] != master:
911
      raise errors.OpPrereqError("There are still %d node(s) in"
912
                                 " this cluster." % (len(nodelist) - 1),
913
                                 errors.ECODE_INVAL)
914
    instancelist = self.cfg.GetInstanceList()
915
    if instancelist:
916
      raise errors.OpPrereqError("There are still %d instance(s) in"
917
                                 " this cluster." % len(instancelist),
918
                                 errors.ECODE_INVAL)
919

    
920
  def Exec(self, feedback_fn):
921
    """Destroys the cluster.
922

923
    """
924
    master = self.cfg.GetMasterNode()
925
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
926

    
927
    # Run post hooks on master node before it's removed
928
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
929
    try:
930
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
931
    except:
932
      # pylint: disable-msg=W0702
933
      self.LogWarning("Errors occurred running hooks on %s" % master)
934

    
935
    result = self.rpc.call_node_stop_master(master, False)
936
    result.Raise("Could not disable the master role")
937

    
938
    if modify_ssh_setup:
939
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
940
      utils.CreateBackup(priv_key)
941
      utils.CreateBackup(pub_key)
942

    
943
    return master
944

    
945

    
946
class LUVerifyCluster(LogicalUnit):
947
  """Verifies the cluster status.
948

949
  """
950
  HPATH = "cluster-verify"
951
  HTYPE = constants.HTYPE_CLUSTER
952
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
953
  REQ_BGL = False
954

    
955
  TCLUSTER = "cluster"
956
  TNODE = "node"
957
  TINSTANCE = "instance"
958

    
959
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
960
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
961
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
962
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
963
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
964
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
965
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
966
  ENODEDRBD = (TNODE, "ENODEDRBD")
967
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
968
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
969
  ENODEHV = (TNODE, "ENODEHV")
970
  ENODELVM = (TNODE, "ENODELVM")
971
  ENODEN1 = (TNODE, "ENODEN1")
972
  ENODENET = (TNODE, "ENODENET")
973
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
974
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
975
  ENODERPC = (TNODE, "ENODERPC")
976
  ENODESSH = (TNODE, "ENODESSH")
977
  ENODEVERSION = (TNODE, "ENODEVERSION")
978
  ENODESETUP = (TNODE, "ENODESETUP")
979
  ENODETIME = (TNODE, "ENODETIME")
980

    
981
  ETYPE_FIELD = "code"
982
  ETYPE_ERROR = "ERROR"
983
  ETYPE_WARNING = "WARNING"
984

    
985
  def ExpandNames(self):
986
    self.needed_locks = {
987
      locking.LEVEL_NODE: locking.ALL_SET,
988
      locking.LEVEL_INSTANCE: locking.ALL_SET,
989
    }
990
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
991

    
992
  def _Error(self, ecode, item, msg, *args, **kwargs):
993
    """Format an error message.
994

995
    Based on the opcode's error_codes parameter, either format a
996
    parseable error code, or a simpler error string.
997

998
    This must be called only from Exec and functions called from Exec.
999

1000
    """
1001
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1002
    itype, etxt = ecode
1003
    # first complete the msg
1004
    if args:
1005
      msg = msg % args
1006
    # then format the whole message
1007
    if self.op.error_codes:
1008
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1009
    else:
1010
      if item:
1011
        item = " " + item
1012
      else:
1013
        item = ""
1014
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1015
    # and finally report it via the feedback_fn
1016
    self._feedback_fn("  - %s" % msg)
1017

    
1018
  def _ErrorIf(self, cond, *args, **kwargs):
1019
    """Log an error message if the passed condition is True.
1020

1021
    """
1022
    cond = bool(cond) or self.op.debug_simulate_errors
1023
    if cond:
1024
      self._Error(*args, **kwargs)
1025
    # do not mark the operation as failed for WARN cases only
1026
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1027
      self.bad = self.bad or cond
1028

    
1029
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1030
                  node_result, master_files, drbd_map, vg_name):
1031
    """Run multiple tests against a node.
1032

1033
    Test list:
1034

1035
      - compares ganeti version
1036
      - checks vg existence and size > 20G
1037
      - checks config file checksum
1038
      - checks ssh to other nodes
1039

1040
    @type nodeinfo: L{objects.Node}
1041
    @param nodeinfo: the node to check
1042
    @param file_list: required list of files
1043
    @param local_cksum: dictionary of local files and their checksums
1044
    @param node_result: the results from the node
1045
    @param master_files: list of files that only masters should have
1046
    @param drbd_map: the useddrbd minors for this node, in
1047
        form of minor: (instance, must_exist) which correspond to instances
1048
        and their running status
1049
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1050

1051
    """
1052
    node = nodeinfo.name
1053
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1054

    
1055
    # main result, node_result should be a non-empty dict
1056
    test = not node_result or not isinstance(node_result, dict)
1057
    _ErrorIf(test, self.ENODERPC, node,
1058
                  "unable to verify node: no data returned")
1059
    if test:
1060
      return
1061

    
1062
    # compares ganeti version
1063
    local_version = constants.PROTOCOL_VERSION
1064
    remote_version = node_result.get('version', None)
1065
    test = not (remote_version and
1066
                isinstance(remote_version, (list, tuple)) and
1067
                len(remote_version) == 2)
1068
    _ErrorIf(test, self.ENODERPC, node,
1069
             "connection to node returned invalid data")
1070
    if test:
1071
      return
1072

    
1073
    test = local_version != remote_version[0]
1074
    _ErrorIf(test, self.ENODEVERSION, node,
1075
             "incompatible protocol versions: master %s,"
1076
             " node %s", local_version, remote_version[0])
1077
    if test:
1078
      return
1079

    
1080
    # node seems compatible, we can actually try to look into its results
1081

    
1082
    # full package version
1083
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1084
                  self.ENODEVERSION, node,
1085
                  "software version mismatch: master %s, node %s",
1086
                  constants.RELEASE_VERSION, remote_version[1],
1087
                  code=self.ETYPE_WARNING)
1088

    
1089
    # checks vg existence and size > 20G
1090
    if vg_name is not None:
1091
      vglist = node_result.get(constants.NV_VGLIST, None)
1092
      test = not vglist
1093
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1094
      if not test:
1095
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1096
                                              constants.MIN_VG_SIZE)
1097
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1098

    
1099
    # checks config file checksum
1100

    
1101
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1102
    test = not isinstance(remote_cksum, dict)
1103
    _ErrorIf(test, self.ENODEFILECHECK, node,
1104
             "node hasn't returned file checksum data")
1105
    if not test:
1106
      for file_name in file_list:
1107
        node_is_mc = nodeinfo.master_candidate
1108
        must_have = (file_name not in master_files) or node_is_mc
1109
        # missing
1110
        test1 = file_name not in remote_cksum
1111
        # invalid checksum
1112
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1113
        # existing and good
1114
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1115
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1116
                 "file '%s' missing", file_name)
1117
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1118
                 "file '%s' has wrong checksum", file_name)
1119
        # not candidate and this is not a must-have file
1120
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1121
                 "file '%s' should not exist on non master"
1122
                 " candidates (and the file is outdated)", file_name)
1123
        # all good, except non-master/non-must have combination
1124
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1125
                 "file '%s' should not exist"
1126
                 " on non master candidates", file_name)
1127

    
1128
    # checks ssh to any
1129

    
1130
    test = constants.NV_NODELIST not in node_result
1131
    _ErrorIf(test, self.ENODESSH, node,
1132
             "node hasn't returned node ssh connectivity data")
1133
    if not test:
1134
      if node_result[constants.NV_NODELIST]:
1135
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1136
          _ErrorIf(True, self.ENODESSH, node,
1137
                   "ssh communication with node '%s': %s", a_node, a_msg)
1138

    
1139
    test = constants.NV_NODENETTEST not in node_result
1140
    _ErrorIf(test, self.ENODENET, node,
1141
             "node hasn't returned node tcp connectivity data")
1142
    if not test:
1143
      if node_result[constants.NV_NODENETTEST]:
1144
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1145
        for anode in nlist:
1146
          _ErrorIf(True, self.ENODENET, node,
1147
                   "tcp communication with node '%s': %s",
1148
                   anode, node_result[constants.NV_NODENETTEST][anode])
1149

    
1150
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1151
    if isinstance(hyp_result, dict):
1152
      for hv_name, hv_result in hyp_result.iteritems():
1153
        test = hv_result is not None
1154
        _ErrorIf(test, self.ENODEHV, node,
1155
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1156

    
1157
    # check used drbd list
1158
    if vg_name is not None:
1159
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1160
      test = not isinstance(used_minors, (tuple, list))
1161
      _ErrorIf(test, self.ENODEDRBD, node,
1162
               "cannot parse drbd status file: %s", str(used_minors))
1163
      if not test:
1164
        for minor, (iname, must_exist) in drbd_map.items():
1165
          test = minor not in used_minors and must_exist
1166
          _ErrorIf(test, self.ENODEDRBD, node,
1167
                   "drbd minor %d of instance %s is not active",
1168
                   minor, iname)
1169
        for minor in used_minors:
1170
          test = minor not in drbd_map
1171
          _ErrorIf(test, self.ENODEDRBD, node,
1172
                   "unallocated drbd minor %d is in use", minor)
1173
    test = node_result.get(constants.NV_NODESETUP,
1174
                           ["Missing NODESETUP results"])
1175
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1176
             "; ".join(test))
1177

    
1178
    # check pv names
1179
    if vg_name is not None:
1180
      pvlist = node_result.get(constants.NV_PVLIST, None)
1181
      test = pvlist is None
1182
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1183
      if not test:
1184
        # check that ':' is not present in PV names, since it's a
1185
        # special character for lvcreate (denotes the range of PEs to
1186
        # use on the PV)
1187
        for _, pvname, owner_vg in pvlist:
1188
          test = ":" in pvname
1189
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1190
                   " '%s' of VG '%s'", pvname, owner_vg)
1191

    
1192
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1193
                      node_instance, n_offline):
1194
    """Verify an instance.
1195

1196
    This function checks to see if the required block devices are
1197
    available on the instance's node.
1198

1199
    """
1200
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1201
    node_current = instanceconfig.primary_node
1202

    
1203
    node_vol_should = {}
1204
    instanceconfig.MapLVsByNode(node_vol_should)
1205

    
1206
    for node in node_vol_should:
1207
      if node in n_offline:
1208
        # ignore missing volumes on offline nodes
1209
        continue
1210
      for volume in node_vol_should[node]:
1211
        test = node not in node_vol_is or volume not in node_vol_is[node]
1212
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1213
                 "volume %s missing on node %s", volume, node)
1214

    
1215
    if instanceconfig.admin_up:
1216
      test = ((node_current not in node_instance or
1217
               not instance in node_instance[node_current]) and
1218
              node_current not in n_offline)
1219
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1220
               "instance not running on its primary node %s",
1221
               node_current)
1222

    
1223
    for node in node_instance:
1224
      if (not node == node_current):
1225
        test = instance in node_instance[node]
1226
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1227
                 "instance should not run on node %s", node)
1228

    
1229
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1230
    """Verify if there are any unknown volumes in the cluster.
1231

1232
    The .os, .swap and backup volumes are ignored. All other volumes are
1233
    reported as unknown.
1234

1235
    """
1236
    for node in node_vol_is:
1237
      for volume in node_vol_is[node]:
1238
        test = (node not in node_vol_should or
1239
                volume not in node_vol_should[node])
1240
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1241
                      "volume %s is unknown", volume)
1242

    
1243
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1244
    """Verify the list of running instances.
1245

1246
    This checks what instances are running but unknown to the cluster.
1247

1248
    """
1249
    for node in node_instance:
1250
      for o_inst in node_instance[node]:
1251
        test = o_inst not in instancelist
1252
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1253
                      "instance %s on node %s should not exist", o_inst, node)
1254

    
1255
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1256
    """Verify N+1 Memory Resilience.
1257

1258
    Check that if one single node dies we can still start all the instances it
1259
    was primary for.
1260

1261
    """
1262
    for node, nodeinfo in node_info.iteritems():
1263
      # This code checks that every node which is now listed as secondary has
1264
      # enough memory to host all instances it is supposed to should a single
1265
      # other node in the cluster fail.
1266
      # FIXME: not ready for failover to an arbitrary node
1267
      # FIXME: does not support file-backed instances
1268
      # WARNING: we currently take into account down instances as well as up
1269
      # ones, considering that even if they're down someone might want to start
1270
      # them even in the event of a node failure.
1271
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1272
        needed_mem = 0
1273
        for instance in instances:
1274
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1275
          if bep[constants.BE_AUTO_BALANCE]:
1276
            needed_mem += bep[constants.BE_MEMORY]
1277
        test = nodeinfo['mfree'] < needed_mem
1278
        self._ErrorIf(test, self.ENODEN1, node,
1279
                      "not enough memory on to accommodate"
1280
                      " failovers should peer node %s fail", prinode)
1281

    
1282
  def CheckPrereq(self):
1283
    """Check prerequisites.
1284

1285
    Transform the list of checks we're going to skip into a set and check that
1286
    all its members are valid.
1287

1288
    """
1289
    self.skip_set = frozenset(self.op.skip_checks)
1290
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1291
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1292
                                 errors.ECODE_INVAL)
1293

    
1294
  def BuildHooksEnv(self):
1295
    """Build hooks env.
1296

1297
    Cluster-Verify hooks just ran in the post phase and their failure makes
1298
    the output be logged in the verify output and the verification to fail.
1299

1300
    """
1301
    all_nodes = self.cfg.GetNodeList()
1302
    env = {
1303
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1304
      }
1305
    for node in self.cfg.GetAllNodesInfo().values():
1306
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1307

    
1308
    return env, [], all_nodes
1309

    
1310
  def Exec(self, feedback_fn):
1311
    """Verify integrity of cluster, performing various test on nodes.
1312

1313
    """
1314
    self.bad = False
1315
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1316
    verbose = self.op.verbose
1317
    self._feedback_fn = feedback_fn
1318
    feedback_fn("* Verifying global settings")
1319
    for msg in self.cfg.VerifyConfig():
1320
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1321

    
1322
    vg_name = self.cfg.GetVGName()
1323
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1324
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1325
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1326
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1327
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1328
                        for iname in instancelist)
1329
    i_non_redundant = [] # Non redundant instances
1330
    i_non_a_balanced = [] # Non auto-balanced instances
1331
    n_offline = [] # List of offline nodes
1332
    n_drained = [] # List of nodes being drained
1333
    node_volume = {}
1334
    node_instance = {}
1335
    node_info = {}
1336
    instance_cfg = {}
1337

    
1338
    # FIXME: verify OS list
1339
    # do local checksums
1340
    master_files = [constants.CLUSTER_CONF_FILE]
1341

    
1342
    file_names = ssconf.SimpleStore().GetFileList()
1343
    file_names.append(constants.SSL_CERT_FILE)
1344
    file_names.append(constants.RAPI_CERT_FILE)
1345
    file_names.extend(master_files)
1346

    
1347
    local_checksums = utils.FingerprintFiles(file_names)
1348

    
1349
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1350
    node_verify_param = {
1351
      constants.NV_FILELIST: file_names,
1352
      constants.NV_NODELIST: [node.name for node in nodeinfo
1353
                              if not node.offline],
1354
      constants.NV_HYPERVISOR: hypervisors,
1355
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1356
                                  node.secondary_ip) for node in nodeinfo
1357
                                 if not node.offline],
1358
      constants.NV_INSTANCELIST: hypervisors,
1359
      constants.NV_VERSION: None,
1360
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1361
      constants.NV_NODESETUP: None,
1362
      constants.NV_TIME: None,
1363
      }
1364

    
1365
    if vg_name is not None:
1366
      node_verify_param[constants.NV_VGLIST] = None
1367
      node_verify_param[constants.NV_LVLIST] = vg_name
1368
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1369
      node_verify_param[constants.NV_DRBDLIST] = None
1370

    
1371
    # Due to the way our RPC system works, exact response times cannot be
1372
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1373
    # time before and after executing the request, we can at least have a time
1374
    # window.
1375
    nvinfo_starttime = time.time()
1376
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1377
                                           self.cfg.GetClusterName())
1378
    nvinfo_endtime = time.time()
1379

    
1380
    cluster = self.cfg.GetClusterInfo()
1381
    master_node = self.cfg.GetMasterNode()
1382
    all_drbd_map = self.cfg.ComputeDRBDMap()
1383

    
1384
    feedback_fn("* Verifying node status")
1385
    for node_i in nodeinfo:
1386
      node = node_i.name
1387

    
1388
      if node_i.offline:
1389
        if verbose:
1390
          feedback_fn("* Skipping offline node %s" % (node,))
1391
        n_offline.append(node)
1392
        continue
1393

    
1394
      if node == master_node:
1395
        ntype = "master"
1396
      elif node_i.master_candidate:
1397
        ntype = "master candidate"
1398
      elif node_i.drained:
1399
        ntype = "drained"
1400
        n_drained.append(node)
1401
      else:
1402
        ntype = "regular"
1403
      if verbose:
1404
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1405

    
1406
      msg = all_nvinfo[node].fail_msg
1407
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1408
      if msg:
1409
        continue
1410

    
1411
      nresult = all_nvinfo[node].payload
1412
      node_drbd = {}
1413
      for minor, instance in all_drbd_map[node].items():
1414
        test = instance not in instanceinfo
1415
        _ErrorIf(test, self.ECLUSTERCFG, None,
1416
                 "ghost instance '%s' in temporary DRBD map", instance)
1417
          # ghost instance should not be running, but otherwise we
1418
          # don't give double warnings (both ghost instance and
1419
          # unallocated minor in use)
1420
        if test:
1421
          node_drbd[minor] = (instance, False)
1422
        else:
1423
          instance = instanceinfo[instance]
1424
          node_drbd[minor] = (instance.name, instance.admin_up)
1425

    
1426
      self._VerifyNode(node_i, file_names, local_checksums,
1427
                       nresult, master_files, node_drbd, vg_name)
1428

    
1429
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1430
      if vg_name is None:
1431
        node_volume[node] = {}
1432
      elif isinstance(lvdata, basestring):
1433
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1434
                 utils.SafeEncode(lvdata))
1435
        node_volume[node] = {}
1436
      elif not isinstance(lvdata, dict):
1437
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1438
        continue
1439
      else:
1440
        node_volume[node] = lvdata
1441

    
1442
      # node_instance
1443
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1444
      test = not isinstance(idata, list)
1445
      _ErrorIf(test, self.ENODEHV, node,
1446
               "rpc call to node failed (instancelist)")
1447
      if test:
1448
        continue
1449

    
1450
      node_instance[node] = idata
1451

    
1452
      # node_info
1453
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1454
      test = not isinstance(nodeinfo, dict)
1455
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1456
      if test:
1457
        continue
1458

    
1459
      # Node time
1460
      ntime = nresult.get(constants.NV_TIME, None)
1461
      try:
1462
        ntime_merged = utils.MergeTime(ntime)
1463
      except (ValueError, TypeError):
1464
        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1465

    
1466
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1467
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1468
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1469
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1470
      else:
1471
        ntime_diff = None
1472

    
1473
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1474
               "Node time diverges by at least %0.1fs from master node time",
1475
               ntime_diff)
1476

    
1477
      if ntime_diff is not None:
1478
        continue
1479

    
1480
      try:
1481
        node_info[node] = {
1482
          "mfree": int(nodeinfo['memory_free']),
1483
          "pinst": [],
1484
          "sinst": [],
1485
          # dictionary holding all instances this node is secondary for,
1486
          # grouped by their primary node. Each key is a cluster node, and each
1487
          # value is a list of instances which have the key as primary and the
1488
          # current node as secondary.  this is handy to calculate N+1 memory
1489
          # availability if you can only failover from a primary to its
1490
          # secondary.
1491
          "sinst-by-pnode": {},
1492
        }
1493
        # FIXME: devise a free space model for file based instances as well
1494
        if vg_name is not None:
1495
          test = (constants.NV_VGLIST not in nresult or
1496
                  vg_name not in nresult[constants.NV_VGLIST])
1497
          _ErrorIf(test, self.ENODELVM, node,
1498
                   "node didn't return data for the volume group '%s'"
1499
                   " - it is either missing or broken", vg_name)
1500
          if test:
1501
            continue
1502
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1503
      except (ValueError, KeyError):
1504
        _ErrorIf(True, self.ENODERPC, node,
1505
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1506
        continue
1507

    
1508
    node_vol_should = {}
1509

    
1510
    feedback_fn("* Verifying instance status")
1511
    for instance in instancelist:
1512
      if verbose:
1513
        feedback_fn("* Verifying instance %s" % instance)
1514
      inst_config = instanceinfo[instance]
1515
      self._VerifyInstance(instance, inst_config, node_volume,
1516
                           node_instance, n_offline)
1517
      inst_nodes_offline = []
1518

    
1519
      inst_config.MapLVsByNode(node_vol_should)
1520

    
1521
      instance_cfg[instance] = inst_config
1522

    
1523
      pnode = inst_config.primary_node
1524
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1525
               self.ENODERPC, pnode, "instance %s, connection to"
1526
               " primary node failed", instance)
1527
      if pnode in node_info:
1528
        node_info[pnode]['pinst'].append(instance)
1529

    
1530
      if pnode in n_offline:
1531
        inst_nodes_offline.append(pnode)
1532

    
1533
      # If the instance is non-redundant we cannot survive losing its primary
1534
      # node, so we are not N+1 compliant. On the other hand we have no disk
1535
      # templates with more than one secondary so that situation is not well
1536
      # supported either.
1537
      # FIXME: does not support file-backed instances
1538
      if len(inst_config.secondary_nodes) == 0:
1539
        i_non_redundant.append(instance)
1540
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1541
               self.EINSTANCELAYOUT, instance,
1542
               "instance has multiple secondary nodes", code="WARNING")
1543

    
1544
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1545
        i_non_a_balanced.append(instance)
1546

    
1547
      for snode in inst_config.secondary_nodes:
1548
        _ErrorIf(snode not in node_info and snode not in n_offline,
1549
                 self.ENODERPC, snode,
1550
                 "instance %s, connection to secondary node"
1551
                 "failed", instance)
1552

    
1553
        if snode in node_info:
1554
          node_info[snode]['sinst'].append(instance)
1555
          if pnode not in node_info[snode]['sinst-by-pnode']:
1556
            node_info[snode]['sinst-by-pnode'][pnode] = []
1557
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1558

    
1559
        if snode in n_offline:
1560
          inst_nodes_offline.append(snode)
1561

    
1562
      # warn that the instance lives on offline nodes
1563
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1564
               "instance lives on offline node(s) %s",
1565
               utils.CommaJoin(inst_nodes_offline))
1566

    
1567
    feedback_fn("* Verifying orphan volumes")
1568
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1569

    
1570
    feedback_fn("* Verifying remaining instances")
1571
    self._VerifyOrphanInstances(instancelist, node_instance)
1572

    
1573
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1574
      feedback_fn("* Verifying N+1 Memory redundancy")
1575
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1576

    
1577
    feedback_fn("* Other Notes")
1578
    if i_non_redundant:
1579
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1580
                  % len(i_non_redundant))
1581

    
1582
    if i_non_a_balanced:
1583
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1584
                  % len(i_non_a_balanced))
1585

    
1586
    if n_offline:
1587
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1588

    
1589
    if n_drained:
1590
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1591

    
1592
    return not self.bad
1593

    
1594
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1595
    """Analyze the post-hooks' result
1596

1597
    This method analyses the hook result, handles it, and sends some
1598
    nicely-formatted feedback back to the user.
1599

1600
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1601
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1602
    @param hooks_results: the results of the multi-node hooks rpc call
1603
    @param feedback_fn: function used send feedback back to the caller
1604
    @param lu_result: previous Exec result
1605
    @return: the new Exec result, based on the previous result
1606
        and hook results
1607

1608
    """
1609
    # We only really run POST phase hooks, and are only interested in
1610
    # their results
1611
    if phase == constants.HOOKS_PHASE_POST:
1612
      # Used to change hooks' output to proper indentation
1613
      indent_re = re.compile('^', re.M)
1614
      feedback_fn("* Hooks Results")
1615
      assert hooks_results, "invalid result from hooks"
1616

    
1617
      for node_name in hooks_results:
1618
        res = hooks_results[node_name]
1619
        msg = res.fail_msg
1620
        test = msg and not res.offline
1621
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1622
                      "Communication failure in hooks execution: %s", msg)
1623
        if res.offline or msg:
1624
          # No need to investigate payload if node is offline or gave an error.
1625
          # override manually lu_result here as _ErrorIf only
1626
          # overrides self.bad
1627
          lu_result = 1
1628
          continue
1629
        for script, hkr, output in res.payload:
1630
          test = hkr == constants.HKR_FAIL
1631
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1632
                        "Script %s failed, output:", script)
1633
          if test:
1634
            output = indent_re.sub('      ', output)
1635
            feedback_fn("%s" % output)
1636
            lu_result = 1
1637

    
1638
      return lu_result
1639

    
1640

    
1641
class LUVerifyDisks(NoHooksLU):
1642
  """Verifies the cluster disks status.
1643

1644
  """
1645
  _OP_REQP = []
1646
  REQ_BGL = False
1647

    
1648
  def ExpandNames(self):
1649
    self.needed_locks = {
1650
      locking.LEVEL_NODE: locking.ALL_SET,
1651
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1652
    }
1653
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1654

    
1655
  def CheckPrereq(self):
1656
    """Check prerequisites.
1657

1658
    This has no prerequisites.
1659

1660
    """
1661
    pass
1662

    
1663
  def Exec(self, feedback_fn):
1664
    """Verify integrity of cluster disks.
1665

1666
    @rtype: tuple of three items
1667
    @return: a tuple of (dict of node-to-node_error, list of instances
1668
        which need activate-disks, dict of instance: (node, volume) for
1669
        missing volumes
1670

1671
    """
1672
    result = res_nodes, res_instances, res_missing = {}, [], {}
1673

    
1674
    vg_name = self.cfg.GetVGName()
1675
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1676
    instances = [self.cfg.GetInstanceInfo(name)
1677
                 for name in self.cfg.GetInstanceList()]
1678

    
1679
    nv_dict = {}
1680
    for inst in instances:
1681
      inst_lvs = {}
1682
      if (not inst.admin_up or
1683
          inst.disk_template not in constants.DTS_NET_MIRROR):
1684
        continue
1685
      inst.MapLVsByNode(inst_lvs)
1686
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1687
      for node, vol_list in inst_lvs.iteritems():
1688
        for vol in vol_list:
1689
          nv_dict[(node, vol)] = inst
1690

    
1691
    if not nv_dict:
1692
      return result
1693

    
1694
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1695

    
1696
    for node in nodes:
1697
      # node_volume
1698
      node_res = node_lvs[node]
1699
      if node_res.offline:
1700
        continue
1701
      msg = node_res.fail_msg
1702
      if msg:
1703
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1704
        res_nodes[node] = msg
1705
        continue
1706

    
1707
      lvs = node_res.payload
1708
      for lv_name, (_, _, lv_online) in lvs.items():
1709
        inst = nv_dict.pop((node, lv_name), None)
1710
        if (not lv_online and inst is not None
1711
            and inst.name not in res_instances):
1712
          res_instances.append(inst.name)
1713

    
1714
    # any leftover items in nv_dict are missing LVs, let's arrange the
1715
    # data better
1716
    for key, inst in nv_dict.iteritems():
1717
      if inst.name not in res_missing:
1718
        res_missing[inst.name] = []
1719
      res_missing[inst.name].append(key)
1720

    
1721
    return result
1722

    
1723

    
1724
class LURepairDiskSizes(NoHooksLU):
1725
  """Verifies the cluster disks sizes.
1726

1727
  """
1728
  _OP_REQP = ["instances"]
1729
  REQ_BGL = False
1730

    
1731
  def ExpandNames(self):
1732
    if not isinstance(self.op.instances, list):
1733
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1734
                                 errors.ECODE_INVAL)
1735

    
1736
    if self.op.instances:
1737
      self.wanted_names = []
1738
      for name in self.op.instances:
1739
        full_name = _ExpandInstanceName(self.cfg, name)
1740
        self.wanted_names.append(full_name)
1741
      self.needed_locks = {
1742
        locking.LEVEL_NODE: [],
1743
        locking.LEVEL_INSTANCE: self.wanted_names,
1744
        }
1745
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1746
    else:
1747
      self.wanted_names = None
1748
      self.needed_locks = {
1749
        locking.LEVEL_NODE: locking.ALL_SET,
1750
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1751
        }
1752
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1753

    
1754
  def DeclareLocks(self, level):
1755
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1756
      self._LockInstancesNodes(primary_only=True)
1757

    
1758
  def CheckPrereq(self):
1759
    """Check prerequisites.
1760

1761
    This only checks the optional instance list against the existing names.
1762

1763
    """
1764
    if self.wanted_names is None:
1765
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1766

    
1767
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1768
                             in self.wanted_names]
1769

    
1770
  def _EnsureChildSizes(self, disk):
1771
    """Ensure children of the disk have the needed disk size.
1772

1773
    This is valid mainly for DRBD8 and fixes an issue where the
1774
    children have smaller disk size.
1775

1776
    @param disk: an L{ganeti.objects.Disk} object
1777

1778
    """
1779
    if disk.dev_type == constants.LD_DRBD8:
1780
      assert disk.children, "Empty children for DRBD8?"
1781
      fchild = disk.children[0]
1782
      mismatch = fchild.size < disk.size
1783
      if mismatch:
1784
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1785
                     fchild.size, disk.size)
1786
        fchild.size = disk.size
1787

    
1788
      # and we recurse on this child only, not on the metadev
1789
      return self._EnsureChildSizes(fchild) or mismatch
1790
    else:
1791
      return False
1792

    
1793
  def Exec(self, feedback_fn):
1794
    """Verify the size of cluster disks.
1795

1796
    """
1797
    # TODO: check child disks too
1798
    # TODO: check differences in size between primary/secondary nodes
1799
    per_node_disks = {}
1800
    for instance in self.wanted_instances:
1801
      pnode = instance.primary_node
1802
      if pnode not in per_node_disks:
1803
        per_node_disks[pnode] = []
1804
      for idx, disk in enumerate(instance.disks):
1805
        per_node_disks[pnode].append((instance, idx, disk))
1806

    
1807
    changed = []
1808
    for node, dskl in per_node_disks.items():
1809
      newl = [v[2].Copy() for v in dskl]
1810
      for dsk in newl:
1811
        self.cfg.SetDiskID(dsk, node)
1812
      result = self.rpc.call_blockdev_getsizes(node, newl)
1813
      if result.fail_msg:
1814
        self.LogWarning("Failure in blockdev_getsizes call to node"
1815
                        " %s, ignoring", node)
1816
        continue
1817
      if len(result.data) != len(dskl):
1818
        self.LogWarning("Invalid result from node %s, ignoring node results",
1819
                        node)
1820
        continue
1821
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1822
        if size is None:
1823
          self.LogWarning("Disk %d of instance %s did not return size"
1824
                          " information, ignoring", idx, instance.name)
1825
          continue
1826
        if not isinstance(size, (int, long)):
1827
          self.LogWarning("Disk %d of instance %s did not return valid"
1828
                          " size information, ignoring", idx, instance.name)
1829
          continue
1830
        size = size >> 20
1831
        if size != disk.size:
1832
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1833
                       " correcting: recorded %d, actual %d", idx,
1834
                       instance.name, disk.size, size)
1835
          disk.size = size
1836
          self.cfg.Update(instance, feedback_fn)
1837
          changed.append((instance.name, idx, size))
1838
        if self._EnsureChildSizes(disk):
1839
          self.cfg.Update(instance, feedback_fn)
1840
          changed.append((instance.name, idx, disk.size))
1841
    return changed
1842

    
1843

    
1844
class LURenameCluster(LogicalUnit):
1845
  """Rename the cluster.
1846

1847
  """
1848
  HPATH = "cluster-rename"
1849
  HTYPE = constants.HTYPE_CLUSTER
1850
  _OP_REQP = ["name"]
1851

    
1852
  def BuildHooksEnv(self):
1853
    """Build hooks env.
1854

1855
    """
1856
    env = {
1857
      "OP_TARGET": self.cfg.GetClusterName(),
1858
      "NEW_NAME": self.op.name,
1859
      }
1860
    mn = self.cfg.GetMasterNode()
1861
    all_nodes = self.cfg.GetNodeList()
1862
    return env, [mn], all_nodes
1863

    
1864
  def CheckPrereq(self):
1865
    """Verify that the passed name is a valid one.
1866

1867
    """
1868
    hostname = utils.GetHostInfo(self.op.name)
1869

    
1870
    new_name = hostname.name
1871
    self.ip = new_ip = hostname.ip
1872
    old_name = self.cfg.GetClusterName()
1873
    old_ip = self.cfg.GetMasterIP()
1874
    if new_name == old_name and new_ip == old_ip:
1875
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1876
                                 " cluster has changed",
1877
                                 errors.ECODE_INVAL)
1878
    if new_ip != old_ip:
1879
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1880
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1881
                                   " reachable on the network. Aborting." %
1882
                                   new_ip, errors.ECODE_NOTUNIQUE)
1883

    
1884
    self.op.name = new_name
1885

    
1886
  def Exec(self, feedback_fn):
1887
    """Rename the cluster.
1888

1889
    """
1890
    clustername = self.op.name
1891
    ip = self.ip
1892

    
1893
    # shutdown the master IP
1894
    master = self.cfg.GetMasterNode()
1895
    result = self.rpc.call_node_stop_master(master, False)
1896
    result.Raise("Could not disable the master role")
1897

    
1898
    try:
1899
      cluster = self.cfg.GetClusterInfo()
1900
      cluster.cluster_name = clustername
1901
      cluster.master_ip = ip
1902
      self.cfg.Update(cluster, feedback_fn)
1903

    
1904
      # update the known hosts file
1905
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1906
      node_list = self.cfg.GetNodeList()
1907
      try:
1908
        node_list.remove(master)
1909
      except ValueError:
1910
        pass
1911
      result = self.rpc.call_upload_file(node_list,
1912
                                         constants.SSH_KNOWN_HOSTS_FILE)
1913
      for to_node, to_result in result.iteritems():
1914
        msg = to_result.fail_msg
1915
        if msg:
1916
          msg = ("Copy of file %s to node %s failed: %s" %
1917
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1918
          self.proc.LogWarning(msg)
1919

    
1920
    finally:
1921
      result = self.rpc.call_node_start_master(master, False, False)
1922
      msg = result.fail_msg
1923
      if msg:
1924
        self.LogWarning("Could not re-enable the master role on"
1925
                        " the master, please restart manually: %s", msg)
1926

    
1927

    
1928
def _RecursiveCheckIfLVMBased(disk):
1929
  """Check if the given disk or its children are lvm-based.
1930

1931
  @type disk: L{objects.Disk}
1932
  @param disk: the disk to check
1933
  @rtype: boolean
1934
  @return: boolean indicating whether a LD_LV dev_type was found or not
1935

1936
  """
1937
  if disk.children:
1938
    for chdisk in disk.children:
1939
      if _RecursiveCheckIfLVMBased(chdisk):
1940
        return True
1941
  return disk.dev_type == constants.LD_LV
1942

    
1943

    
1944
class LUSetClusterParams(LogicalUnit):
1945
  """Change the parameters of the cluster.
1946

1947
  """
1948
  HPATH = "cluster-modify"
1949
  HTYPE = constants.HTYPE_CLUSTER
1950
  _OP_REQP = []
1951
  REQ_BGL = False
1952

    
1953
  def CheckArguments(self):
1954
    """Check parameters
1955

1956
    """
1957
    if not hasattr(self.op, "candidate_pool_size"):
1958
      self.op.candidate_pool_size = None
1959
    if self.op.candidate_pool_size is not None:
1960
      try:
1961
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1962
      except (ValueError, TypeError), err:
1963
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1964
                                   str(err), errors.ECODE_INVAL)
1965
      if self.op.candidate_pool_size < 1:
1966
        raise errors.OpPrereqError("At least one master candidate needed",
1967
                                   errors.ECODE_INVAL)
1968

    
1969
  def ExpandNames(self):
1970
    # FIXME: in the future maybe other cluster params won't require checking on
1971
    # all nodes to be modified.
1972
    self.needed_locks = {
1973
      locking.LEVEL_NODE: locking.ALL_SET,
1974
    }
1975
    self.share_locks[locking.LEVEL_NODE] = 1
1976

    
1977
  def BuildHooksEnv(self):
1978
    """Build hooks env.
1979

1980
    """
1981
    env = {
1982
      "OP_TARGET": self.cfg.GetClusterName(),
1983
      "NEW_VG_NAME": self.op.vg_name,
1984
      }
1985
    mn = self.cfg.GetMasterNode()
1986
    return env, [mn], [mn]
1987

    
1988
  def CheckPrereq(self):
1989
    """Check prerequisites.
1990

1991
    This checks whether the given params don't conflict and
1992
    if the given volume group is valid.
1993

1994
    """
1995
    if self.op.vg_name is not None and not self.op.vg_name:
1996
      instances = self.cfg.GetAllInstancesInfo().values()
1997
      for inst in instances:
1998
        for disk in inst.disks:
1999
          if _RecursiveCheckIfLVMBased(disk):
2000
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2001
                                       " lvm-based instances exist",
2002
                                       errors.ECODE_INVAL)
2003

    
2004
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2005

    
2006
    # if vg_name not None, checks given volume group on all nodes
2007
    if self.op.vg_name:
2008
      vglist = self.rpc.call_vg_list(node_list)
2009
      for node in node_list:
2010
        msg = vglist[node].fail_msg
2011
        if msg:
2012
          # ignoring down node
2013
          self.LogWarning("Error while gathering data on node %s"
2014
                          " (ignoring node): %s", node, msg)
2015
          continue
2016
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2017
                                              self.op.vg_name,
2018
                                              constants.MIN_VG_SIZE)
2019
        if vgstatus:
2020
          raise errors.OpPrereqError("Error on node '%s': %s" %
2021
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2022

    
2023
    self.cluster = cluster = self.cfg.GetClusterInfo()
2024
    # validate params changes
2025
    if self.op.beparams:
2026
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2027
      self.new_beparams = objects.FillDict(
2028
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2029

    
2030
    if self.op.nicparams:
2031
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2032
      self.new_nicparams = objects.FillDict(
2033
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2034
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2035
      nic_errors = []
2036

    
2037
      # check all instances for consistency
2038
      for instance in self.cfg.GetAllInstancesInfo().values():
2039
        for nic_idx, nic in enumerate(instance.nics):
2040
          params_copy = copy.deepcopy(nic.nicparams)
2041
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2042

    
2043
          # check parameter syntax
2044
          try:
2045
            objects.NIC.CheckParameterSyntax(params_filled)
2046
          except errors.ConfigurationError, err:
2047
            nic_errors.append("Instance %s, nic/%d: %s" %
2048
                              (instance.name, nic_idx, err))
2049

    
2050
          # if we're moving instances to routed, check that they have an ip
2051
          target_mode = params_filled[constants.NIC_MODE]
2052
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2053
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2054
                              (instance.name, nic_idx))
2055
      if nic_errors:
2056
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2057
                                   "\n".join(nic_errors))
2058

    
2059
    # hypervisor list/parameters
2060
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2061
    if self.op.hvparams:
2062
      if not isinstance(self.op.hvparams, dict):
2063
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2064
                                   errors.ECODE_INVAL)
2065
      for hv_name, hv_dict in self.op.hvparams.items():
2066
        if hv_name not in self.new_hvparams:
2067
          self.new_hvparams[hv_name] = hv_dict
2068
        else:
2069
          self.new_hvparams[hv_name].update(hv_dict)
2070

    
2071
    if self.op.enabled_hypervisors is not None:
2072
      self.hv_list = self.op.enabled_hypervisors
2073
      if not self.hv_list:
2074
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2075
                                   " least one member",
2076
                                   errors.ECODE_INVAL)
2077
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2078
      if invalid_hvs:
2079
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2080
                                   " entries: %s" %
2081
                                   utils.CommaJoin(invalid_hvs),
2082
                                   errors.ECODE_INVAL)
2083
    else:
2084
      self.hv_list = cluster.enabled_hypervisors
2085

    
2086
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2087
      # either the enabled list has changed, or the parameters have, validate
2088
      for hv_name, hv_params in self.new_hvparams.items():
2089
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2090
            (self.op.enabled_hypervisors and
2091
             hv_name in self.op.enabled_hypervisors)):
2092
          # either this is a new hypervisor, or its parameters have changed
2093
          hv_class = hypervisor.GetHypervisor(hv_name)
2094
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2095
          hv_class.CheckParameterSyntax(hv_params)
2096
          _CheckHVParams(self, node_list, hv_name, hv_params)
2097

    
2098
  def Exec(self, feedback_fn):
2099
    """Change the parameters of the cluster.
2100

2101
    """
2102
    if self.op.vg_name is not None:
2103
      new_volume = self.op.vg_name
2104
      if not new_volume:
2105
        new_volume = None
2106
      if new_volume != self.cfg.GetVGName():
2107
        self.cfg.SetVGName(new_volume)
2108
      else:
2109
        feedback_fn("Cluster LVM configuration already in desired"
2110
                    " state, not changing")
2111
    if self.op.hvparams:
2112
      self.cluster.hvparams = self.new_hvparams
2113
    if self.op.enabled_hypervisors is not None:
2114
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2115
    if self.op.beparams:
2116
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2117
    if self.op.nicparams:
2118
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2119

    
2120
    if self.op.candidate_pool_size is not None:
2121
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2122
      # we need to update the pool size here, otherwise the save will fail
2123
      _AdjustCandidatePool(self, [])
2124

    
2125
    self.cfg.Update(self.cluster, feedback_fn)
2126

    
2127

    
2128
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2129
  """Distribute additional files which are part of the cluster configuration.
2130

2131
  ConfigWriter takes care of distributing the config and ssconf files, but
2132
  there are more files which should be distributed to all nodes. This function
2133
  makes sure those are copied.
2134

2135
  @param lu: calling logical unit
2136
  @param additional_nodes: list of nodes not in the config to distribute to
2137

2138
  """
2139
  # 1. Gather target nodes
2140
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2141
  dist_nodes = lu.cfg.GetNodeList()
2142
  if additional_nodes is not None:
2143
    dist_nodes.extend(additional_nodes)
2144
  if myself.name in dist_nodes:
2145
    dist_nodes.remove(myself.name)
2146

    
2147
  # 2. Gather files to distribute
2148
  dist_files = set([constants.ETC_HOSTS,
2149
                    constants.SSH_KNOWN_HOSTS_FILE,
2150
                    constants.RAPI_CERT_FILE,
2151
                    constants.RAPI_USERS_FILE,
2152
                    constants.HMAC_CLUSTER_KEY,
2153
                   ])
2154

    
2155
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2156
  for hv_name in enabled_hypervisors:
2157
    hv_class = hypervisor.GetHypervisor(hv_name)
2158
    dist_files.update(hv_class.GetAncillaryFiles())
2159

    
2160
  # 3. Perform the files upload
2161
  for fname in dist_files:
2162
    if os.path.exists(fname):
2163
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2164
      for to_node, to_result in result.items():
2165
        msg = to_result.fail_msg
2166
        if msg:
2167
          msg = ("Copy of file %s to node %s failed: %s" %
2168
                 (fname, to_node, msg))
2169
          lu.proc.LogWarning(msg)
2170

    
2171

    
2172
class LURedistributeConfig(NoHooksLU):
2173
  """Force the redistribution of cluster configuration.
2174

2175
  This is a very simple LU.
2176

2177
  """
2178
  _OP_REQP = []
2179
  REQ_BGL = False
2180

    
2181
  def ExpandNames(self):
2182
    self.needed_locks = {
2183
      locking.LEVEL_NODE: locking.ALL_SET,
2184
    }
2185
    self.share_locks[locking.LEVEL_NODE] = 1
2186

    
2187
  def CheckPrereq(self):
2188
    """Check prerequisites.
2189

2190
    """
2191

    
2192
  def Exec(self, feedback_fn):
2193
    """Redistribute the configuration.
2194

2195
    """
2196
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2197
    _RedistributeAncillaryFiles(self)
2198

    
2199

    
2200
def _WaitForSync(lu, instance, oneshot=False):
2201
  """Sleep and poll for an instance's disk to sync.
2202

2203
  """
2204
  if not instance.disks:
2205
    return True
2206

    
2207
  if not oneshot:
2208
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2209

    
2210
  node = instance.primary_node
2211

    
2212
  for dev in instance.disks:
2213
    lu.cfg.SetDiskID(dev, node)
2214

    
2215
  # TODO: Convert to utils.Retry
2216

    
2217
  retries = 0
2218
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2219
  while True:
2220
    max_time = 0
2221
    done = True
2222
    cumul_degraded = False
2223
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2224
    msg = rstats.fail_msg
2225
    if msg:
2226
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2227
      retries += 1
2228
      if retries >= 10:
2229
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2230
                                 " aborting." % node)
2231
      time.sleep(6)
2232
      continue
2233
    rstats = rstats.payload
2234
    retries = 0
2235
    for i, mstat in enumerate(rstats):
2236
      if mstat is None:
2237
        lu.LogWarning("Can't compute data for node %s/%s",
2238
                           node, instance.disks[i].iv_name)
2239
        continue
2240

    
2241
      cumul_degraded = (cumul_degraded or
2242
                        (mstat.is_degraded and mstat.sync_percent is None))
2243
      if mstat.sync_percent is not None:
2244
        done = False
2245
        if mstat.estimated_time is not None:
2246
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2247
          max_time = mstat.estimated_time
2248
        else:
2249
          rem_time = "no time estimate"
2250
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2251
                        (instance.disks[i].iv_name, mstat.sync_percent,
2252
                         rem_time))
2253

    
2254
    # if we're done but degraded, let's do a few small retries, to
2255
    # make sure we see a stable and not transient situation; therefore
2256
    # we force restart of the loop
2257
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2258
      logging.info("Degraded disks found, %d retries left", degr_retries)
2259
      degr_retries -= 1
2260
      time.sleep(1)
2261
      continue
2262

    
2263
    if done or oneshot:
2264
      break
2265

    
2266
    time.sleep(min(60, max_time))
2267

    
2268
  if done:
2269
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2270
  return not cumul_degraded
2271

    
2272

    
2273
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2274
  """Check that mirrors are not degraded.
2275

2276
  The ldisk parameter, if True, will change the test from the
2277
  is_degraded attribute (which represents overall non-ok status for
2278
  the device(s)) to the ldisk (representing the local storage status).
2279

2280
  """
2281
  lu.cfg.SetDiskID(dev, node)
2282

    
2283
  result = True
2284

    
2285
  if on_primary or dev.AssembleOnSecondary():
2286
    rstats = lu.rpc.call_blockdev_find(node, dev)
2287
    msg = rstats.fail_msg
2288
    if msg:
2289
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2290
      result = False
2291
    elif not rstats.payload:
2292
      lu.LogWarning("Can't find disk on node %s", node)
2293
      result = False
2294
    else:
2295
      if ldisk:
2296
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2297
      else:
2298
        result = result and not rstats.payload.is_degraded
2299

    
2300
  if dev.children:
2301
    for child in dev.children:
2302
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2303

    
2304
  return result
2305

    
2306

    
2307
class LUDiagnoseOS(NoHooksLU):
2308
  """Logical unit for OS diagnose/query.
2309

2310
  """
2311
  _OP_REQP = ["output_fields", "names"]
2312
  REQ_BGL = False
2313
  _FIELDS_STATIC = utils.FieldSet()
2314
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2315
  # Fields that need calculation of global os validity
2316
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2317

    
2318
  def ExpandNames(self):
2319
    if self.op.names:
2320
      raise errors.OpPrereqError("Selective OS query not supported",
2321
                                 errors.ECODE_INVAL)
2322

    
2323
    _CheckOutputFields(static=self._FIELDS_STATIC,
2324
                       dynamic=self._FIELDS_DYNAMIC,
2325
                       selected=self.op.output_fields)
2326

    
2327
    # Lock all nodes, in shared mode
2328
    # Temporary removal of locks, should be reverted later
2329
    # TODO: reintroduce locks when they are lighter-weight
2330
    self.needed_locks = {}
2331
    #self.share_locks[locking.LEVEL_NODE] = 1
2332
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2333

    
2334
  def CheckPrereq(self):
2335
    """Check prerequisites.
2336

2337
    """
2338

    
2339
  @staticmethod
2340
  def _DiagnoseByOS(rlist):
2341
    """Remaps a per-node return list into an a per-os per-node dictionary
2342

2343
    @param rlist: a map with node names as keys and OS objects as values
2344

2345
    @rtype: dict
2346
    @return: a dictionary with osnames as keys and as value another map, with
2347
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2348

2349
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2350
                                     (/srv/..., False, "invalid api")],
2351
                           "node2": [(/srv/..., True, "")]}
2352
          }
2353

2354
    """
2355
    all_os = {}
2356
    # we build here the list of nodes that didn't fail the RPC (at RPC
2357
    # level), so that nodes with a non-responding node daemon don't
2358
    # make all OSes invalid
2359
    good_nodes = [node_name for node_name in rlist
2360
                  if not rlist[node_name].fail_msg]
2361
    for node_name, nr in rlist.items():
2362
      if nr.fail_msg or not nr.payload:
2363
        continue
2364
      for name, path, status, diagnose, variants in nr.payload:
2365
        if name not in all_os:
2366
          # build a list of nodes for this os containing empty lists
2367
          # for each node in node_list
2368
          all_os[name] = {}
2369
          for nname in good_nodes:
2370
            all_os[name][nname] = []
2371
        all_os[name][node_name].append((path, status, diagnose, variants))
2372
    return all_os
2373

    
2374
  def Exec(self, feedback_fn):
2375
    """Compute the list of OSes.
2376

2377
    """
2378
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2379
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2380
    pol = self._DiagnoseByOS(node_data)
2381
    output = []
2382
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2383
    calc_variants = "variants" in self.op.output_fields
2384

    
2385
    for os_name, os_data in pol.items():
2386
      row = []
2387
      if calc_valid:
2388
        valid = True
2389
        variants = None
2390
        for osl in os_data.values():
2391
          valid = valid and osl and osl[0][1]
2392
          if not valid:
2393
            variants = None
2394
            break
2395
          if calc_variants:
2396
            node_variants = osl[0][3]
2397
            if variants is None:
2398
              variants = node_variants
2399
            else:
2400
              variants = [v for v in variants if v in node_variants]
2401

    
2402
      for field in self.op.output_fields:
2403
        if field == "name":
2404
          val = os_name
2405
        elif field == "valid":
2406
          val = valid
2407
        elif field == "node_status":
2408
          # this is just a copy of the dict
2409
          val = {}
2410
          for node_name, nos_list in os_data.items():
2411
            val[node_name] = nos_list
2412
        elif field == "variants":
2413
          val =  variants
2414
        else:
2415
          raise errors.ParameterError(field)
2416
        row.append(val)
2417
      output.append(row)
2418

    
2419
    return output
2420

    
2421

    
2422
class LURemoveNode(LogicalUnit):
2423
  """Logical unit for removing a node.
2424

2425
  """
2426
  HPATH = "node-remove"
2427
  HTYPE = constants.HTYPE_NODE
2428
  _OP_REQP = ["node_name"]
2429

    
2430
  def BuildHooksEnv(self):
2431
    """Build hooks env.
2432

2433
    This doesn't run on the target node in the pre phase as a failed
2434
    node would then be impossible to remove.
2435

2436
    """
2437
    env = {
2438
      "OP_TARGET": self.op.node_name,
2439
      "NODE_NAME": self.op.node_name,
2440
      }
2441
    all_nodes = self.cfg.GetNodeList()
2442
    try:
2443
      all_nodes.remove(self.op.node_name)
2444
    except ValueError:
2445
      logging.warning("Node %s which is about to be removed not found"
2446
                      " in the all nodes list", self.op.node_name)
2447
    return env, all_nodes, all_nodes
2448

    
2449
  def CheckPrereq(self):
2450
    """Check prerequisites.
2451

2452
    This checks:
2453
     - the node exists in the configuration
2454
     - it does not have primary or secondary instances
2455
     - it's not the master
2456

2457
    Any errors are signaled by raising errors.OpPrereqError.
2458

2459
    """
2460
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2461
    node = self.cfg.GetNodeInfo(self.op.node_name)
2462
    assert node is not None
2463

    
2464
    instance_list = self.cfg.GetInstanceList()
2465

    
2466
    masternode = self.cfg.GetMasterNode()
2467
    if node.name == masternode:
2468
      raise errors.OpPrereqError("Node is the master node,"
2469
                                 " you need to failover first.",
2470
                                 errors.ECODE_INVAL)
2471

    
2472
    for instance_name in instance_list:
2473
      instance = self.cfg.GetInstanceInfo(instance_name)
2474
      if node.name in instance.all_nodes:
2475
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2476
                                   " please remove first." % instance_name,
2477
                                   errors.ECODE_INVAL)
2478
    self.op.node_name = node.name
2479
    self.node = node
2480

    
2481
  def Exec(self, feedback_fn):
2482
    """Removes the node from the cluster.
2483

2484
    """
2485
    node = self.node
2486
    logging.info("Stopping the node daemon and removing configs from node %s",
2487
                 node.name)
2488

    
2489
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2490

    
2491
    # Promote nodes to master candidate as needed
2492
    _AdjustCandidatePool(self, exceptions=[node.name])
2493
    self.context.RemoveNode(node.name)
2494

    
2495
    # Run post hooks on the node before it's removed
2496
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2497
    try:
2498
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2499
    except:
2500
      # pylint: disable-msg=W0702
2501
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2502

    
2503
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2504
    msg = result.fail_msg
2505
    if msg:
2506
      self.LogWarning("Errors encountered on the remote node while leaving"
2507
                      " the cluster: %s", msg)
2508

    
2509

    
2510
class LUQueryNodes(NoHooksLU):
2511
  """Logical unit for querying nodes.
2512

2513
  """
2514
  # pylint: disable-msg=W0142
2515
  _OP_REQP = ["output_fields", "names", "use_locking"]
2516
  REQ_BGL = False
2517

    
2518
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2519
                    "master_candidate", "offline", "drained"]
2520

    
2521
  _FIELDS_DYNAMIC = utils.FieldSet(
2522
    "dtotal", "dfree",
2523
    "mtotal", "mnode", "mfree",
2524
    "bootid",
2525
    "ctotal", "cnodes", "csockets",
2526
    )
2527

    
2528
  _FIELDS_STATIC = utils.FieldSet(*[
2529
    "pinst_cnt", "sinst_cnt",
2530
    "pinst_list", "sinst_list",
2531
    "pip", "sip", "tags",
2532
    "master",
2533
    "role"] + _SIMPLE_FIELDS
2534
    )
2535

    
2536
  def ExpandNames(self):
2537
    _CheckOutputFields(static=self._FIELDS_STATIC,
2538
                       dynamic=self._FIELDS_DYNAMIC,
2539
                       selected=self.op.output_fields)
2540

    
2541
    self.needed_locks = {}
2542
    self.share_locks[locking.LEVEL_NODE] = 1
2543

    
2544
    if self.op.names:
2545
      self.wanted = _GetWantedNodes(self, self.op.names)
2546
    else:
2547
      self.wanted = locking.ALL_SET
2548

    
2549
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2550
    self.do_locking = self.do_node_query and self.op.use_locking
2551
    if self.do_locking:
2552
      # if we don't request only static fields, we need to lock the nodes
2553
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2554

    
2555
  def CheckPrereq(self):
2556
    """Check prerequisites.
2557

2558
    """
2559
    # The validation of the node list is done in the _GetWantedNodes,
2560
    # if non empty, and if empty, there's no validation to do
2561
    pass
2562

    
2563
  def Exec(self, feedback_fn):
2564
    """Computes the list of nodes and their attributes.
2565

2566
    """
2567
    all_info = self.cfg.GetAllNodesInfo()
2568
    if self.do_locking:
2569
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2570
    elif self.wanted != locking.ALL_SET:
2571
      nodenames = self.wanted
2572
      missing = set(nodenames).difference(all_info.keys())
2573
      if missing:
2574
        raise errors.OpExecError(
2575
          "Some nodes were removed before retrieving their data: %s" % missing)
2576
    else:
2577
      nodenames = all_info.keys()
2578

    
2579
    nodenames = utils.NiceSort(nodenames)
2580
    nodelist = [all_info[name] for name in nodenames]
2581

    
2582
    # begin data gathering
2583

    
2584
    if self.do_node_query:
2585
      live_data = {}
2586
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2587
                                          self.cfg.GetHypervisorType())
2588
      for name in nodenames:
2589
        nodeinfo = node_data[name]
2590
        if not nodeinfo.fail_msg and nodeinfo.payload:
2591
          nodeinfo = nodeinfo.payload
2592
          fn = utils.TryConvert
2593
          live_data[name] = {
2594
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2595
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2596
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2597
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2598
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2599
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2600
            "bootid": nodeinfo.get('bootid', None),
2601
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2602
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2603
            }
2604
        else:
2605
          live_data[name] = {}
2606
    else:
2607
      live_data = dict.fromkeys(nodenames, {})
2608

    
2609
    node_to_primary = dict([(name, set()) for name in nodenames])
2610
    node_to_secondary = dict([(name, set()) for name in nodenames])
2611

    
2612
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2613
                             "sinst_cnt", "sinst_list"))
2614
    if inst_fields & frozenset(self.op.output_fields):
2615
      inst_data = self.cfg.GetAllInstancesInfo()
2616

    
2617
      for inst in inst_data.values():
2618
        if inst.primary_node in node_to_primary:
2619
          node_to_primary[inst.primary_node].add(inst.name)
2620
        for secnode in inst.secondary_nodes:
2621
          if secnode in node_to_secondary:
2622
            node_to_secondary[secnode].add(inst.name)
2623

    
2624
    master_node = self.cfg.GetMasterNode()
2625

    
2626
    # end data gathering
2627

    
2628
    output = []
2629
    for node in nodelist:
2630
      node_output = []
2631
      for field in self.op.output_fields:
2632
        if field in self._SIMPLE_FIELDS:
2633
          val = getattr(node, field)
2634
        elif field == "pinst_list":
2635
          val = list(node_to_primary[node.name])
2636
        elif field == "sinst_list":
2637
          val = list(node_to_secondary[node.name])
2638
        elif field == "pinst_cnt":
2639
          val = len(node_to_primary[node.name])
2640
        elif field == "sinst_cnt":
2641
          val = len(node_to_secondary[node.name])
2642
        elif field == "pip":
2643
          val = node.primary_ip
2644
        elif field == "sip":
2645
          val = node.secondary_ip
2646
        elif field == "tags":
2647
          val = list(node.GetTags())
2648
        elif field == "master":
2649
          val = node.name == master_node
2650
        elif self._FIELDS_DYNAMIC.Matches(field):
2651
          val = live_data[node.name].get(field, None)
2652
        elif field == "role":
2653
          if node.name == master_node:
2654
            val = "M"
2655
          elif node.master_candidate:
2656
            val = "C"
2657
          elif node.drained:
2658
            val = "D"
2659
          elif node.offline:
2660
            val = "O"
2661
          else:
2662
            val = "R"
2663
        else:
2664
          raise errors.ParameterError(field)
2665
        node_output.append(val)
2666
      output.append(node_output)
2667

    
2668
    return output
2669

    
2670

    
2671
class LUQueryNodeVolumes(NoHooksLU):
2672
  """Logical unit for getting volumes on node(s).
2673

2674
  """
2675
  _OP_REQP = ["nodes", "output_fields"]
2676
  REQ_BGL = False
2677
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2678
  _FIELDS_STATIC = utils.FieldSet("node")
2679

    
2680
  def ExpandNames(self):
2681
    _CheckOutputFields(static=self._FIELDS_STATIC,
2682
                       dynamic=self._FIELDS_DYNAMIC,
2683
                       selected=self.op.output_fields)
2684

    
2685
    self.needed_locks = {}
2686
    self.share_locks[locking.LEVEL_NODE] = 1
2687
    if not self.op.nodes:
2688
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2689
    else:
2690
      self.needed_locks[locking.LEVEL_NODE] = \
2691
        _GetWantedNodes(self, self.op.nodes)
2692

    
2693
  def CheckPrereq(self):
2694
    """Check prerequisites.
2695

2696
    This checks that the fields required are valid output fields.
2697

2698
    """
2699
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2700

    
2701
  def Exec(self, feedback_fn):
2702
    """Computes the list of nodes and their attributes.
2703

2704
    """
2705
    nodenames = self.nodes
2706
    volumes = self.rpc.call_node_volumes(nodenames)
2707

    
2708
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2709
             in self.cfg.GetInstanceList()]
2710

    
2711
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2712

    
2713
    output = []
2714
    for node in nodenames:
2715
      nresult = volumes[node]
2716
      if nresult.offline:
2717
        continue
2718
      msg = nresult.fail_msg
2719
      if msg:
2720
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2721
        continue
2722

    
2723
      node_vols = nresult.payload[:]
2724
      node_vols.sort(key=lambda vol: vol['dev'])
2725

    
2726
      for vol in node_vols:
2727
        node_output = []
2728
        for field in self.op.output_fields:
2729
          if field == "node":
2730
            val = node
2731
          elif field == "phys":
2732
            val = vol['dev']
2733
          elif field == "vg":
2734
            val = vol['vg']
2735
          elif field == "name":
2736
            val = vol['name']
2737
          elif field == "size":
2738
            val = int(float(vol['size']))
2739
          elif field == "instance":
2740
            for inst in ilist:
2741
              if node not in lv_by_node[inst]:
2742
                continue
2743
              if vol['name'] in lv_by_node[inst][node]:
2744
                val = inst.name
2745
                break
2746
            else:
2747
              val = '-'
2748
          else:
2749
            raise errors.ParameterError(field)
2750
          node_output.append(str(val))
2751

    
2752
        output.append(node_output)
2753

    
2754
    return output
2755

    
2756

    
2757
class LUQueryNodeStorage(NoHooksLU):
2758
  """Logical unit for getting information on storage units on node(s).
2759

2760
  """
2761
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2762
  REQ_BGL = False
2763
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2764

    
2765
  def ExpandNames(self):
2766
    storage_type = self.op.storage_type
2767

    
2768
    if storage_type not in constants.VALID_STORAGE_TYPES:
2769
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2770
                                 errors.ECODE_INVAL)
2771

    
2772
    _CheckOutputFields(static=self._FIELDS_STATIC,
2773
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2774
                       selected=self.op.output_fields)
2775

    
2776
    self.needed_locks = {}
2777
    self.share_locks[locking.LEVEL_NODE] = 1
2778

    
2779
    if self.op.nodes:
2780
      self.needed_locks[locking.LEVEL_NODE] = \
2781
        _GetWantedNodes(self, self.op.nodes)
2782
    else:
2783
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2784

    
2785
  def CheckPrereq(self):
2786
    """Check prerequisites.
2787

2788
    This checks that the fields required are valid output fields.
2789

2790
    """
2791
    self.op.name = getattr(self.op, "name", None)
2792

    
2793
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2794

    
2795
  def Exec(self, feedback_fn):
2796
    """Computes the list of nodes and their attributes.
2797

2798
    """
2799
    # Always get name to sort by
2800
    if constants.SF_NAME in self.op.output_fields:
2801
      fields = self.op.output_fields[:]
2802
    else:
2803
      fields = [constants.SF_NAME] + self.op.output_fields
2804

    
2805
    # Never ask for node or type as it's only known to the LU
2806
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2807
      while extra in fields:
2808
        fields.remove(extra)
2809

    
2810
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2811
    name_idx = field_idx[constants.SF_NAME]
2812

    
2813
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2814
    data = self.rpc.call_storage_list(self.nodes,
2815
                                      self.op.storage_type, st_args,
2816
                                      self.op.name, fields)
2817

    
2818
    result = []
2819

    
2820
    for node in utils.NiceSort(self.nodes):
2821
      nresult = data[node]
2822
      if nresult.offline:
2823
        continue
2824

    
2825
      msg = nresult.fail_msg
2826
      if msg:
2827
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2828
        continue
2829

    
2830
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2831

    
2832
      for name in utils.NiceSort(rows.keys()):
2833
        row = rows[name]
2834

    
2835
        out = []
2836

    
2837
        for field in self.op.output_fields:
2838
          if field == constants.SF_NODE:
2839
            val = node
2840
          elif field == constants.SF_TYPE:
2841
            val = self.op.storage_type
2842
          elif field in field_idx:
2843
            val = row[field_idx[field]]
2844
          else:
2845
            raise errors.ParameterError(field)
2846

    
2847
          out.append(val)
2848

    
2849
        result.append(out)
2850

    
2851
    return result
2852

    
2853

    
2854
class LUModifyNodeStorage(NoHooksLU):
2855
  """Logical unit for modifying a storage volume on a node.
2856

2857
  """
2858
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2859
  REQ_BGL = False
2860

    
2861
  def CheckArguments(self):
2862
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2863

    
2864
    storage_type = self.op.storage_type
2865
    if storage_type not in constants.VALID_STORAGE_TYPES:
2866
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2867
                                 errors.ECODE_INVAL)
2868

    
2869
  def ExpandNames(self):
2870
    self.needed_locks = {
2871
      locking.LEVEL_NODE: self.op.node_name,
2872
      }
2873

    
2874
  def CheckPrereq(self):
2875
    """Check prerequisites.
2876

2877
    """
2878
    storage_type = self.op.storage_type
2879

    
2880
    try:
2881
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2882
    except KeyError:
2883
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2884
                                 " modified" % storage_type,
2885
                                 errors.ECODE_INVAL)
2886

    
2887
    diff = set(self.op.changes.keys()) - modifiable
2888
    if diff:
2889
      raise errors.OpPrereqError("The following fields can not be modified for"
2890
                                 " storage units of type '%s': %r" %
2891
                                 (storage_type, list(diff)),
2892
                                 errors.ECODE_INVAL)
2893

    
2894
  def Exec(self, feedback_fn):
2895
    """Computes the list of nodes and their attributes.
2896

2897
    """
2898
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2899
    result = self.rpc.call_storage_modify(self.op.node_name,
2900
                                          self.op.storage_type, st_args,
2901
                                          self.op.name, self.op.changes)
2902
    result.Raise("Failed to modify storage unit '%s' on %s" %
2903
                 (self.op.name, self.op.node_name))
2904

    
2905

    
2906
class LUAddNode(LogicalUnit):
2907
  """Logical unit for adding node to the cluster.
2908

2909
  """
2910
  HPATH = "node-add"
2911
  HTYPE = constants.HTYPE_NODE
2912
  _OP_REQP = ["node_name"]
2913

    
2914
  def BuildHooksEnv(self):
2915
    """Build hooks env.
2916

2917
    This will run on all nodes before, and on all nodes + the new node after.
2918

2919
    """
2920
    env = {
2921
      "OP_TARGET": self.op.node_name,
2922
      "NODE_NAME": self.op.node_name,
2923
      "NODE_PIP": self.op.primary_ip,
2924
      "NODE_SIP": self.op.secondary_ip,
2925
      }
2926
    nodes_0 = self.cfg.GetNodeList()
2927
    nodes_1 = nodes_0 + [self.op.node_name, ]
2928
    return env, nodes_0, nodes_1
2929

    
2930
  def CheckPrereq(self):
2931
    """Check prerequisites.
2932

2933
    This checks:
2934
     - the new node is not already in the config
2935
     - it is resolvable
2936
     - its parameters (single/dual homed) matches the cluster
2937

2938
    Any errors are signaled by raising errors.OpPrereqError.
2939

2940
    """
2941
    node_name = self.op.node_name
2942
    cfg = self.cfg
2943

    
2944
    dns_data = utils.GetHostInfo(node_name)
2945

    
2946
    node = dns_data.name
2947
    primary_ip = self.op.primary_ip = dns_data.ip
2948
    secondary_ip = getattr(self.op, "secondary_ip", None)
2949
    if secondary_ip is None:
2950
      secondary_ip = primary_ip
2951
    if not utils.IsValidIP(secondary_ip):
2952
      raise errors.OpPrereqError("Invalid secondary IP given",
2953
                                 errors.ECODE_INVAL)
2954
    self.op.secondary_ip = secondary_ip
2955

    
2956
    node_list = cfg.GetNodeList()
2957
    if not self.op.readd and node in node_list:
2958
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2959
                                 node, errors.ECODE_EXISTS)
2960
    elif self.op.readd and node not in node_list:
2961
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2962
                                 errors.ECODE_NOENT)
2963

    
2964
    for existing_node_name in node_list:
2965
      existing_node = cfg.GetNodeInfo(existing_node_name)
2966

    
2967
      if self.op.readd and node == existing_node_name:
2968
        if (existing_node.primary_ip != primary_ip or
2969
            existing_node.secondary_ip != secondary_ip):
2970
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2971
                                     " address configuration as before",
2972
                                     errors.ECODE_INVAL)
2973
        continue
2974

    
2975
      if (existing_node.primary_ip == primary_ip or
2976
          existing_node.secondary_ip == primary_ip or
2977
          existing_node.primary_ip == secondary_ip or
2978
          existing_node.secondary_ip == secondary_ip):
2979
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2980
                                   " existing node %s" % existing_node.name,
2981
                                   errors.ECODE_NOTUNIQUE)
2982

    
2983
    # check that the type of the node (single versus dual homed) is the
2984
    # same as for the master
2985
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2986
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2987
    newbie_singlehomed = secondary_ip == primary_ip
2988
    if master_singlehomed != newbie_singlehomed:
2989
      if master_singlehomed:
2990
        raise errors.OpPrereqError("The master has no private ip but the"
2991
                                   " new node has one",
2992
                                   errors.ECODE_INVAL)
2993
      else:
2994
        raise errors.OpPrereqError("The master has a private ip but the"
2995
                                   " new node doesn't have one",
2996
                                   errors.ECODE_INVAL)
2997

    
2998
    # checks reachability
2999
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3000
      raise errors.OpPrereqError("Node not reachable by ping",
3001
                                 errors.ECODE_ENVIRON)
3002

    
3003
    if not newbie_singlehomed:
3004
      # check reachability from my secondary ip to newbie's secondary ip
3005
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3006
                           source=myself.secondary_ip):
3007
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3008
                                   " based ping to noded port",
3009
                                   errors.ECODE_ENVIRON)
3010

    
3011
    if self.op.readd:
3012
      exceptions = [node]
3013
    else:
3014
      exceptions = []
3015

    
3016
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3017

    
3018
    if self.op.readd:
3019
      self.new_node = self.cfg.GetNodeInfo(node)
3020
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3021
    else:
3022
      self.new_node = objects.Node(name=node,
3023
                                   primary_ip=primary_ip,
3024
                                   secondary_ip=secondary_ip,
3025
                                   master_candidate=self.master_candidate,
3026
                                   offline=False, drained=False)
3027

    
3028
  def Exec(self, feedback_fn):
3029
    """Adds the new node to the cluster.
3030

3031
    """
3032
    new_node = self.new_node
3033
    node = new_node.name
3034

    
3035
    # for re-adds, reset the offline/drained/master-candidate flags;
3036
    # we need to reset here, otherwise offline would prevent RPC calls
3037
    # later in the procedure; this also means that if the re-add
3038
    # fails, we are left with a non-offlined, broken node
3039
    if self.op.readd:
3040
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3041
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3042
      # if we demote the node, we do cleanup later in the procedure
3043
      new_node.master_candidate = self.master_candidate
3044

    
3045
    # notify the user about any possible mc promotion
3046
    if new_node.master_candidate:
3047
      self.LogInfo("Node will be a master candidate")
3048

    
3049
    # check connectivity
3050
    result = self.rpc.call_version([node])[node]
3051
    result.Raise("Can't get version information from node %s" % node)
3052
    if constants.PROTOCOL_VERSION == result.payload:
3053
      logging.info("Communication to node %s fine, sw version %s match",
3054
                   node, result.payload)
3055
    else:
3056
      raise errors.OpExecError("Version mismatch master version %s,"
3057
                               " node version %s" %
3058
                               (constants.PROTOCOL_VERSION, result.payload))
3059

    
3060
    # setup ssh on node
3061
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3062
      logging.info("Copy ssh key to node %s", node)
3063
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3064
      keyarray = []
3065
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3066
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3067
                  priv_key, pub_key]
3068

    
3069
      for i in keyfiles:
3070
        keyarray.append(utils.ReadFile(i))
3071

    
3072
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3073
                                      keyarray[2], keyarray[3], keyarray[4],
3074
                                      keyarray[5])
3075
      result.Raise("Cannot transfer ssh keys to the new node")
3076

    
3077
    # Add node to our /etc/hosts, and add key to known_hosts
3078
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3079
      utils.AddHostToEtcHosts(new_node.name)
3080

    
3081
    if new_node.secondary_ip != new_node.primary_ip:
3082
      result = self.rpc.call_node_has_ip_address(new_node.name,
3083
                                                 new_node.secondary_ip)
3084
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3085
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3086
      if not result.payload:
3087
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3088
                                 " you gave (%s). Please fix and re-run this"
3089
                                 " command." % new_node.secondary_ip)
3090

    
3091
    node_verify_list = [self.cfg.GetMasterNode()]
3092
    node_verify_param = {
3093
      constants.NV_NODELIST: [node],
3094
      # TODO: do a node-net-test as well?
3095
    }
3096

    
3097
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3098
                                       self.cfg.GetClusterName())
3099
    for verifier in node_verify_list:
3100
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3101
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3102
      if nl_payload:
3103
        for failed in nl_payload:
3104
          feedback_fn("ssh/hostname verification failed"
3105
                      " (checking from %s): %s" %
3106
                      (verifier, nl_payload[failed]))
3107
        raise errors.OpExecError("ssh/hostname verification failed.")
3108

    
3109
    if self.op.readd:
3110
      _RedistributeAncillaryFiles(self)
3111
      self.context.ReaddNode(new_node)
3112
      # make sure we redistribute the config
3113
      self.cfg.Update(new_node, feedback_fn)
3114
      # and make sure the new node will not have old files around
3115
      if not new_node.master_candidate:
3116
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3117
        msg = result.fail_msg
3118
        if msg:
3119
          self.LogWarning("Node failed to demote itself from master"
3120
                          " candidate status: %s" % msg)
3121
    else:
3122
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3123
      self.context.AddNode(new_node, self.proc.GetECId())
3124

    
3125

    
3126
class LUSetNodeParams(LogicalUnit):
3127
  """Modifies the parameters of a node.
3128

3129
  """
3130
  HPATH = "node-modify"
3131
  HTYPE = constants.HTYPE_NODE
3132
  _OP_REQP = ["node_name"]
3133
  REQ_BGL = False
3134

    
3135
  def CheckArguments(self):
3136
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3137
    _CheckBooleanOpField(self.op, 'master_candidate')
3138
    _CheckBooleanOpField(self.op, 'offline')
3139
    _CheckBooleanOpField(self.op, 'drained')
3140
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3141
    if all_mods.count(None) == 3:
3142
      raise errors.OpPrereqError("Please pass at least one modification",
3143
                                 errors.ECODE_INVAL)
3144
    if all_mods.count(True) > 1:
3145
      raise errors.OpPrereqError("Can't set the node into more than one"
3146
                                 " state at the same time",
3147
                                 errors.ECODE_INVAL)
3148

    
3149
  def ExpandNames(self):
3150
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3151

    
3152
  def BuildHooksEnv(self):
3153
    """Build hooks env.
3154

3155
    This runs on the master node.
3156

3157
    """
3158
    env = {
3159
      "OP_TARGET": self.op.node_name,
3160
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3161
      "OFFLINE": str(self.op.offline),
3162
      "DRAINED": str(self.op.drained),
3163
      }
3164
    nl = [self.cfg.GetMasterNode(),
3165
          self.op.node_name]
3166
    return env, nl, nl
3167

    
3168
  def CheckPrereq(self):
3169
    """Check prerequisites.
3170

3171
    This only checks the instance list against the existing names.
3172

3173
    """
3174
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3175

    
3176
    if (self.op.master_candidate is not None or
3177
        self.op.drained is not None or
3178
        self.op.offline is not None):
3179
      # we can't change the master's node flags
3180
      if self.op.node_name == self.cfg.GetMasterNode():
3181
        raise errors.OpPrereqError("The master role can be changed"
3182
                                   " only via masterfailover",
3183
                                   errors.ECODE_INVAL)
3184

    
3185
    # Boolean value that tells us whether we're offlining or draining the node
3186
    offline_or_drain = self.op.offline == True or self.op.drained == True
3187
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3188

    
3189
    if (node.master_candidate and
3190
        (self.op.master_candidate == False or offline_or_drain)):
3191
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3192
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3193
      if mc_now <= cp_size:
3194
        msg = ("Not enough master candidates (desired"
3195
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3196
        # Only allow forcing the operation if it's an offline/drain operation,
3197
        # and we could not possibly promote more nodes.
3198
        # FIXME: this can still lead to issues if in any way another node which
3199
        # could be promoted appears in the meantime.
3200
        if self.op.force and offline_or_drain and mc_should == mc_max:
3201
          self.LogWarning(msg)
3202
        else:
3203
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3204

    
3205
    if (self.op.master_candidate == True and
3206
        ((node.offline and not self.op.offline == False) or
3207
         (node.drained and not self.op.drained == False))):
3208
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3209
                                 " to master_candidate" % node.name,
3210
                                 errors.ECODE_INVAL)
3211

    
3212
    # If we're being deofflined/drained, we'll MC ourself if needed
3213
    if (deoffline_or_drain and not offline_or_drain and not
3214
        self.op.master_candidate == True and not node.master_candidate):
3215
      self.op.master_candidate = _DecideSelfPromotion(self)
3216
      if self.op.master_candidate:
3217
        self.LogInfo("Autopromoting node to master candidate")
3218

    
3219
    return
3220

    
3221
  def Exec(self, feedback_fn):
3222
    """Modifies a node.
3223

3224
    """
3225
    node = self.node
3226

    
3227
    result = []
3228
    changed_mc = False
3229

    
3230
    if self.op.offline is not None:
3231
      node.offline = self.op.offline
3232
      result.append(("offline", str(self.op.offline)))
3233
      if self.op.offline == True:
3234
        if node.master_candidate:
3235
          node.master_candidate = False
3236
          changed_mc = True
3237
          result.append(("master_candidate", "auto-demotion due to offline"))
3238
        if node.drained:
3239
          node.drained = False
3240
          result.append(("drained", "clear drained status due to offline"))
3241

    
3242
    if self.op.master_candidate is not None:
3243
      node.master_candidate = self.op.master_candidate
3244
      changed_mc = True
3245
      result.append(("master_candidate", str(self.op.master_candidate)))
3246
      if self.op.master_candidate == False:
3247
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3248
        msg = rrc.fail_msg
3249
        if msg:
3250
          self.LogWarning("Node failed to demote itself: %s" % msg)
3251

    
3252
    if self.op.drained is not None:
3253
      node.drained = self.op.drained
3254
      result.append(("drained", str(self.op.drained)))
3255
      if self.op.drained == True:
3256
        if node.master_candidate:
3257
          node.master_candidate = False
3258
          changed_mc = True
3259
          result.append(("master_candidate", "auto-demotion due to drain"))
3260
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3261
          msg = rrc.fail_msg
3262
          if msg:
3263
            self.LogWarning("Node failed to demote itself: %s" % msg)
3264
        if node.offline:
3265
          node.offline = False
3266
          result.append(("offline", "clear offline status due to drain"))
3267

    
3268
    # this will trigger configuration file update, if needed
3269
    self.cfg.Update(node, feedback_fn)
3270
    # this will trigger job queue propagation or cleanup
3271
    if changed_mc:
3272
      self.context.ReaddNode(node)
3273

    
3274
    return result
3275

    
3276

    
3277
class LUPowercycleNode(NoHooksLU):
3278
  """Powercycles a node.
3279

3280
  """
3281
  _OP_REQP = ["node_name", "force"]
3282
  REQ_BGL = False
3283

    
3284
  def CheckArguments(self):
3285
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3286
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3287
      raise errors.OpPrereqError("The node is the master and the force"
3288
                                 " parameter was not set",
3289
                                 errors.ECODE_INVAL)
3290

    
3291
  def ExpandNames(self):
3292
    """Locking for PowercycleNode.
3293

3294
    This is a last-resort option and shouldn't block on other
3295
    jobs. Therefore, we grab no locks.
3296

3297
    """
3298
    self.needed_locks = {}
3299

    
3300
  def CheckPrereq(self):
3301
    """Check prerequisites.
3302

3303
    This LU has no prereqs.
3304

3305
    """
3306
    pass
3307

    
3308
  def Exec(self, feedback_fn):
3309
    """Reboots a node.
3310

3311
    """
3312
    result = self.rpc.call_node_powercycle(self.op.node_name,
3313
                                           self.cfg.GetHypervisorType())
3314
    result.Raise("Failed to schedule the reboot")
3315
    return result.payload
3316

    
3317

    
3318
class LUQueryClusterInfo(NoHooksLU):
3319
  """Query cluster configuration.
3320

3321
  """
3322
  _OP_REQP = []
3323
  REQ_BGL = False
3324

    
3325
  def ExpandNames(self):
3326
    self.needed_locks = {}
3327

    
3328
  def CheckPrereq(self):
3329
    """No prerequsites needed for this LU.
3330

3331
    """
3332
    pass
3333

    
3334
  def Exec(self, feedback_fn):
3335
    """Return cluster config.
3336

3337
    """
3338
    cluster = self.cfg.GetClusterInfo()
3339
    result = {
3340
      "software_version": constants.RELEASE_VERSION,
3341
      "protocol_version": constants.PROTOCOL_VERSION,
3342
      "config_version": constants.CONFIG_VERSION,
3343
      "os_api_version": max(constants.OS_API_VERSIONS),
3344
      "export_version": constants.EXPORT_VERSION,
3345
      "architecture": (platform.architecture()[0], platform.machine()),
3346
      "name": cluster.cluster_name,
3347
      "master": cluster.master_node,
3348
      "default_hypervisor": cluster.enabled_hypervisors[0],
3349
      "enabled_hypervisors": cluster.enabled_hypervisors,
3350
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3351
                        for hypervisor_name in cluster.enabled_hypervisors]),
3352
      "beparams": cluster.beparams,
3353
      "nicparams": cluster.nicparams,
3354
      "candidate_pool_size": cluster.candidate_pool_size,
3355
      "master_netdev": cluster.master_netdev,
3356
      "volume_group_name": cluster.volume_group_name,
3357
      "file_storage_dir": cluster.file_storage_dir,
3358
      "ctime": cluster.ctime,
3359
      "mtime": cluster.mtime,
3360
      "uuid": cluster.uuid,
3361
      "tags": list(cluster.GetTags()),
3362
      }
3363

    
3364
    return result
3365

    
3366

    
3367
class LUQueryConfigValues(NoHooksLU):
3368
  """Return configuration values.
3369

3370
  """
3371
  _OP_REQP = []
3372
  REQ_BGL = False
3373
  _FIELDS_DYNAMIC = utils.FieldSet()
3374
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3375
                                  "watcher_pause")
3376

    
3377
  def ExpandNames(self):
3378
    self.needed_locks = {}
3379

    
3380
    _CheckOutputFields(static=self._FIELDS_STATIC,
3381
                       dynamic=self._FIELDS_DYNAMIC,
3382
                       selected=self.op.output_fields)
3383

    
3384
  def CheckPrereq(self):
3385
    """No prerequisites.
3386

3387
    """
3388
    pass
3389

    
3390
  def Exec(self, feedback_fn):
3391
    """Dump a representation of the cluster config to the standard output.
3392

3393
    """
3394
    values = []
3395
    for field in self.op.output_fields:
3396
      if field == "cluster_name":
3397
        entry = self.cfg.GetClusterName()
3398
      elif field == "master_node":
3399
        entry = self.cfg.GetMasterNode()
3400
      elif field == "drain_flag":
3401
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3402
      elif field == "watcher_pause":
3403
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3404
      else:
3405
        raise errors.ParameterError(field)
3406
      values.append(entry)
3407
    return values
3408

    
3409

    
3410
class LUActivateInstanceDisks(NoHooksLU):
3411
  """Bring up an instance's disks.
3412

3413
  """
3414
  _OP_REQP = ["instance_name"]
3415
  REQ_BGL = False
3416

    
3417
  def ExpandNames(self):
3418
    self._ExpandAndLockInstance()
3419
    self.needed_locks[locking.LEVEL_NODE] = []
3420
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3421

    
3422
  def DeclareLocks(self, level):
3423
    if level == locking.LEVEL_NODE:
3424
      self._LockInstancesNodes()
3425

    
3426
  def CheckPrereq(self):
3427
    """Check prerequisites.
3428

3429
    This checks that the instance is in the cluster.
3430

3431
    """
3432
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3433
    assert self.instance is not None, \
3434
      "Cannot retrieve locked instance %s" % self.op.instance_name
3435
    _CheckNodeOnline(self, self.instance.primary_node)
3436
    if not hasattr(self.op, "ignore_size"):
3437
      self.op.ignore_size = False
3438

    
3439
  def Exec(self, feedback_fn):
3440
    """Activate the disks.
3441

3442
    """
3443
    disks_ok, disks_info = \
3444
              _AssembleInstanceDisks(self, self.instance,
3445
                                     ignore_size=self.op.ignore_size)
3446
    if not disks_ok:
3447
      raise errors.OpExecError("Cannot activate block devices")
3448

    
3449
    return disks_info
3450

    
3451

    
3452
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3453
                           ignore_size=False):
3454
  """Prepare the block devices for an instance.
3455

3456
  This sets up the block devices on all nodes.
3457

3458
  @type lu: L{LogicalUnit}
3459
  @param lu: the logical unit on whose behalf we execute
3460
  @type instance: L{objects.Instance}
3461
  @param instance: the instance for whose disks we assemble
3462
  @type ignore_secondaries: boolean
3463
  @param ignore_secondaries: if true, errors on secondary nodes
3464
      won't result in an error return from the function
3465
  @type ignore_size: boolean
3466
  @param ignore_size: if true, the current known size of the disk
3467
      will not be used during the disk activation, useful for cases
3468
      when the size is wrong
3469
  @return: False if the operation failed, otherwise a list of
3470
      (host, instance_visible_name, node_visible_name)
3471
      with the mapping from node devices to instance devices
3472

3473
  """
3474
  device_info = []
3475
  disks_ok = True
3476
  iname = instance.name
3477
  # With the two passes mechanism we try to reduce the window of
3478
  # opportunity for the race condition of switching DRBD to primary
3479
  # before handshaking occured, but we do not eliminate it
3480

    
3481
  # The proper fix would be to wait (with some limits) until the
3482
  # connection has been made and drbd transitions from WFConnection
3483
  # into any other network-connected state (Connected, SyncTarget,
3484
  # SyncSource, etc.)
3485

    
3486
  # 1st pass, assemble on all nodes in secondary mode
3487
  for inst_disk in instance.disks:
3488
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3489
      if ignore_size:
3490
        node_disk = node_disk.Copy()
3491
        node_disk.UnsetSize()
3492
      lu.cfg.SetDiskID(node_disk, node)
3493
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3494
      msg = result.fail_msg
3495
      if msg:
3496
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3497
                           " (is_primary=False, pass=1): %s",
3498
                           inst_disk.iv_name, node, msg)
3499
        if not ignore_secondaries:
3500
          disks_ok = False
3501

    
3502
  # FIXME: race condition on drbd migration to primary
3503

    
3504
  # 2nd pass, do only the primary node
3505
  for inst_disk in instance.disks:
3506
    dev_path = None
3507

    
3508
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3509
      if node != instance.primary_node:
3510
        continue
3511
      if ignore_size:
3512
        node_disk = node_disk.Copy()
3513
        node_disk.UnsetSize()
3514
      lu.cfg.SetDiskID(node_disk, node)
3515
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3516
      msg = result.fail_msg
3517
      if msg:
3518
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3519
                           " (is_primary=True, pass=2): %s",
3520
                           inst_disk.iv_name, node, msg)
3521
        disks_ok = False
3522
      else:
3523
        dev_path = result.payload
3524

    
3525
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3526

    
3527
  # leave the disks configured for the primary node
3528
  # this is a workaround that would be fixed better by
3529
  # improving the logical/physical id handling
3530
  for disk in instance.disks:
3531
    lu.cfg.SetDiskID(disk, instance.primary_node)
3532

    
3533
  return disks_ok, device_info
3534

    
3535

    
3536
def _StartInstanceDisks(lu, instance, force):
3537
  """Start the disks of an instance.
3538

3539
  """
3540
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3541
                                           ignore_secondaries=force)
3542
  if not disks_ok:
3543
    _ShutdownInstanceDisks(lu, instance)
3544
    if force is not None and not force:
3545
      lu.proc.LogWarning("", hint="If the message above refers to a"
3546
                         " secondary node,"
3547
                         " you can retry the operation using '--force'.")
3548
    raise errors.OpExecError("Disk consistency error")
3549

    
3550

    
3551
class LUDeactivateInstanceDisks(NoHooksLU):
3552
  """Shutdown an instance's disks.
3553

3554
  """
3555
  _OP_REQP = ["instance_name"]
3556
  REQ_BGL = False
3557

    
3558
  def ExpandNames(self):
3559
    self._ExpandAndLockInstance()
3560
    self.needed_locks[locking.LEVEL_NODE] = []
3561
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3562

    
3563
  def DeclareLocks(self, level):
3564
    if level == locking.LEVEL_NODE:
3565
      self._LockInstancesNodes()
3566

    
3567
  def CheckPrereq(self):
3568
    """Check prerequisites.
3569

3570
    This checks that the instance is in the cluster.
3571

3572
    """
3573
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574
    assert self.instance is not None, \
3575
      "Cannot retrieve locked instance %s" % self.op.instance_name
3576

    
3577
  def Exec(self, feedback_fn):
3578
    """Deactivate the disks
3579

3580
    """
3581
    instance = self.instance
3582
    _SafeShutdownInstanceDisks(self, instance)
3583

    
3584

    
3585
def _SafeShutdownInstanceDisks(lu, instance):
3586
  """Shutdown block devices of an instance.
3587

3588
  This function checks if an instance is running, before calling
3589
  _ShutdownInstanceDisks.
3590

3591
  """
3592
  pnode = instance.primary_node
3593
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3594
  ins_l.Raise("Can't contact node %s" % pnode)
3595

    
3596
  if instance.name in ins_l.payload:
3597
    raise errors.OpExecError("Instance is running, can't shutdown"
3598
                             " block devices.")
3599

    
3600
  _ShutdownInstanceDisks(lu, instance)
3601

    
3602

    
3603
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3604
  """Shutdown block devices of an instance.
3605

3606
  This does the shutdown on all nodes of the instance.
3607

3608
  If the ignore_primary is false, errors on the primary node are
3609
  ignored.
3610

3611
  """
3612
  all_result = True
3613
  for disk in instance.disks:
3614
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3615
      lu.cfg.SetDiskID(top_disk, node)
3616
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3617
      msg = result.fail_msg
3618
      if msg:
3619
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3620
                      disk.iv_name, node, msg)
3621
        if not ignore_primary or node != instance.primary_node:
3622
          all_result = False
3623
  return all_result
3624

    
3625

    
3626
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3627
  """Checks if a node has enough free memory.
3628

3629
  This function check if a given node has the needed amount of free
3630
  memory. In case the node has less memory or we cannot get the
3631
  information from the node, this function raise an OpPrereqError
3632
  exception.
3633

3634
  @type lu: C{LogicalUnit}
3635
  @param lu: a logical unit from which we get configuration data
3636
  @type node: C{str}
3637
  @param node: the node to check
3638
  @type reason: C{str}
3639
  @param reason: string to use in the error message
3640
  @type requested: C{int}
3641
  @param requested: the amount of memory in MiB to check for
3642
  @type hypervisor_name: C{str}
3643
  @param hypervisor_name: the hypervisor to ask for memory stats
3644
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3645
      we cannot check the node
3646

3647
  """
3648
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3649
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3650
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3651
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3652
  if not isinstance(free_mem, int):
3653
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3654
                               " was '%s'" % (node, free_mem),
3655
                               errors.ECODE_ENVIRON)
3656
  if requested > free_mem:
3657
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3658
                               " needed %s MiB, available %s MiB" %
3659
                               (node, reason, requested, free_mem),
3660
                               errors.ECODE_NORES)
3661

    
3662

    
3663
class LUStartupInstance(LogicalUnit):
3664
  """Starts an instance.
3665

3666
  """
3667
  HPATH = "instance-start"
3668
  HTYPE = constants.HTYPE_INSTANCE
3669
  _OP_REQP = ["instance_name", "force"]
3670
  REQ_BGL = False
3671

    
3672
  def ExpandNames(self):
3673
    self._ExpandAndLockInstance()
3674

    
3675
  def BuildHooksEnv(self):
3676
    """Build hooks env.
3677

3678
    This runs on master, primary and secondary nodes of the instance.
3679

3680
    """
3681
    env = {
3682
      "FORCE": self.op.force,
3683
      }
3684
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3685
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3686
    return env, nl, nl
3687

    
3688
  def CheckPrereq(self):
3689
    """Check prerequisites.
3690

3691
    This checks that the instance is in the cluster.
3692

3693
    """
3694
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3695
    assert self.instance is not None, \
3696
      "Cannot retrieve locked instance %s" % self.op.instance_name
3697

    
3698
    # extra beparams
3699
    self.beparams = getattr(self.op, "beparams", {})
3700
    if self.beparams:
3701
      if not isinstance(self.beparams, dict):
3702
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3703
                                   " dict" % (type(self.beparams), ),
3704
                                   errors.ECODE_INVAL)
3705
      # fill the beparams dict
3706
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3707
      self.op.beparams = self.beparams
3708

    
3709
    # extra hvparams
3710
    self.hvparams = getattr(self.op, "hvparams", {})
3711
    if self.hvparams:
3712
      if not isinstance(self.hvparams, dict):
3713
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3714
                                   " dict" % (type(self.hvparams), ),
3715
                                   errors.ECODE_INVAL)
3716

    
3717
      # check hypervisor parameter syntax (locally)
3718
      cluster = self.cfg.GetClusterInfo()
3719
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3720
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3721
                                    instance.hvparams)
3722
      filled_hvp.update(self.hvparams)
3723
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3724
      hv_type.CheckParameterSyntax(filled_hvp)
3725
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3726
      self.op.hvparams = self.hvparams
3727

    
3728
    _CheckNodeOnline(self, instance.primary_node)
3729

    
3730
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3731
    # check bridges existence
3732
    _CheckInstanceBridgesExist(self, instance)
3733

    
3734
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3735
                                              instance.name,
3736
                                              instance.hypervisor)
3737
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3738
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3739
    if not remote_info.payload: # not running already
3740
      _CheckNodeFreeMemory(self, instance.primary_node,
3741
                           "starting instance %s" % instance.name,
3742
                           bep[constants.BE_MEMORY], instance.hypervisor)
3743

    
3744
  def Exec(self, feedback_fn):
3745
    """Start the instance.
3746

3747
    """
3748
    instance = self.instance
3749
    force = self.op.force
3750

    
3751
    self.cfg.MarkInstanceUp(instance.name)
3752

    
3753
    node_current = instance.primary_node
3754

    
3755
    _StartInstanceDisks(self, instance, force)
3756

    
3757
    result = self.rpc.call_instance_start(node_current, instance,
3758
                                          self.hvparams, self.beparams)
3759
    msg = result.fail_msg
3760
    if msg:
3761
      _ShutdownInstanceDisks(self, instance)
3762
      raise errors.OpExecError("Could not start instance: %s" % msg)
3763

    
3764

    
3765
class LURebootInstance(LogicalUnit):
3766
  """Reboot an instance.
3767

3768
  """
3769
  HPATH = "instance-reboot"
3770
  HTYPE = constants.HTYPE_INSTANCE
3771
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3772
  REQ_BGL = False
3773

    
3774
  def CheckArguments(self):
3775
    """Check the arguments.
3776

3777
    """
3778
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3779
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3780

    
3781
  def ExpandNames(self):
3782
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3783
                                   constants.INSTANCE_REBOOT_HARD,
3784
                                   constants.INSTANCE_REBOOT_FULL]:
3785
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3786
                                  (constants.INSTANCE_REBOOT_SOFT,
3787
                                   constants.INSTANCE_REBOOT_HARD,
3788
                                   constants.INSTANCE_REBOOT_FULL))
3789
    self._ExpandAndLockInstance()
3790

    
3791
  def BuildHooksEnv(self):
3792
    """Build hooks env.
3793

3794
    This runs on master, primary and secondary nodes of the instance.
3795

3796
    """
3797
    env = {
3798
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3799
      "REBOOT_TYPE": self.op.reboot_type,
3800
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3801
      }
3802
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3803
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3804
    return env, nl, nl
3805

    
3806
  def CheckPrereq(self):
3807
    """Check prerequisites.
3808

3809
    This checks that the instance is in the cluster.
3810

3811
    """
3812
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3813
    assert self.instance is not None, \
3814
      "Cannot retrieve locked instance %s" % self.op.instance_name
3815

    
3816
    _CheckNodeOnline(self, instance.primary_node)
3817

    
3818
    # check bridges existence
3819
    _CheckInstanceBridgesExist(self, instance)
3820

    
3821
  def Exec(self, feedback_fn):
3822
    """Reboot the instance.
3823

3824
    """
3825
    instance = self.instance
3826
    ignore_secondaries = self.op.ignore_secondaries
3827
    reboot_type = self.op.reboot_type
3828

    
3829
    node_current = instance.primary_node
3830

    
3831
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3832
                       constants.INSTANCE_REBOOT_HARD]:
3833
      for disk in instance.disks:
3834
        self.cfg.SetDiskID(disk, node_current)
3835
      result = self.rpc.call_instance_reboot(node_current, instance,
3836
                                             reboot_type,
3837
                                             self.shutdown_timeout)
3838
      result.Raise("Could not reboot instance")
3839
    else:
3840
      result = self.rpc.call_instance_shutdown(node_current, instance,
3841
                                               self.shutdown_timeout)
3842
      result.Raise("Could not shutdown instance for full reboot")
3843
      _ShutdownInstanceDisks(self, instance)
3844
      _StartInstanceDisks(self, instance, ignore_secondaries)
3845
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3846
      msg = result.fail_msg
3847
      if msg:
3848
        _ShutdownInstanceDisks(self, instance)
3849
        raise errors.OpExecError("Could not start instance for"
3850
                                 " full reboot: %s" % msg)
3851

    
3852
    self.cfg.MarkInstanceUp(instance.name)
3853

    
3854

    
3855
class LUShutdownInstance(LogicalUnit):
3856
  """Shutdown an instance.
3857

3858
  """
3859
  HPATH = "instance-stop"
3860
  HTYPE = constants.HTYPE_INSTANCE
3861
  _OP_REQP = ["instance_name"]
3862
  REQ_BGL = False
3863

    
3864
  def CheckArguments(self):
3865
    """Check the arguments.
3866

3867
    """
3868
    self.timeout = getattr(self.op, "timeout",
3869
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3870

    
3871
  def ExpandNames(self):
3872
    self._ExpandAndLockInstance()
3873

    
3874
  def BuildHooksEnv(self):
3875
    """Build hooks env.
3876

3877
    This runs on master, primary and secondary nodes of the instance.
3878

3879
    """
3880
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3881
    env["TIMEOUT"] = self.timeout
3882
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3883
    return env, nl, nl
3884

    
3885
  def CheckPrereq(self):
3886
    """Check prerequisites.
3887

3888
    This checks that the instance is in the cluster.
3889

3890
    """
3891
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3892
    assert self.instance is not None, \
3893
      "Cannot retrieve locked instance %s" % self.op.instance_name
3894
    _CheckNodeOnline(self, self.instance.primary_node)
3895

    
3896
  def Exec(self, feedback_fn):
3897
    """Shutdown the instance.
3898

3899
    """
3900
    instance = self.instance
3901
    node_current = instance.primary_node
3902
    timeout = self.timeout
3903
    self.cfg.MarkInstanceDown(instance.name)
3904
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3905
    msg = result.fail_msg
3906
    if msg:
3907
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3908

    
3909
    _ShutdownInstanceDisks(self, instance)
3910

    
3911

    
3912
class LUReinstallInstance(LogicalUnit):
3913
  """Reinstall an instance.
3914

3915
  """
3916
  HPATH = "instance-reinstall"
3917
  HTYPE = constants.HTYPE_INSTANCE
3918
  _OP_REQP = ["instance_name"]
3919
  REQ_BGL = False
3920

    
3921
  def ExpandNames(self):
3922
    self._ExpandAndLockInstance()
3923

    
3924
  def BuildHooksEnv(self):
3925
    """Build hooks env.
3926

3927
    This runs on master, primary and secondary nodes of the instance.
3928

3929
    """
3930
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3931
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3932
    return env, nl, nl
3933

    
3934
  def CheckPrereq(self):
3935
    """Check prerequisites.
3936

3937
    This checks that the instance is in the cluster and is not running.
3938

3939
    """
3940
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3941
    assert instance is not None, \
3942
      "Cannot retrieve locked instance %s" % self.op.instance_name
3943
    _CheckNodeOnline(self, instance.primary_node)
3944

    
3945
    if instance.disk_template == constants.DT_DISKLESS:
3946
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3947
                                 self.op.instance_name,
3948
                                 errors.ECODE_INVAL)
3949
    if instance.admin_up:
3950
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3951
                                 self.op.instance_name,
3952
                                 errors.ECODE_STATE)
3953
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3954
                                              instance.name,
3955
                                              instance.hypervisor)
3956
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3957
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3958
    if remote_info.payload:
3959
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3960
                                 (self.op.instance_name,
3961
                                  instance.primary_node),
3962
                                 errors.ECODE_STATE)
3963

    
3964
    self.op.os_type = getattr(self.op, "os_type", None)
3965
    self.op.force_variant = getattr(self.op, "force_variant", False)
3966
    if self.op.os_type is not None:
3967
      # OS verification
3968
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
3969
      result = self.rpc.call_os_get(pnode, self.op.os_type)
3970
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3971
                   (self.op.os_type, pnode),
3972
                   prereq=True, ecode=errors.ECODE_INVAL)
3973
      if not self.op.force_variant:
3974
        _CheckOSVariant(result.payload, self.op.os_type)
3975

    
3976
    self.instance = instance
3977

    
3978
  def Exec(self, feedback_fn):
3979
    """Reinstall the instance.
3980

3981
    """
3982
    inst = self.instance
3983

    
3984
    if self.op.os_type is not None:
3985
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3986
      inst.os = self.op.os_type
3987
      self.cfg.Update(inst, feedback_fn)
3988

    
3989
    _StartInstanceDisks(self, inst, None)
3990
    try:
3991
      feedback_fn("Running the instance OS create scripts...")
3992
      # FIXME: pass debug option from opcode to backend
3993
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
3994
                                             self.op.debug_level)
3995
      result.Raise("Could not install OS for instance %s on node %s" %
3996
                   (inst.name, inst.primary_node))
3997
    finally:
3998
      _ShutdownInstanceDisks(self, inst)
3999

    
4000

    
4001
class LURecreateInstanceDisks(LogicalUnit):
4002
  """Recreate an instance's missing disks.
4003

4004
  """
4005
  HPATH = "instance-recreate-disks"
4006
  HTYPE = constants.HTYPE_INSTANCE
4007
  _OP_REQP = ["instance_name", "disks"]
4008
  REQ_BGL = False
4009

    
4010
  def CheckArguments(self):
4011
    """Check the arguments.
4012

4013
    """
4014
    if not isinstance(self.op.disks, list):
4015
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4016
    for item in self.op.disks:
4017
      if (not isinstance(item, int) or
4018
          item < 0):
4019
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4020
                                   str(item), errors.ECODE_INVAL)
4021

    
4022
  def ExpandNames(self):
4023
    self._ExpandAndLockInstance()
4024

    
4025
  def BuildHooksEnv(self):
4026
    """Build hooks env.
4027

4028
    This runs on master, primary and secondary nodes of the instance.
4029

4030
    """
4031
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4032
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4033
    return env, nl, nl
4034

    
4035
  def CheckPrereq(self):
4036
    """Check prerequisites.
4037

4038
    This checks that the instance is in the cluster and is not running.
4039

4040
    """
4041
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4042
    assert instance is not None, \
4043
      "Cannot retrieve locked instance %s" % self.op.instance_name
4044
    _CheckNodeOnline(self, instance.primary_node)
4045

    
4046
    if instance.disk_template == constants.DT_DISKLESS:
4047
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4048
                                 self.op.instance_name, errors.ECODE_INVAL)
4049
    if instance.admin_up:
4050
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4051
                                 self.op.instance_name, errors.ECODE_STATE)
4052
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4053
                                              instance.name,
4054
                                              instance.hypervisor)
4055
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4056
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4057
    if remote_info.payload:
4058
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4059
                                 (self.op.instance_name,
4060
                                  instance.primary_node), errors.ECODE_STATE)
4061

    
4062
    if not self.op.disks:
4063
      self.op.disks = range(len(instance.disks))
4064
    else:
4065
      for idx in self.op.disks:
4066
        if idx >= len(instance.disks):
4067
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4068
                                     errors.ECODE_INVAL)
4069

    
4070
    self.instance = instance
4071

    
4072
  def Exec(self, feedback_fn):
4073
    """Recreate the disks.
4074

4075
    """
4076
    to_skip = []
4077
    for idx, _ in enumerate(self.instance.disks):
4078
      if idx not in self.op.disks: # disk idx has not been passed in
4079
        to_skip.append(idx)
4080
        continue
4081

    
4082
    _CreateDisks(self, self.instance, to_skip=to_skip)
4083

    
4084

    
4085
class LURenameInstance(LogicalUnit):
4086
  """Rename an instance.
4087

4088
  """
4089
  HPATH = "instance-rename"
4090
  HTYPE = constants.HTYPE_INSTANCE
4091
  _OP_REQP = ["instance_name", "new_name"]
4092

    
4093
  def BuildHooksEnv(self):
4094
    """Build hooks env.
4095

4096
    This runs on master, primary and secondary nodes of the instance.
4097

4098
    """
4099
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4100
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4101
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4102
    return env, nl, nl
4103

    
4104
  def CheckPrereq(self):
4105
    """Check prerequisites.
4106

4107
    This checks that the instance is in the cluster and is not running.
4108

4109
    """
4110
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4111
                                                self.op.instance_name)
4112
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4113
    assert instance is not None
4114
    _CheckNodeOnline(self, instance.primary_node)
4115

    
4116
    if instance.admin_up:
4117
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4118
                                 self.op.instance_name, errors.ECODE_STATE)
4119
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4120
                                              instance.name,
4121
                                              instance.hypervisor)
4122
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4123
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4124
    if remote_info.payload:
4125
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4126
                                 (self.op.instance_name,
4127
                                  instance.primary_node), errors.ECODE_STATE)
4128
    self.instance = instance
4129

    
4130
    # new name verification
4131
    name_info = utils.GetHostInfo(self.op.new_name)
4132

    
4133
    self.op.new_name = new_name = name_info.name
4134
    instance_list = self.cfg.GetInstanceList()
4135
    if new_name in instance_list:
4136
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4137
                                 new_name, errors.ECODE_EXISTS)
4138

    
4139
    if not getattr(self.op, "ignore_ip", False):
4140
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4141
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4142
                                   (name_info.ip, new_name),
4143
                                   errors.ECODE_NOTUNIQUE)
4144

    
4145

    
4146
  def Exec(self, feedback_fn):
4147
    """Reinstall the instance.
4148

4149
    """
4150
    inst = self.instance
4151
    old_name = inst.name
4152

    
4153
    if inst.disk_template == constants.DT_FILE:
4154
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4155

    
4156
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4157
    # Change the instance lock. This is definitely safe while we hold the BGL
4158
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4159
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4160

    
4161
    # re-read the instance from the configuration after rename
4162
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4163

    
4164
    if inst.disk_template == constants.DT_FILE:
4165
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4166
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4167
                                                     old_file_storage_dir,
4168
                                                     new_file_storage_dir)
4169
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4170
                   " (but the instance has been renamed in Ganeti)" %
4171
                   (inst.primary_node, old_file_storage_dir,
4172
                    new_file_storage_dir))
4173

    
4174
    _StartInstanceDisks(self, inst, None)
4175
    try:
4176
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4177
                                                 old_name, self.op.debug_level)
4178
      msg = result.fail_msg
4179
      if msg:
4180
        msg = ("Could not run OS rename script for instance %s on node %s"
4181
               " (but the instance has been renamed in Ganeti): %s" %
4182
               (inst.name, inst.primary_node, msg))
4183
        self.proc.LogWarning(msg)
4184
    finally:
4185
      _ShutdownInstanceDisks(self, inst)
4186

    
4187

    
4188
class LURemoveInstance(LogicalUnit):
4189
  """Remove an instance.
4190

4191
  """
4192
  HPATH = "instance-remove"
4193
  HTYPE = constants.HTYPE_INSTANCE
4194
  _OP_REQP = ["instance_name", "ignore_failures"]
4195
  REQ_BGL = False
4196

    
4197
  def CheckArguments(self):
4198
    """Check the arguments.
4199

4200
    """
4201
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4202
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4203

    
4204
  def ExpandNames(self):
4205
    self._ExpandAndLockInstance()
4206
    self.needed_locks[locking.LEVEL_NODE] = []
4207
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4208

    
4209
  def DeclareLocks(self, level):
4210
    if level == locking.LEVEL_NODE:
4211
      self._LockInstancesNodes()
4212

    
4213
  def BuildHooksEnv(self):
4214
    """Build hooks env.
4215

4216
    This runs on master, primary and secondary nodes of the instance.
4217

4218
    """
4219
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4220
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4221
    nl = [self.cfg.GetMasterNode()]
4222
    nl_post = list(self.instance.all_nodes) + nl
4223
    return env, nl, nl_post
4224

    
4225
  def CheckPrereq(self):
4226
    """Check prerequisites.
4227

4228
    This checks that the instance is in the cluster.
4229

4230
    """
4231
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4232
    assert self.instance is not None, \
4233
      "Cannot retrieve locked instance %s" % self.op.instance_name
4234

    
4235
  def Exec(self, feedback_fn):
4236
    """Remove the instance.
4237

4238
    """
4239
    instance = self.instance
4240
    logging.info("Shutting down instance %s on node %s",
4241
                 instance.name, instance.primary_node)
4242

    
4243
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4244
                                             self.shutdown_timeout)
4245
    msg = result.fail_msg
4246
    if msg:
4247
      if self.op.ignore_failures:
4248
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4249
      else:
4250
        raise errors.OpExecError("Could not shutdown instance %s on"
4251
                                 " node %s: %s" %
4252
                                 (instance.name, instance.primary_node, msg))
4253

    
4254
    logging.info("Removing block devices for instance %s", instance.name)
4255

    
4256
    if not _RemoveDisks(self, instance):
4257
      if self.op.ignore_failures:
4258
        feedback_fn("Warning: can't remove instance's disks")
4259
      else:
4260
        raise errors.OpExecError("Can't remove instance's disks")
4261

    
4262
    logging.info("Removing instance %s out of cluster config", instance.name)
4263

    
4264
    self.cfg.RemoveInstance(instance.name)
4265
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4266

    
4267

    
4268
class LUQueryInstances(NoHooksLU):
4269
  """Logical unit for querying instances.
4270

4271
  """
4272
  # pylint: disable-msg=W0142
4273
  _OP_REQP = ["output_fields", "names", "use_locking"]
4274
  REQ_BGL = False
4275
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4276
                    "serial_no", "ctime", "mtime", "uuid"]
4277
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4278
                                    "admin_state",
4279
                                    "disk_template", "ip", "mac", "bridge",
4280
                                    "nic_mode", "nic_link",
4281
                                    "sda_size", "sdb_size", "vcpus", "tags",
4282
                                    "network_port", "beparams",
4283
                                    r"(disk)\.(size)/([0-9]+)",
4284
                                    r"(disk)\.(sizes)", "disk_usage",
4285
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4286
                                    r"(nic)\.(bridge)/([0-9]+)",
4287
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4288
                                    r"(disk|nic)\.(count)",
4289
                                    "hvparams",
4290
                                    ] + _SIMPLE_FIELDS +
4291
                                  ["hv/%s" % name
4292
                                   for name in constants.HVS_PARAMETERS
4293
                                   if name not in constants.HVC_GLOBALS] +
4294
                                  ["be/%s" % name
4295
                                   for name in constants.BES_PARAMETERS])
4296
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4297

    
4298

    
4299
  def ExpandNames(self):
4300
    _CheckOutputFields(static=self._FIELDS_STATIC,
4301
                       dynamic=self._FIELDS_DYNAMIC,
4302
                       selected=self.op.output_fields)
4303

    
4304
    self.needed_locks = {}
4305
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4306
    self.share_locks[locking.LEVEL_NODE] = 1
4307

    
4308
    if self.op.names:
4309
      self.wanted = _GetWantedInstances(self, self.op.names)
4310
    else:
4311
      self.wanted = locking.ALL_SET
4312

    
4313
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4314
    self.do_locking = self.do_node_query and self.op.use_locking
4315
    if self.do_locking:
4316
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4317
      self.needed_locks[locking.LEVEL_NODE] = []
4318
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4319

    
4320
  def DeclareLocks(self, level):
4321
    if level == locking.LEVEL_NODE and self.do_locking:
4322
      self._LockInstancesNodes()
4323

    
4324
  def CheckPrereq(self):
4325
    """Check prerequisites.
4326

4327
    """
4328
    pass
4329

    
4330
  def Exec(self, feedback_fn):
4331
    """Computes the list of nodes and their attributes.
4332

4333
    """
4334
    # pylint: disable-msg=R0912
4335
    # way too many branches here
4336
    all_info = self.cfg.GetAllInstancesInfo()
4337
    if self.wanted == locking.ALL_SET:
4338
      # caller didn't specify instance names, so ordering is not important
4339
      if self.do_locking:
4340
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4341
      else:
4342
        instance_names = all_info.keys()
4343
      instance_names = utils.NiceSort(instance_names)
4344
    else:
4345
      # caller did specify names, so we must keep the ordering
4346
      if self.do_locking:
4347
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4348
      else:
4349
        tgt_set = all_info.keys()
4350
      missing = set(self.wanted).difference(tgt_set)
4351
      if missing:
4352
        raise errors.OpExecError("Some instances were removed before"
4353
                                 " retrieving their data: %s" % missing)
4354
      instance_names = self.wanted
4355

    
4356
    instance_list = [all_info[iname] for iname in instance_names]
4357

    
4358
    # begin data gathering
4359

    
4360
    nodes = frozenset([inst.primary_node for inst in instance_list])
4361
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4362

    
4363
    bad_nodes = []
4364
    off_nodes = []
4365
    if self.do_node_query:
4366
      live_data = {}
4367
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4368
      for name in nodes:
4369
        result = node_data[name]
4370
        if result.offline:
4371
          # offline nodes will be in both lists
4372
          off_nodes.append(name)
4373
        if result.fail_msg:
4374
          bad_nodes.append(name)
4375
        else:
4376
          if result.payload:
4377
            live_data.update(result.payload)
4378
          # else no instance is alive
4379
    else:
4380
      live_data = dict([(name, {}) for name in instance_names])
4381

    
4382
    # end data gathering
4383

    
4384
    HVPREFIX = "hv/"
4385
    BEPREFIX = "be/"
4386
    output = []
4387
    cluster = self.cfg.GetClusterInfo()
4388
    for instance in instance_list:
4389
      iout = []
4390
      i_hv = cluster.FillHV(instance, skip_globals=True)
4391
      i_be = cluster.FillBE(instance)
4392
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4393
                                 nic.nicparams) for nic in instance.nics]
4394
      for field in self.op.output_fields:
4395
        st_match = self._FIELDS_STATIC.Matches(field)
4396
        if field in self._SIMPLE_FIELDS:
4397
          val = getattr(instance, field)
4398
        elif field == "pnode":
4399
          val = instance.primary_node
4400
        elif field == "snodes":
4401
          val = list(instance.secondary_nodes)
4402
        elif field == "admin_state":
4403
          val = instance.admin_up
4404
        elif field == "oper_state":
4405
          if instance.primary_node in bad_nodes:
4406
            val = None
4407
          else:
4408
            val = bool(live_data.get(instance.name))
4409
        elif field == "status":
4410
          if instance.primary_node in off_nodes:
4411
            val = "ERROR_nodeoffline"
4412
          elif instance.primary_node in bad_nodes:
4413
            val = "ERROR_nodedown"
4414
          else:
4415
            running = bool(live_data.get(instance.name))
4416
            if running:
4417
              if instance.admin_up:
4418
                val = "running"
4419
              else:
4420
                val = "ERROR_up"
4421
            else:
4422
              if instance.admin_up:
4423
                val = "ERROR_down"
4424
              else:
4425
                val = "ADMIN_down"
4426
        elif field == "oper_ram":
4427
          if instance.primary_node in bad_nodes:
4428
            val = None
4429
          elif instance.name in live_data:
4430
            val = live_data[instance.name].get("memory", "?")
4431
          else:
4432
            val = "-"
4433
        elif field == "vcpus":
4434
          val = i_be[constants.BE_VCPUS]
4435
        elif field == "disk_template":
4436
          val = instance.disk_template
4437
        elif field == "ip":
4438
          if instance.nics:
4439
            val = instance.nics[0].ip
4440
          else:
4441
            val = None
4442
        elif field == "nic_mode":
4443
          if instance.nics:
4444
            val = i_nicp[0][constants.NIC_MODE]
4445
          else:
4446
            val = None
4447
        elif field == "nic_link":
4448
          if instance.nics:
4449
            val = i_nicp[0][constants.NIC_LINK]
4450
          else:
4451
            val = None
4452
        elif field == "bridge":
4453
          if (instance.nics and
4454
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4455
            val = i_nicp[0][constants.NIC_LINK]
4456
          else:
4457
            val = None
4458
        elif field == "mac":
4459
          if instance.nics:
4460
            val = instance.nics[0].mac
4461
          else:
4462
            val = None
4463
        elif field == "sda_size" or field == "sdb_size":
4464
          idx = ord(field[2]) - ord('a')
4465
          try:
4466
            val = instance.FindDisk(idx).size
4467
          except errors.OpPrereqError:
4468
            val = None
4469
        elif field == "disk_usage": # total disk usage per node
4470
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4471
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4472
        elif field == "tags":
4473
          val = list(instance.GetTags())
4474
        elif field == "hvparams":
4475
          val = i_hv
4476
        elif (field.startswith(HVPREFIX) and
4477
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4478
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4479
          val = i_hv.get(field[len(HVPREFIX):], None)
4480
        elif field == "beparams":
4481
          val = i_be
4482
        elif (field.startswith(BEPREFIX) and
4483
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4484
          val = i_be.get(field[len(BEPREFIX):], None)
4485
        elif st_match and st_match.groups():
4486
          # matches a variable list
4487
          st_groups = st_match.groups()
4488
          if st_groups and st_groups[0] == "disk":
4489
            if st_groups[1] == "count":
4490
              val = len(instance.disks)
4491
            elif st_groups[1] == "sizes":
4492
              val = [disk.size for disk in instance.disks]
4493
            elif st_groups[1] == "size":
4494
              try:
4495
                val = instance.FindDisk(st_groups[2]).size
4496
              except errors.OpPrereqError:
4497
                val = None
4498
            else:
4499
              assert False, "Unhandled disk parameter"
4500
          elif st_groups[0] == "nic":
4501
            if st_groups[1] == "count":
4502
              val = len(instance.nics)
4503
            elif st_groups[1] == "macs":
4504
              val = [nic.mac for nic in instance.nics]
4505
            elif st_groups[1] == "ips":
4506
              val = [nic.ip for nic in instance.nics]
4507
            elif st_groups[1] == "modes":
4508
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4509
            elif st_groups[1] == "links":
4510
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4511
            elif st_groups[1] == "bridges":
4512
              val = []
4513
              for nicp in i_nicp:
4514
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4515
                  val.append(nicp[constants.NIC_LINK])
4516
                else:
4517
                  val.append(None)
4518
            else:
4519
              # index-based item
4520
              nic_idx = int(st_groups[2])
4521
              if nic_idx >= len(instance.nics):
4522
                val = None
4523
              else:
4524
                if st_groups[1] == "mac":
4525
                  val = instance.nics[nic_idx].mac
4526
                elif st_groups[1] == "ip":
4527
                  val = instance.nics[nic_idx].ip
4528
                elif st_groups[1] == "mode":
4529
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4530
                elif st_groups[1] == "link":
4531
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4532
                elif st_groups[1] == "bridge":
4533
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4534
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4535
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4536
                  else:
4537
                    val = None
4538
                else:
4539
                  assert False, "Unhandled NIC parameter"
4540
          else:
4541
            assert False, ("Declared but unhandled variable parameter '%s'" %
4542
                           field)
4543
        else:
4544
          assert False, "Declared but unhandled parameter '%s'" % field
4545
        iout.append(val)
4546
      output.append(iout)
4547

    
4548
    return output
4549

    
4550

    
4551
class LUFailoverInstance(LogicalUnit):
4552
  """Failover an instance.
4553

4554
  """
4555
  HPATH = "instance-failover"
4556
  HTYPE = constants.HTYPE_INSTANCE
4557
  _OP_REQP = ["instance_name", "ignore_consistency"]
4558
  REQ_BGL = False
4559

    
4560
  def CheckArguments(self):
4561
    """Check the arguments.
4562

4563
    """
4564
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4565
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4566

    
4567
  def ExpandNames(self):
4568
    self._ExpandAndLockInstance()
4569
    self.needed_locks[locking.LEVEL_NODE] = []
4570
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4571

    
4572
  def DeclareLocks(self, level):
4573
    if level == locking.LEVEL_NODE:
4574
      self._LockInstancesNodes()
4575

    
4576
  def BuildHooksEnv(self):
4577
    """Build hooks env.
4578

4579
    This runs on master, primary and secondary nodes of the instance.
4580

4581
    """
4582
    instance = self.instance
4583
    source_node = instance.primary_node
4584
    target_node = instance.secondary_nodes[0]
4585
    env = {
4586
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4587
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4588
      "OLD_PRIMARY": source_node,
4589
      "OLD_SECONDARY": target_node,
4590
      "NEW_PRIMARY": target_node,
4591
      "NEW_SECONDARY": source_node,
4592
      }
4593
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4594
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4595
    nl_post = list(nl)
4596
    nl_post.append(source_node)
4597
    return env, nl, nl_post
4598

    
4599
  def CheckPrereq(self):
4600
    """Check prerequisites.
4601

4602
    This checks that the instance is in the cluster.
4603

4604
    """
4605
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4606
    assert self.instance is not None, \
4607
      "Cannot retrieve locked instance %s" % self.op.instance_name
4608

    
4609
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4610
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4611
      raise errors.OpPrereqError("Instance's disk layout is not"
4612
                                 " network mirrored, cannot failover.",
4613
                                 errors.ECODE_STATE)
4614

    
4615
    secondary_nodes = instance.secondary_nodes
4616
    if not secondary_nodes:
4617
      raise errors.ProgrammerError("no secondary node but using "
4618
                                   "a mirrored disk template")
4619

    
4620
    target_node = secondary_nodes[0]
4621
    _CheckNodeOnline(self, target_node)
4622
    _CheckNodeNotDrained(self, target_node)
4623
    if instance.admin_up:
4624
      # check memory requirements on the secondary node
4625
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4626
                           instance.name, bep[constants.BE_MEMORY],
4627
                           instance.hypervisor)
4628
    else:
4629
      self.LogInfo("Not checking memory on the secondary node as"
4630
                   " instance will not be started")
4631

    
4632
    # check bridge existance
4633
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4634

    
4635
  def Exec(self, feedback_fn):
4636
    """Failover an instance.
4637

4638
    The failover is done by shutting it down on its present node and
4639
    starting it on the secondary.
4640

4641
    """
4642
    instance = self.instance
4643

    
4644
    source_node = instance.primary_node
4645
    target_node = instance.secondary_nodes[0]
4646

    
4647
    if instance.admin_up:
4648
      feedback_fn("* checking disk consistency between source and target")
4649
      for dev in instance.disks:
4650
        # for drbd, these are drbd over lvm
4651
        if not _CheckDiskConsistency(self, dev, target_node, False):
4652
          if not self.op.ignore_consistency:
4653
            raise errors.OpExecError("Disk %s is degraded on target node,"
4654
                                     " aborting failover." % dev.iv_name)
4655
    else:
4656
      feedback_fn("* not checking disk consistency as instance is not running")
4657

    
4658
    feedback_fn("* shutting down instance on source node")
4659
    logging.info("Shutting down instance %s on node %s",
4660
                 instance.name, source_node)
4661

    
4662
    result = self.rpc.call_instance_shutdown(source_node, instance,
4663
                                             self.shutdown_timeout)
4664
    msg = result.fail_msg
4665
    if msg:
4666
      if self.op.ignore_consistency:
4667
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4668
                             " Proceeding anyway. Please make sure node"
4669
                             " %s is down. Error details: %s",
4670
                             instance.name, source_node, source_node, msg)
4671
      else:
4672
        raise errors.OpExecError("Could not shutdown instance %s on"
4673
                                 " node %s: %s" %
4674
                                 (instance.name, source_node, msg))
4675

    
4676
    feedback_fn("* deactivating the instance's disks on source node")
4677
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4678
      raise errors.OpExecError("Can't shut down the instance's disks.")
4679

    
4680
    instance.primary_node = target_node
4681
    # distribute new instance config to the other nodes
4682
    self.cfg.Update(instance, feedback_fn)
4683

    
4684
    # Only start the instance if it's marked as up
4685
    if instance.admin_up:
4686
      feedback_fn("* activating the instance's disks on target node")
4687
      logging.info("Starting instance %s on node %s",
4688
                   instance.name, target_node)
4689

    
4690
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4691
                                               ignore_secondaries=True)
4692
      if not disks_ok:
4693
        _ShutdownInstanceDisks(self, instance)
4694
        raise errors.OpExecError("Can't activate the instance's disks")
4695

    
4696
      feedback_fn("* starting the instance on the target node")
4697
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4698
      msg = result.fail_msg
4699
      if msg:
4700
        _ShutdownInstanceDisks(self, instance)
4701
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4702
                                 (instance.name, target_node, msg))
4703

    
4704

    
4705
class LUMigrateInstance(LogicalUnit):
4706
  """Migrate an instance.
4707

4708
  This is migration without shutting down, compared to the failover,
4709
  which is done with shutdown.
4710

4711
  """
4712
  HPATH = "instance-migrate"
4713
  HTYPE = constants.HTYPE_INSTANCE
4714
  _OP_REQP = ["instance_name", "live", "cleanup"]
4715

    
4716
  REQ_BGL = False
4717

    
4718
  def ExpandNames(self):
4719
    self._ExpandAndLockInstance()
4720

    
4721
    self.needed_locks[locking.LEVEL_NODE] = []
4722
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4723

    
4724
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4725
                                       self.op.live, self.op.cleanup)
4726
    self.tasklets = [self._migrater]
4727

    
4728
  def DeclareLocks(self, level):
4729
    if level == locking.LEVEL_NODE:
4730
      self._LockInstancesNodes()
4731

    
4732
  def BuildHooksEnv(self):
4733
    """Build hooks env.
4734

4735
    This runs on master, primary and secondary nodes of the instance.
4736

4737
    """
4738
    instance = self._migrater.instance
4739
    source_node = instance.primary_node
4740
    target_node = instance.secondary_nodes[0]
4741
    env = _BuildInstanceHookEnvByObject(self, instance)
4742
    env["MIGRATE_LIVE"] = self.op.live
4743
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4744
    env.update({
4745
        "OLD_PRIMARY": source_node,
4746
        "OLD_SECONDARY": target_node,
4747
        "NEW_PRIMARY": target_node,
4748
        "NEW_SECONDARY": source_node,
4749
        })
4750
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4751
    nl_post = list(nl)
4752
    nl_post.append(source_node)
4753
    return env, nl, nl_post
4754

    
4755

    
4756
class LUMoveInstance(LogicalUnit):
4757
  """Move an instance by data-copying.
4758

4759
  """
4760
  HPATH = "instance-move"
4761
  HTYPE = constants.HTYPE_INSTANCE
4762
  _OP_REQP = ["instance_name", "target_node"]
4763
  REQ_BGL = False
4764

    
4765
  def CheckArguments(self):
4766
    """Check the arguments.
4767

4768
    """
4769
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4770
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4771

    
4772
  def ExpandNames(self):
4773
    self._ExpandAndLockInstance()
4774
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4775
    self.op.target_node = target_node
4776
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4777
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4778

    
4779
  def DeclareLocks(self, level):
4780
    if level == locking.LEVEL_NODE:
4781
      self._LockInstancesNodes(primary_only=True)
4782

    
4783
  def BuildHooksEnv(self):
4784
    """Build hooks env.
4785

4786
    This runs on master, primary and secondary nodes of the instance.
4787

4788
    """
4789
    env = {
4790
      "TARGET_NODE": self.op.target_node,
4791
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4792
      }
4793
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4794
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4795
                                       self.op.target_node]
4796
    return env, nl, nl
4797

    
4798
  def CheckPrereq(self):
4799
    """Check prerequisites.
4800

4801
    This checks that the instance is in the cluster.
4802

4803
    """
4804
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4805
    assert self.instance is not None, \
4806
      "Cannot retrieve locked instance %s" % self.op.instance_name
4807

    
4808
    node = self.cfg.GetNodeInfo(self.op.target_node)
4809
    assert node is not None, \
4810
      "Cannot retrieve locked node %s" % self.op.target_node
4811

    
4812
    self.target_node = target_node = node.name
4813

    
4814
    if target_node == instance.primary_node:
4815
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4816
                                 (instance.name, target_node),
4817
                                 errors.ECODE_STATE)
4818

    
4819
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4820

    
4821
    for idx, dsk in enumerate(instance.disks):
4822
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4823
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4824
                                   " cannot copy" % idx, errors.ECODE_STATE)
4825

    
4826
    _CheckNodeOnline(self, target_node)
4827
    _CheckNodeNotDrained(self, target_node)
4828

    
4829
    if instance.admin_up:
4830
      # check memory requirements on the secondary node
4831
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4832
                           instance.name, bep[constants.BE_MEMORY],
4833
                           instance.hypervisor)
4834
    else:
4835
      self.LogInfo("Not checking memory on the secondary node as"
4836
                   " instance will not be started")
4837

    
4838
    # check bridge existance
4839
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4840

    
4841
  def Exec(self, feedback_fn):
4842
    """Move an instance.
4843

4844
    The move is done by shutting it down on its present node, copying
4845
    the data over (slow) and starting it on the new node.
4846

4847
    """
4848
    instance = self.instance
4849

    
4850
    source_node = instance.primary_node
4851
    target_node = self.target_node
4852

    
4853
    self.LogInfo("Shutting down instance %s on source node %s",
4854
                 instance.name, source_node)
4855

    
4856
    result = self.rpc.call_instance_shutdown(source_node, instance,
4857
                                             self.shutdown_timeout)
4858
    msg = result.fail_msg
4859
    if msg:
4860
      if self.op.ignore_consistency:
4861
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4862
                             " Proceeding anyway. Please make sure node"
4863
                             " %s is down. Error details: %s",
4864
                             instance.name, source_node, source_node, msg)
4865
      else:
4866
        raise errors.OpExecError("Could not shutdown instance %s on"
4867
                                 " node %s: %s" %
4868
                                 (instance.name, source_node, msg))
4869

    
4870
    # create the target disks
4871
    try:
4872
      _CreateDisks(self, instance, target_node=target_node)
4873
    except errors.OpExecError:
4874
      self.LogWarning("Device creation failed, reverting...")
4875
      try:
4876
        _RemoveDisks(self, instance, target_node=target_node)
4877
      finally:
4878
        self.cfg.ReleaseDRBDMinors(instance.name)
4879
        raise
4880

    
4881
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4882

    
4883
    errs = []
4884
    # activate, get path, copy the data over
4885
    for idx, disk in enumerate(instance.disks):
4886
      self.LogInfo("Copying data for disk %d", idx)
4887
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4888
                                               instance.name, True)
4889
      if result.fail_msg:
4890
        self.LogWarning("Can't assemble newly created disk %d: %s",
4891
                        idx, result.fail_msg)
4892
        errs.append(result.fail_msg)
4893
        break
4894
      dev_path = result.payload
4895
      result = self.rpc.call_blockdev_export(source_node, disk,
4896
                                             target_node, dev_path,
4897
                                             cluster_name)
4898
      if result.fail_msg:
4899
        self.LogWarning("Can't copy data over for disk %d: %s",
4900
                        idx, result.fail_msg)
4901
        errs.append(result.fail_msg)
4902
        break
4903

    
4904
    if errs:
4905
      self.LogWarning("Some disks failed to copy, aborting")
4906
      try:
4907
        _RemoveDisks(self, instance, target_node=target_node)
4908
      finally:
4909
        self.cfg.ReleaseDRBDMinors(instance.name)
4910
        raise errors.OpExecError("Errors during disk copy: %s" %
4911
                                 (",".join(errs),))
4912

    
4913
    instance.primary_node = target_node
4914
    self.cfg.Update(instance, feedback_fn)
4915

    
4916
    self.LogInfo("Removing the disks on the original node")
4917
    _RemoveDisks(self, instance, target_node=source_node)
4918

    
4919
    # Only start the instance if it's marked as up
4920
    if instance.admin_up:
4921
      self.LogInfo("Starting instance %s on node %s",
4922
                   instance.name, target_node)
4923

    
4924
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4925
                                           ignore_secondaries=True)
4926
      if not disks_ok:
4927
        _ShutdownInstanceDisks(self, instance)
4928
        raise errors.OpExecError("Can't activate the instance's disks")
4929

    
4930
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4931
      msg = result.fail_msg
4932
      if msg:
4933
        _ShutdownInstanceDisks(self, instance)
4934
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4935
                                 (instance.name, target_node, msg))
4936

    
4937

    
4938
class LUMigrateNode(LogicalUnit):
4939
  """Migrate all instances from a node.
4940

4941
  """
4942
  HPATH = "node-migrate"
4943
  HTYPE = constants.HTYPE_NODE
4944
  _OP_REQP = ["node_name", "live"]
4945
  REQ_BGL = False
4946

    
4947
  def ExpandNames(self):
4948
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4949

    
4950
    self.needed_locks = {
4951
      locking.LEVEL_NODE: [self.op.node_name],
4952
      }
4953

    
4954
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4955

    
4956
    # Create tasklets for migrating instances for all instances on this node
4957
    names = []
4958
    tasklets = []
4959

    
4960
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4961
      logging.debug("Migrating instance %s", inst.name)
4962
      names.append(inst.name)
4963

    
4964
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4965

    
4966
    self.tasklets = tasklets
4967

    
4968
    # Declare instance locks
4969
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4970

    
4971
  def DeclareLocks(self, level):
4972
    if level == locking.LEVEL_NODE:
4973
      self._LockInstancesNodes()
4974

    
4975
  def BuildHooksEnv(self):
4976
    """Build hooks env.
4977

4978
    This runs on the master, the primary and all the secondaries.
4979

4980
    """
4981
    env = {
4982
      "NODE_NAME": self.op.node_name,
4983
      }
4984

    
4985
    nl = [self.cfg.GetMasterNode()]
4986

    
4987
    return (env, nl, nl)
4988

    
4989

    
4990
class TLMigrateInstance(Tasklet):
4991
  def __init__(self, lu, instance_name, live, cleanup):
4992
    """Initializes this class.
4993

4994
    """
4995
    Tasklet.__init__(self, lu)
4996

    
4997
    # Parameters
4998
    self.instance_name = instance_name
4999
    self.live = live
5000
    self.cleanup = cleanup
5001

    
5002
  def CheckPrereq(self):
5003
    """Check prerequisites.
5004

5005
    This checks that the instance is in the cluster.
5006

5007
    """
5008
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5009
    instance = self.cfg.GetInstanceInfo(instance_name)
5010
    assert instance is not None
5011

    
5012
    if instance.disk_template != constants.DT_DRBD8:
5013
      raise errors.OpPrereqError("Instance's disk layout is not"
5014
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5015

    
5016
    secondary_nodes = instance.secondary_nodes
5017
    if not secondary_nodes:
5018
      raise errors.ConfigurationError("No secondary node but using"
5019
                                      " drbd8 disk template")
5020

    
5021
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5022

    
5023
    target_node = secondary_nodes[0]
5024
    # check memory requirements on the secondary node
5025
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5026
                         instance.name, i_be[constants.BE_MEMORY],
5027
                         instance.hypervisor)
5028

    
5029
    # check bridge existance
5030
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5031

    
5032
    if not self.cleanup:
5033
      _CheckNodeNotDrained(self, target_node)
5034
      result = self.rpc.call_instance_migratable(instance.primary_node,
5035
                                                 instance)
5036
      result.Raise("Can't migrate, please use failover",
5037
                   prereq=True, ecode=errors.ECODE_STATE)
5038

    
5039
    self.instance = instance
5040

    
5041
  def _WaitUntilSync(self):
5042
    """Poll with custom rpc for disk sync.
5043

5044
    This uses our own step-based rpc call.
5045

5046
    """
5047
    self.feedback_fn("* wait until resync is done")
5048
    all_done = False
5049
    while not all_done:
5050
      all_done = True
5051
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5052
                                            self.nodes_ip,
5053
                                            self.instance.disks)
5054
      min_percent = 100
5055
      for node, nres in result.items():
5056
        nres.Raise("Cannot resync disks on node %s" % node)
5057
        node_done, node_percent = nres.payload
5058
        all_done = all_done and node_done
5059
        if node_percent is not None:
5060
          min_percent = min(min_percent, node_percent)
5061
      if not all_done:
5062
        if min_percent < 100:
5063
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5064
        time.sleep(2)
5065

    
5066
  def _EnsureSecondary(self, node):
5067
    """Demote a node to secondary.
5068

5069
    """
5070
    self.feedback_fn("* switching node %s to secondary mode" % node)
5071

    
5072
    for dev in self.instance.disks:
5073
      self.cfg.SetDiskID(dev, node)
5074

    
5075
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5076
                                          self.instance.disks)
5077
    result.Raise("Cannot change disk to secondary on node %s" % node)
5078

    
5079
  def _GoStandalone(self):
5080
    """Disconnect from the network.
5081

5082
    """
5083
    self.feedback_fn("* changing into standalone mode")
5084
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5085
                                               self.instance.disks)
5086
    for node, nres in result.items():
5087
      nres.Raise("Cannot disconnect disks node %s" % node)
5088

    
5089
  def _GoReconnect(self, multimaster):
5090
    """Reconnect to the network.
5091

5092
    """
5093
    if multimaster:
5094
      msg = "dual-master"
5095
    else:
5096
      msg = "single-master"
5097
    self.feedback_fn("* changing disks into %s mode" % msg)
5098
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5099
                                           self.instance.disks,
5100
                                           self.instance.name, multimaster)
5101
    for node, nres in result.items():
5102
      nres.Raise("Cannot change disks config on node %s" % node)
5103

    
5104
  def _ExecCleanup(self):
5105
    """Try to cleanup after a failed migration.
5106

5107
    The cleanup is done by:
5108
      - check that the instance is running only on one node
5109
        (and update the config if needed)
5110
      - change disks on its secondary node to secondary
5111
      - wait until disks are fully synchronized
5112
      - disconnect from the network
5113
      - change disks into single-master mode
5114
      - wait again until disks are fully synchronized
5115

5116
    """
5117
    instance = self.instance
5118
    target_node = self.target_node
5119
    source_node = self.source_node
5120

    
5121
    # check running on only one node
5122
    self.feedback_fn("* checking where the instance actually runs"
5123
                     " (if this hangs, the hypervisor might be in"
5124
                     " a bad state)")
5125
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5126
    for node, result in ins_l.items():
5127
      result.Raise("Can't contact node %s" % node)
5128

    
5129
    runningon_source = instance.name in ins_l[source_node].payload
5130
    runningon_target = instance.name in ins_l[target_node].payload
5131

    
5132
    if runningon_source and runningon_target:
5133
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5134
                               " or the hypervisor is confused. You will have"
5135
                               " to ensure manually that it runs only on one"
5136
                               " and restart this operation.")
5137

    
5138
    if not (runningon_source or runningon_target):
5139
      raise errors.OpExecError("Instance does not seem to be running at all."
5140
                               " In this case, it's safer to repair by"
5141
                               " running 'gnt-instance stop' to ensure disk"
5142
                               " shutdown, and then restarting it.")
5143

    
5144
    if runningon_target:
5145
      # the migration has actually succeeded, we need to update the config
5146
      self.feedback_fn("* instance running on secondary node (%s),"
5147
                       " updating config" % target_node)
5148
      instance.primary_node = target_node
5149
      self.cfg.Update(instance, self.feedback_fn)
5150
      demoted_node = source_node
5151
    else:
5152
      self.feedback_fn("* instance confirmed to be running on its"
5153
                       " primary node (%s)" % source_node)
5154
      demoted_node = target_node
5155

    
5156
    self._EnsureSecondary(demoted_node)
5157
    try:
5158
      self._WaitUntilSync()
5159
    except errors.OpExecError:
5160
      # we ignore here errors, since if the device is standalone, it
5161
      # won't be able to sync
5162
      pass
5163
    self._GoStandalone()
5164
    self._GoReconnect(False)
5165
    self._WaitUntilSync()
5166

    
5167
    self.feedback_fn("* done")
5168

    
5169
  def _RevertDiskStatus(self):
5170
    """Try to revert the disk status after a failed migration.
5171

5172
    """
5173
    target_node = self.target_node
5174
    try:
5175
      self._EnsureSecondary(target_node)
5176
      self._GoStandalone()
5177
      self._GoReconnect(False)
5178
      self._WaitUntilSync()
5179
    except errors.OpExecError, err:
5180
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5181
                         " drives: error '%s'\n"
5182
                         "Please look and recover the instance status" %
5183
                         str(err))
5184

    
5185
  def _AbortMigration(self):
5186
    """Call the hypervisor code to abort a started migration.
5187

5188
    """
5189
    instance = self.instance
5190
    target_node = self.target_node
5191
    migration_info = self.migration_info
5192

    
5193
    abort_result = self.rpc.call_finalize_migration(target_node,
5194
                                                    instance,
5195
                                                    migration_info,
5196
                                                    False)
5197
    abort_msg = abort_result.fail_msg
5198
    if abort_msg:
5199
      logging.error("Aborting migration failed on target node %s: %s",
5200
                    target_node, abort_msg)
5201
      # Don't raise an exception here, as we stil have to try to revert the
5202
      # disk status, even if this step failed.
5203

    
5204
  def _ExecMigration(self):
5205
    """Migrate an instance.
5206

5207
    The migrate is done by:
5208
      - change the disks into dual-master mode
5209
      - wait until disks are fully synchronized again
5210
      - migrate the instance
5211
      - change disks on the new secondary node (the old primary) to secondary
5212
      - wait until disks are fully synchronized
5213
      - change disks into single-master mode
5214

5215
    """
5216
    instance = self.instance
5217
    target_node = self.target_node
5218
    source_node = self.source_node
5219

    
5220
    self.feedback_fn("* checking disk consistency between source and target")
5221
    for dev in instance.disks:
5222
      if not _CheckDiskConsistency(self, dev, target_node, False):
5223
        raise errors.OpExecError("Disk %s is degraded or not fully"
5224
                                 " synchronized on target node,"
5225
                                 " aborting migrate." % dev.iv_name)
5226

    
5227
    # First get the migration information from the remote node
5228
    result = self.rpc.call_migration_info(source_node, instance)
5229
    msg = result.fail_msg
5230
    if msg:
5231
      log_err = ("Failed fetching source migration information from %s: %s" %
5232
                 (source_node, msg))
5233
      logging.error(log_err)
5234
      raise errors.OpExecError(log_err)
5235

    
5236
    self.migration_info = migration_info = result.payload
5237

    
5238
    # Then switch the disks to master/master mode
5239
    self._EnsureSecondary(target_node)
5240
    self._GoStandalone()
5241
    self._GoReconnect(True)
5242
    self._WaitUntilSync()
5243

    
5244
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5245
    result = self.rpc.call_accept_instance(target_node,
5246
                                           instance,
5247
                                           migration_info,
5248
                                           self.nodes_ip[target_node])
5249

    
5250
    msg = result.fail_msg
5251
    if msg:
5252
      logging.error("Instance pre-migration failed, trying to revert"
5253
                    " disk status: %s", msg)
5254
      self.feedback_fn("Pre-migration failed, aborting")
5255
      self._AbortMigration()
5256
      self._RevertDiskStatus()
5257
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5258
                               (instance.name, msg))
5259

    
5260
    self.feedback_fn("* migrating instance to %s" % target_node)
5261
    time.sleep(10)
5262
    result = self.rpc.call_instance_migrate(source_node, instance,
5263
                                            self.nodes_ip[target_node],
5264
                                            self.live)
5265
    msg = result.fail_msg
5266
    if msg:
5267
      logging.error("Instance migration failed, trying to revert"
5268
                    " disk status: %s", msg)
5269
      self.feedback_fn("Migration failed, aborting")
5270
      self._AbortMigration()
5271
      self._RevertDiskStatus()
5272
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5273
                               (instance.name, msg))
5274
    time.sleep(10)
5275

    
5276
    instance.primary_node = target_node
5277
    # distribute new instance config to the other nodes
5278
    self.cfg.Update(instance, self.feedback_fn)
5279

    
5280
    result = self.rpc.call_finalize_migration(target_node,
5281
                                              instance,
5282
                                              migration_info,
5283
                                              True)
5284
    msg = result.fail_msg
5285
    if msg:
5286
      logging.error("Instance migration succeeded, but finalization failed:"
5287
                    " %s", msg)
5288
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5289
                               msg)
5290

    
5291
    self._EnsureSecondary(source_node)
5292
    self._WaitUntilSync()
5293
    self._GoStandalone()
5294
    self._GoReconnect(False)
5295
    self._WaitUntilSync()
5296

    
5297
    self.feedback_fn("* done")
5298

    
5299
  def Exec(self, feedback_fn):
5300
    """Perform the migration.
5301

5302
    """
5303
    feedback_fn("Migrating instance %s" % self.instance.name)
5304

    
5305
    self.feedback_fn = feedback_fn
5306

    
5307
    self.source_node = self.instance.primary_node
5308
    self.target_node = self.instance.secondary_nodes[0]
5309
    self.all_nodes = [self.source_node, self.target_node]
5310
    self.nodes_ip = {
5311
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5312
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5313
      }
5314

    
5315
    if self.cleanup:
5316
      return self._ExecCleanup()
5317
    else:
5318
      return self._ExecMigration()
5319

    
5320

    
5321
def _CreateBlockDev(lu, node, instance, device, force_create,
5322
                    info, force_open):
5323
  """Create a tree of block devices on a given node.
5324

5325
  If this device type has to be created on secondaries, create it and
5326
  all its children.
5327

5328
  If not, just recurse to children keeping the same 'force' value.
5329

5330
  @param lu: the lu on whose behalf we execute
5331
  @param node: the node on which to create the device
5332
  @type instance: L{objects.Instance}
5333
  @param instance: the instance which owns the device
5334
  @type device: L{objects.Disk}
5335
  @param device: the device to create
5336
  @type force_create: boolean
5337
  @param force_create: whether to force creation of this device; this
5338
      will be change to True whenever we find a device which has
5339
      CreateOnSecondary() attribute
5340
  @param info: the extra 'metadata' we should attach to the device
5341
      (this will be represented as a LVM tag)
5342
  @type force_open: boolean
5343
  @param force_open: this parameter will be passes to the
5344
      L{backend.BlockdevCreate} function where it specifies
5345
      whether we run on primary or not, and it affects both
5346
      the child assembly and the device own Open() execution
5347

5348
  """
5349
  if device.CreateOnSecondary():
5350
    force_create = True
5351

    
5352
  if device.children:
5353
    for child in device.children:
5354
      _CreateBlockDev(lu, node, instance, child, force_create,
5355
                      info, force_open)
5356

    
5357
  if not force_create:
5358
    return
5359

    
5360
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5361

    
5362

    
5363
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5364
  """Create a single block device on a given node.
5365

5366
  This will not recurse over children of the device, so they must be
5367
  created in advance.
5368

5369
  @param lu: the lu on whose behalf we execute
5370
  @param node: the node on which to create the device
5371
  @type instance: L{objects.Instance}
5372
  @param instance: the instance which owns the device
5373
  @type device: L{objects.Disk}
5374
  @param device: the device to create
5375
  @param info: the extra 'metadata' we should attach to the device
5376
      (this will be represented as a LVM tag)
5377
  @type force_open: boolean
5378
  @param force_open: this parameter will be passes to the
5379
      L{backend.BlockdevCreate} function where it specifies
5380
      whether we run on primary or not, and it affects both
5381
      the child assembly and the device own Open() execution
5382

5383
  """
5384
  lu.cfg.SetDiskID(device, node)
5385
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5386
                                       instance.name, force_open, info)
5387
  result.Raise("Can't create block device %s on"
5388
               " node %s for instance %s" % (device, node, instance.name))
5389
  if device.physical_id is None:
5390
    device.physical_id = result.payload
5391

    
5392

    
5393
def _GenerateUniqueNames(lu, exts):
5394
  """Generate a suitable LV name.
5395

5396
  This will generate a logical volume name for the given instance.
5397

5398
  """
5399
  results = []
5400
  for val in exts:
5401
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5402
    results.append("%s%s" % (new_id, val))
5403
  return results
5404

    
5405

    
5406
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5407
                         p_minor, s_minor):
5408
  """Generate a drbd8 device complete with its children.
5409

5410
  """
5411
  port = lu.cfg.AllocatePort()
5412
  vgname = lu.cfg.GetVGName()
5413
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5414
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5415
                          logical_id=(vgname, names[0]))
5416
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5417
                          logical_id=(vgname, names[1]))
5418
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5419
                          logical_id=(primary, secondary, port,
5420
                                      p_minor, s_minor,
5421
                                      shared_secret),
5422
                          children=[dev_data, dev_meta],
5423
                          iv_name=iv_name)
5424
  return drbd_dev
5425

    
5426

    
5427
def _GenerateDiskTemplate(lu, template_name,
5428
                          instance_name, primary_node,
5429
                          secondary_nodes, disk_info,
5430
                          file_storage_dir, file_driver,
5431
                          base_index):
5432
  """Generate the entire disk layout for a given template type.
5433

5434
  """
5435
  #TODO: compute space requirements
5436

    
5437
  vgname = lu.cfg.GetVGName()
5438
  disk_count = len(disk_info)
5439
  disks = []
5440
  if template_name == constants.DT_DISKLESS:
5441
    pass
5442
  elif template_name == constants.DT_PLAIN:
5443
    if len(secondary_nodes) != 0:
5444
      raise errors.ProgrammerError("Wrong template configuration")
5445

    
5446
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5447
                                      for i in range(disk_count)])
5448
    for idx, disk in enumerate(disk_info):
5449
      disk_index = idx + base_index
5450
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5451
                              logical_id=(vgname, names[idx]),
5452
                              iv_name="disk/%d" % disk_index,
5453
                              mode=disk["mode"])
5454
      disks.append(disk_dev)
5455
  elif template_name == constants.DT_DRBD8:
5456
    if len(secondary_nodes) != 1:
5457
      raise errors.ProgrammerError("Wrong template configuration")
5458
    remote_node = secondary_nodes[0]
5459
    minors = lu.cfg.AllocateDRBDMinor(
5460
      [primary_node, remote_node] * len(disk_info), instance_name)
5461

    
5462
    names = []
5463
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5464
                                               for i in range(disk_count)]):
5465
      names.append(lv_prefix + "_data")
5466
      names.append(lv_prefix + "_meta")
5467
    for idx, disk in enumerate(disk_info):
5468
      disk_index = idx + base_index
5469
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5470
                                      disk["size"], names[idx*2:idx*2+2],
5471
                                      "disk/%d" % disk_index,
5472
                                      minors[idx*2], minors[idx*2+1])
5473
      disk_dev.mode = disk["mode"]
5474
      disks.append(disk_dev)
5475
  elif template_name == constants.DT_FILE:
5476
    if len(secondary_nodes) != 0:
5477
      raise errors.ProgrammerError("Wrong template configuration")
5478

    
5479
    for idx, disk in enumerate(disk_info):
5480
      disk_index = idx + base_index
5481
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5482
                              iv_name="disk/%d" % disk_index,
5483
                              logical_id=(file_driver,
5484
                                          "%s/disk%d" % (file_storage_dir,
5485
                                                         disk_index)),
5486
                              mode=disk["mode"])
5487
      disks.append(disk_dev)
5488
  else:
5489
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5490
  return disks
5491

    
5492

    
5493
def _GetInstanceInfoText(instance):
5494
  """Compute that text that should be added to the disk's metadata.
5495

5496
  """
5497
  return "originstname+%s" % instance.name
5498

    
5499

    
5500
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5501
  """Create all disks for an instance.
5502

5503
  This abstracts away some work from AddInstance.
5504

5505
  @type lu: L{LogicalUnit}
5506
  @param lu: the logical unit on whose behalf we execute
5507
  @type instance: L{objects.Instance}
5508
  @param instance: the instance whose disks we should create
5509
  @type to_skip: list
5510
  @param to_skip: list of indices to skip
5511
  @type target_node: string
5512
  @param target_node: if passed, overrides the target node for creation
5513
  @rtype: boolean
5514
  @return: the success of the creation
5515

5516
  """
5517
  info = _GetInstanceInfoText(instance)
5518
  if target_node is None:
5519
    pnode = instance.primary_node
5520
    all_nodes = instance.all_nodes
5521
  else:
5522
    pnode = target_node
5523
    all_nodes = [pnode]
5524

    
5525
  if instance.disk_template == constants.DT_FILE:
5526
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5527
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5528

    
5529
    result.Raise("Failed to create directory '%s' on"
5530
                 " node %s" % (file_storage_dir, pnode))
5531

    
5532
  # Note: this needs to be kept in sync with adding of disks in
5533
  # LUSetInstanceParams
5534
  for idx, device in enumerate(instance.disks):
5535
    if to_skip and idx in to_skip:
5536
      continue
5537
    logging.info("Creating volume %s for instance %s",
5538
                 device.iv_name, instance.name)
5539
    #HARDCODE
5540
    for node in all_nodes:
5541
      f_create = node == pnode
5542
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5543

    
5544

    
5545
def _RemoveDisks(lu, instance, target_node=None):
5546
  """Remove all disks for an instance.
5547

5548
  This abstracts away some work from `AddInstance()` and
5549
  `RemoveInstance()`. Note that in case some of the devices couldn't
5550
  be removed, the removal will continue with the other ones (compare
5551
  with `_CreateDisks()`).
5552

5553
  @type lu: L{LogicalUnit}
5554
  @param lu: the logical unit on whose behalf we execute
5555
  @type instance: L{objects.Instance}
5556
  @param instance: the instance whose disks we should remove
5557
  @type target_node: string
5558
  @param target_node: used to override the node on which to remove the disks
5559
  @rtype: boolean
5560
  @return: the success of the removal
5561

5562
  """
5563
  logging.info("Removing block devices for instance %s", instance.name)
5564

    
5565
  all_result = True
5566
  for device in instance.disks:
5567
    if target_node:
5568
      edata = [(target_node, device)]
5569
    else:
5570
      edata = device.ComputeNodeTree(instance.primary_node)
5571
    for node, disk in edata:
5572
      lu.cfg.SetDiskID(disk, node)
5573
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5574
      if msg:
5575
        lu.LogWarning("Could not remove block device %s on node %s,"
5576
                      " continuing anyway: %s", device.iv_name, node, msg)
5577
        all_result = False
5578

    
5579
  if instance.disk_template == constants.DT_FILE:
5580
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5581
    if target_node:
5582
      tgt = target_node
5583
    else:
5584
      tgt = instance.primary_node
5585
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5586
    if result.fail_msg:
5587
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5588
                    file_storage_dir, instance.primary_node, result.fail_msg)
5589
      all_result = False
5590

    
5591
  return all_result
5592

    
5593

    
5594
def _ComputeDiskSize(disk_template, disks):
5595
  """Compute disk size requirements in the volume group
5596

5597
  """
5598
  # Required free disk space as a function of disk and swap space
5599
  req_size_dict = {
5600
    constants.DT_DISKLESS: None,
5601
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5602
    # 128 MB are added for drbd metadata for each disk
5603
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5604
    constants.DT_FILE: None,
5605
  }
5606

    
5607
  if disk_template not in req_size_dict:
5608
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5609
                                 " is unknown" %  disk_template)
5610

    
5611
  return req_size_dict[disk_template]
5612

    
5613

    
5614
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5615
  """Hypervisor parameter validation.
5616

5617
  This function abstract the hypervisor parameter validation to be
5618
  used in both instance create and instance modify.
5619

5620
  @type lu: L{LogicalUnit}
5621
  @param lu: the logical unit for which we check
5622
  @type nodenames: list
5623
  @param nodenames: the list of nodes on which we should check
5624
  @type hvname: string
5625
  @param hvname: the name of the hypervisor we should use
5626
  @type hvparams: dict
5627
  @param hvparams: the parameters which we need to check
5628
  @raise errors.OpPrereqError: if the parameters are not valid
5629

5630
  """
5631
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5632
                                                  hvname,
5633
                                                  hvparams)
5634
  for node in nodenames:
5635
    info = hvinfo[node]
5636
    if info.offline:
5637
      continue
5638
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5639

    
5640

    
5641
class LUCreateInstance(LogicalUnit):
5642
  """Create an instance.
5643

5644
  """
5645
  HPATH = "instance-add"
5646
  HTYPE = constants.HTYPE_INSTANCE
5647
  _OP_REQP = ["instance_name", "disks", "disk_template",
5648
              "mode", "start",
5649
              "wait_for_sync", "ip_check", "nics",
5650
              "hvparams", "beparams"]
5651
  REQ_BGL = False
5652

    
5653
  def CheckArguments(self):
5654
    """Check arguments.
5655

5656
    """
5657
    # do not require name_check to ease forward/backward compatibility
5658
    # for tools
5659
    if not hasattr(self.op, "name_check"):
5660
      self.op.name_check = True
5661
    if self.op.ip_check and not self.op.name_check:
5662
      # TODO: make the ip check more flexible and not depend on the name check
5663
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5664
                                 errors.ECODE_INVAL)
5665

    
5666
  def ExpandNames(self):
5667
    """ExpandNames for CreateInstance.
5668

5669
    Figure out the right locks for instance creation.
5670

5671
    """
5672
    self.needed_locks = {}
5673

    
5674
    # set optional parameters to none if they don't exist
5675
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5676
      if not hasattr(self.op, attr):
5677
        setattr(self.op, attr, None)
5678

    
5679
    # cheap checks, mostly valid constants given
5680

    
5681
    # verify creation mode
5682
    if self.op.mode not in (constants.INSTANCE_CREATE,
5683
                            constants.INSTANCE_IMPORT):
5684
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5685
                                 self.op.mode, errors.ECODE_INVAL)
5686

    
5687
    # disk template and mirror node verification
5688
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5689
      raise errors.OpPrereqError("Invalid disk template name",
5690
                                 errors.ECODE_INVAL)
5691

    
5692
    if self.op.hypervisor is None:
5693
      self.op.hypervisor = self.cfg.GetHypervisorType()
5694

    
5695
    cluster = self.cfg.GetClusterInfo()
5696
    enabled_hvs = cluster.enabled_hypervisors
5697
    if self.op.hypervisor not in enabled_hvs:
5698
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5699
                                 " cluster (%s)" % (self.op.hypervisor,
5700
                                  ",".join(enabled_hvs)),
5701
                                 errors.ECODE_STATE)
5702

    
5703
    # check hypervisor parameter syntax (locally)
5704
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5705
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5706
                                  self.op.hvparams)
5707
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5708
    hv_type.CheckParameterSyntax(filled_hvp)
5709
    self.hv_full = filled_hvp
5710
    # check that we don't specify global parameters on an instance
5711
    _CheckGlobalHvParams(self.op.hvparams)
5712

    
5713
    # fill and remember the beparams dict
5714
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5715
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5716
                                    self.op.beparams)
5717

    
5718
    #### instance parameters check
5719

    
5720
    # instance name verification
5721
    if self.op.name_check:
5722
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5723
      self.op.instance_name = instance_name = hostname1.name
5724
      # used in CheckPrereq for ip ping check
5725
      self.check_ip = hostname1.ip
5726
    else:
5727
      instance_name = self.op.instance_name
5728
      self.check_ip = None
5729

    
5730
    # this is just a preventive check, but someone might still add this
5731
    # instance in the meantime, and creation will fail at lock-add time
5732
    if instance_name in self.cfg.GetInstanceList():
5733
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5734
                                 instance_name, errors.ECODE_EXISTS)
5735

    
5736
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5737

    
5738
    # NIC buildup
5739
    self.nics = []
5740
    for idx, nic in enumerate(self.op.nics):
5741
      nic_mode_req = nic.get("mode", None)
5742
      nic_mode = nic_mode_req
5743
      if nic_mode is None:
5744
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5745

    
5746
      # in routed mode, for the first nic, the default ip is 'auto'
5747
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5748
        default_ip_mode = constants.VALUE_AUTO
5749
      else:
5750
        default_ip_mode = constants.VALUE_NONE
5751

    
5752
      # ip validity checks
5753
      ip = nic.get("ip", default_ip_mode)
5754
      if ip is None or ip.lower() == constants.VALUE_NONE:
5755
        nic_ip = None
5756
      elif ip.lower() == constants.VALUE_AUTO:
5757
        if not self.op.name_check:
5758
          raise errors.OpPrereqError("IP address set to auto but name checks"
5759
                                     " have been skipped. Aborting.",
5760
                                     errors.ECODE_INVAL)
5761
        nic_ip = hostname1.ip
5762
      else:
5763
        if not utils.IsValidIP(ip):
5764
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5765
                                     " like a valid IP" % ip,
5766
                                     errors.ECODE_INVAL)
5767
        nic_ip = ip
5768

    
5769
      # TODO: check the ip address for uniqueness
5770
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5771
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5772
                                   errors.ECODE_INVAL)
5773

    
5774
      # MAC address verification
5775
      mac = nic.get("mac", constants.VALUE_AUTO)
5776
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5777
        mac = utils.NormalizeAndValidateMac(mac)
5778

    
5779
        try:
5780
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
5781
        except errors.ReservationError:
5782
          raise errors.OpPrereqError("MAC address %s already in use"
5783
                                     " in cluster" % mac,
5784
                                     errors.ECODE_NOTUNIQUE)
5785

    
5786
      # bridge verification
5787
      bridge = nic.get("bridge", None)
5788
      link = nic.get("link", None)
5789
      if bridge and link:
5790
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5791
                                   " at the same time", errors.ECODE_INVAL)
5792
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5793
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5794
                                   errors.ECODE_INVAL)
5795
      elif bridge:
5796
        link = bridge
5797

    
5798
      nicparams = {}
5799
      if nic_mode_req:
5800
        nicparams[constants.NIC_MODE] = nic_mode_req
5801
      if link:
5802
        nicparams[constants.NIC_LINK] = link
5803

    
5804
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5805
                                      nicparams)
5806
      objects.NIC.CheckParameterSyntax(check_params)
5807
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5808

    
5809
    # disk checks/pre-build
5810
    self.disks = []
5811
    for disk in self.op.disks:
5812
      mode = disk.get("mode", constants.DISK_RDWR)
5813
      if mode not in constants.DISK_ACCESS_SET:
5814
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5815
                                   mode, errors.ECODE_INVAL)
5816
      size = disk.get("size", None)
5817
      if size is None:
5818
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5819
      try:
5820
        size = int(size)
5821
      except (TypeError, ValueError):
5822
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5823
                                   errors.ECODE_INVAL)
5824
      self.disks.append({"size": size, "mode": mode})
5825

    
5826
    # file storage checks
5827
    if (self.op.file_driver and
5828
        not self.op.file_driver in constants.FILE_DRIVER):
5829
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5830
                                 self.op.file_driver, errors.ECODE_INVAL)
5831

    
5832
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5833
      raise errors.OpPrereqError("File storage directory path not absolute",
5834
                                 errors.ECODE_INVAL)
5835

    
5836
    ### Node/iallocator related checks
5837
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5838
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5839
                                 " node must be given",
5840
                                 errors.ECODE_INVAL)
5841

    
5842
    if self.op.iallocator:
5843
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5844
    else:
5845
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
5846
      nodelist = [self.op.pnode]
5847
      if self.op.snode is not None:
5848
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
5849
        nodelist.append(self.op.snode)
5850
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5851

    
5852
    # in case of import lock the source node too
5853
    if self.op.mode == constants.INSTANCE_IMPORT:
5854
      src_node = getattr(self.op, "src_node", None)
5855
      src_path = getattr(self.op, "src_path", None)
5856

    
5857
      if src_path is None:
5858
        self.op.src_path = src_path = self.op.instance_name
5859

    
5860
      if src_node is None:
5861
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5862
        self.op.src_node = None
5863
        if os.path.isabs(src_path):
5864
          raise errors.OpPrereqError("Importing an instance from an absolute"
5865
                                     " path requires a source node option.",
5866
                                     errors.ECODE_INVAL)
5867
      else:
5868
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
5869
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5870
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5871
        if not os.path.isabs(src_path):
5872
          self.op.src_path = src_path = \
5873
            os.path.join(constants.EXPORT_DIR, src_path)
5874

    
5875
      # On import force_variant must be True, because if we forced it at
5876
      # initial install, our only chance when importing it back is that it
5877
      # works again!
5878
      self.op.force_variant = True
5879

    
5880
    else: # INSTANCE_CREATE
5881
      if getattr(self.op, "os_type", None) is None:
5882
        raise errors.OpPrereqError("No guest OS specified",
5883
                                   errors.ECODE_INVAL)
5884
      self.op.force_variant = getattr(self.op, "force_variant", False)
5885

    
5886
  def _RunAllocator(self):
5887
    """Run the allocator based on input opcode.
5888

5889
    """
5890
    nics = [n.ToDict() for n in self.nics]
5891
    ial = IAllocator(self.cfg, self.rpc,
5892
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5893
                     name=self.op.instance_name,
5894
                     disk_template=self.op.disk_template,
5895
                     tags=[],
5896
                     os=self.op.os_type,
5897
                     vcpus=self.be_full[constants.BE_VCPUS],
5898
                     mem_size=self.be_full[constants.BE_MEMORY],
5899
                     disks=self.disks,
5900
                     nics=nics,
5901
                     hypervisor=self.op.hypervisor,
5902
                     )
5903

    
5904
    ial.Run(self.op.iallocator)
5905

    
5906
    if not ial.success:
5907
      raise errors.OpPrereqError("Can't compute nodes using"
5908
                                 " iallocator '%s': %s" %
5909
                                 (self.op.iallocator, ial.info),
5910
                                 errors.ECODE_NORES)
5911
    if len(ial.nodes) != ial.required_nodes:
5912
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5913
                                 " of nodes (%s), required %s" %
5914
                                 (self.op.iallocator, len(ial.nodes),
5915
                                  ial.required_nodes), errors.ECODE_FAULT)
5916
    self.op.pnode = ial.nodes[0]
5917
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5918
                 self.op.instance_name, self.op.iallocator,
5919
                 utils.CommaJoin(ial.nodes))
5920
    if ial.required_nodes == 2:
5921
      self.op.snode = ial.nodes[1]
5922

    
5923
  def BuildHooksEnv(self):
5924
    """Build hooks env.
5925

5926
    This runs on master, primary and secondary nodes of the instance.
5927

5928
    """
5929
    env = {
5930
      "ADD_MODE": self.op.mode,
5931
      }
5932
    if self.op.mode == constants.INSTANCE_IMPORT:
5933
      env["SRC_NODE"] = self.op.src_node
5934
      env["SRC_PATH"] = self.op.src_path
5935
      env["SRC_IMAGES"] = self.src_images
5936

    
5937
    env.update(_BuildInstanceHookEnv(
5938
      name=self.op.instance_name,
5939
      primary_node=self.op.pnode,
5940
      secondary_nodes=self.secondaries,
5941
      status=self.op.start,
5942
      os_type=self.op.os_type,
5943
      memory=self.be_full[constants.BE_MEMORY],
5944
      vcpus=self.be_full[constants.BE_VCPUS],
5945
      nics=_NICListToTuple(self, self.nics),
5946
      disk_template=self.op.disk_template,
5947
      disks=[(d["size"], d["mode"]) for d in self.disks],
5948
      bep=self.be_full,
5949
      hvp=self.hv_full,
5950
      hypervisor_name=self.op.hypervisor,
5951
    ))
5952

    
5953
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5954
          self.secondaries)
5955
    return env, nl, nl
5956

    
5957

    
5958
  def CheckPrereq(self):
5959
    """Check prerequisites.
5960

5961
    """
5962
    if (not self.cfg.GetVGName() and
5963
        self.op.disk_template not in constants.DTS_NOT_LVM):
5964
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5965
                                 " instances", errors.ECODE_STATE)
5966

    
5967
    if self.op.mode == constants.INSTANCE_IMPORT:
5968
      src_node = self.op.src_node
5969
      src_path = self.op.src_path
5970

    
5971
      if src_node is None:
5972
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5973
        exp_list = self.rpc.call_export_list(locked_nodes)
5974
        found = False
5975
        for node in exp_list:
5976
          if exp_list[node].fail_msg:
5977
            continue
5978
          if src_path in exp_list[node].payload:
5979
            found = True
5980
            self.op.src_node = src_node = node
5981
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5982
                                                       src_path)
5983
            break
5984
        if not found:
5985
          raise errors.OpPrereqError("No export found for relative path %s" %
5986
                                      src_path, errors.ECODE_INVAL)
5987

    
5988
      _CheckNodeOnline(self, src_node)
5989
      result = self.rpc.call_export_info(src_node, src_path)
5990
      result.Raise("No export or invalid export found in dir %s" % src_path)
5991

    
5992
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5993
      if not export_info.has_section(constants.INISECT_EXP):
5994
        raise errors.ProgrammerError("Corrupted export config",
5995
                                     errors.ECODE_ENVIRON)
5996

    
5997
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5998
      if (int(ei_version) != constants.EXPORT_VERSION):
5999
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6000
                                   (ei_version, constants.EXPORT_VERSION),
6001
                                   errors.ECODE_ENVIRON)
6002

    
6003
      # Check that the new instance doesn't have less disks than the export
6004
      instance_disks = len(self.disks)
6005
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6006
      if instance_disks < export_disks:
6007
        raise errors.OpPrereqError("Not enough disks to import."
6008
                                   " (instance: %d, export: %d)" %
6009
                                   (instance_disks, export_disks),
6010
                                   errors.ECODE_INVAL)
6011

    
6012
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6013
      disk_images = []
6014
      for idx in range(export_disks):
6015
        option = 'disk%d_dump' % idx
6016
        if export_info.has_option(constants.INISECT_INS, option):
6017
          # FIXME: are the old os-es, disk sizes, etc. useful?
6018
          export_name = export_info.get(constants.INISECT_INS, option)
6019
          image = os.path.join(src_path, export_name)
6020
          disk_images.append(image)
6021
        else:
6022
          disk_images.append(False)
6023

    
6024
      self.src_images = disk_images
6025

    
6026
      old_name = export_info.get(constants.INISECT_INS, 'name')
6027
      # FIXME: int() here could throw a ValueError on broken exports
6028
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6029
      if self.op.instance_name == old_name:
6030
        for idx, nic in enumerate(self.nics):
6031
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6032
            nic_mac_ini = 'nic%d_mac' % idx
6033
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6034

    
6035
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6036

    
6037
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6038
    if self.op.ip_check:
6039
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6040
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6041
                                   (self.check_ip, self.op.instance_name),
6042
                                   errors.ECODE_NOTUNIQUE)
6043

    
6044
    #### mac address generation
6045
    # By generating here the mac address both the allocator and the hooks get
6046
    # the real final mac address rather than the 'auto' or 'generate' value.
6047
    # There is a race condition between the generation and the instance object
6048
    # creation, which means that we know the mac is valid now, but we're not
6049
    # sure it will be when we actually add the instance. If things go bad
6050
    # adding the instance will abort because of a duplicate mac, and the
6051
    # creation job will fail.
6052
    for nic in self.nics:
6053
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6054
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6055

    
6056
    #### allocator run
6057

    
6058
    if self.op.iallocator is not None:
6059
      self._RunAllocator()
6060

    
6061
    #### node related checks
6062

    
6063
    # check primary node
6064
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6065
    assert self.pnode is not None, \
6066
      "Cannot retrieve locked node %s" % self.op.pnode
6067
    if pnode.offline:
6068
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6069
                                 pnode.name, errors.ECODE_STATE)
6070
    if pnode.drained:
6071
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6072
                                 pnode.name, errors.ECODE_STATE)
6073

    
6074
    self.secondaries = []
6075

    
6076
    # mirror node verification
6077
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6078
      if self.op.snode is None:
6079
        raise errors.OpPrereqError("The networked disk templates need"
6080
                                   " a mirror node", errors.ECODE_INVAL)
6081
      if self.op.snode == pnode.name:
6082
        raise errors.OpPrereqError("The secondary node cannot be the"
6083
                                   " primary node.", errors.ECODE_INVAL)
6084
      _CheckNodeOnline(self, self.op.snode)
6085
      _CheckNodeNotDrained(self, self.op.snode)
6086
      self.secondaries.append(self.op.snode)
6087

    
6088
    nodenames = [pnode.name] + self.secondaries
6089

    
6090
    req_size = _ComputeDiskSize(self.op.disk_template,
6091
                                self.disks)
6092

    
6093
    # Check lv size requirements
6094
    if req_size is not None:
6095
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6096
                                         self.op.hypervisor)
6097
      for node in nodenames:
6098
        info = nodeinfo[node]
6099
        info.Raise("Cannot get current information from node %s" % node)
6100
        info = info.payload
6101
        vg_free = info.get('vg_free', None)
6102
        if not isinstance(vg_free, int):
6103
          raise errors.OpPrereqError("Can't compute free disk space on"
6104
                                     " node %s" % node, errors.ECODE_ENVIRON)
6105
        if req_size > vg_free:
6106
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6107
                                     " %d MB available, %d MB required" %
6108
                                     (node, vg_free, req_size),
6109
                                     errors.ECODE_NORES)
6110

    
6111
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6112

    
6113
    # os verification
6114
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6115
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6116
                 (self.op.os_type, pnode.name),
6117
                 prereq=True, ecode=errors.ECODE_INVAL)
6118
    if not self.op.force_variant:
6119
      _CheckOSVariant(result.payload, self.op.os_type)
6120

    
6121
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6122

    
6123
    # memory check on primary node
6124
    if self.op.start:
6125
      _CheckNodeFreeMemory(self, self.pnode.name,
6126
                           "creating instance %s" % self.op.instance_name,
6127
                           self.be_full[constants.BE_MEMORY],
6128
                           self.op.hypervisor)
6129

    
6130
    self.dry_run_result = list(nodenames)
6131

    
6132
  def Exec(self, feedback_fn):
6133
    """Create and add the instance to the cluster.
6134

6135
    """
6136
    instance = self.op.instance_name
6137
    pnode_name = self.pnode.name
6138

    
6139
    ht_kind = self.op.hypervisor
6140
    if ht_kind in constants.HTS_REQ_PORT:
6141
      network_port = self.cfg.AllocatePort()
6142
    else:
6143
      network_port = None
6144

    
6145
    ##if self.op.vnc_bind_address is None:
6146
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6147

    
6148
    # this is needed because os.path.join does not accept None arguments
6149
    if self.op.file_storage_dir is None:
6150
      string_file_storage_dir = ""
6151
    else:
6152
      string_file_storage_dir = self.op.file_storage_dir
6153

    
6154
    # build the full file storage dir path
6155
    file_storage_dir = os.path.normpath(os.path.join(
6156
                                        self.cfg.GetFileStorageDir(),
6157
                                        string_file_storage_dir, instance))
6158

    
6159

    
6160
    disks = _GenerateDiskTemplate(self,
6161
                                  self.op.disk_template,
6162
                                  instance, pnode_name,
6163
                                  self.secondaries,
6164
                                  self.disks,
6165
                                  file_storage_dir,
6166
                                  self.op.file_driver,
6167
                                  0)
6168

    
6169
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6170
                            primary_node=pnode_name,
6171
                            nics=self.nics, disks=disks,
6172
                            disk_template=self.op.disk_template,
6173
                            admin_up=False,
6174
                            network_port=network_port,
6175
                            beparams=self.op.beparams,
6176
                            hvparams=self.op.hvparams,
6177
                            hypervisor=self.op.hypervisor,
6178
                            )
6179

    
6180
    feedback_fn("* creating instance disks...")
6181
    try:
6182
      _CreateDisks(self, iobj)
6183
    except errors.OpExecError:
6184
      self.LogWarning("Device creation failed, reverting...")
6185
      try:
6186
        _RemoveDisks(self, iobj)
6187
      finally:
6188
        self.cfg.ReleaseDRBDMinors(instance)
6189
        raise
6190

    
6191
    feedback_fn("adding instance %s to cluster config" % instance)
6192

    
6193
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6194

    
6195
    # Declare that we don't want to remove the instance lock anymore, as we've
6196
    # added the instance to the config
6197
    del self.remove_locks[locking.LEVEL_INSTANCE]
6198
    # Unlock all the nodes
6199
    if self.op.mode == constants.INSTANCE_IMPORT:
6200
      nodes_keep = [self.op.src_node]
6201
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6202
                       if node != self.op.src_node]
6203
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6204
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6205
    else:
6206
      self.context.glm.release(locking.LEVEL_NODE)
6207
      del self.acquired_locks[locking.LEVEL_NODE]
6208

    
6209
    if self.op.wait_for_sync:
6210
      disk_abort = not _WaitForSync(self, iobj)
6211
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6212
      # make sure the disks are not degraded (still sync-ing is ok)
6213
      time.sleep(15)
6214
      feedback_fn("* checking mirrors status")
6215
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6216
    else:
6217
      disk_abort = False
6218

    
6219
    if disk_abort:
6220
      _RemoveDisks(self, iobj)
6221
      self.cfg.RemoveInstance(iobj.name)
6222
      # Make sure the instance lock gets removed
6223
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6224
      raise errors.OpExecError("There are some degraded disks for"
6225
                               " this instance")
6226

    
6227
    feedback_fn("creating os for instance %s on node %s" %
6228
                (instance, pnode_name))
6229

    
6230
    if iobj.disk_template != constants.DT_DISKLESS:
6231
      if self.op.mode == constants.INSTANCE_CREATE:
6232
        feedback_fn("* running the instance OS create scripts...")
6233
        # FIXME: pass debug option from opcode to backend
6234
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6235
                                               self.op.debug_level)
6236
        result.Raise("Could not add os for instance %s"
6237
                     " on node %s" % (instance, pnode_name))
6238

    
6239
      elif self.op.mode == constants.INSTANCE_IMPORT:
6240
        feedback_fn("* running the instance OS import scripts...")
6241
        src_node = self.op.src_node
6242
        src_images = self.src_images
6243
        cluster_name = self.cfg.GetClusterName()
6244
        # FIXME: pass debug option from opcode to backend
6245
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6246
                                                         src_node, src_images,
6247
                                                         cluster_name,
6248
                                                         self.op.debug_level)
6249
        msg = import_result.fail_msg
6250
        if msg:
6251
          self.LogWarning("Error while importing the disk images for instance"
6252
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6253
      else:
6254
        # also checked in the prereq part
6255
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6256
                                     % self.op.mode)
6257

    
6258
    if self.op.start:
6259
      iobj.admin_up = True
6260
      self.cfg.Update(iobj, feedback_fn)
6261
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6262
      feedback_fn("* starting instance...")
6263
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6264
      result.Raise("Could not start instance")
6265

    
6266
    return list(iobj.all_nodes)
6267

    
6268

    
6269
class LUConnectConsole(NoHooksLU):
6270
  """Connect to an instance's console.
6271

6272
  This is somewhat special in that it returns the command line that
6273
  you need to run on the master node in order to connect to the
6274
  console.
6275

6276
  """
6277
  _OP_REQP = ["instance_name"]
6278
  REQ_BGL = False
6279

    
6280
  def ExpandNames(self):
6281
    self._ExpandAndLockInstance()
6282

    
6283
  def CheckPrereq(self):
6284
    """Check prerequisites.
6285

6286
    This checks that the instance is in the cluster.
6287

6288
    """
6289
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6290
    assert self.instance is not None, \
6291
      "Cannot retrieve locked instance %s" % self.op.instance_name
6292
    _CheckNodeOnline(self, self.instance.primary_node)
6293

    
6294
  def Exec(self, feedback_fn):
6295
    """Connect to the console of an instance
6296

6297
    """
6298
    instance = self.instance
6299
    node = instance.primary_node
6300

    
6301
    node_insts = self.rpc.call_instance_list([node],
6302
                                             [instance.hypervisor])[node]
6303
    node_insts.Raise("Can't get node information from %s" % node)
6304

    
6305
    if instance.name not in node_insts.payload:
6306
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6307

    
6308
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6309

    
6310
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6311
    cluster = self.cfg.GetClusterInfo()
6312
    # beparams and hvparams are passed separately, to avoid editing the
6313
    # instance and then saving the defaults in the instance itself.
6314
    hvparams = cluster.FillHV(instance)
6315
    beparams = cluster.FillBE(instance)
6316
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6317

    
6318
    # build ssh cmdline
6319
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6320

    
6321

    
6322
class LUReplaceDisks(LogicalUnit):
6323
  """Replace the disks of an instance.
6324

6325
  """
6326
  HPATH = "mirrors-replace"
6327
  HTYPE = constants.HTYPE_INSTANCE
6328
  _OP_REQP = ["instance_name", "mode", "disks"]
6329
  REQ_BGL = False
6330

    
6331
  def CheckArguments(self):
6332
    if not hasattr(self.op, "remote_node"):
6333
      self.op.remote_node = None
6334
    if not hasattr(self.op, "iallocator"):
6335
      self.op.iallocator = None
6336
    if not hasattr(self.op, "early_release"):
6337
      self.op.early_release = False
6338

    
6339
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6340
                                  self.op.iallocator)
6341

    
6342
  def ExpandNames(self):
6343
    self._ExpandAndLockInstance()
6344

    
6345
    if self.op.iallocator is not None:
6346
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6347

    
6348
    elif self.op.remote_node is not None:
6349
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6350
      self.op.remote_node = remote_node
6351

    
6352
      # Warning: do not remove the locking of the new secondary here
6353
      # unless DRBD8.AddChildren is changed to work in parallel;
6354
      # currently it doesn't since parallel invocations of
6355
      # FindUnusedMinor will conflict
6356
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6357
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6358

    
6359
    else:
6360
      self.needed_locks[locking.LEVEL_NODE] = []
6361
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6362

    
6363
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6364
                                   self.op.iallocator, self.op.remote_node,
6365
                                   self.op.disks, False, self.op.early_release)
6366

    
6367
    self.tasklets = [self.replacer]
6368

    
6369
  def DeclareLocks(self, level):
6370
    # If we're not already locking all nodes in the set we have to declare the
6371
    # instance's primary/secondary nodes.
6372
    if (level == locking.LEVEL_NODE and
6373
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6374
      self._LockInstancesNodes()
6375

    
6376
  def BuildHooksEnv(self):
6377
    """Build hooks env.
6378

6379
    This runs on the master, the primary and all the secondaries.
6380

6381
    """
6382
    instance = self.replacer.instance
6383
    env = {
6384
      "MODE": self.op.mode,
6385
      "NEW_SECONDARY": self.op.remote_node,
6386
      "OLD_SECONDARY": instance.secondary_nodes[0],
6387
      }
6388
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6389
    nl = [
6390
      self.cfg.GetMasterNode(),
6391
      instance.primary_node,
6392
      ]
6393
    if self.op.remote_node is not None:
6394
      nl.append(self.op.remote_node)
6395
    return env, nl, nl
6396

    
6397

    
6398
class LUEvacuateNode(LogicalUnit):
6399
  """Relocate the secondary instances from a node.
6400

6401
  """
6402
  HPATH = "node-evacuate"
6403
  HTYPE = constants.HTYPE_NODE
6404
  _OP_REQP = ["node_name"]
6405
  REQ_BGL = False
6406

    
6407
  def CheckArguments(self):
6408
    if not hasattr(self.op, "remote_node"):
6409
      self.op.remote_node = None
6410
    if not hasattr(self.op, "iallocator"):
6411
      self.op.iallocator = None
6412
    if not hasattr(self.op, "early_release"):
6413
      self.op.early_release = False
6414

    
6415
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6416
                                  self.op.remote_node,
6417
                                  self.op.iallocator)
6418

    
6419
  def ExpandNames(self):
6420
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6421

    
6422
    self.needed_locks = {}
6423

    
6424
    # Declare node locks
6425
    if self.op.iallocator is not None:
6426
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6427

    
6428
    elif self.op.remote_node is not None:
6429
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6430

    
6431
      # Warning: do not remove the locking of the new secondary here
6432
      # unless DRBD8.AddChildren is changed to work in parallel;
6433
      # currently it doesn't since parallel invocations of
6434
      # FindUnusedMinor will conflict
6435
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6436
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6437

    
6438
    else:
6439
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6440

    
6441
    # Create tasklets for replacing disks for all secondary instances on this
6442
    # node
6443
    names = []
6444
    tasklets = []
6445

    
6446
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6447
      logging.debug("Replacing disks for instance %s", inst.name)
6448
      names.append(inst.name)
6449

    
6450
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6451
                                self.op.iallocator, self.op.remote_node, [],
6452
                                True, self.op.early_release)
6453
      tasklets.append(replacer)
6454

    
6455
    self.tasklets = tasklets
6456
    self.instance_names = names
6457

    
6458
    # Declare instance locks
6459
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6460

    
6461
  def DeclareLocks(self, level):
6462
    # If we're not already locking all nodes in the set we have to declare the
6463
    # instance's primary/secondary nodes.
6464
    if (level == locking.LEVEL_NODE and
6465
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6466
      self._LockInstancesNodes()
6467

    
6468
  def BuildHooksEnv(self):
6469
    """Build hooks env.
6470

6471
    This runs on the master, the primary and all the secondaries.
6472

6473
    """
6474
    env = {
6475
      "NODE_NAME": self.op.node_name,
6476
      }
6477

    
6478
    nl = [self.cfg.GetMasterNode()]
6479

    
6480
    if self.op.remote_node is not None:
6481
      env["NEW_SECONDARY"] = self.op.remote_node
6482
      nl.append(self.op.remote_node)
6483

    
6484
    return (env, nl, nl)
6485

    
6486

    
6487
class TLReplaceDisks(Tasklet):
6488
  """Replaces disks for an instance.
6489

6490
  Note: Locking is not within the scope of this class.
6491

6492
  """
6493
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6494
               disks, delay_iallocator, early_release):
6495
    """Initializes this class.
6496

6497
    """
6498
    Tasklet.__init__(self, lu)
6499

    
6500
    # Parameters
6501
    self.instance_name = instance_name
6502
    self.mode = mode
6503
    self.iallocator_name = iallocator_name
6504
    self.remote_node = remote_node
6505
    self.disks = disks
6506
    self.delay_iallocator = delay_iallocator
6507
    self.early_release = early_release
6508

    
6509
    # Runtime data
6510
    self.instance = None
6511
    self.new_node = None
6512
    self.target_node = None
6513
    self.other_node = None
6514
    self.remote_node_info = None
6515
    self.node_secondary_ip = None
6516

    
6517
  @staticmethod
6518
  def CheckArguments(mode, remote_node, iallocator):
6519
    """Helper function for users of this class.
6520

6521
    """
6522
    # check for valid parameter combination
6523
    if mode == constants.REPLACE_DISK_CHG:
6524
      if remote_node is None and iallocator is None:
6525
        raise errors.OpPrereqError("When changing the secondary either an"
6526
                                   " iallocator script must be used or the"
6527
                                   " new node given", errors.ECODE_INVAL)
6528

    
6529
      if remote_node is not None and iallocator is not None:
6530
        raise errors.OpPrereqError("Give either the iallocator or the new"
6531
                                   " secondary, not both", errors.ECODE_INVAL)
6532

    
6533
    elif remote_node is not None or iallocator is not None:
6534
      # Not replacing the secondary
6535
      raise errors.OpPrereqError("The iallocator and new node options can"
6536
                                 " only be used when changing the"
6537
                                 " secondary node", errors.ECODE_INVAL)
6538

    
6539
  @staticmethod
6540
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6541
    """Compute a new secondary node using an IAllocator.
6542

6543
    """
6544
    ial = IAllocator(lu.cfg, lu.rpc,
6545
                     mode=constants.IALLOCATOR_MODE_RELOC,
6546
                     name=instance_name,
6547
                     relocate_from=relocate_from)
6548

    
6549
    ial.Run(iallocator_name)
6550

    
6551
    if not ial.success:
6552
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6553
                                 " %s" % (iallocator_name, ial.info),
6554
                                 errors.ECODE_NORES)
6555

    
6556
    if len(ial.nodes) != ial.required_nodes:
6557
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6558
                                 " of nodes (%s), required %s" %
6559
                                 (iallocator_name,
6560
                                  len(ial.nodes), ial.required_nodes),
6561
                                 errors.ECODE_FAULT)
6562

    
6563
    remote_node_name = ial.nodes[0]
6564

    
6565
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6566
               instance_name, remote_node_name)
6567

    
6568
    return remote_node_name
6569

    
6570
  def _FindFaultyDisks(self, node_name):
6571
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6572
                                    node_name, True)
6573

    
6574
  def CheckPrereq(self):
6575
    """Check prerequisites.
6576

6577
    This checks that the instance is in the cluster.
6578

6579
    """
6580
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6581
    assert instance is not None, \
6582
      "Cannot retrieve locked instance %s" % self.instance_name
6583

    
6584
    if instance.disk_template != constants.DT_DRBD8:
6585
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6586
                                 " instances", errors.ECODE_INVAL)
6587

    
6588
    if len(instance.secondary_nodes) != 1:
6589
      raise errors.OpPrereqError("The instance has a strange layout,"
6590
                                 " expected one secondary but found %d" %
6591
                                 len(instance.secondary_nodes),
6592
                                 errors.ECODE_FAULT)
6593

    
6594
    if not self.delay_iallocator:
6595
      self._CheckPrereq2()
6596

    
6597
  def _CheckPrereq2(self):
6598
    """Check prerequisites, second part.
6599

6600
    This function should always be part of CheckPrereq. It was separated and is
6601
    now called from Exec because during node evacuation iallocator was only
6602
    called with an unmodified cluster model, not taking planned changes into
6603
    account.
6604

6605
    """
6606
    instance = self.instance
6607
    secondary_node = instance.secondary_nodes[0]
6608

    
6609
    if self.iallocator_name is None:
6610
      remote_node = self.remote_node
6611
    else:
6612
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6613
                                       instance.name, instance.secondary_nodes)
6614

    
6615
    if remote_node is not None:
6616
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6617
      assert self.remote_node_info is not None, \
6618
        "Cannot retrieve locked node %s" % remote_node
6619
    else:
6620
      self.remote_node_info = None
6621

    
6622
    if remote_node == self.instance.primary_node:
6623
      raise errors.OpPrereqError("The specified node is the primary node of"
6624
                                 " the instance.", errors.ECODE_INVAL)
6625

    
6626
    if remote_node == secondary_node:
6627
      raise errors.OpPrereqError("The specified node is already the"
6628
                                 " secondary node of the instance.",
6629
                                 errors.ECODE_INVAL)
6630

    
6631
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6632
                                    constants.REPLACE_DISK_CHG):
6633
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6634
                                 errors.ECODE_INVAL)
6635

    
6636
    if self.mode == constants.REPLACE_DISK_AUTO:
6637
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6638
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6639

    
6640
      if faulty_primary and faulty_secondary:
6641
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6642
                                   " one node and can not be repaired"
6643
                                   " automatically" % self.instance_name,
6644
                                   errors.ECODE_STATE)
6645

    
6646
      if faulty_primary:
6647
        self.disks = faulty_primary
6648
        self.target_node = instance.primary_node
6649
        self.other_node = secondary_node
6650
        check_nodes = [self.target_node, self.other_node]
6651
      elif faulty_secondary:
6652
        self.disks = faulty_secondary
6653
        self.target_node = secondary_node
6654
        self.other_node = instance.primary_node
6655
        check_nodes = [self.target_node, self.other_node]
6656
      else:
6657
        self.disks = []
6658
        check_nodes = []
6659

    
6660
    else:
6661
      # Non-automatic modes
6662
      if self.mode == constants.REPLACE_DISK_PRI:
6663
        self.target_node = instance.primary_node
6664
        self.other_node = secondary_node
6665
        check_nodes = [self.target_node, self.other_node]
6666

    
6667
      elif self.mode == constants.REPLACE_DISK_SEC:
6668
        self.target_node = secondary_node
6669
        self.other_node = instance.primary_node
6670
        check_nodes = [self.target_node, self.other_node]
6671

    
6672
      elif self.mode == constants.REPLACE_DISK_CHG:
6673
        self.new_node = remote_node
6674
        self.other_node = instance.primary_node
6675
        self.target_node = secondary_node
6676
        check_nodes = [self.new_node, self.other_node]
6677

    
6678
        _CheckNodeNotDrained(self.lu, remote_node)
6679

    
6680
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
6681
        assert old_node_info is not None
6682
        if old_node_info.offline and not self.early_release:
6683
          # doesn't make sense to delay the release
6684
          self.early_release = True
6685
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6686
                          " early-release mode", secondary_node)
6687

    
6688
      else:
6689
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6690
                                     self.mode)
6691

    
6692
      # If not specified all disks should be replaced
6693
      if not self.disks:
6694
        self.disks = range(len(self.instance.disks))
6695

    
6696
    for node in check_nodes:
6697
      _CheckNodeOnline(self.lu, node)
6698

    
6699
    # Check whether disks are valid
6700
    for disk_idx in self.disks:
6701
      instance.FindDisk(disk_idx)
6702

    
6703
    # Get secondary node IP addresses
6704
    node_2nd_ip = {}
6705

    
6706
    for node_name in [self.target_node, self.other_node, self.new_node]:
6707
      if node_name is not None:
6708
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6709

    
6710
    self.node_secondary_ip = node_2nd_ip
6711

    
6712
  def Exec(self, feedback_fn):
6713
    """Execute disk replacement.
6714

6715
    This dispatches the disk replacement to the appropriate handler.
6716

6717
    """
6718
    if self.delay_iallocator:
6719
      self._CheckPrereq2()
6720

    
6721
    if not self.disks:
6722
      feedback_fn("No disks need replacement")
6723
      return
6724

    
6725
    feedback_fn("Replacing disk(s) %s for %s" %
6726
                (utils.CommaJoin(self.disks), self.instance.name))
6727

    
6728
    activate_disks = (not self.instance.admin_up)
6729

    
6730
    # Activate the instance disks if we're replacing them on a down instance
6731
    if activate_disks:
6732
      _StartInstanceDisks(self.lu, self.instance, True)
6733

    
6734
    try:
6735
      # Should we replace the secondary node?
6736
      if self.new_node is not None:
6737
        fn = self._ExecDrbd8Secondary
6738
      else:
6739
        fn = self._ExecDrbd8DiskOnly
6740

    
6741
      return fn(feedback_fn)
6742

    
6743
    finally:
6744
      # Deactivate the instance disks if we're replacing them on a
6745
      # down instance
6746
      if activate_disks:
6747
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6748

    
6749
  def _CheckVolumeGroup(self, nodes):
6750
    self.lu.LogInfo("Checking volume groups")
6751

    
6752
    vgname = self.cfg.GetVGName()
6753

    
6754
    # Make sure volume group exists on all involved nodes
6755
    results = self.rpc.call_vg_list(nodes)
6756
    if not results:
6757
      raise errors.OpExecError("Can't list volume groups on the nodes")
6758

    
6759
    for node in nodes:
6760
      res = results[node]
6761
      res.Raise("Error checking node %s" % node)
6762
      if vgname not in res.payload:
6763
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6764
                                 (vgname, node))
6765

    
6766
  def _CheckDisksExistence(self, nodes):
6767
    # Check disk existence
6768
    for idx, dev in enumerate(self.instance.disks):
6769
      if idx not in self.disks:
6770
        continue
6771

    
6772
      for node in nodes:
6773
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6774
        self.cfg.SetDiskID(dev, node)
6775

    
6776
        result = self.rpc.call_blockdev_find(node, dev)
6777

    
6778
        msg = result.fail_msg
6779
        if msg or not result.payload:
6780
          if not msg:
6781
            msg = "disk not found"
6782
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6783
                                   (idx, node, msg))
6784

    
6785
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6786
    for idx, dev in enumerate(self.instance.disks):
6787
      if idx not in self.disks:
6788
        continue
6789

    
6790
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6791
                      (idx, node_name))
6792

    
6793
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6794
                                   ldisk=ldisk):
6795
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6796
                                 " replace disks for instance %s" %
6797
                                 (node_name, self.instance.name))
6798

    
6799
  def _CreateNewStorage(self, node_name):
6800
    vgname = self.cfg.GetVGName()
6801
    iv_names = {}
6802

    
6803
    for idx, dev in enumerate(self.instance.disks):
6804
      if idx not in self.disks:
6805
        continue
6806

    
6807
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6808

    
6809
      self.cfg.SetDiskID(dev, node_name)
6810

    
6811
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6812
      names = _GenerateUniqueNames(self.lu, lv_names)
6813

    
6814
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6815
                             logical_id=(vgname, names[0]))
6816
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6817
                             logical_id=(vgname, names[1]))
6818

    
6819
      new_lvs = [lv_data, lv_meta]
6820
      old_lvs = dev.children
6821
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6822

    
6823
      # we pass force_create=True to force the LVM creation
6824
      for new_lv in new_lvs:
6825
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6826
                        _GetInstanceInfoText(self.instance), False)
6827

    
6828
    return iv_names
6829

    
6830
  def _CheckDevices(self, node_name, iv_names):
6831
    for name, (dev, _, _) in iv_names.iteritems():
6832
      self.cfg.SetDiskID(dev, node_name)
6833

    
6834
      result = self.rpc.call_blockdev_find(node_name, dev)
6835

    
6836
      msg = result.fail_msg
6837
      if msg or not result.payload:
6838
        if not msg:
6839
          msg = "disk not found"
6840
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6841
                                 (name, msg))
6842

    
6843
      if result.payload.is_degraded:
6844
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6845

    
6846
  def _RemoveOldStorage(self, node_name, iv_names):
6847
    for name, (_, old_lvs, _) in iv_names.iteritems():
6848
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6849

    
6850
      for lv in old_lvs:
6851
        self.cfg.SetDiskID(lv, node_name)
6852

    
6853
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6854
        if msg:
6855
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6856
                             hint="remove unused LVs manually")
6857

    
6858
  def _ReleaseNodeLock(self, node_name):
6859
    """Releases the lock for a given node."""
6860
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
6861

    
6862
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6863
    """Replace a disk on the primary or secondary for DRBD 8.
6864

6865
    The algorithm for replace is quite complicated:
6866

6867
      1. for each disk to be replaced:
6868

6869
        1. create new LVs on the target node with unique names
6870
        1. detach old LVs from the drbd device
6871
        1. rename old LVs to name_replaced.<time_t>
6872
        1. rename new LVs to old LVs
6873
        1. attach the new LVs (with the old names now) to the drbd device
6874

6875
      1. wait for sync across all devices
6876

6877
      1. for each modified disk:
6878

6879
        1. remove old LVs (which have the name name_replaces.<time_t>)
6880

6881
    Failures are not very well handled.
6882

6883
    """
6884
    steps_total = 6
6885

    
6886
    # Step: check device activation
6887
    self.lu.LogStep(1, steps_total, "Check device existence")
6888
    self._CheckDisksExistence([self.other_node, self.target_node])
6889
    self._CheckVolumeGroup([self.target_node, self.other_node])
6890

    
6891
    # Step: check other node consistency
6892
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6893
    self._CheckDisksConsistency(self.other_node,
6894
                                self.other_node == self.instance.primary_node,
6895
                                False)
6896

    
6897
    # Step: create new storage
6898
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6899
    iv_names = self._CreateNewStorage(self.target_node)
6900

    
6901
    # Step: for each lv, detach+rename*2+attach
6902
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6903
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6904
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6905

    
6906
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6907
                                                     old_lvs)
6908
      result.Raise("Can't detach drbd from local storage on node"
6909
                   " %s for device %s" % (self.target_node, dev.iv_name))
6910
      #dev.children = []
6911
      #cfg.Update(instance)
6912

    
6913
      # ok, we created the new LVs, so now we know we have the needed
6914
      # storage; as such, we proceed on the target node to rename
6915
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6916
      # using the assumption that logical_id == physical_id (which in
6917
      # turn is the unique_id on that node)
6918

    
6919
      # FIXME(iustin): use a better name for the replaced LVs
6920
      temp_suffix = int(time.time())
6921
      ren_fn = lambda d, suff: (d.physical_id[0],
6922
                                d.physical_id[1] + "_replaced-%s" % suff)
6923

    
6924
      # Build the rename list based on what LVs exist on the node
6925
      rename_old_to_new = []
6926
      for to_ren in old_lvs:
6927
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6928
        if not result.fail_msg and result.payload:
6929
          # device exists
6930
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6931

    
6932
      self.lu.LogInfo("Renaming the old LVs on the target node")
6933
      result = self.rpc.call_blockdev_rename(self.target_node,
6934
                                             rename_old_to_new)
6935
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6936

    
6937
      # Now we rename the new LVs to the old LVs
6938
      self.lu.LogInfo("Renaming the new LVs on the target node")
6939
      rename_new_to_old = [(new, old.physical_id)
6940
                           for old, new in zip(old_lvs, new_lvs)]
6941
      result = self.rpc.call_blockdev_rename(self.target_node,
6942
                                             rename_new_to_old)
6943
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6944

    
6945
      for old, new in zip(old_lvs, new_lvs):
6946
        new.logical_id = old.logical_id
6947
        self.cfg.SetDiskID(new, self.target_node)
6948

    
6949
      for disk in old_lvs:
6950
        disk.logical_id = ren_fn(disk, temp_suffix)
6951
        self.cfg.SetDiskID(disk, self.target_node)
6952

    
6953
      # Now that the new lvs have the old name, we can add them to the device
6954
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6955
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6956
                                                  new_lvs)
6957
      msg = result.fail_msg
6958
      if msg:
6959
        for new_lv in new_lvs:
6960
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6961
                                               new_lv).fail_msg
6962
          if msg2:
6963
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6964
                               hint=("cleanup manually the unused logical"
6965
                                     "volumes"))
6966
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6967

    
6968
      dev.children = new_lvs
6969

    
6970
      self.cfg.Update(self.instance, feedback_fn)
6971

    
6972
    cstep = 5
6973
    if self.early_release:
6974
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
6975
      cstep += 1
6976
      self._RemoveOldStorage(self.target_node, iv_names)
6977
      # WARNING: we release both node locks here, do not do other RPCs
6978
      # than WaitForSync to the primary node
6979
      self._ReleaseNodeLock([self.target_node, self.other_node])
6980

    
6981
    # Wait for sync
6982
    # This can fail as the old devices are degraded and _WaitForSync
6983
    # does a combined result over all disks, so we don't check its return value
6984
    self.lu.LogStep(cstep, steps_total, "Sync devices")
6985
    cstep += 1
6986
    _WaitForSync(self.lu, self.instance)
6987

    
6988
    # Check all devices manually
6989
    self._CheckDevices(self.instance.primary_node, iv_names)
6990

    
6991
    # Step: remove old storage
6992
    if not self.early_release:
6993
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
6994
      cstep += 1
6995
      self._RemoveOldStorage(self.target_node, iv_names)
6996

    
6997
  def _ExecDrbd8Secondary(self, feedback_fn):
6998
    """Replace the secondary node for DRBD 8.
6999

7000
    The algorithm for replace is quite complicated:
7001
      - for all disks of the instance:
7002
        - create new LVs on the new node with same names
7003
        - shutdown the drbd device on the old secondary
7004
        - disconnect the drbd network on the primary
7005
        - create the drbd device on the new secondary
7006
        - network attach the drbd on the primary, using an artifice:
7007
          the drbd code for Attach() will connect to the network if it
7008
          finds a device which is connected to the good local disks but
7009
          not network enabled
7010
      - wait for sync across all devices
7011
      - remove all disks from the old secondary
7012

7013
    Failures are not very well handled.
7014

7015
    """
7016
    steps_total = 6
7017

    
7018
    # Step: check device activation
7019
    self.lu.LogStep(1, steps_total, "Check device existence")
7020
    self._CheckDisksExistence([self.instance.primary_node])
7021
    self._CheckVolumeGroup([self.instance.primary_node])
7022

    
7023
    # Step: check other node consistency
7024
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7025
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7026

    
7027
    # Step: create new storage
7028
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7029
    for idx, dev in enumerate(self.instance.disks):
7030
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7031
                      (self.new_node, idx))
7032
      # we pass force_create=True to force LVM creation
7033
      for new_lv in dev.children:
7034
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7035
                        _GetInstanceInfoText(self.instance), False)
7036

    
7037
    # Step 4: dbrd minors and drbd setups changes
7038
    # after this, we must manually remove the drbd minors on both the
7039
    # error and the success paths
7040
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7041
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7042
                                         for dev in self.instance.disks],
7043
                                        self.instance.name)
7044
    logging.debug("Allocated minors %r", minors)
7045

    
7046
    iv_names = {}
7047
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7048
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7049
                      (self.new_node, idx))
7050
      # create new devices on new_node; note that we create two IDs:
7051
      # one without port, so the drbd will be activated without
7052
      # networking information on the new node at this stage, and one
7053
      # with network, for the latter activation in step 4
7054
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7055
      if self.instance.primary_node == o_node1:
7056
        p_minor = o_minor1
7057
      else:
7058
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7059
        p_minor = o_minor2
7060

    
7061
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7062
                      p_minor, new_minor, o_secret)
7063
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7064
                    p_minor, new_minor, o_secret)
7065

    
7066
      iv_names[idx] = (dev, dev.children, new_net_id)
7067
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7068
                    new_net_id)
7069
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7070
                              logical_id=new_alone_id,
7071
                              children=dev.children,
7072
                              size=dev.size)
7073
      try:
7074
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7075
                              _GetInstanceInfoText(self.instance), False)
7076
      except errors.GenericError:
7077
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7078
        raise
7079

    
7080
    # We have new devices, shutdown the drbd on the old secondary
7081
    for idx, dev in enumerate(self.instance.disks):
7082
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7083
      self.cfg.SetDiskID(dev, self.target_node)
7084
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7085
      if msg:
7086
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7087
                           "node: %s" % (idx, msg),
7088
                           hint=("Please cleanup this device manually as"
7089
                                 " soon as possible"))
7090

    
7091
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7092
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7093
                                               self.node_secondary_ip,
7094
                                               self.instance.disks)\
7095
                                              [self.instance.primary_node]
7096

    
7097
    msg = result.fail_msg
7098
    if msg:
7099
      # detaches didn't succeed (unlikely)
7100
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7101
      raise errors.OpExecError("Can't detach the disks from the network on"
7102
                               " old node: %s" % (msg,))
7103

    
7104
    # if we managed to detach at least one, we update all the disks of
7105
    # the instance to point to the new secondary
7106
    self.lu.LogInfo("Updating instance configuration")
7107
    for dev, _, new_logical_id in iv_names.itervalues():
7108
      dev.logical_id = new_logical_id
7109
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7110

    
7111
    self.cfg.Update(self.instance, feedback_fn)
7112

    
7113
    # and now perform the drbd attach
7114
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7115
                    " (standalone => connected)")
7116
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7117
                                            self.new_node],
7118
                                           self.node_secondary_ip,
7119
                                           self.instance.disks,
7120
                                           self.instance.name,
7121
                                           False)
7122
    for to_node, to_result in result.items():
7123
      msg = to_result.fail_msg
7124
      if msg:
7125
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7126
                           to_node, msg,
7127
                           hint=("please do a gnt-instance info to see the"
7128
                                 " status of disks"))
7129
    cstep = 5
7130
    if self.early_release:
7131
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7132
      cstep += 1
7133
      self._RemoveOldStorage(self.target_node, iv_names)
7134
      # WARNING: we release all node locks here, do not do other RPCs
7135
      # than WaitForSync to the primary node
7136
      self._ReleaseNodeLock([self.instance.primary_node,
7137
                             self.target_node,
7138
                             self.new_node])
7139

    
7140
    # Wait for sync
7141
    # This can fail as the old devices are degraded and _WaitForSync
7142
    # does a combined result over all disks, so we don't check its return value
7143
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7144
    cstep += 1
7145
    _WaitForSync(self.lu, self.instance)
7146

    
7147
    # Check all devices manually
7148
    self._CheckDevices(self.instance.primary_node, iv_names)
7149

    
7150
    # Step: remove old storage
7151
    if not self.early_release:
7152
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7153
      self._RemoveOldStorage(self.target_node, iv_names)
7154

    
7155

    
7156
class LURepairNodeStorage(NoHooksLU):
7157
  """Repairs the volume group on a node.
7158

7159
  """
7160
  _OP_REQP = ["node_name"]
7161
  REQ_BGL = False
7162

    
7163
  def CheckArguments(self):
7164
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7165

    
7166
  def ExpandNames(self):
7167
    self.needed_locks = {
7168
      locking.LEVEL_NODE: [self.op.node_name],
7169
      }
7170

    
7171
  def _CheckFaultyDisks(self, instance, node_name):
7172
    """Ensure faulty disks abort the opcode or at least warn."""
7173
    try:
7174
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7175
                                  node_name, True):
7176
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7177
                                   " node '%s'" % (instance.name, node_name),
7178
                                   errors.ECODE_STATE)
7179
    except errors.OpPrereqError, err:
7180
      if self.op.ignore_consistency:
7181
        self.proc.LogWarning(str(err.args[0]))
7182
      else:
7183
        raise
7184

    
7185
  def CheckPrereq(self):
7186
    """Check prerequisites.
7187

7188
    """
7189
    storage_type = self.op.storage_type
7190

    
7191
    if (constants.SO_FIX_CONSISTENCY not in
7192
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7193
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7194
                                 " repaired" % storage_type,
7195
                                 errors.ECODE_INVAL)
7196

    
7197
    # Check whether any instance on this node has faulty disks
7198
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7199
      if not inst.admin_up:
7200
        continue
7201
      check_nodes = set(inst.all_nodes)
7202
      check_nodes.discard(self.op.node_name)
7203
      for inst_node_name in check_nodes:
7204
        self._CheckFaultyDisks(inst, inst_node_name)
7205

    
7206
  def Exec(self, feedback_fn):
7207
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7208
                (self.op.name, self.op.node_name))
7209

    
7210
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7211
    result = self.rpc.call_storage_execute(self.op.node_name,
7212
                                           self.op.storage_type, st_args,
7213
                                           self.op.name,
7214
                                           constants.SO_FIX_CONSISTENCY)
7215
    result.Raise("Failed to repair storage unit '%s' on %s" %
7216
                 (self.op.name, self.op.node_name))
7217

    
7218

    
7219
class LUGrowDisk(LogicalUnit):
7220
  """Grow a disk of an instance.
7221

7222
  """
7223
  HPATH = "disk-grow"
7224
  HTYPE = constants.HTYPE_INSTANCE
7225
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7226
  REQ_BGL = False
7227

    
7228
  def ExpandNames(self):
7229
    self._ExpandAndLockInstance()
7230
    self.needed_locks[locking.LEVEL_NODE] = []
7231
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7232

    
7233
  def DeclareLocks(self, level):
7234
    if level == locking.LEVEL_NODE:
7235
      self._LockInstancesNodes()
7236

    
7237
  def BuildHooksEnv(self):
7238
    """Build hooks env.
7239

7240
    This runs on the master, the primary and all the secondaries.
7241

7242
    """
7243
    env = {
7244
      "DISK": self.op.disk,
7245
      "AMOUNT": self.op.amount,
7246
      }
7247
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7248
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7249
    return env, nl, nl
7250

    
7251
  def CheckPrereq(self):
7252
    """Check prerequisites.
7253

7254
    This checks that the instance is in the cluster.
7255

7256
    """
7257
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7258
    assert instance is not None, \
7259
      "Cannot retrieve locked instance %s" % self.op.instance_name
7260
    nodenames = list(instance.all_nodes)
7261
    for node in nodenames:
7262
      _CheckNodeOnline(self, node)
7263

    
7264

    
7265
    self.instance = instance
7266

    
7267
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7268
      raise errors.OpPrereqError("Instance's disk layout does not support"
7269
                                 " growing.", errors.ECODE_INVAL)
7270

    
7271
    self.disk = instance.FindDisk(self.op.disk)
7272

    
7273
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7274
                                       instance.hypervisor)
7275
    for node in nodenames:
7276
      info = nodeinfo[node]
7277
      info.Raise("Cannot get current information from node %s" % node)
7278
      vg_free = info.payload.get('vg_free', None)
7279
      if not isinstance(vg_free, int):
7280
        raise errors.OpPrereqError("Can't compute free disk space on"
7281
                                   " node %s" % node, errors.ECODE_ENVIRON)
7282
      if self.op.amount > vg_free:
7283
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7284
                                   " %d MiB available, %d MiB required" %
7285
                                   (node, vg_free, self.op.amount),
7286
                                   errors.ECODE_NORES)
7287

    
7288
  def Exec(self, feedback_fn):
7289
    """Execute disk grow.
7290

7291
    """
7292
    instance = self.instance
7293
    disk = self.disk
7294
    for node in instance.all_nodes:
7295
      self.cfg.SetDiskID(disk, node)
7296
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7297
      result.Raise("Grow request failed to node %s" % node)
7298

    
7299
      # TODO: Rewrite code to work properly
7300
      # DRBD goes into sync mode for a short amount of time after executing the
7301
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7302
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7303
      # time is a work-around.
7304
      time.sleep(5)
7305

    
7306
    disk.RecordGrow(self.op.amount)
7307
    self.cfg.Update(instance, feedback_fn)
7308
    if self.op.wait_for_sync:
7309
      disk_abort = not _WaitForSync(self, instance)
7310
      if disk_abort:
7311
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7312
                             " status.\nPlease check the instance.")
7313

    
7314

    
7315
class LUQueryInstanceData(NoHooksLU):
7316
  """Query runtime instance data.
7317

7318
  """
7319
  _OP_REQP = ["instances", "static"]
7320
  REQ_BGL = False
7321

    
7322
  def ExpandNames(self):
7323
    self.needed_locks = {}
7324
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7325

    
7326
    if not isinstance(self.op.instances, list):
7327
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7328
                                 errors.ECODE_INVAL)
7329

    
7330
    if self.op.instances:
7331
      self.wanted_names = []
7332
      for name in self.op.instances:
7333
        full_name = _ExpandInstanceName(self.cfg, name)
7334
        self.wanted_names.append(full_name)
7335
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7336
    else:
7337
      self.wanted_names = None
7338
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7339

    
7340
    self.needed_locks[locking.LEVEL_NODE] = []
7341
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7342

    
7343
  def DeclareLocks(self, level):
7344
    if level == locking.LEVEL_NODE:
7345
      self._LockInstancesNodes()
7346

    
7347
  def CheckPrereq(self):
7348
    """Check prerequisites.
7349

7350
    This only checks the optional instance list against the existing names.
7351

7352
    """
7353
    if self.wanted_names is None:
7354
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7355

    
7356
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7357
                             in self.wanted_names]
7358
    return
7359

    
7360
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7361
    """Returns the status of a block device
7362

7363
    """
7364
    if self.op.static or not node:
7365
      return None
7366

    
7367
    self.cfg.SetDiskID(dev, node)
7368

    
7369
    result = self.rpc.call_blockdev_find(node, dev)
7370
    if result.offline:
7371
      return None
7372

    
7373
    result.Raise("Can't compute disk status for %s" % instance_name)
7374

    
7375
    status = result.payload
7376
    if status is None:
7377
      return None
7378

    
7379
    return (status.dev_path, status.major, status.minor,
7380
            status.sync_percent, status.estimated_time,
7381
            status.is_degraded, status.ldisk_status)
7382

    
7383
  def _ComputeDiskStatus(self, instance, snode, dev):
7384
    """Compute block device status.
7385

7386
    """
7387
    if dev.dev_type in constants.LDS_DRBD:
7388
      # we change the snode then (otherwise we use the one passed in)
7389
      if dev.logical_id[0] == instance.primary_node:
7390
        snode = dev.logical_id[1]
7391
      else:
7392
        snode = dev.logical_id[0]
7393

    
7394
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7395
                                              instance.name, dev)
7396
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7397

    
7398
    if dev.children:
7399
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7400
                      for child in dev.children]
7401
    else:
7402
      dev_children = []
7403

    
7404
    data = {
7405
      "iv_name": dev.iv_name,
7406
      "dev_type": dev.dev_type,
7407
      "logical_id": dev.logical_id,
7408
      "physical_id": dev.physical_id,
7409
      "pstatus": dev_pstatus,
7410
      "sstatus": dev_sstatus,
7411
      "children": dev_children,
7412
      "mode": dev.mode,
7413
      "size": dev.size,
7414
      }
7415

    
7416
    return data
7417

    
7418
  def Exec(self, feedback_fn):
7419
    """Gather and return data"""
7420
    result = {}
7421

    
7422
    cluster = self.cfg.GetClusterInfo()
7423

    
7424
    for instance in self.wanted_instances:
7425
      if not self.op.static:
7426
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7427
                                                  instance.name,
7428
                                                  instance.hypervisor)
7429
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7430
        remote_info = remote_info.payload
7431
        if remote_info and "state" in remote_info:
7432
          remote_state = "up"
7433
        else:
7434
          remote_state = "down"
7435
      else:
7436
        remote_state = None
7437
      if instance.admin_up:
7438
        config_state = "up"
7439
      else:
7440
        config_state = "down"
7441

    
7442
      disks = [self._ComputeDiskStatus(instance, None, device)
7443
               for device in instance.disks]
7444

    
7445
      idict = {
7446
        "name": instance.name,
7447
        "config_state": config_state,
7448
        "run_state": remote_state,
7449
        "pnode": instance.primary_node,
7450
        "snodes": instance.secondary_nodes,
7451
        "os": instance.os,
7452
        # this happens to be the same format used for hooks
7453
        "nics": _NICListToTuple(self, instance.nics),
7454
        "disks": disks,
7455
        "hypervisor": instance.hypervisor,
7456
        "network_port": instance.network_port,
7457
        "hv_instance": instance.hvparams,
7458
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7459
        "be_instance": instance.beparams,
7460
        "be_actual": cluster.FillBE(instance),
7461
        "serial_no": instance.serial_no,
7462
        "mtime": instance.mtime,
7463
        "ctime": instance.ctime,
7464
        "uuid": instance.uuid,
7465
        }
7466

    
7467
      result[instance.name] = idict
7468

    
7469
    return result
7470

    
7471

    
7472
class LUSetInstanceParams(LogicalUnit):
7473
  """Modifies an instances's parameters.
7474

7475
  """
7476
  HPATH = "instance-modify"
7477
  HTYPE = constants.HTYPE_INSTANCE
7478
  _OP_REQP = ["instance_name"]
7479
  REQ_BGL = False
7480

    
7481
  def CheckArguments(self):
7482
    if not hasattr(self.op, 'nics'):
7483
      self.op.nics = []
7484
    if not hasattr(self.op, 'disks'):
7485
      self.op.disks = []
7486
    if not hasattr(self.op, 'beparams'):
7487
      self.op.beparams = {}
7488
    if not hasattr(self.op, 'hvparams'):
7489
      self.op.hvparams = {}
7490
    self.op.force = getattr(self.op, "force", False)
7491
    if not (self.op.nics or self.op.disks or
7492
            self.op.hvparams or self.op.beparams):
7493
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7494

    
7495
    if self.op.hvparams:
7496
      _CheckGlobalHvParams(self.op.hvparams)
7497

    
7498
    # Disk validation
7499
    disk_addremove = 0
7500
    for disk_op, disk_dict in self.op.disks:
7501
      if disk_op == constants.DDM_REMOVE:
7502
        disk_addremove += 1
7503
        continue
7504
      elif disk_op == constants.DDM_ADD:
7505
        disk_addremove += 1
7506
      else:
7507
        if not isinstance(disk_op, int):
7508
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7509
        if not isinstance(disk_dict, dict):
7510
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7511
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7512

    
7513
      if disk_op == constants.DDM_ADD:
7514
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7515
        if mode not in constants.DISK_ACCESS_SET:
7516
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7517
                                     errors.ECODE_INVAL)
7518
        size = disk_dict.get('size', None)
7519
        if size is None:
7520
          raise errors.OpPrereqError("Required disk parameter size missing",
7521
                                     errors.ECODE_INVAL)
7522
        try:
7523
          size = int(size)
7524
        except (TypeError, ValueError), err:
7525
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7526
                                     str(err), errors.ECODE_INVAL)
7527
        disk_dict['size'] = size
7528
      else:
7529
        # modification of disk
7530
        if 'size' in disk_dict:
7531
          raise errors.OpPrereqError("Disk size change not possible, use"
7532
                                     " grow-disk", errors.ECODE_INVAL)
7533

    
7534
    if disk_addremove > 1:
7535
      raise errors.OpPrereqError("Only one disk add or remove operation"
7536
                                 " supported at a time", errors.ECODE_INVAL)
7537

    
7538
    # NIC validation
7539
    nic_addremove = 0
7540
    for nic_op, nic_dict in self.op.nics:
7541
      if nic_op == constants.DDM_REMOVE:
7542
        nic_addremove += 1
7543
        continue
7544
      elif nic_op == constants.DDM_ADD:
7545
        nic_addremove += 1
7546
      else:
7547
        if not isinstance(nic_op, int):
7548
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7549
        if not isinstance(nic_dict, dict):
7550
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7551
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7552

    
7553
      # nic_dict should be a dict
7554
      nic_ip = nic_dict.get('ip', None)
7555
      if nic_ip is not None:
7556
        if nic_ip.lower() == constants.VALUE_NONE:
7557
          nic_dict['ip'] = None
7558
        else:
7559
          if not utils.IsValidIP(nic_ip):
7560
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7561
                                       errors.ECODE_INVAL)
7562

    
7563
      nic_bridge = nic_dict.get('bridge', None)
7564
      nic_link = nic_dict.get('link', None)
7565
      if nic_bridge and nic_link:
7566
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7567
                                   " at the same time", errors.ECODE_INVAL)
7568
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7569
        nic_dict['bridge'] = None
7570
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7571
        nic_dict['link'] = None
7572

    
7573
      if nic_op == constants.DDM_ADD:
7574
        nic_mac = nic_dict.get('mac', None)
7575
        if nic_mac is None:
7576
          nic_dict['mac'] = constants.VALUE_AUTO
7577

    
7578
      if 'mac' in nic_dict:
7579
        nic_mac = nic_dict['mac']
7580
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7581
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7582

    
7583
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7584
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7585
                                     " modifying an existing nic",
7586
                                     errors.ECODE_INVAL)
7587

    
7588
    if nic_addremove > 1:
7589
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7590
                                 " supported at a time", errors.ECODE_INVAL)
7591

    
7592
  def ExpandNames(self):
7593
    self._ExpandAndLockInstance()
7594
    self.needed_locks[locking.LEVEL_NODE] = []
7595
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7596

    
7597
  def DeclareLocks(self, level):
7598
    if level == locking.LEVEL_NODE:
7599
      self._LockInstancesNodes()
7600

    
7601
  def BuildHooksEnv(self):
7602
    """Build hooks env.
7603

7604
    This runs on the master, primary and secondaries.
7605

7606
    """
7607
    args = dict()
7608
    if constants.BE_MEMORY in self.be_new:
7609
      args['memory'] = self.be_new[constants.BE_MEMORY]
7610
    if constants.BE_VCPUS in self.be_new:
7611
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7612
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7613
    # information at all.
7614
    if self.op.nics:
7615
      args['nics'] = []
7616
      nic_override = dict(self.op.nics)
7617
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7618
      for idx, nic in enumerate(self.instance.nics):
7619
        if idx in nic_override:
7620
          this_nic_override = nic_override[idx]
7621
        else:
7622
          this_nic_override = {}
7623
        if 'ip' in this_nic_override:
7624
          ip = this_nic_override['ip']
7625
        else:
7626
          ip = nic.ip
7627
        if 'mac' in this_nic_override:
7628
          mac = this_nic_override['mac']
7629
        else:
7630
          mac = nic.mac
7631
        if idx in self.nic_pnew:
7632
          nicparams = self.nic_pnew[idx]
7633
        else:
7634
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7635
        mode = nicparams[constants.NIC_MODE]
7636
        link = nicparams[constants.NIC_LINK]
7637
        args['nics'].append((ip, mac, mode, link))
7638
      if constants.DDM_ADD in nic_override:
7639
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7640
        mac = nic_override[constants.DDM_ADD]['mac']
7641
        nicparams = self.nic_pnew[constants.DDM_ADD]
7642
        mode = nicparams[constants.NIC_MODE]
7643
        link = nicparams[constants.NIC_LINK]
7644
        args['nics'].append((ip, mac, mode, link))
7645
      elif constants.DDM_REMOVE in nic_override:
7646
        del args['nics'][-1]
7647

    
7648
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7649
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7650
    return env, nl, nl
7651

    
7652
  @staticmethod
7653
  def _GetUpdatedParams(old_params, update_dict,
7654
                        default_values, parameter_types):
7655
    """Return the new params dict for the given params.
7656

7657
    @type old_params: dict
7658
    @param old_params: old parameters
7659
    @type update_dict: dict
7660
    @param update_dict: dict containing new parameter values,
7661
                        or constants.VALUE_DEFAULT to reset the
7662
                        parameter to its default value
7663
    @type default_values: dict
7664
    @param default_values: default values for the filled parameters
7665
    @type parameter_types: dict
7666
    @param parameter_types: dict mapping target dict keys to types
7667
                            in constants.ENFORCEABLE_TYPES
7668
    @rtype: (dict, dict)
7669
    @return: (new_parameters, filled_parameters)
7670

7671
    """
7672
    params_copy = copy.deepcopy(old_params)
7673
    for key, val in update_dict.iteritems():
7674
      if val == constants.VALUE_DEFAULT:
7675
        try:
7676
          del params_copy[key]
7677
        except KeyError:
7678
          pass
7679
      else:
7680
        params_copy[key] = val
7681
    utils.ForceDictType(params_copy, parameter_types)
7682
    params_filled = objects.FillDict(default_values, params_copy)
7683
    return (params_copy, params_filled)
7684

    
7685
  def CheckPrereq(self):
7686
    """Check prerequisites.
7687

7688
    This only checks the instance list against the existing names.
7689

7690
    """
7691
    self.force = self.op.force
7692

    
7693
    # checking the new params on the primary/secondary nodes
7694

    
7695
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7696
    cluster = self.cluster = self.cfg.GetClusterInfo()
7697
    assert self.instance is not None, \
7698
      "Cannot retrieve locked instance %s" % self.op.instance_name
7699
    pnode = instance.primary_node
7700
    nodelist = list(instance.all_nodes)
7701

    
7702
    # hvparams processing
7703
    if self.op.hvparams:
7704
      i_hvdict, hv_new = self._GetUpdatedParams(
7705
                             instance.hvparams, self.op.hvparams,
7706
                             cluster.hvparams[instance.hypervisor],
7707
                             constants.HVS_PARAMETER_TYPES)
7708
      # local check
7709
      hypervisor.GetHypervisor(
7710
        instance.hypervisor).CheckParameterSyntax(hv_new)
7711
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7712
      self.hv_new = hv_new # the new actual values
7713
      self.hv_inst = i_hvdict # the new dict (without defaults)
7714
    else:
7715
      self.hv_new = self.hv_inst = {}
7716

    
7717
    # beparams processing
7718
    if self.op.beparams:
7719
      i_bedict, be_new = self._GetUpdatedParams(
7720
                             instance.beparams, self.op.beparams,
7721
                             cluster.beparams[constants.PP_DEFAULT],
7722
                             constants.BES_PARAMETER_TYPES)
7723
      self.be_new = be_new # the new actual values
7724
      self.be_inst = i_bedict # the new dict (without defaults)
7725
    else:
7726
      self.be_new = self.be_inst = {}
7727

    
7728
    self.warn = []
7729

    
7730
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7731
      mem_check_list = [pnode]
7732
      if be_new[constants.BE_AUTO_BALANCE]:
7733
        # either we changed auto_balance to yes or it was from before
7734
        mem_check_list.extend(instance.secondary_nodes)
7735
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7736
                                                  instance.hypervisor)
7737
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7738
                                         instance.hypervisor)
7739
      pninfo = nodeinfo[pnode]
7740
      msg = pninfo.fail_msg
7741
      if msg:
7742
        # Assume the primary node is unreachable and go ahead
7743
        self.warn.append("Can't get info from primary node %s: %s" %
7744
                         (pnode,  msg))
7745
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7746
        self.warn.append("Node data from primary node %s doesn't contain"
7747
                         " free memory information" % pnode)
7748
      elif instance_info.fail_msg:
7749
        self.warn.append("Can't get instance runtime information: %s" %
7750
                        instance_info.fail_msg)
7751
      else:
7752
        if instance_info.payload:
7753
          current_mem = int(instance_info.payload['memory'])
7754
        else:
7755
          # Assume instance not running
7756
          # (there is a slight race condition here, but it's not very probable,
7757
          # and we have no other way to check)
7758
          current_mem = 0
7759
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7760
                    pninfo.payload['memory_free'])
7761
        if miss_mem > 0:
7762
          raise errors.OpPrereqError("This change will prevent the instance"
7763
                                     " from starting, due to %d MB of memory"
7764
                                     " missing on its primary node" % miss_mem,
7765
                                     errors.ECODE_NORES)
7766

    
7767
      if be_new[constants.BE_AUTO_BALANCE]:
7768
        for node, nres in nodeinfo.items():
7769
          if node not in instance.secondary_nodes:
7770
            continue
7771
          msg = nres.fail_msg
7772
          if msg:
7773
            self.warn.append("Can't get info from secondary node %s: %s" %
7774
                             (node, msg))
7775
          elif not isinstance(nres.payload.get('memory_free', None), int):
7776
            self.warn.append("Secondary node %s didn't return free"
7777
                             " memory information" % node)
7778
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7779
            self.warn.append("Not enough memory to failover instance to"
7780
                             " secondary node %s" % node)
7781

    
7782
    # NIC processing
7783
    self.nic_pnew = {}
7784
    self.nic_pinst = {}
7785
    for nic_op, nic_dict in self.op.nics:
7786
      if nic_op == constants.DDM_REMOVE:
7787
        if not instance.nics:
7788
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7789
                                     errors.ECODE_INVAL)
7790
        continue
7791
      if nic_op != constants.DDM_ADD:
7792
        # an existing nic
7793
        if not instance.nics:
7794
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7795
                                     " no NICs" % nic_op,
7796
                                     errors.ECODE_INVAL)
7797
        if nic_op < 0 or nic_op >= len(instance.nics):
7798
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7799
                                     " are 0 to %d" %
7800
                                     (nic_op, len(instance.nics) - 1),
7801
                                     errors.ECODE_INVAL)
7802
        old_nic_params = instance.nics[nic_op].nicparams
7803
        old_nic_ip = instance.nics[nic_op].ip
7804
      else:
7805
        old_nic_params = {}
7806
        old_nic_ip = None
7807

    
7808
      update_params_dict = dict([(key, nic_dict[key])
7809
                                 for key in constants.NICS_PARAMETERS
7810
                                 if key in nic_dict])
7811

    
7812
      if 'bridge' in nic_dict:
7813
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7814

    
7815
      new_nic_params, new_filled_nic_params = \
7816
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7817
                                 cluster.nicparams[constants.PP_DEFAULT],
7818
                                 constants.NICS_PARAMETER_TYPES)
7819
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7820
      self.nic_pinst[nic_op] = new_nic_params
7821
      self.nic_pnew[nic_op] = new_filled_nic_params
7822
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7823

    
7824
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7825
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7826
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7827
        if msg:
7828
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7829
          if self.force:
7830
            self.warn.append(msg)
7831
          else:
7832
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7833
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7834
        if 'ip' in nic_dict:
7835
          nic_ip = nic_dict['ip']
7836
        else:
7837
          nic_ip = old_nic_ip
7838
        if nic_ip is None:
7839
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7840
                                     ' on a routed nic', errors.ECODE_INVAL)
7841
      if 'mac' in nic_dict:
7842
        nic_mac = nic_dict['mac']
7843
        if nic_mac is None:
7844
          raise errors.OpPrereqError('Cannot set the nic mac to None',
7845
                                     errors.ECODE_INVAL)
7846
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7847
          # otherwise generate the mac
7848
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7849
        else:
7850
          # or validate/reserve the current one
7851
          try:
7852
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7853
          except errors.ReservationError:
7854
            raise errors.OpPrereqError("MAC address %s already in use"
7855
                                       " in cluster" % nic_mac,
7856
                                       errors.ECODE_NOTUNIQUE)
7857

    
7858
    # DISK processing
7859
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7860
      raise errors.OpPrereqError("Disk operations not supported for"
7861
                                 " diskless instances",
7862
                                 errors.ECODE_INVAL)
7863
    for disk_op, _ in self.op.disks:
7864
      if disk_op == constants.DDM_REMOVE:
7865
        if len(instance.disks) == 1:
7866
          raise errors.OpPrereqError("Cannot remove the last disk of"
7867
                                     " an instance",
7868
                                     errors.ECODE_INVAL)
7869
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7870
        ins_l = ins_l[pnode]
7871
        msg = ins_l.fail_msg
7872
        if msg:
7873
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7874
                                     (pnode, msg), errors.ECODE_ENVIRON)
7875
        if instance.name in ins_l.payload:
7876
          raise errors.OpPrereqError("Instance is running, can't remove"
7877
                                     " disks.", errors.ECODE_STATE)
7878

    
7879
      if (disk_op == constants.DDM_ADD and
7880
          len(instance.nics) >= constants.MAX_DISKS):
7881
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7882
                                   " add more" % constants.MAX_DISKS,
7883
                                   errors.ECODE_STATE)
7884
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7885
        # an existing disk
7886
        if disk_op < 0 or disk_op >= len(instance.disks):
7887
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7888
                                     " are 0 to %d" %
7889
                                     (disk_op, len(instance.disks)),
7890
                                     errors.ECODE_INVAL)
7891

    
7892
    return
7893

    
7894
  def Exec(self, feedback_fn):
7895
    """Modifies an instance.
7896

7897
    All parameters take effect only at the next restart of the instance.
7898

7899
    """
7900
    # Process here the warnings from CheckPrereq, as we don't have a
7901
    # feedback_fn there.
7902
    for warn in self.warn:
7903
      feedback_fn("WARNING: %s" % warn)
7904

    
7905
    result = []
7906
    instance = self.instance
7907
    # disk changes
7908
    for disk_op, disk_dict in self.op.disks:
7909
      if disk_op == constants.DDM_REMOVE:
7910
        # remove the last disk
7911
        device = instance.disks.pop()
7912
        device_idx = len(instance.disks)
7913
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7914
          self.cfg.SetDiskID(disk, node)
7915
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7916
          if msg:
7917
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7918
                            " continuing anyway", device_idx, node, msg)
7919
        result.append(("disk/%d" % device_idx, "remove"))
7920
      elif disk_op == constants.DDM_ADD:
7921
        # add a new disk
7922
        if instance.disk_template == constants.DT_FILE:
7923
          file_driver, file_path = instance.disks[0].logical_id
7924
          file_path = os.path.dirname(file_path)
7925
        else:
7926
          file_driver = file_path = None
7927
        disk_idx_base = len(instance.disks)
7928
        new_disk = _GenerateDiskTemplate(self,
7929
                                         instance.disk_template,
7930
                                         instance.name, instance.primary_node,
7931
                                         instance.secondary_nodes,
7932
                                         [disk_dict],
7933
                                         file_path,
7934
                                         file_driver,
7935
                                         disk_idx_base)[0]
7936
        instance.disks.append(new_disk)
7937
        info = _GetInstanceInfoText(instance)
7938

    
7939
        logging.info("Creating volume %s for instance %s",
7940
                     new_disk.iv_name, instance.name)
7941
        # Note: this needs to be kept in sync with _CreateDisks
7942
        #HARDCODE
7943
        for node in instance.all_nodes:
7944
          f_create = node == instance.primary_node
7945
          try:
7946
            _CreateBlockDev(self, node, instance, new_disk,
7947
                            f_create, info, f_create)
7948
          except errors.OpExecError, err:
7949
            self.LogWarning("Failed to create volume %s (%s) on"
7950
                            " node %s: %s",
7951
                            new_disk.iv_name, new_disk, node, err)
7952
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7953
                       (new_disk.size, new_disk.mode)))
7954
      else:
7955
        # change a given disk
7956
        instance.disks[disk_op].mode = disk_dict['mode']
7957
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7958
    # NIC changes
7959
    for nic_op, nic_dict in self.op.nics:
7960
      if nic_op == constants.DDM_REMOVE:
7961
        # remove the last nic
7962
        del instance.nics[-1]
7963
        result.append(("nic.%d" % len(instance.nics), "remove"))
7964
      elif nic_op == constants.DDM_ADD:
7965
        # mac and bridge should be set, by now
7966
        mac = nic_dict['mac']
7967
        ip = nic_dict.get('ip', None)
7968
        nicparams = self.nic_pinst[constants.DDM_ADD]
7969
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7970
        instance.nics.append(new_nic)
7971
        result.append(("nic.%d" % (len(instance.nics) - 1),
7972
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7973
                       (new_nic.mac, new_nic.ip,
7974
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7975
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7976
                       )))
7977
      else:
7978
        for key in 'mac', 'ip':
7979
          if key in nic_dict:
7980
            setattr(instance.nics[nic_op], key, nic_dict[key])
7981
        if nic_op in self.nic_pinst:
7982
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7983
        for key, val in nic_dict.iteritems():
7984
          result.append(("nic.%s/%d" % (key, nic_op), val))
7985

    
7986
    # hvparams changes
7987
    if self.op.hvparams:
7988
      instance.hvparams = self.hv_inst
7989
      for key, val in self.op.hvparams.iteritems():
7990
        result.append(("hv/%s" % key, val))
7991

    
7992
    # beparams changes
7993
    if self.op.beparams:
7994
      instance.beparams = self.be_inst
7995
      for key, val in self.op.beparams.iteritems():
7996
        result.append(("be/%s" % key, val))
7997

    
7998
    self.cfg.Update(instance, feedback_fn)
7999

    
8000
    return result
8001

    
8002

    
8003
class LUQueryExports(NoHooksLU):
8004
  """Query the exports list
8005

8006
  """
8007
  _OP_REQP = ['nodes']
8008
  REQ_BGL = False
8009

    
8010
  def ExpandNames(self):
8011
    self.needed_locks = {}
8012
    self.share_locks[locking.LEVEL_NODE] = 1
8013
    if not self.op.nodes:
8014
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8015
    else:
8016
      self.needed_locks[locking.LEVEL_NODE] = \
8017
        _GetWantedNodes(self, self.op.nodes)
8018

    
8019
  def CheckPrereq(self):
8020
    """Check prerequisites.
8021

8022
    """
8023
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8024

    
8025
  def Exec(self, feedback_fn):
8026
    """Compute the list of all the exported system images.
8027

8028
    @rtype: dict
8029
    @return: a dictionary with the structure node->(export-list)
8030
        where export-list is a list of the instances exported on
8031
        that node.
8032

8033
    """
8034
    rpcresult = self.rpc.call_export_list(self.nodes)
8035
    result = {}
8036
    for node in rpcresult:
8037
      if rpcresult[node].fail_msg:
8038
        result[node] = False
8039
      else:
8040
        result[node] = rpcresult[node].payload
8041

    
8042
    return result
8043

    
8044

    
8045
class LUExportInstance(LogicalUnit):
8046
  """Export an instance to an image in the cluster.
8047

8048
  """
8049
  HPATH = "instance-export"
8050
  HTYPE = constants.HTYPE_INSTANCE
8051
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8052
  REQ_BGL = False
8053

    
8054
  def CheckArguments(self):
8055
    """Check the arguments.
8056

8057
    """
8058
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8059
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8060

    
8061
  def ExpandNames(self):
8062
    self._ExpandAndLockInstance()
8063
    # FIXME: lock only instance primary and destination node
8064
    #
8065
    # Sad but true, for now we have do lock all nodes, as we don't know where
8066
    # the previous export might be, and and in this LU we search for it and
8067
    # remove it from its current node. In the future we could fix this by:
8068
    #  - making a tasklet to search (share-lock all), then create the new one,
8069
    #    then one to remove, after
8070
    #  - removing the removal operation altogether
8071
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8072

    
8073
  def DeclareLocks(self, level):
8074
    """Last minute lock declaration."""
8075
    # All nodes are locked anyway, so nothing to do here.
8076

    
8077
  def BuildHooksEnv(self):
8078
    """Build hooks env.
8079

8080
    This will run on the master, primary node and target node.
8081

8082
    """
8083
    env = {
8084
      "EXPORT_NODE": self.op.target_node,
8085
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8086
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8087
      }
8088
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8089
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8090
          self.op.target_node]
8091
    return env, nl, nl
8092

    
8093
  def CheckPrereq(self):
8094
    """Check prerequisites.
8095

8096
    This checks that the instance and node names are valid.
8097

8098
    """
8099
    instance_name = self.op.instance_name
8100
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8101
    assert self.instance is not None, \
8102
          "Cannot retrieve locked instance %s" % self.op.instance_name
8103
    _CheckNodeOnline(self, self.instance.primary_node)
8104

    
8105
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8106
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8107
    assert self.dst_node is not None
8108

    
8109
    _CheckNodeOnline(self, self.dst_node.name)
8110
    _CheckNodeNotDrained(self, self.dst_node.name)
8111

    
8112
    # instance disk type verification
8113
    for disk in self.instance.disks:
8114
      if disk.dev_type == constants.LD_FILE:
8115
        raise errors.OpPrereqError("Export not supported for instances with"
8116
                                   " file-based disks", errors.ECODE_INVAL)
8117

    
8118
  def Exec(self, feedback_fn):
8119
    """Export an instance to an image in the cluster.
8120

8121
    """
8122
    instance = self.instance
8123
    dst_node = self.dst_node
8124
    src_node = instance.primary_node
8125

    
8126
    if self.op.shutdown:
8127
      # shutdown the instance, but not the disks
8128
      feedback_fn("Shutting down instance %s" % instance.name)
8129
      result = self.rpc.call_instance_shutdown(src_node, instance,
8130
                                               self.shutdown_timeout)
8131
      result.Raise("Could not shutdown instance %s on"
8132
                   " node %s" % (instance.name, src_node))
8133

    
8134
    vgname = self.cfg.GetVGName()
8135

    
8136
    snap_disks = []
8137

    
8138
    # set the disks ID correctly since call_instance_start needs the
8139
    # correct drbd minor to create the symlinks
8140
    for disk in instance.disks:
8141
      self.cfg.SetDiskID(disk, src_node)
8142

    
8143
    activate_disks = (not instance.admin_up)
8144

    
8145
    if activate_disks:
8146
      # Activate the instance disks if we'exporting a stopped instance
8147
      feedback_fn("Activating disks for %s" % instance.name)
8148
      _StartInstanceDisks(self, instance, None)
8149

    
8150
    try:
8151
      # per-disk results
8152
      dresults = []
8153
      try:
8154
        for idx, disk in enumerate(instance.disks):
8155
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8156
                      (idx, src_node))
8157

    
8158
          # result.payload will be a snapshot of an lvm leaf of the one we
8159
          # passed
8160
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8161
          msg = result.fail_msg
8162
          if msg:
8163
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8164
                            idx, src_node, msg)
8165
            snap_disks.append(False)
8166
          else:
8167
            disk_id = (vgname, result.payload)
8168
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8169
                                   logical_id=disk_id, physical_id=disk_id,
8170
                                   iv_name=disk.iv_name)
8171
            snap_disks.append(new_dev)
8172

    
8173
      finally:
8174
        if self.op.shutdown and instance.admin_up:
8175
          feedback_fn("Starting instance %s" % instance.name)
8176
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8177
          msg = result.fail_msg
8178
          if msg:
8179
            _ShutdownInstanceDisks(self, instance)
8180
            raise errors.OpExecError("Could not start instance: %s" % msg)
8181

    
8182
      # TODO: check for size
8183

    
8184
      cluster_name = self.cfg.GetClusterName()
8185
      for idx, dev in enumerate(snap_disks):
8186
        feedback_fn("Exporting snapshot %s from %s to %s" %
8187
                    (idx, src_node, dst_node.name))
8188
        if dev:
8189
          # FIXME: pass debug from opcode to backend
8190
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8191
                                                 instance, cluster_name,
8192
                                                 idx, self.op.debug_level)
8193
          msg = result.fail_msg
8194
          if msg:
8195
            self.LogWarning("Could not export disk/%s from node %s to"
8196
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8197
            dresults.append(False)
8198
          else:
8199
            dresults.append(True)
8200
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8201
          if msg:
8202
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8203
                            " %s: %s", idx, src_node, msg)
8204
        else:
8205
          dresults.append(False)
8206

    
8207
      feedback_fn("Finalizing export on %s" % dst_node.name)
8208
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8209
                                             snap_disks)
8210
      fin_resu = True
8211
      msg = result.fail_msg
8212
      if msg:
8213
        self.LogWarning("Could not finalize export for instance %s"
8214
                        " on node %s: %s", instance.name, dst_node.name, msg)
8215
        fin_resu = False
8216

    
8217
    finally:
8218
      if activate_disks:
8219
        feedback_fn("Deactivating disks for %s" % instance.name)
8220
        _ShutdownInstanceDisks(self, instance)
8221

    
8222
    nodelist = self.cfg.GetNodeList()
8223
    nodelist.remove(dst_node.name)
8224

    
8225
    # on one-node clusters nodelist will be empty after the removal
8226
    # if we proceed the backup would be removed because OpQueryExports
8227
    # substitutes an empty list with the full cluster node list.
8228
    iname = instance.name
8229
    if nodelist:
8230
      feedback_fn("Removing old exports for instance %s" % iname)
8231
      exportlist = self.rpc.call_export_list(nodelist)
8232
      for node in exportlist:
8233
        if exportlist[node].fail_msg:
8234
          continue
8235
        if iname in exportlist[node].payload:
8236
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8237
          if msg:
8238
            self.LogWarning("Could not remove older export for instance %s"
8239
                            " on node %s: %s", iname, node, msg)
8240
    return fin_resu, dresults
8241

    
8242

    
8243
class LURemoveExport(NoHooksLU):
8244
  """Remove exports related to the named instance.
8245

8246
  """
8247
  _OP_REQP = ["instance_name"]
8248
  REQ_BGL = False
8249

    
8250
  def ExpandNames(self):
8251
    self.needed_locks = {}
8252
    # We need all nodes to be locked in order for RemoveExport to work, but we
8253
    # don't need to lock the instance itself, as nothing will happen to it (and
8254
    # we can remove exports also for a removed instance)
8255
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8256

    
8257
  def CheckPrereq(self):
8258
    """Check prerequisites.
8259
    """
8260
    pass
8261

    
8262
  def Exec(self, feedback_fn):
8263
    """Remove any export.
8264

8265
    """
8266
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8267
    # If the instance was not found we'll try with the name that was passed in.
8268
    # This will only work if it was an FQDN, though.
8269
    fqdn_warn = False
8270
    if not instance_name:
8271
      fqdn_warn = True
8272
      instance_name = self.op.instance_name
8273

    
8274
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8275
    exportlist = self.rpc.call_export_list(locked_nodes)
8276
    found = False
8277
    for node in exportlist:
8278
      msg = exportlist[node].fail_msg
8279
      if msg:
8280
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8281
        continue
8282
      if instance_name in exportlist[node].payload:
8283
        found = True
8284
        result = self.rpc.call_export_remove(node, instance_name)
8285
        msg = result.fail_msg
8286
        if msg:
8287
          logging.error("Could not remove export for instance %s"
8288
                        " on node %s: %s", instance_name, node, msg)
8289

    
8290
    if fqdn_warn and not found:
8291
      feedback_fn("Export not found. If trying to remove an export belonging"
8292
                  " to a deleted instance please use its Fully Qualified"
8293
                  " Domain Name.")
8294

    
8295

    
8296
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8297
  """Generic tags LU.
8298

8299
  This is an abstract class which is the parent of all the other tags LUs.
8300

8301
  """
8302

    
8303
  def ExpandNames(self):
8304
    self.needed_locks = {}
8305
    if self.op.kind == constants.TAG_NODE:
8306
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8307
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
8308
    elif self.op.kind == constants.TAG_INSTANCE:
8309
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8310
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8311

    
8312
  def CheckPrereq(self):
8313
    """Check prerequisites.
8314

8315
    """
8316
    if self.op.kind == constants.TAG_CLUSTER:
8317
      self.target = self.cfg.GetClusterInfo()
8318
    elif self.op.kind == constants.TAG_NODE:
8319
      self.target = self.cfg.GetNodeInfo(self.op.name)
8320
    elif self.op.kind == constants.TAG_INSTANCE:
8321
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8322
    else:
8323
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8324
                                 str(self.op.kind), errors.ECODE_INVAL)
8325

    
8326

    
8327
class LUGetTags(TagsLU):
8328
  """Returns the tags of a given object.
8329

8330
  """
8331
  _OP_REQP = ["kind", "name"]
8332
  REQ_BGL = False
8333

    
8334
  def Exec(self, feedback_fn):
8335
    """Returns the tag list.
8336

8337
    """
8338
    return list(self.target.GetTags())
8339

    
8340

    
8341
class LUSearchTags(NoHooksLU):
8342
  """Searches the tags for a given pattern.
8343

8344
  """
8345
  _OP_REQP = ["pattern"]
8346
  REQ_BGL = False
8347

    
8348
  def ExpandNames(self):
8349
    self.needed_locks = {}
8350

    
8351
  def CheckPrereq(self):
8352
    """Check prerequisites.
8353

8354
    This checks the pattern passed for validity by compiling it.
8355

8356
    """
8357
    try:
8358
      self.re = re.compile(self.op.pattern)
8359
    except re.error, err:
8360
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8361
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8362

    
8363
  def Exec(self, feedback_fn):
8364
    """Returns the tag list.
8365

8366
    """
8367
    cfg = self.cfg
8368
    tgts = [("/cluster", cfg.GetClusterInfo())]
8369
    ilist = cfg.GetAllInstancesInfo().values()
8370
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8371
    nlist = cfg.GetAllNodesInfo().values()
8372
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8373
    results = []
8374
    for path, target in tgts:
8375
      for tag in target.GetTags():
8376
        if self.re.search(tag):
8377
          results.append((path, tag))
8378
    return results
8379

    
8380

    
8381
class LUAddTags(TagsLU):
8382
  """Sets a tag on a given object.
8383

8384
  """
8385
  _OP_REQP = ["kind", "name", "tags"]
8386
  REQ_BGL = False
8387

    
8388
  def CheckPrereq(self):
8389
    """Check prerequisites.
8390

8391
    This checks the type and length of the tag name and value.
8392

8393
    """
8394
    TagsLU.CheckPrereq(self)
8395
    for tag in self.op.tags:
8396
      objects.TaggableObject.ValidateTag(tag)
8397

    
8398
  def Exec(self, feedback_fn):
8399
    """Sets the tag.
8400

8401
    """
8402
    try:
8403
      for tag in self.op.tags:
8404
        self.target.AddTag(tag)
8405
    except errors.TagError, err:
8406
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8407
    self.cfg.Update(self.target, feedback_fn)
8408

    
8409

    
8410
class LUDelTags(TagsLU):
8411
  """Delete a list of tags from a given object.
8412

8413
  """
8414
  _OP_REQP = ["kind", "name", "tags"]
8415
  REQ_BGL = False
8416

    
8417
  def CheckPrereq(self):
8418
    """Check prerequisites.
8419

8420
    This checks that we have the given tag.
8421

8422
    """
8423
    TagsLU.CheckPrereq(self)
8424
    for tag in self.op.tags:
8425
      objects.TaggableObject.ValidateTag(tag)
8426
    del_tags = frozenset(self.op.tags)
8427
    cur_tags = self.target.GetTags()
8428
    if not del_tags <= cur_tags:
8429
      diff_tags = del_tags - cur_tags
8430
      diff_names = ["'%s'" % tag for tag in diff_tags]
8431
      diff_names.sort()
8432
      raise errors.OpPrereqError("Tag(s) %s not found" %
8433
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8434

    
8435
  def Exec(self, feedback_fn):
8436
    """Remove the tag from the object.
8437

8438
    """
8439
    for tag in self.op.tags:
8440
      self.target.RemoveTag(tag)
8441
    self.cfg.Update(self.target, feedback_fn)
8442

    
8443

    
8444
class LUTestDelay(NoHooksLU):
8445
  """Sleep for a specified amount of time.
8446

8447
  This LU sleeps on the master and/or nodes for a specified amount of
8448
  time.
8449

8450
  """
8451
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8452
  REQ_BGL = False
8453

    
8454
  def ExpandNames(self):
8455
    """Expand names and set required locks.
8456

8457
    This expands the node list, if any.
8458

8459
    """
8460
    self.needed_locks = {}
8461
    if self.op.on_nodes:
8462
      # _GetWantedNodes can be used here, but is not always appropriate to use
8463
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8464
      # more information.
8465
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8466
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8467

    
8468
  def CheckPrereq(self):
8469
    """Check prerequisites.
8470

8471
    """
8472

    
8473
  def Exec(self, feedback_fn):
8474
    """Do the actual sleep.
8475

8476
    """
8477
    if self.op.on_master:
8478
      if not utils.TestDelay(self.op.duration):
8479
        raise errors.OpExecError("Error during master delay test")
8480
    if self.op.on_nodes:
8481
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8482
      for node, node_result in result.items():
8483
        node_result.Raise("Failure during rpc call to node %s" % node)
8484

    
8485

    
8486
class IAllocator(object):
8487
  """IAllocator framework.
8488

8489
  An IAllocator instance has three sets of attributes:
8490
    - cfg that is needed to query the cluster
8491
    - input data (all members of the _KEYS class attribute are required)
8492
    - four buffer attributes (in|out_data|text), that represent the
8493
      input (to the external script) in text and data structure format,
8494
      and the output from it, again in two formats
8495
    - the result variables from the script (success, info, nodes) for
8496
      easy usage
8497

8498
  """
8499
  # pylint: disable-msg=R0902
8500
  # lots of instance attributes
8501
  _ALLO_KEYS = [
8502
    "mem_size", "disks", "disk_template",
8503
    "os", "tags", "nics", "vcpus", "hypervisor",
8504
    ]
8505
  _RELO_KEYS = [
8506
    "relocate_from",
8507
    ]
8508

    
8509
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8510
    self.cfg = cfg
8511
    self.rpc = rpc
8512
    # init buffer variables
8513
    self.in_text = self.out_text = self.in_data = self.out_data = None
8514
    # init all input fields so that pylint is happy
8515
    self.mode = mode
8516
    self.name = name
8517
    self.mem_size = self.disks = self.disk_template = None
8518
    self.os = self.tags = self.nics = self.vcpus = None
8519
    self.hypervisor = None
8520
    self.relocate_from = None
8521
    # computed fields
8522
    self.required_nodes = None
8523
    # init result fields
8524
    self.success = self.info = self.nodes = None
8525
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8526
      keyset = self._ALLO_KEYS
8527
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8528
      keyset = self._RELO_KEYS
8529
    else:
8530
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8531
                                   " IAllocator" % self.mode)
8532
    for key in kwargs:
8533
      if key not in keyset:
8534
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8535
                                     " IAllocator" % key)
8536
      setattr(self, key, kwargs[key])
8537
    for key in keyset:
8538
      if key not in kwargs:
8539
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8540
                                     " IAllocator" % key)
8541
    self._BuildInputData()
8542

    
8543
  def _ComputeClusterData(self):
8544
    """Compute the generic allocator input data.
8545

8546
    This is the data that is independent of the actual operation.
8547

8548
    """
8549
    cfg = self.cfg
8550
    cluster_info = cfg.GetClusterInfo()
8551
    # cluster data
8552
    data = {
8553
      "version": constants.IALLOCATOR_VERSION,
8554
      "cluster_name": cfg.GetClusterName(),
8555
      "cluster_tags": list(cluster_info.GetTags()),
8556
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8557
      # we don't have job IDs
8558
      }
8559
    iinfo = cfg.GetAllInstancesInfo().values()
8560
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8561

    
8562
    # node data
8563
    node_results = {}
8564
    node_list = cfg.GetNodeList()
8565

    
8566
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8567
      hypervisor_name = self.hypervisor
8568
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8569
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8570

    
8571
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8572
                                        hypervisor_name)
8573
    node_iinfo = \
8574
      self.rpc.call_all_instances_info(node_list,
8575
                                       cluster_info.enabled_hypervisors)
8576
    for nname, nresult in node_data.items():
8577
      # first fill in static (config-based) values
8578
      ninfo = cfg.GetNodeInfo(nname)
8579
      pnr = {
8580
        "tags": list(ninfo.GetTags()),
8581
        "primary_ip": ninfo.primary_ip,
8582
        "secondary_ip": ninfo.secondary_ip,
8583
        "offline": ninfo.offline,
8584
        "drained": ninfo.drained,
8585
        "master_candidate": ninfo.master_candidate,
8586
        }
8587

    
8588
      if not (ninfo.offline or ninfo.drained):
8589
        nresult.Raise("Can't get data for node %s" % nname)
8590
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8591
                                nname)
8592
        remote_info = nresult.payload
8593

    
8594
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8595
                     'vg_size', 'vg_free', 'cpu_total']:
8596
          if attr not in remote_info:
8597
            raise errors.OpExecError("Node '%s' didn't return attribute"
8598
                                     " '%s'" % (nname, attr))
8599
          if not isinstance(remote_info[attr], int):
8600
            raise errors.OpExecError("Node '%s' returned invalid value"
8601
                                     " for '%s': %s" %
8602
                                     (nname, attr, remote_info[attr]))
8603
        # compute memory used by primary instances
8604
        i_p_mem = i_p_up_mem = 0
8605
        for iinfo, beinfo in i_list:
8606
          if iinfo.primary_node == nname:
8607
            i_p_mem += beinfo[constants.BE_MEMORY]
8608
            if iinfo.name not in node_iinfo[nname].payload:
8609
              i_used_mem = 0
8610
            else:
8611
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8612
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8613
            remote_info['memory_free'] -= max(0, i_mem_diff)
8614

    
8615
            if iinfo.admin_up:
8616
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8617

    
8618
        # compute memory used by instances
8619
        pnr_dyn = {
8620
          "total_memory": remote_info['memory_total'],
8621
          "reserved_memory": remote_info['memory_dom0'],
8622
          "free_memory": remote_info['memory_free'],
8623
          "total_disk": remote_info['vg_size'],
8624
          "free_disk": remote_info['vg_free'],
8625
          "total_cpus": remote_info['cpu_total'],
8626
          "i_pri_memory": i_p_mem,
8627
          "i_pri_up_memory": i_p_up_mem,
8628
          }
8629
        pnr.update(pnr_dyn)
8630

    
8631
      node_results[nname] = pnr
8632
    data["nodes"] = node_results
8633

    
8634
    # instance data
8635
    instance_data = {}
8636
    for iinfo, beinfo in i_list:
8637
      nic_data = []
8638
      for nic in iinfo.nics:
8639
        filled_params = objects.FillDict(
8640
            cluster_info.nicparams[constants.PP_DEFAULT],
8641
            nic.nicparams)
8642
        nic_dict = {"mac": nic.mac,
8643
                    "ip": nic.ip,
8644
                    "mode": filled_params[constants.NIC_MODE],
8645
                    "link": filled_params[constants.NIC_LINK],
8646
                   }
8647
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8648
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8649
        nic_data.append(nic_dict)
8650
      pir = {
8651
        "tags": list(iinfo.GetTags()),
8652
        "admin_up": iinfo.admin_up,
8653
        "vcpus": beinfo[constants.BE_VCPUS],
8654
        "memory": beinfo[constants.BE_MEMORY],
8655
        "os": iinfo.os,
8656
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8657
        "nics": nic_data,
8658
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8659
        "disk_template": iinfo.disk_template,
8660
        "hypervisor": iinfo.hypervisor,
8661
        }
8662
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8663
                                                 pir["disks"])
8664
      instance_data[iinfo.name] = pir
8665

    
8666
    data["instances"] = instance_data
8667

    
8668
    self.in_data = data
8669

    
8670
  def _AddNewInstance(self):
8671
    """Add new instance data to allocator structure.
8672

8673
    This in combination with _AllocatorGetClusterData will create the
8674
    correct structure needed as input for the allocator.
8675

8676
    The checks for the completeness of the opcode must have already been
8677
    done.
8678

8679
    """
8680
    data = self.in_data
8681

    
8682
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8683

    
8684
    if self.disk_template in constants.DTS_NET_MIRROR:
8685
      self.required_nodes = 2
8686
    else:
8687
      self.required_nodes = 1
8688
    request = {
8689
      "type": "allocate",
8690
      "name": self.name,
8691
      "disk_template": self.disk_template,
8692
      "tags": self.tags,
8693
      "os": self.os,
8694
      "vcpus": self.vcpus,
8695
      "memory": self.mem_size,
8696
      "disks": self.disks,
8697
      "disk_space_total": disk_space,
8698
      "nics": self.nics,
8699
      "required_nodes": self.required_nodes,
8700
      }
8701
    data["request"] = request
8702

    
8703
  def _AddRelocateInstance(self):
8704
    """Add relocate instance data to allocator structure.
8705

8706
    This in combination with _IAllocatorGetClusterData will create the
8707
    correct structure needed as input for the allocator.
8708

8709
    The checks for the completeness of the opcode must have already been
8710
    done.
8711

8712
    """
8713
    instance = self.cfg.GetInstanceInfo(self.name)
8714
    if instance is None:
8715
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8716
                                   " IAllocator" % self.name)
8717

    
8718
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8719
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8720
                                 errors.ECODE_INVAL)
8721

    
8722
    if len(instance.secondary_nodes) != 1:
8723
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8724
                                 errors.ECODE_STATE)
8725

    
8726
    self.required_nodes = 1
8727
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8728
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8729

    
8730
    request = {
8731
      "type": "relocate",
8732
      "name": self.name,
8733
      "disk_space_total": disk_space,
8734
      "required_nodes": self.required_nodes,
8735
      "relocate_from": self.relocate_from,
8736
      }
8737
    self.in_data["request"] = request
8738

    
8739
  def _BuildInputData(self):
8740
    """Build input data structures.
8741

8742
    """
8743
    self._ComputeClusterData()
8744

    
8745
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8746
      self._AddNewInstance()
8747
    else:
8748
      self._AddRelocateInstance()
8749

    
8750
    self.in_text = serializer.Dump(self.in_data)
8751

    
8752
  def Run(self, name, validate=True, call_fn=None):
8753
    """Run an instance allocator and return the results.
8754

8755
    """
8756
    if call_fn is None:
8757
      call_fn = self.rpc.call_iallocator_runner
8758

    
8759
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8760
    result.Raise("Failure while running the iallocator script")
8761

    
8762
    self.out_text = result.payload
8763
    if validate:
8764
      self._ValidateResult()
8765

    
8766
  def _ValidateResult(self):
8767
    """Process the allocator results.
8768

8769
    This will process and if successful save the result in
8770
    self.out_data and the other parameters.
8771

8772
    """
8773
    try:
8774
      rdict = serializer.Load(self.out_text)
8775
    except Exception, err:
8776
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8777

    
8778
    if not isinstance(rdict, dict):
8779
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8780

    
8781
    for key in "success", "info", "nodes":
8782
      if key not in rdict:
8783
        raise errors.OpExecError("Can't parse iallocator results:"
8784
                                 " missing key '%s'" % key)
8785
      setattr(self, key, rdict[key])
8786

    
8787
    if not isinstance(rdict["nodes"], list):
8788
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8789
                               " is not a list")
8790
    self.out_data = rdict
8791

    
8792

    
8793
class LUTestAllocator(NoHooksLU):
8794
  """Run allocator tests.
8795

8796
  This LU runs the allocator tests
8797

8798
  """
8799
  _OP_REQP = ["direction", "mode", "name"]
8800

    
8801
  def CheckPrereq(self):
8802
    """Check prerequisites.
8803

8804
    This checks the opcode parameters depending on the director and mode test.
8805

8806
    """
8807
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8808
      for attr in ["name", "mem_size", "disks", "disk_template",
8809
                   "os", "tags", "nics", "vcpus"]:
8810
        if not hasattr(self.op, attr):
8811
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8812
                                     attr, errors.ECODE_INVAL)
8813
      iname = self.cfg.ExpandInstanceName(self.op.name)
8814
      if iname is not None:
8815
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8816
                                   iname, errors.ECODE_EXISTS)
8817
      if not isinstance(self.op.nics, list):
8818
        raise errors.OpPrereqError("Invalid parameter 'nics'",
8819
                                   errors.ECODE_INVAL)
8820
      for row in self.op.nics:
8821
        if (not isinstance(row, dict) or
8822
            "mac" not in row or
8823
            "ip" not in row or
8824
            "bridge" not in row):
8825
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
8826
                                     " parameter", errors.ECODE_INVAL)
8827
      if not isinstance(self.op.disks, list):
8828
        raise errors.OpPrereqError("Invalid parameter 'disks'",
8829
                                   errors.ECODE_INVAL)
8830
      for row in self.op.disks:
8831
        if (not isinstance(row, dict) or
8832
            "size" not in row or
8833
            not isinstance(row["size"], int) or
8834
            "mode" not in row or
8835
            row["mode"] not in ['r', 'w']):
8836
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
8837
                                     " parameter", errors.ECODE_INVAL)
8838
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8839
        self.op.hypervisor = self.cfg.GetHypervisorType()
8840
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8841
      if not hasattr(self.op, "name"):
8842
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8843
                                   errors.ECODE_INVAL)
8844
      fname = _ExpandInstanceName(self.cfg, self.op.name)
8845
      self.op.name = fname
8846
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8847
    else:
8848
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8849
                                 self.op.mode, errors.ECODE_INVAL)
8850

    
8851
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8852
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8853
        raise errors.OpPrereqError("Missing allocator name",
8854
                                   errors.ECODE_INVAL)
8855
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8856
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8857
                                 self.op.direction, errors.ECODE_INVAL)
8858

    
8859
  def Exec(self, feedback_fn):
8860
    """Run the allocator test.
8861

8862
    """
8863
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8864
      ial = IAllocator(self.cfg, self.rpc,
8865
                       mode=self.op.mode,
8866
                       name=self.op.name,
8867
                       mem_size=self.op.mem_size,
8868
                       disks=self.op.disks,
8869
                       disk_template=self.op.disk_template,
8870
                       os=self.op.os,
8871
                       tags=self.op.tags,
8872
                       nics=self.op.nics,
8873
                       vcpus=self.op.vcpus,
8874
                       hypervisor=self.op.hypervisor,
8875
                       )
8876
    else:
8877
      ial = IAllocator(self.cfg, self.rpc,
8878
                       mode=self.op.mode,
8879
                       name=self.op.name,
8880
                       relocate_from=list(self.relocate_from),
8881
                       )
8882

    
8883
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8884
      result = ial.in_text
8885
    else:
8886
      ial.Run(self.op.allocator, validate=False)
8887
      result = ial.out_text
8888
    return result