Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 857121ad

History | View | Annotate | Download (309.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq (except when tasklets are used)
54
    - implement Exec (except when tasklets are used)
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  @ivar dry_run_result: the value (if any) that will be returned to the caller
63
      in dry-run mode (signalled by opcode dry_run parameter)
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_BGL = True
70

    
71
  def __init__(self, processor, op, context, rpc):
72
    """Constructor for LogicalUnit.
73

74
    This needs to be overridden in derived classes in order to check op
75
    validity.
76

77
    """
78
    self.proc = processor
79
    self.op = op
80
    self.cfg = context.cfg
81
    self.context = context
82
    self.rpc = rpc
83
    # Dicts used to declare locking needs to mcpu
84
    self.needed_locks = None
85
    self.acquired_locks = {}
86
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
87
    self.add_locks = {}
88
    self.remove_locks = {}
89
    # Used to force good behavior when calling helper functions
90
    self.recalculate_locks = {}
91
    self.__ssh = None
92
    # logging
93
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
94
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
95
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
96
    # support for dry-run
97
    self.dry_run_result = None
98

    
99
    # Tasklets
100
    self.tasklets = None
101

    
102
    for attr_name in self._OP_REQP:
103
      attr_val = getattr(op, attr_name, None)
104
      if attr_val is None:
105
        raise errors.OpPrereqError("Required parameter '%s' missing" %
106
                                   attr_name, errors.ECODE_INVAL)
107

    
108
    self.CheckArguments()
109

    
110
  def __GetSSH(self):
111
    """Returns the SshRunner object
112

113
    """
114
    if not self.__ssh:
115
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
116
    return self.__ssh
117

    
118
  ssh = property(fget=__GetSSH)
119

    
120
  def CheckArguments(self):
121
    """Check syntactic validity for the opcode arguments.
122

123
    This method is for doing a simple syntactic check and ensure
124
    validity of opcode parameters, without any cluster-related
125
    checks. While the same can be accomplished in ExpandNames and/or
126
    CheckPrereq, doing these separate is better because:
127

128
      - ExpandNames is left as as purely a lock-related function
129
      - CheckPrereq is run after we have acquired locks (and possible
130
        waited for them)
131

132
    The function is allowed to change the self.op attribute so that
133
    later methods can no longer worry about missing parameters.
134

135
    """
136
    pass
137

    
138
  def ExpandNames(self):
139
    """Expand names for this LU.
140

141
    This method is called before starting to execute the opcode, and it should
142
    update all the parameters of the opcode to their canonical form (e.g. a
143
    short node name must be fully expanded after this method has successfully
144
    completed). This way locking, hooks, logging, ecc. can work correctly.
145

146
    LUs which implement this method must also populate the self.needed_locks
147
    member, as a dict with lock levels as keys, and a list of needed lock names
148
    as values. Rules:
149

150
      - use an empty dict if you don't need any lock
151
      - if you don't need any lock at a particular level omit that level
152
      - don't put anything for the BGL level
153
      - if you want all locks at a level use locking.ALL_SET as a value
154

155
    If you need to share locks (rather than acquire them exclusively) at one
156
    level you can modify self.share_locks, setting a true value (usually 1) for
157
    that level. By default locks are not shared.
158

159
    This function can also define a list of tasklets, which then will be
160
    executed in order instead of the usual LU-level CheckPrereq and Exec
161
    functions, if those are not defined by the LU.
162

163
    Examples::
164

165
      # Acquire all nodes and one instance
166
      self.needed_locks = {
167
        locking.LEVEL_NODE: locking.ALL_SET,
168
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
169
      }
170
      # Acquire just two nodes
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
173
      }
174
      # Acquire no locks
175
      self.needed_locks = {} # No, you can't leave it to the default value None
176

177
    """
178
    # The implementation of this method is mandatory only if the new LU is
179
    # concurrent, so that old LUs don't need to be changed all at the same
180
    # time.
181
    if self.REQ_BGL:
182
      self.needed_locks = {} # Exclusive LUs don't need locks.
183
    else:
184
      raise NotImplementedError
185

    
186
  def DeclareLocks(self, level):
187
    """Declare LU locking needs for a level
188

189
    While most LUs can just declare their locking needs at ExpandNames time,
190
    sometimes there's the need to calculate some locks after having acquired
191
    the ones before. This function is called just before acquiring locks at a
192
    particular level, but after acquiring the ones at lower levels, and permits
193
    such calculations. It can be used to modify self.needed_locks, and by
194
    default it does nothing.
195

196
    This function is only called if you have something already set in
197
    self.needed_locks for the level.
198

199
    @param level: Locking level which is going to be locked
200
    @type level: member of ganeti.locking.LEVELS
201

202
    """
203

    
204
  def CheckPrereq(self):
205
    """Check prerequisites for this LU.
206

207
    This method should check that the prerequisites for the execution
208
    of this LU are fulfilled. It can do internode communication, but
209
    it should be idempotent - no cluster or system changes are
210
    allowed.
211

212
    The method should raise errors.OpPrereqError in case something is
213
    not fulfilled. Its return value is ignored.
214

215
    This method should also update all the parameters of the opcode to
216
    their canonical form if it hasn't been done by ExpandNames before.
217

218
    """
219
    if self.tasklets is not None:
220
      for (idx, tl) in enumerate(self.tasklets):
221
        logging.debug("Checking prerequisites for tasklet %s/%s",
222
                      idx + 1, len(self.tasklets))
223
        tl.CheckPrereq()
224
    else:
225
      raise NotImplementedError
226

    
227
  def Exec(self, feedback_fn):
228
    """Execute the LU.
229

230
    This method should implement the actual work. It should raise
231
    errors.OpExecError for failures that are somewhat dealt with in
232
    code, or expected.
233

234
    """
235
    if self.tasklets is not None:
236
      for (idx, tl) in enumerate(self.tasklets):
237
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
238
        tl.Exec(feedback_fn)
239
    else:
240
      raise NotImplementedError
241

    
242
  def BuildHooksEnv(self):
243
    """Build hooks environment for this LU.
244

245
    This method should return a three-node tuple consisting of: a dict
246
    containing the environment that will be used for running the
247
    specific hook for this LU, a list of node names on which the hook
248
    should run before the execution, and a list of node names on which
249
    the hook should run after the execution.
250

251
    The keys of the dict must not have 'GANETI_' prefixed as this will
252
    be handled in the hooks runner. Also note additional keys will be
253
    added by the hooks runner. If the LU doesn't define any
254
    environment, an empty dict (and not None) should be returned.
255

256
    No nodes should be returned as an empty list (and not None).
257

258
    Note that if the HPATH for a LU class is None, this function will
259
    not be called.
260

261
    """
262
    raise NotImplementedError
263

    
264
  # this is valid in this entire class even if added here
265
  # pylint: disable-msg=R0201
266
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
267
    """Notify the LU about the results of its hooks.
268

269
    This method is called every time a hooks phase is executed, and notifies
270
    the Logical Unit about the hooks' result. The LU can then use it to alter
271
    its result based on the hooks.  By default the method does nothing and the
272
    previous result is passed back unchanged but any LU can define it if it
273
    wants to use the local cluster hook-scripts somehow.
274

275
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
276
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
277
    @param hook_results: the results of the multi-node hooks rpc call
278
    @param feedback_fn: function used send feedback back to the caller
279
    @param lu_result: the previous Exec result this LU had, or None
280
        in the PRE phase
281
    @return: the new Exec result, based on the previous result
282
        and hook results
283

284
    """
285
    return lu_result
286

    
287
  def _ExpandAndLockInstance(self):
288
    """Helper function to expand and lock an instance.
289

290
    Many LUs that work on an instance take its name in self.op.instance_name
291
    and need to expand it and then declare the expanded name for locking. This
292
    function does it, and then updates self.op.instance_name to the expanded
293
    name. It also initializes needed_locks as a dict, if this hasn't been done
294
    before.
295

296
    """
297
    if self.needed_locks is None:
298
      self.needed_locks = {}
299
    else:
300
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
301
        "_ExpandAndLockInstance called with instance-level locks set"
302
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
303
    if expanded_name is None:
304
      raise errors.OpPrereqError("Instance '%s' not known" %
305
                                 self.op.instance_name, errors.ECODE_NOENT)
306
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
307
    self.op.instance_name = expanded_name
308

    
309
  def _LockInstancesNodes(self, primary_only=False):
310
    """Helper function to declare instances' nodes for locking.
311

312
    This function should be called after locking one or more instances to lock
313
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
314
    with all primary or secondary nodes for instances already locked and
315
    present in self.needed_locks[locking.LEVEL_INSTANCE].
316

317
    It should be called from DeclareLocks, and for safety only works if
318
    self.recalculate_locks[locking.LEVEL_NODE] is set.
319

320
    In the future it may grow parameters to just lock some instance's nodes, or
321
    to just lock primaries or secondary nodes, if needed.
322

323
    If should be called in DeclareLocks in a way similar to::
324

325
      if level == locking.LEVEL_NODE:
326
        self._LockInstancesNodes()
327

328
    @type primary_only: boolean
329
    @param primary_only: only lock primary nodes of locked instances
330

331
    """
332
    assert locking.LEVEL_NODE in self.recalculate_locks, \
333
      "_LockInstancesNodes helper function called with no nodes to recalculate"
334

    
335
    # TODO: check if we're really been called with the instance locks held
336

    
337
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
338
    # future we might want to have different behaviors depending on the value
339
    # of self.recalculate_locks[locking.LEVEL_NODE]
340
    wanted_nodes = []
341
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
342
      instance = self.context.cfg.GetInstanceInfo(instance_name)
343
      wanted_nodes.append(instance.primary_node)
344
      if not primary_only:
345
        wanted_nodes.extend(instance.secondary_nodes)
346

    
347
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
348
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
349
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
350
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
351

    
352
    del self.recalculate_locks[locking.LEVEL_NODE]
353

    
354

    
355
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
356
  """Simple LU which runs no hooks.
357

358
  This LU is intended as a parent for other LogicalUnits which will
359
  run no hooks, in order to reduce duplicate code.
360

361
  """
362
  HPATH = None
363
  HTYPE = None
364

    
365
  def BuildHooksEnv(self):
366
    """Empty BuildHooksEnv for NoHooksLu.
367

368
    This just raises an error.
369

370
    """
371
    assert False, "BuildHooksEnv called for NoHooksLUs"
372

    
373

    
374
class Tasklet:
375
  """Tasklet base class.
376

377
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
378
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
379
  tasklets know nothing about locks.
380

381
  Subclasses must follow these rules:
382
    - Implement CheckPrereq
383
    - Implement Exec
384

385
  """
386
  def __init__(self, lu):
387
    self.lu = lu
388

    
389
    # Shortcuts
390
    self.cfg = lu.cfg
391
    self.rpc = lu.rpc
392

    
393
  def CheckPrereq(self):
394
    """Check prerequisites for this tasklets.
395

396
    This method should check whether the prerequisites for the execution of
397
    this tasklet are fulfilled. It can do internode communication, but it
398
    should be idempotent - no cluster or system changes are allowed.
399

400
    The method should raise errors.OpPrereqError in case something is not
401
    fulfilled. Its return value is ignored.
402

403
    This method should also update all parameters to their canonical form if it
404
    hasn't been done before.
405

406
    """
407
    raise NotImplementedError
408

    
409
  def Exec(self, feedback_fn):
410
    """Execute the tasklet.
411

412
    This method should implement the actual work. It should raise
413
    errors.OpExecError for failures that are somewhat dealt with in code, or
414
    expected.
415

416
    """
417
    raise NotImplementedError
418

    
419

    
420
def _GetWantedNodes(lu, nodes):
421
  """Returns list of checked and expanded node names.
422

423
  @type lu: L{LogicalUnit}
424
  @param lu: the logical unit on whose behalf we execute
425
  @type nodes: list
426
  @param nodes: list of node names or None for all nodes
427
  @rtype: list
428
  @return: the list of nodes, sorted
429
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
430

431
  """
432
  if not isinstance(nodes, list):
433
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
434
                               errors.ECODE_INVAL)
435

    
436
  if not nodes:
437
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
438
      " non-empty list of nodes whose name is to be expanded.")
439

    
440
  wanted = []
441
  for name in nodes:
442
    node = lu.cfg.ExpandNodeName(name)
443
    if node is None:
444
      raise errors.OpPrereqError("No such node name '%s'" % name,
445
                                 errors.ECODE_NOENT)
446
    wanted.append(node)
447

    
448
  return utils.NiceSort(wanted)
449

    
450

    
451
def _GetWantedInstances(lu, instances):
452
  """Returns list of checked and expanded instance names.
453

454
  @type lu: L{LogicalUnit}
455
  @param lu: the logical unit on whose behalf we execute
456
  @type instances: list
457
  @param instances: list of instance names or None for all instances
458
  @rtype: list
459
  @return: the list of instances, sorted
460
  @raise errors.OpPrereqError: if the instances parameter is wrong type
461
  @raise errors.OpPrereqError: if any of the passed instances is not found
462

463
  """
464
  if not isinstance(instances, list):
465
    raise errors.OpPrereqError("Invalid argument type 'instances'",
466
                               errors.ECODE_INVAL)
467

    
468
  if instances:
469
    wanted = []
470

    
471
    for name in instances:
472
      instance = lu.cfg.ExpandInstanceName(name)
473
      if instance is None:
474
        raise errors.OpPrereqError("No such instance name '%s'" % name,
475
                                   errors.ECODE_NOENT)
476
      wanted.append(instance)
477

    
478
  else:
479
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
480
  return wanted
481

    
482

    
483
def _CheckOutputFields(static, dynamic, selected):
484
  """Checks whether all selected fields are valid.
485

486
  @type static: L{utils.FieldSet}
487
  @param static: static fields set
488
  @type dynamic: L{utils.FieldSet}
489
  @param dynamic: dynamic fields set
490

491
  """
492
  f = utils.FieldSet()
493
  f.Extend(static)
494
  f.Extend(dynamic)
495

    
496
  delta = f.NonMatching(selected)
497
  if delta:
498
    raise errors.OpPrereqError("Unknown output fields selected: %s"
499
                               % ",".join(delta), errors.ECODE_INVAL)
500

    
501

    
502
def _CheckBooleanOpField(op, name):
503
  """Validates boolean opcode parameters.
504

505
  This will ensure that an opcode parameter is either a boolean value,
506
  or None (but that it always exists).
507

508
  """
509
  val = getattr(op, name, None)
510
  if not (val is None or isinstance(val, bool)):
511
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
512
                               (name, str(val)), errors.ECODE_INVAL)
513
  setattr(op, name, val)
514

    
515

    
516
def _CheckGlobalHvParams(params):
517
  """Validates that given hypervisor params are not global ones.
518

519
  This will ensure that instances don't get customised versions of
520
  global params.
521

522
  """
523
  used_globals = constants.HVC_GLOBALS.intersection(params)
524
  if used_globals:
525
    msg = ("The following hypervisor parameters are global and cannot"
526
           " be customized at instance level, please modify them at"
527
           " cluster level: %s" % utils.CommaJoin(used_globals))
528
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
529

    
530

    
531
def _CheckNodeOnline(lu, node):
532
  """Ensure that a given node is online.
533

534
  @param lu: the LU on behalf of which we make the check
535
  @param node: the node to check
536
  @raise errors.OpPrereqError: if the node is offline
537

538
  """
539
  if lu.cfg.GetNodeInfo(node).offline:
540
    raise errors.OpPrereqError("Can't use offline node %s" % node,
541
                               errors.ECODE_INVAL)
542

    
543

    
544
def _CheckNodeNotDrained(lu, node):
545
  """Ensure that a given node is not drained.
546

547
  @param lu: the LU on behalf of which we make the check
548
  @param node: the node to check
549
  @raise errors.OpPrereqError: if the node is drained
550

551
  """
552
  if lu.cfg.GetNodeInfo(node).drained:
553
    raise errors.OpPrereqError("Can't use drained node %s" % node,
554
                               errors.ECODE_INVAL)
555

    
556

    
557
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
558
                          memory, vcpus, nics, disk_template, disks,
559
                          bep, hvp, hypervisor_name):
560
  """Builds instance related env variables for hooks
561

562
  This builds the hook environment from individual variables.
563

564
  @type name: string
565
  @param name: the name of the instance
566
  @type primary_node: string
567
  @param primary_node: the name of the instance's primary node
568
  @type secondary_nodes: list
569
  @param secondary_nodes: list of secondary nodes as strings
570
  @type os_type: string
571
  @param os_type: the name of the instance's OS
572
  @type status: boolean
573
  @param status: the should_run status of the instance
574
  @type memory: string
575
  @param memory: the memory size of the instance
576
  @type vcpus: string
577
  @param vcpus: the count of VCPUs the instance has
578
  @type nics: list
579
  @param nics: list of tuples (ip, mac, mode, link) representing
580
      the NICs the instance has
581
  @type disk_template: string
582
  @param disk_template: the disk template of the instance
583
  @type disks: list
584
  @param disks: the list of (size, mode) pairs
585
  @type bep: dict
586
  @param bep: the backend parameters for the instance
587
  @type hvp: dict
588
  @param hvp: the hypervisor parameters for the instance
589
  @type hypervisor_name: string
590
  @param hypervisor_name: the hypervisor for the instance
591
  @rtype: dict
592
  @return: the hook environment for this instance
593

594
  """
595
  if status:
596
    str_status = "up"
597
  else:
598
    str_status = "down"
599
  env = {
600
    "OP_TARGET": name,
601
    "INSTANCE_NAME": name,
602
    "INSTANCE_PRIMARY": primary_node,
603
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
604
    "INSTANCE_OS_TYPE": os_type,
605
    "INSTANCE_STATUS": str_status,
606
    "INSTANCE_MEMORY": memory,
607
    "INSTANCE_VCPUS": vcpus,
608
    "INSTANCE_DISK_TEMPLATE": disk_template,
609
    "INSTANCE_HYPERVISOR": hypervisor_name,
610
  }
611

    
612
  if nics:
613
    nic_count = len(nics)
614
    for idx, (ip, mac, mode, link) in enumerate(nics):
615
      if ip is None:
616
        ip = ""
617
      env["INSTANCE_NIC%d_IP" % idx] = ip
618
      env["INSTANCE_NIC%d_MAC" % idx] = mac
619
      env["INSTANCE_NIC%d_MODE" % idx] = mode
620
      env["INSTANCE_NIC%d_LINK" % idx] = link
621
      if mode == constants.NIC_MODE_BRIDGED:
622
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
623
  else:
624
    nic_count = 0
625

    
626
  env["INSTANCE_NIC_COUNT"] = nic_count
627

    
628
  if disks:
629
    disk_count = len(disks)
630
    for idx, (size, mode) in enumerate(disks):
631
      env["INSTANCE_DISK%d_SIZE" % idx] = size
632
      env["INSTANCE_DISK%d_MODE" % idx] = mode
633
  else:
634
    disk_count = 0
635

    
636
  env["INSTANCE_DISK_COUNT"] = disk_count
637

    
638
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
639
    for key, value in source.items():
640
      env["INSTANCE_%s_%s" % (kind, key)] = value
641

    
642
  return env
643

    
644

    
645
def _NICListToTuple(lu, nics):
646
  """Build a list of nic information tuples.
647

648
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
649
  value in LUQueryInstanceData.
650

651
  @type lu:  L{LogicalUnit}
652
  @param lu: the logical unit on whose behalf we execute
653
  @type nics: list of L{objects.NIC}
654
  @param nics: list of nics to convert to hooks tuples
655

656
  """
657
  hooks_nics = []
658
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
659
  for nic in nics:
660
    ip = nic.ip
661
    mac = nic.mac
662
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
663
    mode = filled_params[constants.NIC_MODE]
664
    link = filled_params[constants.NIC_LINK]
665
    hooks_nics.append((ip, mac, mode, link))
666
  return hooks_nics
667

    
668

    
669
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
670
  """Builds instance related env variables for hooks from an object.
671

672
  @type lu: L{LogicalUnit}
673
  @param lu: the logical unit on whose behalf we execute
674
  @type instance: L{objects.Instance}
675
  @param instance: the instance for which we should build the
676
      environment
677
  @type override: dict
678
  @param override: dictionary with key/values that will override
679
      our values
680
  @rtype: dict
681
  @return: the hook environment dictionary
682

683
  """
684
  cluster = lu.cfg.GetClusterInfo()
685
  bep = cluster.FillBE(instance)
686
  hvp = cluster.FillHV(instance)
687
  args = {
688
    'name': instance.name,
689
    'primary_node': instance.primary_node,
690
    'secondary_nodes': instance.secondary_nodes,
691
    'os_type': instance.os,
692
    'status': instance.admin_up,
693
    'memory': bep[constants.BE_MEMORY],
694
    'vcpus': bep[constants.BE_VCPUS],
695
    'nics': _NICListToTuple(lu, instance.nics),
696
    'disk_template': instance.disk_template,
697
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
698
    'bep': bep,
699
    'hvp': hvp,
700
    'hypervisor_name': instance.hypervisor,
701
  }
702
  if override:
703
    args.update(override)
704
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
705

    
706

    
707
def _AdjustCandidatePool(lu, exceptions):
708
  """Adjust the candidate pool after node operations.
709

710
  """
711
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
712
  if mod_list:
713
    lu.LogInfo("Promoted nodes to master candidate role: %s",
714
               utils.CommaJoin(node.name for node in mod_list))
715
    for name in mod_list:
716
      lu.context.ReaddNode(name)
717
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
718
  if mc_now > mc_max:
719
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
720
               (mc_now, mc_max))
721

    
722

    
723
def _DecideSelfPromotion(lu, exceptions=None):
724
  """Decide whether I should promote myself as a master candidate.
725

726
  """
727
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
728
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
729
  # the new node will increase mc_max with one, so:
730
  mc_should = min(mc_should + 1, cp_size)
731
  return mc_now < mc_should
732

    
733

    
734
def _CheckNicsBridgesExist(lu, target_nics, target_node,
735
                               profile=constants.PP_DEFAULT):
736
  """Check that the brigdes needed by a list of nics exist.
737

738
  """
739
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
740
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
741
                for nic in target_nics]
742
  brlist = [params[constants.NIC_LINK] for params in paramslist
743
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
744
  if brlist:
745
    result = lu.rpc.call_bridges_exist(target_node, brlist)
746
    result.Raise("Error checking bridges on destination node '%s'" %
747
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
748

    
749

    
750
def _CheckInstanceBridgesExist(lu, instance, node=None):
751
  """Check that the brigdes needed by an instance exist.
752

753
  """
754
  if node is None:
755
    node = instance.primary_node
756
  _CheckNicsBridgesExist(lu, instance.nics, node)
757

    
758

    
759
def _CheckOSVariant(os_obj, name):
760
  """Check whether an OS name conforms to the os variants specification.
761

762
  @type os_obj: L{objects.OS}
763
  @param os_obj: OS object to check
764
  @type name: string
765
  @param name: OS name passed by the user, to check for validity
766

767
  """
768
  if not os_obj.supported_variants:
769
    return
770
  try:
771
    variant = name.split("+", 1)[1]
772
  except IndexError:
773
    raise errors.OpPrereqError("OS name must include a variant",
774
                               errors.ECODE_INVAL)
775

    
776
  if variant not in os_obj.supported_variants:
777
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
778

    
779

    
780
def _GetNodeInstancesInner(cfg, fn):
781
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
782

    
783

    
784
def _GetNodeInstances(cfg, node_name):
785
  """Returns a list of all primary and secondary instances on a node.
786

787
  """
788

    
789
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
790

    
791

    
792
def _GetNodePrimaryInstances(cfg, node_name):
793
  """Returns primary instances on a node.
794

795
  """
796
  return _GetNodeInstancesInner(cfg,
797
                                lambda inst: node_name == inst.primary_node)
798

    
799

    
800
def _GetNodeSecondaryInstances(cfg, node_name):
801
  """Returns secondary instances on a node.
802

803
  """
804
  return _GetNodeInstancesInner(cfg,
805
                                lambda inst: node_name in inst.secondary_nodes)
806

    
807

    
808
def _GetStorageTypeArgs(cfg, storage_type):
809
  """Returns the arguments for a storage type.
810

811
  """
812
  # Special case for file storage
813
  if storage_type == constants.ST_FILE:
814
    # storage.FileStorage wants a list of storage directories
815
    return [[cfg.GetFileStorageDir()]]
816

    
817
  return []
818

    
819

    
820
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
821
  faulty = []
822

    
823
  for dev in instance.disks:
824
    cfg.SetDiskID(dev, node_name)
825

    
826
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
827
  result.Raise("Failed to get disk status from node %s" % node_name,
828
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
829

    
830
  for idx, bdev_status in enumerate(result.payload):
831
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
832
      faulty.append(idx)
833

    
834
  return faulty
835

    
836

    
837
class LUPostInitCluster(LogicalUnit):
838
  """Logical unit for running hooks after cluster initialization.
839

840
  """
841
  HPATH = "cluster-init"
842
  HTYPE = constants.HTYPE_CLUSTER
843
  _OP_REQP = []
844

    
845
  def BuildHooksEnv(self):
846
    """Build hooks env.
847

848
    """
849
    env = {"OP_TARGET": self.cfg.GetClusterName()}
850
    mn = self.cfg.GetMasterNode()
851
    return env, [], [mn]
852

    
853
  def CheckPrereq(self):
854
    """No prerequisites to check.
855

856
    """
857
    return True
858

    
859
  def Exec(self, feedback_fn):
860
    """Nothing to do.
861

862
    """
863
    return True
864

    
865

    
866
class LUDestroyCluster(LogicalUnit):
867
  """Logical unit for destroying the cluster.
868

869
  """
870
  HPATH = "cluster-destroy"
871
  HTYPE = constants.HTYPE_CLUSTER
872
  _OP_REQP = []
873

    
874
  def BuildHooksEnv(self):
875
    """Build hooks env.
876

877
    """
878
    env = {"OP_TARGET": self.cfg.GetClusterName()}
879
    return env, [], []
880

    
881
  def CheckPrereq(self):
882
    """Check prerequisites.
883

884
    This checks whether the cluster is empty.
885

886
    Any errors are signaled by raising errors.OpPrereqError.
887

888
    """
889
    master = self.cfg.GetMasterNode()
890

    
891
    nodelist = self.cfg.GetNodeList()
892
    if len(nodelist) != 1 or nodelist[0] != master:
893
      raise errors.OpPrereqError("There are still %d node(s) in"
894
                                 " this cluster." % (len(nodelist) - 1),
895
                                 errors.ECODE_INVAL)
896
    instancelist = self.cfg.GetInstanceList()
897
    if instancelist:
898
      raise errors.OpPrereqError("There are still %d instance(s) in"
899
                                 " this cluster." % len(instancelist),
900
                                 errors.ECODE_INVAL)
901

    
902
  def Exec(self, feedback_fn):
903
    """Destroys the cluster.
904

905
    """
906
    master = self.cfg.GetMasterNode()
907
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
908

    
909
    # Run post hooks on master node before it's removed
910
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
911
    try:
912
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
913
    except:
914
      # pylint: disable-msg=W0702
915
      self.LogWarning("Errors occurred running hooks on %s" % master)
916

    
917
    result = self.rpc.call_node_stop_master(master, False)
918
    result.Raise("Could not disable the master role")
919

    
920
    if modify_ssh_setup:
921
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
922
      utils.CreateBackup(priv_key)
923
      utils.CreateBackup(pub_key)
924

    
925
    return master
926

    
927

    
928
class LUVerifyCluster(LogicalUnit):
929
  """Verifies the cluster status.
930

931
  """
932
  HPATH = "cluster-verify"
933
  HTYPE = constants.HTYPE_CLUSTER
934
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
935
  REQ_BGL = False
936

    
937
  TCLUSTER = "cluster"
938
  TNODE = "node"
939
  TINSTANCE = "instance"
940

    
941
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
942
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
943
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
944
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
945
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
946
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
947
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
948
  ENODEDRBD = (TNODE, "ENODEDRBD")
949
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
950
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
951
  ENODEHV = (TNODE, "ENODEHV")
952
  ENODELVM = (TNODE, "ENODELVM")
953
  ENODEN1 = (TNODE, "ENODEN1")
954
  ENODENET = (TNODE, "ENODENET")
955
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
956
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
957
  ENODERPC = (TNODE, "ENODERPC")
958
  ENODESSH = (TNODE, "ENODESSH")
959
  ENODEVERSION = (TNODE, "ENODEVERSION")
960
  ENODESETUP = (TNODE, "ENODESETUP")
961
  ENODETIME = (TNODE, "ENODETIME")
962

    
963
  ETYPE_FIELD = "code"
964
  ETYPE_ERROR = "ERROR"
965
  ETYPE_WARNING = "WARNING"
966

    
967
  def ExpandNames(self):
968
    self.needed_locks = {
969
      locking.LEVEL_NODE: locking.ALL_SET,
970
      locking.LEVEL_INSTANCE: locking.ALL_SET,
971
    }
972
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
973

    
974
  def _Error(self, ecode, item, msg, *args, **kwargs):
975
    """Format an error message.
976

977
    Based on the opcode's error_codes parameter, either format a
978
    parseable error code, or a simpler error string.
979

980
    This must be called only from Exec and functions called from Exec.
981

982
    """
983
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
984
    itype, etxt = ecode
985
    # first complete the msg
986
    if args:
987
      msg = msg % args
988
    # then format the whole message
989
    if self.op.error_codes:
990
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
991
    else:
992
      if item:
993
        item = " " + item
994
      else:
995
        item = ""
996
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
997
    # and finally report it via the feedback_fn
998
    self._feedback_fn("  - %s" % msg)
999

    
1000
  def _ErrorIf(self, cond, *args, **kwargs):
1001
    """Log an error message if the passed condition is True.
1002

1003
    """
1004
    cond = bool(cond) or self.op.debug_simulate_errors
1005
    if cond:
1006
      self._Error(*args, **kwargs)
1007
    # do not mark the operation as failed for WARN cases only
1008
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1009
      self.bad = self.bad or cond
1010

    
1011
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1012
                  node_result, master_files, drbd_map, vg_name):
1013
    """Run multiple tests against a node.
1014

1015
    Test list:
1016

1017
      - compares ganeti version
1018
      - checks vg existence and size > 20G
1019
      - checks config file checksum
1020
      - checks ssh to other nodes
1021

1022
    @type nodeinfo: L{objects.Node}
1023
    @param nodeinfo: the node to check
1024
    @param file_list: required list of files
1025
    @param local_cksum: dictionary of local files and their checksums
1026
    @param node_result: the results from the node
1027
    @param master_files: list of files that only masters should have
1028
    @param drbd_map: the useddrbd minors for this node, in
1029
        form of minor: (instance, must_exist) which correspond to instances
1030
        and their running status
1031
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1032

1033
    """
1034
    node = nodeinfo.name
1035
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1036

    
1037
    # main result, node_result should be a non-empty dict
1038
    test = not node_result or not isinstance(node_result, dict)
1039
    _ErrorIf(test, self.ENODERPC, node,
1040
                  "unable to verify node: no data returned")
1041
    if test:
1042
      return
1043

    
1044
    # compares ganeti version
1045
    local_version = constants.PROTOCOL_VERSION
1046
    remote_version = node_result.get('version', None)
1047
    test = not (remote_version and
1048
                isinstance(remote_version, (list, tuple)) and
1049
                len(remote_version) == 2)
1050
    _ErrorIf(test, self.ENODERPC, node,
1051
             "connection to node returned invalid data")
1052
    if test:
1053
      return
1054

    
1055
    test = local_version != remote_version[0]
1056
    _ErrorIf(test, self.ENODEVERSION, node,
1057
             "incompatible protocol versions: master %s,"
1058
             " node %s", local_version, remote_version[0])
1059
    if test:
1060
      return
1061

    
1062
    # node seems compatible, we can actually try to look into its results
1063

    
1064
    # full package version
1065
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1066
                  self.ENODEVERSION, node,
1067
                  "software version mismatch: master %s, node %s",
1068
                  constants.RELEASE_VERSION, remote_version[1],
1069
                  code=self.ETYPE_WARNING)
1070

    
1071
    # checks vg existence and size > 20G
1072
    if vg_name is not None:
1073
      vglist = node_result.get(constants.NV_VGLIST, None)
1074
      test = not vglist
1075
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1076
      if not test:
1077
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1078
                                              constants.MIN_VG_SIZE)
1079
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1080

    
1081
    # checks config file checksum
1082

    
1083
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1084
    test = not isinstance(remote_cksum, dict)
1085
    _ErrorIf(test, self.ENODEFILECHECK, node,
1086
             "node hasn't returned file checksum data")
1087
    if not test:
1088
      for file_name in file_list:
1089
        node_is_mc = nodeinfo.master_candidate
1090
        must_have = (file_name not in master_files) or node_is_mc
1091
        # missing
1092
        test1 = file_name not in remote_cksum
1093
        # invalid checksum
1094
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1095
        # existing and good
1096
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1097
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1098
                 "file '%s' missing", file_name)
1099
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1100
                 "file '%s' has wrong checksum", file_name)
1101
        # not candidate and this is not a must-have file
1102
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1103
                 "file '%s' should not exist on non master"
1104
                 " candidates (and the file is outdated)", file_name)
1105
        # all good, except non-master/non-must have combination
1106
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1107
                 "file '%s' should not exist"
1108
                 " on non master candidates", file_name)
1109

    
1110
    # checks ssh to any
1111

    
1112
    test = constants.NV_NODELIST not in node_result
1113
    _ErrorIf(test, self.ENODESSH, node,
1114
             "node hasn't returned node ssh connectivity data")
1115
    if not test:
1116
      if node_result[constants.NV_NODELIST]:
1117
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1118
          _ErrorIf(True, self.ENODESSH, node,
1119
                   "ssh communication with node '%s': %s", a_node, a_msg)
1120

    
1121
    test = constants.NV_NODENETTEST not in node_result
1122
    _ErrorIf(test, self.ENODENET, node,
1123
             "node hasn't returned node tcp connectivity data")
1124
    if not test:
1125
      if node_result[constants.NV_NODENETTEST]:
1126
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1127
        for anode in nlist:
1128
          _ErrorIf(True, self.ENODENET, node,
1129
                   "tcp communication with node '%s': %s",
1130
                   anode, node_result[constants.NV_NODENETTEST][anode])
1131

    
1132
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1133
    if isinstance(hyp_result, dict):
1134
      for hv_name, hv_result in hyp_result.iteritems():
1135
        test = hv_result is not None
1136
        _ErrorIf(test, self.ENODEHV, node,
1137
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1138

    
1139
    # check used drbd list
1140
    if vg_name is not None:
1141
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1142
      test = not isinstance(used_minors, (tuple, list))
1143
      _ErrorIf(test, self.ENODEDRBD, node,
1144
               "cannot parse drbd status file: %s", str(used_minors))
1145
      if not test:
1146
        for minor, (iname, must_exist) in drbd_map.items():
1147
          test = minor not in used_minors and must_exist
1148
          _ErrorIf(test, self.ENODEDRBD, node,
1149
                   "drbd minor %d of instance %s is not active",
1150
                   minor, iname)
1151
        for minor in used_minors:
1152
          test = minor not in drbd_map
1153
          _ErrorIf(test, self.ENODEDRBD, node,
1154
                   "unallocated drbd minor %d is in use", minor)
1155
    test = node_result.get(constants.NV_NODESETUP,
1156
                           ["Missing NODESETUP results"])
1157
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1158
             "; ".join(test))
1159

    
1160
    # check pv names
1161
    if vg_name is not None:
1162
      pvlist = node_result.get(constants.NV_PVLIST, None)
1163
      test = pvlist is None
1164
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1165
      if not test:
1166
        # check that ':' is not present in PV names, since it's a
1167
        # special character for lvcreate (denotes the range of PEs to
1168
        # use on the PV)
1169
        for _, pvname, owner_vg in pvlist:
1170
          test = ":" in pvname
1171
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1172
                   " '%s' of VG '%s'", pvname, owner_vg)
1173

    
1174
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1175
                      node_instance, n_offline):
1176
    """Verify an instance.
1177

1178
    This function checks to see if the required block devices are
1179
    available on the instance's node.
1180

1181
    """
1182
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1183
    node_current = instanceconfig.primary_node
1184

    
1185
    node_vol_should = {}
1186
    instanceconfig.MapLVsByNode(node_vol_should)
1187

    
1188
    for node in node_vol_should:
1189
      if node in n_offline:
1190
        # ignore missing volumes on offline nodes
1191
        continue
1192
      for volume in node_vol_should[node]:
1193
        test = node not in node_vol_is or volume not in node_vol_is[node]
1194
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1195
                 "volume %s missing on node %s", volume, node)
1196

    
1197
    if instanceconfig.admin_up:
1198
      test = ((node_current not in node_instance or
1199
               not instance in node_instance[node_current]) and
1200
              node_current not in n_offline)
1201
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1202
               "instance not running on its primary node %s",
1203
               node_current)
1204

    
1205
    for node in node_instance:
1206
      if (not node == node_current):
1207
        test = instance in node_instance[node]
1208
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1209
                 "instance should not run on node %s", node)
1210

    
1211
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1212
    """Verify if there are any unknown volumes in the cluster.
1213

1214
    The .os, .swap and backup volumes are ignored. All other volumes are
1215
    reported as unknown.
1216

1217
    """
1218
    for node in node_vol_is:
1219
      for volume in node_vol_is[node]:
1220
        test = (node not in node_vol_should or
1221
                volume not in node_vol_should[node])
1222
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1223
                      "volume %s is unknown", volume)
1224

    
1225
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1226
    """Verify the list of running instances.
1227

1228
    This checks what instances are running but unknown to the cluster.
1229

1230
    """
1231
    for node in node_instance:
1232
      for o_inst in node_instance[node]:
1233
        test = o_inst not in instancelist
1234
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1235
                      "instance %s on node %s should not exist", o_inst, node)
1236

    
1237
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1238
    """Verify N+1 Memory Resilience.
1239

1240
    Check that if one single node dies we can still start all the instances it
1241
    was primary for.
1242

1243
    """
1244
    for node, nodeinfo in node_info.iteritems():
1245
      # This code checks that every node which is now listed as secondary has
1246
      # enough memory to host all instances it is supposed to should a single
1247
      # other node in the cluster fail.
1248
      # FIXME: not ready for failover to an arbitrary node
1249
      # FIXME: does not support file-backed instances
1250
      # WARNING: we currently take into account down instances as well as up
1251
      # ones, considering that even if they're down someone might want to start
1252
      # them even in the event of a node failure.
1253
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1254
        needed_mem = 0
1255
        for instance in instances:
1256
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1257
          if bep[constants.BE_AUTO_BALANCE]:
1258
            needed_mem += bep[constants.BE_MEMORY]
1259
        test = nodeinfo['mfree'] < needed_mem
1260
        self._ErrorIf(test, self.ENODEN1, node,
1261
                      "not enough memory on to accommodate"
1262
                      " failovers should peer node %s fail", prinode)
1263

    
1264
  def CheckPrereq(self):
1265
    """Check prerequisites.
1266

1267
    Transform the list of checks we're going to skip into a set and check that
1268
    all its members are valid.
1269

1270
    """
1271
    self.skip_set = frozenset(self.op.skip_checks)
1272
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1273
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1274
                                 errors.ECODE_INVAL)
1275

    
1276
  def BuildHooksEnv(self):
1277
    """Build hooks env.
1278

1279
    Cluster-Verify hooks just ran in the post phase and their failure makes
1280
    the output be logged in the verify output and the verification to fail.
1281

1282
    """
1283
    all_nodes = self.cfg.GetNodeList()
1284
    env = {
1285
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1286
      }
1287
    for node in self.cfg.GetAllNodesInfo().values():
1288
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1289

    
1290
    return env, [], all_nodes
1291

    
1292
  def Exec(self, feedback_fn):
1293
    """Verify integrity of cluster, performing various test on nodes.
1294

1295
    """
1296
    self.bad = False
1297
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1298
    verbose = self.op.verbose
1299
    self._feedback_fn = feedback_fn
1300
    feedback_fn("* Verifying global settings")
1301
    for msg in self.cfg.VerifyConfig():
1302
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1303

    
1304
    vg_name = self.cfg.GetVGName()
1305
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1306
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1307
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1308
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1309
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1310
                        for iname in instancelist)
1311
    i_non_redundant = [] # Non redundant instances
1312
    i_non_a_balanced = [] # Non auto-balanced instances
1313
    n_offline = [] # List of offline nodes
1314
    n_drained = [] # List of nodes being drained
1315
    node_volume = {}
1316
    node_instance = {}
1317
    node_info = {}
1318
    instance_cfg = {}
1319

    
1320
    # FIXME: verify OS list
1321
    # do local checksums
1322
    master_files = [constants.CLUSTER_CONF_FILE]
1323

    
1324
    file_names = ssconf.SimpleStore().GetFileList()
1325
    file_names.append(constants.SSL_CERT_FILE)
1326
    file_names.append(constants.RAPI_CERT_FILE)
1327
    file_names.extend(master_files)
1328

    
1329
    local_checksums = utils.FingerprintFiles(file_names)
1330

    
1331
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1332
    node_verify_param = {
1333
      constants.NV_FILELIST: file_names,
1334
      constants.NV_NODELIST: [node.name for node in nodeinfo
1335
                              if not node.offline],
1336
      constants.NV_HYPERVISOR: hypervisors,
1337
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1338
                                  node.secondary_ip) for node in nodeinfo
1339
                                 if not node.offline],
1340
      constants.NV_INSTANCELIST: hypervisors,
1341
      constants.NV_VERSION: None,
1342
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1343
      constants.NV_NODESETUP: None,
1344
      constants.NV_TIME: None,
1345
      }
1346

    
1347
    if vg_name is not None:
1348
      node_verify_param[constants.NV_VGLIST] = None
1349
      node_verify_param[constants.NV_LVLIST] = vg_name
1350
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1351
      node_verify_param[constants.NV_DRBDLIST] = None
1352

    
1353
    # Due to the way our RPC system works, exact response times cannot be
1354
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1355
    # time before and after executing the request, we can at least have a time
1356
    # window.
1357
    nvinfo_starttime = time.time()
1358
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1359
                                           self.cfg.GetClusterName())
1360
    nvinfo_endtime = time.time()
1361

    
1362
    cluster = self.cfg.GetClusterInfo()
1363
    master_node = self.cfg.GetMasterNode()
1364
    all_drbd_map = self.cfg.ComputeDRBDMap()
1365

    
1366
    feedback_fn("* Verifying node status")
1367
    for node_i in nodeinfo:
1368
      node = node_i.name
1369

    
1370
      if node_i.offline:
1371
        if verbose:
1372
          feedback_fn("* Skipping offline node %s" % (node,))
1373
        n_offline.append(node)
1374
        continue
1375

    
1376
      if node == master_node:
1377
        ntype = "master"
1378
      elif node_i.master_candidate:
1379
        ntype = "master candidate"
1380
      elif node_i.drained:
1381
        ntype = "drained"
1382
        n_drained.append(node)
1383
      else:
1384
        ntype = "regular"
1385
      if verbose:
1386
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1387

    
1388
      msg = all_nvinfo[node].fail_msg
1389
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1390
      if msg:
1391
        continue
1392

    
1393
      nresult = all_nvinfo[node].payload
1394
      node_drbd = {}
1395
      for minor, instance in all_drbd_map[node].items():
1396
        test = instance not in instanceinfo
1397
        _ErrorIf(test, self.ECLUSTERCFG, None,
1398
                 "ghost instance '%s' in temporary DRBD map", instance)
1399
          # ghost instance should not be running, but otherwise we
1400
          # don't give double warnings (both ghost instance and
1401
          # unallocated minor in use)
1402
        if test:
1403
          node_drbd[minor] = (instance, False)
1404
        else:
1405
          instance = instanceinfo[instance]
1406
          node_drbd[minor] = (instance.name, instance.admin_up)
1407

    
1408
      self._VerifyNode(node_i, file_names, local_checksums,
1409
                       nresult, master_files, node_drbd, vg_name)
1410

    
1411
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1412
      if vg_name is None:
1413
        node_volume[node] = {}
1414
      elif isinstance(lvdata, basestring):
1415
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1416
                 utils.SafeEncode(lvdata))
1417
        node_volume[node] = {}
1418
      elif not isinstance(lvdata, dict):
1419
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1420
        continue
1421
      else:
1422
        node_volume[node] = lvdata
1423

    
1424
      # node_instance
1425
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1426
      test = not isinstance(idata, list)
1427
      _ErrorIf(test, self.ENODEHV, node,
1428
               "rpc call to node failed (instancelist)")
1429
      if test:
1430
        continue
1431

    
1432
      node_instance[node] = idata
1433

    
1434
      # node_info
1435
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1436
      test = not isinstance(nodeinfo, dict)
1437
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1438
      if test:
1439
        continue
1440

    
1441
      # Node time
1442
      ntime = nresult.get(constants.NV_TIME, None)
1443
      try:
1444
        ntime_merged = utils.MergeTime(ntime)
1445
      except (ValueError, TypeError):
1446
        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1447

    
1448
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1449
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1450
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1451
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1452
      else:
1453
        ntime_diff = None
1454

    
1455
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1456
               "Node time diverges by at least %0.1fs from master node time",
1457
               ntime_diff)
1458

    
1459
      if ntime_diff is not None:
1460
        continue
1461

    
1462
      try:
1463
        node_info[node] = {
1464
          "mfree": int(nodeinfo['memory_free']),
1465
          "pinst": [],
1466
          "sinst": [],
1467
          # dictionary holding all instances this node is secondary for,
1468
          # grouped by their primary node. Each key is a cluster node, and each
1469
          # value is a list of instances which have the key as primary and the
1470
          # current node as secondary.  this is handy to calculate N+1 memory
1471
          # availability if you can only failover from a primary to its
1472
          # secondary.
1473
          "sinst-by-pnode": {},
1474
        }
1475
        # FIXME: devise a free space model for file based instances as well
1476
        if vg_name is not None:
1477
          test = (constants.NV_VGLIST not in nresult or
1478
                  vg_name not in nresult[constants.NV_VGLIST])
1479
          _ErrorIf(test, self.ENODELVM, node,
1480
                   "node didn't return data for the volume group '%s'"
1481
                   " - it is either missing or broken", vg_name)
1482
          if test:
1483
            continue
1484
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1485
      except (ValueError, KeyError):
1486
        _ErrorIf(True, self.ENODERPC, node,
1487
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1488
        continue
1489

    
1490
    node_vol_should = {}
1491

    
1492
    feedback_fn("* Verifying instance status")
1493
    for instance in instancelist:
1494
      if verbose:
1495
        feedback_fn("* Verifying instance %s" % instance)
1496
      inst_config = instanceinfo[instance]
1497
      self._VerifyInstance(instance, inst_config, node_volume,
1498
                           node_instance, n_offline)
1499
      inst_nodes_offline = []
1500

    
1501
      inst_config.MapLVsByNode(node_vol_should)
1502

    
1503
      instance_cfg[instance] = inst_config
1504

    
1505
      pnode = inst_config.primary_node
1506
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1507
               self.ENODERPC, pnode, "instance %s, connection to"
1508
               " primary node failed", instance)
1509
      if pnode in node_info:
1510
        node_info[pnode]['pinst'].append(instance)
1511

    
1512
      if pnode in n_offline:
1513
        inst_nodes_offline.append(pnode)
1514

    
1515
      # If the instance is non-redundant we cannot survive losing its primary
1516
      # node, so we are not N+1 compliant. On the other hand we have no disk
1517
      # templates with more than one secondary so that situation is not well
1518
      # supported either.
1519
      # FIXME: does not support file-backed instances
1520
      if len(inst_config.secondary_nodes) == 0:
1521
        i_non_redundant.append(instance)
1522
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1523
               self.EINSTANCELAYOUT, instance,
1524
               "instance has multiple secondary nodes", code="WARNING")
1525

    
1526
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1527
        i_non_a_balanced.append(instance)
1528

    
1529
      for snode in inst_config.secondary_nodes:
1530
        _ErrorIf(snode not in node_info and snode not in n_offline,
1531
                 self.ENODERPC, snode,
1532
                 "instance %s, connection to secondary node"
1533
                 "failed", instance)
1534

    
1535
        if snode in node_info:
1536
          node_info[snode]['sinst'].append(instance)
1537
          if pnode not in node_info[snode]['sinst-by-pnode']:
1538
            node_info[snode]['sinst-by-pnode'][pnode] = []
1539
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1540

    
1541
        if snode in n_offline:
1542
          inst_nodes_offline.append(snode)
1543

    
1544
      # warn that the instance lives on offline nodes
1545
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1546
               "instance lives on offline node(s) %s",
1547
               utils.CommaJoin(inst_nodes_offline))
1548

    
1549
    feedback_fn("* Verifying orphan volumes")
1550
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1551

    
1552
    feedback_fn("* Verifying remaining instances")
1553
    self._VerifyOrphanInstances(instancelist, node_instance)
1554

    
1555
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1556
      feedback_fn("* Verifying N+1 Memory redundancy")
1557
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1558

    
1559
    feedback_fn("* Other Notes")
1560
    if i_non_redundant:
1561
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1562
                  % len(i_non_redundant))
1563

    
1564
    if i_non_a_balanced:
1565
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1566
                  % len(i_non_a_balanced))
1567

    
1568
    if n_offline:
1569
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1570

    
1571
    if n_drained:
1572
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1573

    
1574
    return not self.bad
1575

    
1576
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1577
    """Analyze the post-hooks' result
1578

1579
    This method analyses the hook result, handles it, and sends some
1580
    nicely-formatted feedback back to the user.
1581

1582
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1583
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1584
    @param hooks_results: the results of the multi-node hooks rpc call
1585
    @param feedback_fn: function used send feedback back to the caller
1586
    @param lu_result: previous Exec result
1587
    @return: the new Exec result, based on the previous result
1588
        and hook results
1589

1590
    """
1591
    # We only really run POST phase hooks, and are only interested in
1592
    # their results
1593
    if phase == constants.HOOKS_PHASE_POST:
1594
      # Used to change hooks' output to proper indentation
1595
      indent_re = re.compile('^', re.M)
1596
      feedback_fn("* Hooks Results")
1597
      assert hooks_results, "invalid result from hooks"
1598

    
1599
      for node_name in hooks_results:
1600
        res = hooks_results[node_name]
1601
        msg = res.fail_msg
1602
        test = msg and not res.offline
1603
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1604
                      "Communication failure in hooks execution: %s", msg)
1605
        if test:
1606
          # override manually lu_result here as _ErrorIf only
1607
          # overrides self.bad
1608
          lu_result = 1
1609
          continue
1610
        for script, hkr, output in res.payload:
1611
          test = hkr == constants.HKR_FAIL
1612
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1613
                        "Script %s failed, output:", script)
1614
          if test:
1615
            output = indent_re.sub('      ', output)
1616
            feedback_fn("%s" % output)
1617
            lu_result = 1
1618

    
1619
      return lu_result
1620

    
1621

    
1622
class LUVerifyDisks(NoHooksLU):
1623
  """Verifies the cluster disks status.
1624

1625
  """
1626
  _OP_REQP = []
1627
  REQ_BGL = False
1628

    
1629
  def ExpandNames(self):
1630
    self.needed_locks = {
1631
      locking.LEVEL_NODE: locking.ALL_SET,
1632
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1633
    }
1634
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1635

    
1636
  def CheckPrereq(self):
1637
    """Check prerequisites.
1638

1639
    This has no prerequisites.
1640

1641
    """
1642
    pass
1643

    
1644
  def Exec(self, feedback_fn):
1645
    """Verify integrity of cluster disks.
1646

1647
    @rtype: tuple of three items
1648
    @return: a tuple of (dict of node-to-node_error, list of instances
1649
        which need activate-disks, dict of instance: (node, volume) for
1650
        missing volumes
1651

1652
    """
1653
    result = res_nodes, res_instances, res_missing = {}, [], {}
1654

    
1655
    vg_name = self.cfg.GetVGName()
1656
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1657
    instances = [self.cfg.GetInstanceInfo(name)
1658
                 for name in self.cfg.GetInstanceList()]
1659

    
1660
    nv_dict = {}
1661
    for inst in instances:
1662
      inst_lvs = {}
1663
      if (not inst.admin_up or
1664
          inst.disk_template not in constants.DTS_NET_MIRROR):
1665
        continue
1666
      inst.MapLVsByNode(inst_lvs)
1667
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1668
      for node, vol_list in inst_lvs.iteritems():
1669
        for vol in vol_list:
1670
          nv_dict[(node, vol)] = inst
1671

    
1672
    if not nv_dict:
1673
      return result
1674

    
1675
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1676

    
1677
    for node in nodes:
1678
      # node_volume
1679
      node_res = node_lvs[node]
1680
      if node_res.offline:
1681
        continue
1682
      msg = node_res.fail_msg
1683
      if msg:
1684
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1685
        res_nodes[node] = msg
1686
        continue
1687

    
1688
      lvs = node_res.payload
1689
      for lv_name, (_, _, lv_online) in lvs.items():
1690
        inst = nv_dict.pop((node, lv_name), None)
1691
        if (not lv_online and inst is not None
1692
            and inst.name not in res_instances):
1693
          res_instances.append(inst.name)
1694

    
1695
    # any leftover items in nv_dict are missing LVs, let's arrange the
1696
    # data better
1697
    for key, inst in nv_dict.iteritems():
1698
      if inst.name not in res_missing:
1699
        res_missing[inst.name] = []
1700
      res_missing[inst.name].append(key)
1701

    
1702
    return result
1703

    
1704

    
1705
class LURepairDiskSizes(NoHooksLU):
1706
  """Verifies the cluster disks sizes.
1707

1708
  """
1709
  _OP_REQP = ["instances"]
1710
  REQ_BGL = False
1711

    
1712
  def ExpandNames(self):
1713
    if not isinstance(self.op.instances, list):
1714
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1715
                                 errors.ECODE_INVAL)
1716

    
1717
    if self.op.instances:
1718
      self.wanted_names = []
1719
      for name in self.op.instances:
1720
        full_name = self.cfg.ExpandInstanceName(name)
1721
        if full_name is None:
1722
          raise errors.OpPrereqError("Instance '%s' not known" % name,
1723
                                     errors.ECODE_NOENT)
1724
        self.wanted_names.append(full_name)
1725
      self.needed_locks = {
1726
        locking.LEVEL_NODE: [],
1727
        locking.LEVEL_INSTANCE: self.wanted_names,
1728
        }
1729
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1730
    else:
1731
      self.wanted_names = None
1732
      self.needed_locks = {
1733
        locking.LEVEL_NODE: locking.ALL_SET,
1734
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1735
        }
1736
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1737

    
1738
  def DeclareLocks(self, level):
1739
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1740
      self._LockInstancesNodes(primary_only=True)
1741

    
1742
  def CheckPrereq(self):
1743
    """Check prerequisites.
1744

1745
    This only checks the optional instance list against the existing names.
1746

1747
    """
1748
    if self.wanted_names is None:
1749
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1750

    
1751
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1752
                             in self.wanted_names]
1753

    
1754
  def _EnsureChildSizes(self, disk):
1755
    """Ensure children of the disk have the needed disk size.
1756

1757
    This is valid mainly for DRBD8 and fixes an issue where the
1758
    children have smaller disk size.
1759

1760
    @param disk: an L{ganeti.objects.Disk} object
1761

1762
    """
1763
    if disk.dev_type == constants.LD_DRBD8:
1764
      assert disk.children, "Empty children for DRBD8?"
1765
      fchild = disk.children[0]
1766
      mismatch = fchild.size < disk.size
1767
      if mismatch:
1768
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1769
                     fchild.size, disk.size)
1770
        fchild.size = disk.size
1771

    
1772
      # and we recurse on this child only, not on the metadev
1773
      return self._EnsureChildSizes(fchild) or mismatch
1774
    else:
1775
      return False
1776

    
1777
  def Exec(self, feedback_fn):
1778
    """Verify the size of cluster disks.
1779

1780
    """
1781
    # TODO: check child disks too
1782
    # TODO: check differences in size between primary/secondary nodes
1783
    per_node_disks = {}
1784
    for instance in self.wanted_instances:
1785
      pnode = instance.primary_node
1786
      if pnode not in per_node_disks:
1787
        per_node_disks[pnode] = []
1788
      for idx, disk in enumerate(instance.disks):
1789
        per_node_disks[pnode].append((instance, idx, disk))
1790

    
1791
    changed = []
1792
    for node, dskl in per_node_disks.items():
1793
      newl = [v[2].Copy() for v in dskl]
1794
      for dsk in newl:
1795
        self.cfg.SetDiskID(dsk, node)
1796
      result = self.rpc.call_blockdev_getsizes(node, newl)
1797
      if result.fail_msg:
1798
        self.LogWarning("Failure in blockdev_getsizes call to node"
1799
                        " %s, ignoring", node)
1800
        continue
1801
      if len(result.data) != len(dskl):
1802
        self.LogWarning("Invalid result from node %s, ignoring node results",
1803
                        node)
1804
        continue
1805
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1806
        if size is None:
1807
          self.LogWarning("Disk %d of instance %s did not return size"
1808
                          " information, ignoring", idx, instance.name)
1809
          continue
1810
        if not isinstance(size, (int, long)):
1811
          self.LogWarning("Disk %d of instance %s did not return valid"
1812
                          " size information, ignoring", idx, instance.name)
1813
          continue
1814
        size = size >> 20
1815
        if size != disk.size:
1816
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1817
                       " correcting: recorded %d, actual %d", idx,
1818
                       instance.name, disk.size, size)
1819
          disk.size = size
1820
          self.cfg.Update(instance, feedback_fn)
1821
          changed.append((instance.name, idx, size))
1822
        if self._EnsureChildSizes(disk):
1823
          self.cfg.Update(instance, feedback_fn)
1824
          changed.append((instance.name, idx, disk.size))
1825
    return changed
1826

    
1827

    
1828
class LURenameCluster(LogicalUnit):
1829
  """Rename the cluster.
1830

1831
  """
1832
  HPATH = "cluster-rename"
1833
  HTYPE = constants.HTYPE_CLUSTER
1834
  _OP_REQP = ["name"]
1835

    
1836
  def BuildHooksEnv(self):
1837
    """Build hooks env.
1838

1839
    """
1840
    env = {
1841
      "OP_TARGET": self.cfg.GetClusterName(),
1842
      "NEW_NAME": self.op.name,
1843
      }
1844
    mn = self.cfg.GetMasterNode()
1845
    return env, [mn], [mn]
1846

    
1847
  def CheckPrereq(self):
1848
    """Verify that the passed name is a valid one.
1849

1850
    """
1851
    hostname = utils.GetHostInfo(self.op.name)
1852

    
1853
    new_name = hostname.name
1854
    self.ip = new_ip = hostname.ip
1855
    old_name = self.cfg.GetClusterName()
1856
    old_ip = self.cfg.GetMasterIP()
1857
    if new_name == old_name and new_ip == old_ip:
1858
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1859
                                 " cluster has changed",
1860
                                 errors.ECODE_INVAL)
1861
    if new_ip != old_ip:
1862
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1863
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1864
                                   " reachable on the network. Aborting." %
1865
                                   new_ip, errors.ECODE_NOTUNIQUE)
1866

    
1867
    self.op.name = new_name
1868

    
1869
  def Exec(self, feedback_fn):
1870
    """Rename the cluster.
1871

1872
    """
1873
    clustername = self.op.name
1874
    ip = self.ip
1875

    
1876
    # shutdown the master IP
1877
    master = self.cfg.GetMasterNode()
1878
    result = self.rpc.call_node_stop_master(master, False)
1879
    result.Raise("Could not disable the master role")
1880

    
1881
    try:
1882
      cluster = self.cfg.GetClusterInfo()
1883
      cluster.cluster_name = clustername
1884
      cluster.master_ip = ip
1885
      self.cfg.Update(cluster, feedback_fn)
1886

    
1887
      # update the known hosts file
1888
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1889
      node_list = self.cfg.GetNodeList()
1890
      try:
1891
        node_list.remove(master)
1892
      except ValueError:
1893
        pass
1894
      result = self.rpc.call_upload_file(node_list,
1895
                                         constants.SSH_KNOWN_HOSTS_FILE)
1896
      for to_node, to_result in result.iteritems():
1897
        msg = to_result.fail_msg
1898
        if msg:
1899
          msg = ("Copy of file %s to node %s failed: %s" %
1900
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1901
          self.proc.LogWarning(msg)
1902

    
1903
    finally:
1904
      result = self.rpc.call_node_start_master(master, False, False)
1905
      msg = result.fail_msg
1906
      if msg:
1907
        self.LogWarning("Could not re-enable the master role on"
1908
                        " the master, please restart manually: %s", msg)
1909

    
1910

    
1911
def _RecursiveCheckIfLVMBased(disk):
1912
  """Check if the given disk or its children are lvm-based.
1913

1914
  @type disk: L{objects.Disk}
1915
  @param disk: the disk to check
1916
  @rtype: boolean
1917
  @return: boolean indicating whether a LD_LV dev_type was found or not
1918

1919
  """
1920
  if disk.children:
1921
    for chdisk in disk.children:
1922
      if _RecursiveCheckIfLVMBased(chdisk):
1923
        return True
1924
  return disk.dev_type == constants.LD_LV
1925

    
1926

    
1927
class LUSetClusterParams(LogicalUnit):
1928
  """Change the parameters of the cluster.
1929

1930
  """
1931
  HPATH = "cluster-modify"
1932
  HTYPE = constants.HTYPE_CLUSTER
1933
  _OP_REQP = []
1934
  REQ_BGL = False
1935

    
1936
  def CheckArguments(self):
1937
    """Check parameters
1938

1939
    """
1940
    if not hasattr(self.op, "candidate_pool_size"):
1941
      self.op.candidate_pool_size = None
1942
    if self.op.candidate_pool_size is not None:
1943
      try:
1944
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1945
      except (ValueError, TypeError), err:
1946
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1947
                                   str(err), errors.ECODE_INVAL)
1948
      if self.op.candidate_pool_size < 1:
1949
        raise errors.OpPrereqError("At least one master candidate needed",
1950
                                   errors.ECODE_INVAL)
1951

    
1952
  def ExpandNames(self):
1953
    # FIXME: in the future maybe other cluster params won't require checking on
1954
    # all nodes to be modified.
1955
    self.needed_locks = {
1956
      locking.LEVEL_NODE: locking.ALL_SET,
1957
    }
1958
    self.share_locks[locking.LEVEL_NODE] = 1
1959

    
1960
  def BuildHooksEnv(self):
1961
    """Build hooks env.
1962

1963
    """
1964
    env = {
1965
      "OP_TARGET": self.cfg.GetClusterName(),
1966
      "NEW_VG_NAME": self.op.vg_name,
1967
      }
1968
    mn = self.cfg.GetMasterNode()
1969
    return env, [mn], [mn]
1970

    
1971
  def CheckPrereq(self):
1972
    """Check prerequisites.
1973

1974
    This checks whether the given params don't conflict and
1975
    if the given volume group is valid.
1976

1977
    """
1978
    if self.op.vg_name is not None and not self.op.vg_name:
1979
      instances = self.cfg.GetAllInstancesInfo().values()
1980
      for inst in instances:
1981
        for disk in inst.disks:
1982
          if _RecursiveCheckIfLVMBased(disk):
1983
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1984
                                       " lvm-based instances exist",
1985
                                       errors.ECODE_INVAL)
1986

    
1987
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1988

    
1989
    # if vg_name not None, checks given volume group on all nodes
1990
    if self.op.vg_name:
1991
      vglist = self.rpc.call_vg_list(node_list)
1992
      for node in node_list:
1993
        msg = vglist[node].fail_msg
1994
        if msg:
1995
          # ignoring down node
1996
          self.LogWarning("Error while gathering data on node %s"
1997
                          " (ignoring node): %s", node, msg)
1998
          continue
1999
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2000
                                              self.op.vg_name,
2001
                                              constants.MIN_VG_SIZE)
2002
        if vgstatus:
2003
          raise errors.OpPrereqError("Error on node '%s': %s" %
2004
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2005

    
2006
    self.cluster = cluster = self.cfg.GetClusterInfo()
2007
    # validate params changes
2008
    if self.op.beparams:
2009
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2010
      self.new_beparams = objects.FillDict(
2011
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2012

    
2013
    if self.op.nicparams:
2014
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2015
      self.new_nicparams = objects.FillDict(
2016
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2017
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2018
      nic_errors = []
2019

    
2020
      # check all instances for consistency
2021
      for instance in self.cfg.GetAllInstancesInfo().values():
2022
        for nic_idx, nic in enumerate(instance.nics):
2023
          params_copy = copy.deepcopy(nic.nicparams)
2024
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2025

    
2026
          # check parameter syntax
2027
          try:
2028
            objects.NIC.CheckParameterSyntax(params_filled)
2029
          except errors.ConfigurationError, err:
2030
            nic_errors.append("Instance %s, nic/%d: %s" %
2031
                              (instance.name, nic_idx, err))
2032

    
2033
          # if we're moving instances to routed, check that they have an ip
2034
          target_mode = params_filled[constants.NIC_MODE]
2035
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2036
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2037
                              (instance.name, nic_idx))
2038
      if nic_errors:
2039
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2040
                                   "\n".join(nic_errors))
2041

    
2042
    # hypervisor list/parameters
2043
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2044
    if self.op.hvparams:
2045
      if not isinstance(self.op.hvparams, dict):
2046
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2047
                                   errors.ECODE_INVAL)
2048
      for hv_name, hv_dict in self.op.hvparams.items():
2049
        if hv_name not in self.new_hvparams:
2050
          self.new_hvparams[hv_name] = hv_dict
2051
        else:
2052
          self.new_hvparams[hv_name].update(hv_dict)
2053

    
2054
    if self.op.enabled_hypervisors is not None:
2055
      self.hv_list = self.op.enabled_hypervisors
2056
      if not self.hv_list:
2057
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2058
                                   " least one member",
2059
                                   errors.ECODE_INVAL)
2060
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2061
      if invalid_hvs:
2062
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2063
                                   " entries: %s" %
2064
                                   utils.CommaJoin(invalid_hvs),
2065
                                   errors.ECODE_INVAL)
2066
    else:
2067
      self.hv_list = cluster.enabled_hypervisors
2068

    
2069
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2070
      # either the enabled list has changed, or the parameters have, validate
2071
      for hv_name, hv_params in self.new_hvparams.items():
2072
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2073
            (self.op.enabled_hypervisors and
2074
             hv_name in self.op.enabled_hypervisors)):
2075
          # either this is a new hypervisor, or its parameters have changed
2076
          hv_class = hypervisor.GetHypervisor(hv_name)
2077
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2078
          hv_class.CheckParameterSyntax(hv_params)
2079
          _CheckHVParams(self, node_list, hv_name, hv_params)
2080

    
2081
  def Exec(self, feedback_fn):
2082
    """Change the parameters of the cluster.
2083

2084
    """
2085
    if self.op.vg_name is not None:
2086
      new_volume = self.op.vg_name
2087
      if not new_volume:
2088
        new_volume = None
2089
      if new_volume != self.cfg.GetVGName():
2090
        self.cfg.SetVGName(new_volume)
2091
      else:
2092
        feedback_fn("Cluster LVM configuration already in desired"
2093
                    " state, not changing")
2094
    if self.op.hvparams:
2095
      self.cluster.hvparams = self.new_hvparams
2096
    if self.op.enabled_hypervisors is not None:
2097
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2098
    if self.op.beparams:
2099
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2100
    if self.op.nicparams:
2101
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2102

    
2103
    if self.op.candidate_pool_size is not None:
2104
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2105
      # we need to update the pool size here, otherwise the save will fail
2106
      _AdjustCandidatePool(self, [])
2107

    
2108
    self.cfg.Update(self.cluster, feedback_fn)
2109

    
2110

    
2111
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2112
  """Distribute additional files which are part of the cluster configuration.
2113

2114
  ConfigWriter takes care of distributing the config and ssconf files, but
2115
  there are more files which should be distributed to all nodes. This function
2116
  makes sure those are copied.
2117

2118
  @param lu: calling logical unit
2119
  @param additional_nodes: list of nodes not in the config to distribute to
2120

2121
  """
2122
  # 1. Gather target nodes
2123
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2124
  dist_nodes = lu.cfg.GetNodeList()
2125
  if additional_nodes is not None:
2126
    dist_nodes.extend(additional_nodes)
2127
  if myself.name in dist_nodes:
2128
    dist_nodes.remove(myself.name)
2129

    
2130
  # 2. Gather files to distribute
2131
  dist_files = set([constants.ETC_HOSTS,
2132
                    constants.SSH_KNOWN_HOSTS_FILE,
2133
                    constants.RAPI_CERT_FILE,
2134
                    constants.RAPI_USERS_FILE,
2135
                    constants.HMAC_CLUSTER_KEY,
2136
                   ])
2137

    
2138
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2139
  for hv_name in enabled_hypervisors:
2140
    hv_class = hypervisor.GetHypervisor(hv_name)
2141
    dist_files.update(hv_class.GetAncillaryFiles())
2142

    
2143
  # 3. Perform the files upload
2144
  for fname in dist_files:
2145
    if os.path.exists(fname):
2146
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2147
      for to_node, to_result in result.items():
2148
        msg = to_result.fail_msg
2149
        if msg:
2150
          msg = ("Copy of file %s to node %s failed: %s" %
2151
                 (fname, to_node, msg))
2152
          lu.proc.LogWarning(msg)
2153

    
2154

    
2155
class LURedistributeConfig(NoHooksLU):
2156
  """Force the redistribution of cluster configuration.
2157

2158
  This is a very simple LU.
2159

2160
  """
2161
  _OP_REQP = []
2162
  REQ_BGL = False
2163

    
2164
  def ExpandNames(self):
2165
    self.needed_locks = {
2166
      locking.LEVEL_NODE: locking.ALL_SET,
2167
    }
2168
    self.share_locks[locking.LEVEL_NODE] = 1
2169

    
2170
  def CheckPrereq(self):
2171
    """Check prerequisites.
2172

2173
    """
2174

    
2175
  def Exec(self, feedback_fn):
2176
    """Redistribute the configuration.
2177

2178
    """
2179
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2180
    _RedistributeAncillaryFiles(self)
2181

    
2182

    
2183
def _WaitForSync(lu, instance, oneshot=False):
2184
  """Sleep and poll for an instance's disk to sync.
2185

2186
  """
2187
  if not instance.disks:
2188
    return True
2189

    
2190
  if not oneshot:
2191
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2192

    
2193
  node = instance.primary_node
2194

    
2195
  for dev in instance.disks:
2196
    lu.cfg.SetDiskID(dev, node)
2197

    
2198
  # TODO: Convert to utils.Retry
2199

    
2200
  retries = 0
2201
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2202
  while True:
2203
    max_time = 0
2204
    done = True
2205
    cumul_degraded = False
2206
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2207
    msg = rstats.fail_msg
2208
    if msg:
2209
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2210
      retries += 1
2211
      if retries >= 10:
2212
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2213
                                 " aborting." % node)
2214
      time.sleep(6)
2215
      continue
2216
    rstats = rstats.payload
2217
    retries = 0
2218
    for i, mstat in enumerate(rstats):
2219
      if mstat is None:
2220
        lu.LogWarning("Can't compute data for node %s/%s",
2221
                           node, instance.disks[i].iv_name)
2222
        continue
2223

    
2224
      cumul_degraded = (cumul_degraded or
2225
                        (mstat.is_degraded and mstat.sync_percent is None))
2226
      if mstat.sync_percent is not None:
2227
        done = False
2228
        if mstat.estimated_time is not None:
2229
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2230
          max_time = mstat.estimated_time
2231
        else:
2232
          rem_time = "no time estimate"
2233
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2234
                        (instance.disks[i].iv_name, mstat.sync_percent,
2235
                         rem_time))
2236

    
2237
    # if we're done but degraded, let's do a few small retries, to
2238
    # make sure we see a stable and not transient situation; therefore
2239
    # we force restart of the loop
2240
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2241
      logging.info("Degraded disks found, %d retries left", degr_retries)
2242
      degr_retries -= 1
2243
      time.sleep(1)
2244
      continue
2245

    
2246
    if done or oneshot:
2247
      break
2248

    
2249
    time.sleep(min(60, max_time))
2250

    
2251
  if done:
2252
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2253
  return not cumul_degraded
2254

    
2255

    
2256
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2257
  """Check that mirrors are not degraded.
2258

2259
  The ldisk parameter, if True, will change the test from the
2260
  is_degraded attribute (which represents overall non-ok status for
2261
  the device(s)) to the ldisk (representing the local storage status).
2262

2263
  """
2264
  lu.cfg.SetDiskID(dev, node)
2265

    
2266
  result = True
2267

    
2268
  if on_primary or dev.AssembleOnSecondary():
2269
    rstats = lu.rpc.call_blockdev_find(node, dev)
2270
    msg = rstats.fail_msg
2271
    if msg:
2272
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2273
      result = False
2274
    elif not rstats.payload:
2275
      lu.LogWarning("Can't find disk on node %s", node)
2276
      result = False
2277
    else:
2278
      if ldisk:
2279
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2280
      else:
2281
        result = result and not rstats.payload.is_degraded
2282

    
2283
  if dev.children:
2284
    for child in dev.children:
2285
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2286

    
2287
  return result
2288

    
2289

    
2290
class LUDiagnoseOS(NoHooksLU):
2291
  """Logical unit for OS diagnose/query.
2292

2293
  """
2294
  _OP_REQP = ["output_fields", "names"]
2295
  REQ_BGL = False
2296
  _FIELDS_STATIC = utils.FieldSet()
2297
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2298
  # Fields that need calculation of global os validity
2299
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2300

    
2301
  def ExpandNames(self):
2302
    if self.op.names:
2303
      raise errors.OpPrereqError("Selective OS query not supported",
2304
                                 errors.ECODE_INVAL)
2305

    
2306
    _CheckOutputFields(static=self._FIELDS_STATIC,
2307
                       dynamic=self._FIELDS_DYNAMIC,
2308
                       selected=self.op.output_fields)
2309

    
2310
    # Lock all nodes, in shared mode
2311
    # Temporary removal of locks, should be reverted later
2312
    # TODO: reintroduce locks when they are lighter-weight
2313
    self.needed_locks = {}
2314
    #self.share_locks[locking.LEVEL_NODE] = 1
2315
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2316

    
2317
  def CheckPrereq(self):
2318
    """Check prerequisites.
2319

2320
    """
2321

    
2322
  @staticmethod
2323
  def _DiagnoseByOS(rlist):
2324
    """Remaps a per-node return list into an a per-os per-node dictionary
2325

2326
    @param rlist: a map with node names as keys and OS objects as values
2327

2328
    @rtype: dict
2329
    @return: a dictionary with osnames as keys and as value another map, with
2330
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2331

2332
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2333
                                     (/srv/..., False, "invalid api")],
2334
                           "node2": [(/srv/..., True, "")]}
2335
          }
2336

2337
    """
2338
    all_os = {}
2339
    # we build here the list of nodes that didn't fail the RPC (at RPC
2340
    # level), so that nodes with a non-responding node daemon don't
2341
    # make all OSes invalid
2342
    good_nodes = [node_name for node_name in rlist
2343
                  if not rlist[node_name].fail_msg]
2344
    for node_name, nr in rlist.items():
2345
      if nr.fail_msg or not nr.payload:
2346
        continue
2347
      for name, path, status, diagnose, variants in nr.payload:
2348
        if name not in all_os:
2349
          # build a list of nodes for this os containing empty lists
2350
          # for each node in node_list
2351
          all_os[name] = {}
2352
          for nname in good_nodes:
2353
            all_os[name][nname] = []
2354
        all_os[name][node_name].append((path, status, diagnose, variants))
2355
    return all_os
2356

    
2357
  def Exec(self, feedback_fn):
2358
    """Compute the list of OSes.
2359

2360
    """
2361
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2362
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2363
    pol = self._DiagnoseByOS(node_data)
2364
    output = []
2365
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2366
    calc_variants = "variants" in self.op.output_fields
2367

    
2368
    for os_name, os_data in pol.items():
2369
      row = []
2370
      if calc_valid:
2371
        valid = True
2372
        variants = None
2373
        for osl in os_data.values():
2374
          valid = valid and osl and osl[0][1]
2375
          if not valid:
2376
            variants = None
2377
            break
2378
          if calc_variants:
2379
            node_variants = osl[0][3]
2380
            if variants is None:
2381
              variants = node_variants
2382
            else:
2383
              variants = [v for v in variants if v in node_variants]
2384

    
2385
      for field in self.op.output_fields:
2386
        if field == "name":
2387
          val = os_name
2388
        elif field == "valid":
2389
          val = valid
2390
        elif field == "node_status":
2391
          # this is just a copy of the dict
2392
          val = {}
2393
          for node_name, nos_list in os_data.items():
2394
            val[node_name] = nos_list
2395
        elif field == "variants":
2396
          val =  variants
2397
        else:
2398
          raise errors.ParameterError(field)
2399
        row.append(val)
2400
      output.append(row)
2401

    
2402
    return output
2403

    
2404

    
2405
class LURemoveNode(LogicalUnit):
2406
  """Logical unit for removing a node.
2407

2408
  """
2409
  HPATH = "node-remove"
2410
  HTYPE = constants.HTYPE_NODE
2411
  _OP_REQP = ["node_name"]
2412

    
2413
  def BuildHooksEnv(self):
2414
    """Build hooks env.
2415

2416
    This doesn't run on the target node in the pre phase as a failed
2417
    node would then be impossible to remove.
2418

2419
    """
2420
    env = {
2421
      "OP_TARGET": self.op.node_name,
2422
      "NODE_NAME": self.op.node_name,
2423
      }
2424
    all_nodes = self.cfg.GetNodeList()
2425
    if self.op.node_name in all_nodes:
2426
      all_nodes.remove(self.op.node_name)
2427
    return env, all_nodes, all_nodes
2428

    
2429
  def CheckPrereq(self):
2430
    """Check prerequisites.
2431

2432
    This checks:
2433
     - the node exists in the configuration
2434
     - it does not have primary or secondary instances
2435
     - it's not the master
2436

2437
    Any errors are signaled by raising errors.OpPrereqError.
2438

2439
    """
2440
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2441
    if node is None:
2442
      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2443
                                 errors.ECODE_NOENT)
2444

    
2445
    instance_list = self.cfg.GetInstanceList()
2446

    
2447
    masternode = self.cfg.GetMasterNode()
2448
    if node.name == masternode:
2449
      raise errors.OpPrereqError("Node is the master node,"
2450
                                 " you need to failover first.",
2451
                                 errors.ECODE_INVAL)
2452

    
2453
    for instance_name in instance_list:
2454
      instance = self.cfg.GetInstanceInfo(instance_name)
2455
      if node.name in instance.all_nodes:
2456
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2457
                                   " please remove first." % instance_name,
2458
                                   errors.ECODE_INVAL)
2459
    self.op.node_name = node.name
2460
    self.node = node
2461

    
2462
  def Exec(self, feedback_fn):
2463
    """Removes the node from the cluster.
2464

2465
    """
2466
    node = self.node
2467
    logging.info("Stopping the node daemon and removing configs from node %s",
2468
                 node.name)
2469

    
2470
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2471

    
2472
    # Promote nodes to master candidate as needed
2473
    _AdjustCandidatePool(self, exceptions=[node.name])
2474
    self.context.RemoveNode(node.name)
2475

    
2476
    # Run post hooks on the node before it's removed
2477
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2478
    try:
2479
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2480
    except:
2481
      # pylint: disable-msg=W0702
2482
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2483

    
2484
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2485
    msg = result.fail_msg
2486
    if msg:
2487
      self.LogWarning("Errors encountered on the remote node while leaving"
2488
                      " the cluster: %s", msg)
2489

    
2490

    
2491
class LUQueryNodes(NoHooksLU):
2492
  """Logical unit for querying nodes.
2493

2494
  """
2495
  # pylint: disable-msg=W0142
2496
  _OP_REQP = ["output_fields", "names", "use_locking"]
2497
  REQ_BGL = False
2498

    
2499
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2500
                    "master_candidate", "offline", "drained"]
2501

    
2502
  _FIELDS_DYNAMIC = utils.FieldSet(
2503
    "dtotal", "dfree",
2504
    "mtotal", "mnode", "mfree",
2505
    "bootid",
2506
    "ctotal", "cnodes", "csockets",
2507
    )
2508

    
2509
  _FIELDS_STATIC = utils.FieldSet(*[
2510
    "pinst_cnt", "sinst_cnt",
2511
    "pinst_list", "sinst_list",
2512
    "pip", "sip", "tags",
2513
    "master",
2514
    "role"] + _SIMPLE_FIELDS
2515
    )
2516

    
2517
  def ExpandNames(self):
2518
    _CheckOutputFields(static=self._FIELDS_STATIC,
2519
                       dynamic=self._FIELDS_DYNAMIC,
2520
                       selected=self.op.output_fields)
2521

    
2522
    self.needed_locks = {}
2523
    self.share_locks[locking.LEVEL_NODE] = 1
2524

    
2525
    if self.op.names:
2526
      self.wanted = _GetWantedNodes(self, self.op.names)
2527
    else:
2528
      self.wanted = locking.ALL_SET
2529

    
2530
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2531
    self.do_locking = self.do_node_query and self.op.use_locking
2532
    if self.do_locking:
2533
      # if we don't request only static fields, we need to lock the nodes
2534
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2535

    
2536
  def CheckPrereq(self):
2537
    """Check prerequisites.
2538

2539
    """
2540
    # The validation of the node list is done in the _GetWantedNodes,
2541
    # if non empty, and if empty, there's no validation to do
2542
    pass
2543

    
2544
  def Exec(self, feedback_fn):
2545
    """Computes the list of nodes and their attributes.
2546

2547
    """
2548
    all_info = self.cfg.GetAllNodesInfo()
2549
    if self.do_locking:
2550
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2551
    elif self.wanted != locking.ALL_SET:
2552
      nodenames = self.wanted
2553
      missing = set(nodenames).difference(all_info.keys())
2554
      if missing:
2555
        raise errors.OpExecError(
2556
          "Some nodes were removed before retrieving their data: %s" % missing)
2557
    else:
2558
      nodenames = all_info.keys()
2559

    
2560
    nodenames = utils.NiceSort(nodenames)
2561
    nodelist = [all_info[name] for name in nodenames]
2562

    
2563
    # begin data gathering
2564

    
2565
    if self.do_node_query:
2566
      live_data = {}
2567
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2568
                                          self.cfg.GetHypervisorType())
2569
      for name in nodenames:
2570
        nodeinfo = node_data[name]
2571
        if not nodeinfo.fail_msg and nodeinfo.payload:
2572
          nodeinfo = nodeinfo.payload
2573
          fn = utils.TryConvert
2574
          live_data[name] = {
2575
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2576
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2577
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2578
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2579
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2580
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2581
            "bootid": nodeinfo.get('bootid', None),
2582
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2583
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2584
            }
2585
        else:
2586
          live_data[name] = {}
2587
    else:
2588
      live_data = dict.fromkeys(nodenames, {})
2589

    
2590
    node_to_primary = dict([(name, set()) for name in nodenames])
2591
    node_to_secondary = dict([(name, set()) for name in nodenames])
2592

    
2593
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2594
                             "sinst_cnt", "sinst_list"))
2595
    if inst_fields & frozenset(self.op.output_fields):
2596
      inst_data = self.cfg.GetAllInstancesInfo()
2597

    
2598
      for inst in inst_data.values():
2599
        if inst.primary_node in node_to_primary:
2600
          node_to_primary[inst.primary_node].add(inst.name)
2601
        for secnode in inst.secondary_nodes:
2602
          if secnode in node_to_secondary:
2603
            node_to_secondary[secnode].add(inst.name)
2604

    
2605
    master_node = self.cfg.GetMasterNode()
2606

    
2607
    # end data gathering
2608

    
2609
    output = []
2610
    for node in nodelist:
2611
      node_output = []
2612
      for field in self.op.output_fields:
2613
        if field in self._SIMPLE_FIELDS:
2614
          val = getattr(node, field)
2615
        elif field == "pinst_list":
2616
          val = list(node_to_primary[node.name])
2617
        elif field == "sinst_list":
2618
          val = list(node_to_secondary[node.name])
2619
        elif field == "pinst_cnt":
2620
          val = len(node_to_primary[node.name])
2621
        elif field == "sinst_cnt":
2622
          val = len(node_to_secondary[node.name])
2623
        elif field == "pip":
2624
          val = node.primary_ip
2625
        elif field == "sip":
2626
          val = node.secondary_ip
2627
        elif field == "tags":
2628
          val = list(node.GetTags())
2629
        elif field == "master":
2630
          val = node.name == master_node
2631
        elif self._FIELDS_DYNAMIC.Matches(field):
2632
          val = live_data[node.name].get(field, None)
2633
        elif field == "role":
2634
          if node.name == master_node:
2635
            val = "M"
2636
          elif node.master_candidate:
2637
            val = "C"
2638
          elif node.drained:
2639
            val = "D"
2640
          elif node.offline:
2641
            val = "O"
2642
          else:
2643
            val = "R"
2644
        else:
2645
          raise errors.ParameterError(field)
2646
        node_output.append(val)
2647
      output.append(node_output)
2648

    
2649
    return output
2650

    
2651

    
2652
class LUQueryNodeVolumes(NoHooksLU):
2653
  """Logical unit for getting volumes on node(s).
2654

2655
  """
2656
  _OP_REQP = ["nodes", "output_fields"]
2657
  REQ_BGL = False
2658
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2659
  _FIELDS_STATIC = utils.FieldSet("node")
2660

    
2661
  def ExpandNames(self):
2662
    _CheckOutputFields(static=self._FIELDS_STATIC,
2663
                       dynamic=self._FIELDS_DYNAMIC,
2664
                       selected=self.op.output_fields)
2665

    
2666
    self.needed_locks = {}
2667
    self.share_locks[locking.LEVEL_NODE] = 1
2668
    if not self.op.nodes:
2669
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2670
    else:
2671
      self.needed_locks[locking.LEVEL_NODE] = \
2672
        _GetWantedNodes(self, self.op.nodes)
2673

    
2674
  def CheckPrereq(self):
2675
    """Check prerequisites.
2676

2677
    This checks that the fields required are valid output fields.
2678

2679
    """
2680
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2681

    
2682
  def Exec(self, feedback_fn):
2683
    """Computes the list of nodes and their attributes.
2684

2685
    """
2686
    nodenames = self.nodes
2687
    volumes = self.rpc.call_node_volumes(nodenames)
2688

    
2689
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2690
             in self.cfg.GetInstanceList()]
2691

    
2692
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2693

    
2694
    output = []
2695
    for node in nodenames:
2696
      nresult = volumes[node]
2697
      if nresult.offline:
2698
        continue
2699
      msg = nresult.fail_msg
2700
      if msg:
2701
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2702
        continue
2703

    
2704
      node_vols = nresult.payload[:]
2705
      node_vols.sort(key=lambda vol: vol['dev'])
2706

    
2707
      for vol in node_vols:
2708
        node_output = []
2709
        for field in self.op.output_fields:
2710
          if field == "node":
2711
            val = node
2712
          elif field == "phys":
2713
            val = vol['dev']
2714
          elif field == "vg":
2715
            val = vol['vg']
2716
          elif field == "name":
2717
            val = vol['name']
2718
          elif field == "size":
2719
            val = int(float(vol['size']))
2720
          elif field == "instance":
2721
            for inst in ilist:
2722
              if node not in lv_by_node[inst]:
2723
                continue
2724
              if vol['name'] in lv_by_node[inst][node]:
2725
                val = inst.name
2726
                break
2727
            else:
2728
              val = '-'
2729
          else:
2730
            raise errors.ParameterError(field)
2731
          node_output.append(str(val))
2732

    
2733
        output.append(node_output)
2734

    
2735
    return output
2736

    
2737

    
2738
class LUQueryNodeStorage(NoHooksLU):
2739
  """Logical unit for getting information on storage units on node(s).
2740

2741
  """
2742
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2743
  REQ_BGL = False
2744
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2745

    
2746
  def ExpandNames(self):
2747
    storage_type = self.op.storage_type
2748

    
2749
    if storage_type not in constants.VALID_STORAGE_TYPES:
2750
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2751
                                 errors.ECODE_INVAL)
2752

    
2753
    _CheckOutputFields(static=self._FIELDS_STATIC,
2754
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2755
                       selected=self.op.output_fields)
2756

    
2757
    self.needed_locks = {}
2758
    self.share_locks[locking.LEVEL_NODE] = 1
2759

    
2760
    if self.op.nodes:
2761
      self.needed_locks[locking.LEVEL_NODE] = \
2762
        _GetWantedNodes(self, self.op.nodes)
2763
    else:
2764
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2765

    
2766
  def CheckPrereq(self):
2767
    """Check prerequisites.
2768

2769
    This checks that the fields required are valid output fields.
2770

2771
    """
2772
    self.op.name = getattr(self.op, "name", None)
2773

    
2774
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2775

    
2776
  def Exec(self, feedback_fn):
2777
    """Computes the list of nodes and their attributes.
2778

2779
    """
2780
    # Always get name to sort by
2781
    if constants.SF_NAME in self.op.output_fields:
2782
      fields = self.op.output_fields[:]
2783
    else:
2784
      fields = [constants.SF_NAME] + self.op.output_fields
2785

    
2786
    # Never ask for node or type as it's only known to the LU
2787
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2788
      while extra in fields:
2789
        fields.remove(extra)
2790

    
2791
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2792
    name_idx = field_idx[constants.SF_NAME]
2793

    
2794
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2795
    data = self.rpc.call_storage_list(self.nodes,
2796
                                      self.op.storage_type, st_args,
2797
                                      self.op.name, fields)
2798

    
2799
    result = []
2800

    
2801
    for node in utils.NiceSort(self.nodes):
2802
      nresult = data[node]
2803
      if nresult.offline:
2804
        continue
2805

    
2806
      msg = nresult.fail_msg
2807
      if msg:
2808
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2809
        continue
2810

    
2811
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2812

    
2813
      for name in utils.NiceSort(rows.keys()):
2814
        row = rows[name]
2815

    
2816
        out = []
2817

    
2818
        for field in self.op.output_fields:
2819
          if field == constants.SF_NODE:
2820
            val = node
2821
          elif field == constants.SF_TYPE:
2822
            val = self.op.storage_type
2823
          elif field in field_idx:
2824
            val = row[field_idx[field]]
2825
          else:
2826
            raise errors.ParameterError(field)
2827

    
2828
          out.append(val)
2829

    
2830
        result.append(out)
2831

    
2832
    return result
2833

    
2834

    
2835
class LUModifyNodeStorage(NoHooksLU):
2836
  """Logical unit for modifying a storage volume on a node.
2837

2838
  """
2839
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2840
  REQ_BGL = False
2841

    
2842
  def CheckArguments(self):
2843
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2844
    if node_name is None:
2845
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2846
                                 errors.ECODE_NOENT)
2847

    
2848
    self.op.node_name = node_name
2849

    
2850
    storage_type = self.op.storage_type
2851
    if storage_type not in constants.VALID_STORAGE_TYPES:
2852
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2853
                                 errors.ECODE_INVAL)
2854

    
2855
  def ExpandNames(self):
2856
    self.needed_locks = {
2857
      locking.LEVEL_NODE: self.op.node_name,
2858
      }
2859

    
2860
  def CheckPrereq(self):
2861
    """Check prerequisites.
2862

2863
    """
2864
    storage_type = self.op.storage_type
2865

    
2866
    try:
2867
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2868
    except KeyError:
2869
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2870
                                 " modified" % storage_type,
2871
                                 errors.ECODE_INVAL)
2872

    
2873
    diff = set(self.op.changes.keys()) - modifiable
2874
    if diff:
2875
      raise errors.OpPrereqError("The following fields can not be modified for"
2876
                                 " storage units of type '%s': %r" %
2877
                                 (storage_type, list(diff)),
2878
                                 errors.ECODE_INVAL)
2879

    
2880
  def Exec(self, feedback_fn):
2881
    """Computes the list of nodes and their attributes.
2882

2883
    """
2884
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2885
    result = self.rpc.call_storage_modify(self.op.node_name,
2886
                                          self.op.storage_type, st_args,
2887
                                          self.op.name, self.op.changes)
2888
    result.Raise("Failed to modify storage unit '%s' on %s" %
2889
                 (self.op.name, self.op.node_name))
2890

    
2891

    
2892
class LUAddNode(LogicalUnit):
2893
  """Logical unit for adding node to the cluster.
2894

2895
  """
2896
  HPATH = "node-add"
2897
  HTYPE = constants.HTYPE_NODE
2898
  _OP_REQP = ["node_name"]
2899

    
2900
  def BuildHooksEnv(self):
2901
    """Build hooks env.
2902

2903
    This will run on all nodes before, and on all nodes + the new node after.
2904

2905
    """
2906
    env = {
2907
      "OP_TARGET": self.op.node_name,
2908
      "NODE_NAME": self.op.node_name,
2909
      "NODE_PIP": self.op.primary_ip,
2910
      "NODE_SIP": self.op.secondary_ip,
2911
      }
2912
    nodes_0 = self.cfg.GetNodeList()
2913
    nodes_1 = nodes_0 + [self.op.node_name, ]
2914
    return env, nodes_0, nodes_1
2915

    
2916
  def CheckPrereq(self):
2917
    """Check prerequisites.
2918

2919
    This checks:
2920
     - the new node is not already in the config
2921
     - it is resolvable
2922
     - its parameters (single/dual homed) matches the cluster
2923

2924
    Any errors are signaled by raising errors.OpPrereqError.
2925

2926
    """
2927
    node_name = self.op.node_name
2928
    cfg = self.cfg
2929

    
2930
    dns_data = utils.GetHostInfo(node_name)
2931

    
2932
    node = dns_data.name
2933
    primary_ip = self.op.primary_ip = dns_data.ip
2934
    secondary_ip = getattr(self.op, "secondary_ip", None)
2935
    if secondary_ip is None:
2936
      secondary_ip = primary_ip
2937
    if not utils.IsValidIP(secondary_ip):
2938
      raise errors.OpPrereqError("Invalid secondary IP given",
2939
                                 errors.ECODE_INVAL)
2940
    self.op.secondary_ip = secondary_ip
2941

    
2942
    node_list = cfg.GetNodeList()
2943
    if not self.op.readd and node in node_list:
2944
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2945
                                 node, errors.ECODE_EXISTS)
2946
    elif self.op.readd and node not in node_list:
2947
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2948
                                 errors.ECODE_NOENT)
2949

    
2950
    for existing_node_name in node_list:
2951
      existing_node = cfg.GetNodeInfo(existing_node_name)
2952

    
2953
      if self.op.readd and node == existing_node_name:
2954
        if (existing_node.primary_ip != primary_ip or
2955
            existing_node.secondary_ip != secondary_ip):
2956
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2957
                                     " address configuration as before",
2958
                                     errors.ECODE_INVAL)
2959
        continue
2960

    
2961
      if (existing_node.primary_ip == primary_ip or
2962
          existing_node.secondary_ip == primary_ip or
2963
          existing_node.primary_ip == secondary_ip or
2964
          existing_node.secondary_ip == secondary_ip):
2965
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2966
                                   " existing node %s" % existing_node.name,
2967
                                   errors.ECODE_NOTUNIQUE)
2968

    
2969
    # check that the type of the node (single versus dual homed) is the
2970
    # same as for the master
2971
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2972
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2973
    newbie_singlehomed = secondary_ip == primary_ip
2974
    if master_singlehomed != newbie_singlehomed:
2975
      if master_singlehomed:
2976
        raise errors.OpPrereqError("The master has no private ip but the"
2977
                                   " new node has one",
2978
                                   errors.ECODE_INVAL)
2979
      else:
2980
        raise errors.OpPrereqError("The master has a private ip but the"
2981
                                   " new node doesn't have one",
2982
                                   errors.ECODE_INVAL)
2983

    
2984
    # checks reachability
2985
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2986
      raise errors.OpPrereqError("Node not reachable by ping",
2987
                                 errors.ECODE_ENVIRON)
2988

    
2989
    if not newbie_singlehomed:
2990
      # check reachability from my secondary ip to newbie's secondary ip
2991
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2992
                           source=myself.secondary_ip):
2993
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2994
                                   " based ping to noded port",
2995
                                   errors.ECODE_ENVIRON)
2996

    
2997
    if self.op.readd:
2998
      exceptions = [node]
2999
    else:
3000
      exceptions = []
3001

    
3002
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3003

    
3004
    if self.op.readd:
3005
      self.new_node = self.cfg.GetNodeInfo(node)
3006
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3007
    else:
3008
      self.new_node = objects.Node(name=node,
3009
                                   primary_ip=primary_ip,
3010
                                   secondary_ip=secondary_ip,
3011
                                   master_candidate=self.master_candidate,
3012
                                   offline=False, drained=False)
3013

    
3014
  def Exec(self, feedback_fn):
3015
    """Adds the new node to the cluster.
3016

3017
    """
3018
    new_node = self.new_node
3019
    node = new_node.name
3020

    
3021
    # for re-adds, reset the offline/drained/master-candidate flags;
3022
    # we need to reset here, otherwise offline would prevent RPC calls
3023
    # later in the procedure; this also means that if the re-add
3024
    # fails, we are left with a non-offlined, broken node
3025
    if self.op.readd:
3026
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3027
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3028
      # if we demote the node, we do cleanup later in the procedure
3029
      new_node.master_candidate = self.master_candidate
3030

    
3031
    # notify the user about any possible mc promotion
3032
    if new_node.master_candidate:
3033
      self.LogInfo("Node will be a master candidate")
3034

    
3035
    # check connectivity
3036
    result = self.rpc.call_version([node])[node]
3037
    result.Raise("Can't get version information from node %s" % node)
3038
    if constants.PROTOCOL_VERSION == result.payload:
3039
      logging.info("Communication to node %s fine, sw version %s match",
3040
                   node, result.payload)
3041
    else:
3042
      raise errors.OpExecError("Version mismatch master version %s,"
3043
                               " node version %s" %
3044
                               (constants.PROTOCOL_VERSION, result.payload))
3045

    
3046
    # setup ssh on node
3047
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3048
      logging.info("Copy ssh key to node %s", node)
3049
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3050
      keyarray = []
3051
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3052
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3053
                  priv_key, pub_key]
3054

    
3055
      for i in keyfiles:
3056
        keyarray.append(utils.ReadFile(i))
3057

    
3058
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3059
                                      keyarray[2], keyarray[3], keyarray[4],
3060
                                      keyarray[5])
3061
      result.Raise("Cannot transfer ssh keys to the new node")
3062

    
3063
    # Add node to our /etc/hosts, and add key to known_hosts
3064
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3065
      utils.AddHostToEtcHosts(new_node.name)
3066

    
3067
    if new_node.secondary_ip != new_node.primary_ip:
3068
      result = self.rpc.call_node_has_ip_address(new_node.name,
3069
                                                 new_node.secondary_ip)
3070
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3071
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3072
      if not result.payload:
3073
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3074
                                 " you gave (%s). Please fix and re-run this"
3075
                                 " command." % new_node.secondary_ip)
3076

    
3077
    node_verify_list = [self.cfg.GetMasterNode()]
3078
    node_verify_param = {
3079
      constants.NV_NODELIST: [node],
3080
      # TODO: do a node-net-test as well?
3081
    }
3082

    
3083
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3084
                                       self.cfg.GetClusterName())
3085
    for verifier in node_verify_list:
3086
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3087
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3088
      if nl_payload:
3089
        for failed in nl_payload:
3090
          feedback_fn("ssh/hostname verification failed"
3091
                      " (checking from %s): %s" %
3092
                      (verifier, nl_payload[failed]))
3093
        raise errors.OpExecError("ssh/hostname verification failed.")
3094

    
3095
    if self.op.readd:
3096
      _RedistributeAncillaryFiles(self)
3097
      self.context.ReaddNode(new_node)
3098
      # make sure we redistribute the config
3099
      self.cfg.Update(new_node, feedback_fn)
3100
      # and make sure the new node will not have old files around
3101
      if not new_node.master_candidate:
3102
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3103
        msg = result.fail_msg
3104
        if msg:
3105
          self.LogWarning("Node failed to demote itself from master"
3106
                          " candidate status: %s" % msg)
3107
    else:
3108
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3109
      self.context.AddNode(new_node, self.proc.GetECId())
3110

    
3111

    
3112
class LUSetNodeParams(LogicalUnit):
3113
  """Modifies the parameters of a node.
3114

3115
  """
3116
  HPATH = "node-modify"
3117
  HTYPE = constants.HTYPE_NODE
3118
  _OP_REQP = ["node_name"]
3119
  REQ_BGL = False
3120

    
3121
  def CheckArguments(self):
3122
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3123
    if node_name is None:
3124
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3125
                                 errors.ECODE_INVAL)
3126
    self.op.node_name = node_name
3127
    _CheckBooleanOpField(self.op, 'master_candidate')
3128
    _CheckBooleanOpField(self.op, 'offline')
3129
    _CheckBooleanOpField(self.op, 'drained')
3130
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3131
    if all_mods.count(None) == 3:
3132
      raise errors.OpPrereqError("Please pass at least one modification",
3133
                                 errors.ECODE_INVAL)
3134
    if all_mods.count(True) > 1:
3135
      raise errors.OpPrereqError("Can't set the node into more than one"
3136
                                 " state at the same time",
3137
                                 errors.ECODE_INVAL)
3138

    
3139
  def ExpandNames(self):
3140
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3141

    
3142
  def BuildHooksEnv(self):
3143
    """Build hooks env.
3144

3145
    This runs on the master node.
3146

3147
    """
3148
    env = {
3149
      "OP_TARGET": self.op.node_name,
3150
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3151
      "OFFLINE": str(self.op.offline),
3152
      "DRAINED": str(self.op.drained),
3153
      }
3154
    nl = [self.cfg.GetMasterNode(),
3155
          self.op.node_name]
3156
    return env, nl, nl
3157

    
3158
  def CheckPrereq(self):
3159
    """Check prerequisites.
3160

3161
    This only checks the instance list against the existing names.
3162

3163
    """
3164
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3165

    
3166
    if (self.op.master_candidate is not None or
3167
        self.op.drained is not None or
3168
        self.op.offline is not None):
3169
      # we can't change the master's node flags
3170
      if self.op.node_name == self.cfg.GetMasterNode():
3171
        raise errors.OpPrereqError("The master role can be changed"
3172
                                   " only via masterfailover",
3173
                                   errors.ECODE_INVAL)
3174

    
3175
    # Boolean value that tells us whether we're offlining or draining the node
3176
    offline_or_drain = self.op.offline == True or self.op.drained == True
3177
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3178

    
3179
    if (node.master_candidate and
3180
        (self.op.master_candidate == False or offline_or_drain)):
3181
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3182
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3183
      if mc_now <= cp_size:
3184
        msg = ("Not enough master candidates (desired"
3185
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3186
        # Only allow forcing the operation if it's an offline/drain operation,
3187
        # and we could not possibly promote more nodes.
3188
        # FIXME: this can still lead to issues if in any way another node which
3189
        # could be promoted appears in the meantime.
3190
        if self.op.force and offline_or_drain and mc_should == mc_max:
3191
          self.LogWarning(msg)
3192
        else:
3193
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3194

    
3195
    if (self.op.master_candidate == True and
3196
        ((node.offline and not self.op.offline == False) or
3197
         (node.drained and not self.op.drained == False))):
3198
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3199
                                 " to master_candidate" % node.name,
3200
                                 errors.ECODE_INVAL)
3201

    
3202
    # If we're being deofflined/drained, we'll MC ourself if needed
3203
    if (deoffline_or_drain and not offline_or_drain and not
3204
        self.op.master_candidate == True):
3205
      self.op.master_candidate = _DecideSelfPromotion(self)
3206
      if self.op.master_candidate:
3207
        self.LogInfo("Autopromoting node to master candidate")
3208

    
3209
    return
3210

    
3211
  def Exec(self, feedback_fn):
3212
    """Modifies a node.
3213

3214
    """
3215
    node = self.node
3216

    
3217
    result = []
3218
    changed_mc = False
3219

    
3220
    if self.op.offline is not None:
3221
      node.offline = self.op.offline
3222
      result.append(("offline", str(self.op.offline)))
3223
      if self.op.offline == True:
3224
        if node.master_candidate:
3225
          node.master_candidate = False
3226
          changed_mc = True
3227
          result.append(("master_candidate", "auto-demotion due to offline"))
3228
        if node.drained:
3229
          node.drained = False
3230
          result.append(("drained", "clear drained status due to offline"))
3231

    
3232
    if self.op.master_candidate is not None:
3233
      node.master_candidate = self.op.master_candidate
3234
      changed_mc = True
3235
      result.append(("master_candidate", str(self.op.master_candidate)))
3236
      if self.op.master_candidate == False:
3237
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3238
        msg = rrc.fail_msg
3239
        if msg:
3240
          self.LogWarning("Node failed to demote itself: %s" % msg)
3241

    
3242
    if self.op.drained is not None:
3243
      node.drained = self.op.drained
3244
      result.append(("drained", str(self.op.drained)))
3245
      if self.op.drained == True:
3246
        if node.master_candidate:
3247
          node.master_candidate = False
3248
          changed_mc = True
3249
          result.append(("master_candidate", "auto-demotion due to drain"))
3250
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3251
          msg = rrc.fail_msg
3252
          if msg:
3253
            self.LogWarning("Node failed to demote itself: %s" % msg)
3254
        if node.offline:
3255
          node.offline = False
3256
          result.append(("offline", "clear offline status due to drain"))
3257

    
3258
    # this will trigger configuration file update, if needed
3259
    self.cfg.Update(node, feedback_fn)
3260
    # this will trigger job queue propagation or cleanup
3261
    if changed_mc:
3262
      self.context.ReaddNode(node)
3263

    
3264
    return result
3265

    
3266

    
3267
class LUPowercycleNode(NoHooksLU):
3268
  """Powercycles a node.
3269

3270
  """
3271
  _OP_REQP = ["node_name", "force"]
3272
  REQ_BGL = False
3273

    
3274
  def CheckArguments(self):
3275
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3276
    if node_name is None:
3277
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3278
                                 errors.ECODE_NOENT)
3279
    self.op.node_name = node_name
3280
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3281
      raise errors.OpPrereqError("The node is the master and the force"
3282
                                 " parameter was not set",
3283
                                 errors.ECODE_INVAL)
3284

    
3285
  def ExpandNames(self):
3286
    """Locking for PowercycleNode.
3287

3288
    This is a last-resort option and shouldn't block on other
3289
    jobs. Therefore, we grab no locks.
3290

3291
    """
3292
    self.needed_locks = {}
3293

    
3294
  def CheckPrereq(self):
3295
    """Check prerequisites.
3296

3297
    This LU has no prereqs.
3298

3299
    """
3300
    pass
3301

    
3302
  def Exec(self, feedback_fn):
3303
    """Reboots a node.
3304

3305
    """
3306
    result = self.rpc.call_node_powercycle(self.op.node_name,
3307
                                           self.cfg.GetHypervisorType())
3308
    result.Raise("Failed to schedule the reboot")
3309
    return result.payload
3310

    
3311

    
3312
class LUQueryClusterInfo(NoHooksLU):
3313
  """Query cluster configuration.
3314

3315
  """
3316
  _OP_REQP = []
3317
  REQ_BGL = False
3318

    
3319
  def ExpandNames(self):
3320
    self.needed_locks = {}
3321

    
3322
  def CheckPrereq(self):
3323
    """No prerequsites needed for this LU.
3324

3325
    """
3326
    pass
3327

    
3328
  def Exec(self, feedback_fn):
3329
    """Return cluster config.
3330

3331
    """
3332
    cluster = self.cfg.GetClusterInfo()
3333
    result = {
3334
      "software_version": constants.RELEASE_VERSION,
3335
      "protocol_version": constants.PROTOCOL_VERSION,
3336
      "config_version": constants.CONFIG_VERSION,
3337
      "os_api_version": max(constants.OS_API_VERSIONS),
3338
      "export_version": constants.EXPORT_VERSION,
3339
      "architecture": (platform.architecture()[0], platform.machine()),
3340
      "name": cluster.cluster_name,
3341
      "master": cluster.master_node,
3342
      "default_hypervisor": cluster.enabled_hypervisors[0],
3343
      "enabled_hypervisors": cluster.enabled_hypervisors,
3344
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3345
                        for hypervisor_name in cluster.enabled_hypervisors]),
3346
      "beparams": cluster.beparams,
3347
      "nicparams": cluster.nicparams,
3348
      "candidate_pool_size": cluster.candidate_pool_size,
3349
      "master_netdev": cluster.master_netdev,
3350
      "volume_group_name": cluster.volume_group_name,
3351
      "file_storage_dir": cluster.file_storage_dir,
3352
      "ctime": cluster.ctime,
3353
      "mtime": cluster.mtime,
3354
      "uuid": cluster.uuid,
3355
      "tags": list(cluster.GetTags()),
3356
      }
3357

    
3358
    return result
3359

    
3360

    
3361
class LUQueryConfigValues(NoHooksLU):
3362
  """Return configuration values.
3363

3364
  """
3365
  _OP_REQP = []
3366
  REQ_BGL = False
3367
  _FIELDS_DYNAMIC = utils.FieldSet()
3368
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3369
                                  "watcher_pause")
3370

    
3371
  def ExpandNames(self):
3372
    self.needed_locks = {}
3373

    
3374
    _CheckOutputFields(static=self._FIELDS_STATIC,
3375
                       dynamic=self._FIELDS_DYNAMIC,
3376
                       selected=self.op.output_fields)
3377

    
3378
  def CheckPrereq(self):
3379
    """No prerequisites.
3380

3381
    """
3382
    pass
3383

    
3384
  def Exec(self, feedback_fn):
3385
    """Dump a representation of the cluster config to the standard output.
3386

3387
    """
3388
    values = []
3389
    for field in self.op.output_fields:
3390
      if field == "cluster_name":
3391
        entry = self.cfg.GetClusterName()
3392
      elif field == "master_node":
3393
        entry = self.cfg.GetMasterNode()
3394
      elif field == "drain_flag":
3395
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3396
      elif field == "watcher_pause":
3397
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3398
      else:
3399
        raise errors.ParameterError(field)
3400
      values.append(entry)
3401
    return values
3402

    
3403

    
3404
class LUActivateInstanceDisks(NoHooksLU):
3405
  """Bring up an instance's disks.
3406

3407
  """
3408
  _OP_REQP = ["instance_name"]
3409
  REQ_BGL = False
3410

    
3411
  def ExpandNames(self):
3412
    self._ExpandAndLockInstance()
3413
    self.needed_locks[locking.LEVEL_NODE] = []
3414
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3415

    
3416
  def DeclareLocks(self, level):
3417
    if level == locking.LEVEL_NODE:
3418
      self._LockInstancesNodes()
3419

    
3420
  def CheckPrereq(self):
3421
    """Check prerequisites.
3422

3423
    This checks that the instance is in the cluster.
3424

3425
    """
3426
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3427
    assert self.instance is not None, \
3428
      "Cannot retrieve locked instance %s" % self.op.instance_name
3429
    _CheckNodeOnline(self, self.instance.primary_node)
3430
    if not hasattr(self.op, "ignore_size"):
3431
      self.op.ignore_size = False
3432

    
3433
  def Exec(self, feedback_fn):
3434
    """Activate the disks.
3435

3436
    """
3437
    disks_ok, disks_info = \
3438
              _AssembleInstanceDisks(self, self.instance,
3439
                                     ignore_size=self.op.ignore_size)
3440
    if not disks_ok:
3441
      raise errors.OpExecError("Cannot activate block devices")
3442

    
3443
    return disks_info
3444

    
3445

    
3446
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3447
                           ignore_size=False):
3448
  """Prepare the block devices for an instance.
3449

3450
  This sets up the block devices on all nodes.
3451

3452
  @type lu: L{LogicalUnit}
3453
  @param lu: the logical unit on whose behalf we execute
3454
  @type instance: L{objects.Instance}
3455
  @param instance: the instance for whose disks we assemble
3456
  @type ignore_secondaries: boolean
3457
  @param ignore_secondaries: if true, errors on secondary nodes
3458
      won't result in an error return from the function
3459
  @type ignore_size: boolean
3460
  @param ignore_size: if true, the current known size of the disk
3461
      will not be used during the disk activation, useful for cases
3462
      when the size is wrong
3463
  @return: False if the operation failed, otherwise a list of
3464
      (host, instance_visible_name, node_visible_name)
3465
      with the mapping from node devices to instance devices
3466

3467
  """
3468
  device_info = []
3469
  disks_ok = True
3470
  iname = instance.name
3471
  # With the two passes mechanism we try to reduce the window of
3472
  # opportunity for the race condition of switching DRBD to primary
3473
  # before handshaking occured, but we do not eliminate it
3474

    
3475
  # The proper fix would be to wait (with some limits) until the
3476
  # connection has been made and drbd transitions from WFConnection
3477
  # into any other network-connected state (Connected, SyncTarget,
3478
  # SyncSource, etc.)
3479

    
3480
  # 1st pass, assemble on all nodes in secondary mode
3481
  for inst_disk in instance.disks:
3482
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3483
      if ignore_size:
3484
        node_disk = node_disk.Copy()
3485
        node_disk.UnsetSize()
3486
      lu.cfg.SetDiskID(node_disk, node)
3487
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3488
      msg = result.fail_msg
3489
      if msg:
3490
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3491
                           " (is_primary=False, pass=1): %s",
3492
                           inst_disk.iv_name, node, msg)
3493
        if not ignore_secondaries:
3494
          disks_ok = False
3495

    
3496
  # FIXME: race condition on drbd migration to primary
3497

    
3498
  # 2nd pass, do only the primary node
3499
  for inst_disk in instance.disks:
3500
    dev_path = None
3501

    
3502
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3503
      if node != instance.primary_node:
3504
        continue
3505
      if ignore_size:
3506
        node_disk = node_disk.Copy()
3507
        node_disk.UnsetSize()
3508
      lu.cfg.SetDiskID(node_disk, node)
3509
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3510
      msg = result.fail_msg
3511
      if msg:
3512
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3513
                           " (is_primary=True, pass=2): %s",
3514
                           inst_disk.iv_name, node, msg)
3515
        disks_ok = False
3516
      else:
3517
        dev_path = result.payload
3518

    
3519
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3520

    
3521
  # leave the disks configured for the primary node
3522
  # this is a workaround that would be fixed better by
3523
  # improving the logical/physical id handling
3524
  for disk in instance.disks:
3525
    lu.cfg.SetDiskID(disk, instance.primary_node)
3526

    
3527
  return disks_ok, device_info
3528

    
3529

    
3530
def _StartInstanceDisks(lu, instance, force):
3531
  """Start the disks of an instance.
3532

3533
  """
3534
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3535
                                           ignore_secondaries=force)
3536
  if not disks_ok:
3537
    _ShutdownInstanceDisks(lu, instance)
3538
    if force is not None and not force:
3539
      lu.proc.LogWarning("", hint="If the message above refers to a"
3540
                         " secondary node,"
3541
                         " you can retry the operation using '--force'.")
3542
    raise errors.OpExecError("Disk consistency error")
3543

    
3544

    
3545
class LUDeactivateInstanceDisks(NoHooksLU):
3546
  """Shutdown an instance's disks.
3547

3548
  """
3549
  _OP_REQP = ["instance_name"]
3550
  REQ_BGL = False
3551

    
3552
  def ExpandNames(self):
3553
    self._ExpandAndLockInstance()
3554
    self.needed_locks[locking.LEVEL_NODE] = []
3555
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3556

    
3557
  def DeclareLocks(self, level):
3558
    if level == locking.LEVEL_NODE:
3559
      self._LockInstancesNodes()
3560

    
3561
  def CheckPrereq(self):
3562
    """Check prerequisites.
3563

3564
    This checks that the instance is in the cluster.
3565

3566
    """
3567
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3568
    assert self.instance is not None, \
3569
      "Cannot retrieve locked instance %s" % self.op.instance_name
3570

    
3571
  def Exec(self, feedback_fn):
3572
    """Deactivate the disks
3573

3574
    """
3575
    instance = self.instance
3576
    _SafeShutdownInstanceDisks(self, instance)
3577

    
3578

    
3579
def _SafeShutdownInstanceDisks(lu, instance):
3580
  """Shutdown block devices of an instance.
3581

3582
  This function checks if an instance is running, before calling
3583
  _ShutdownInstanceDisks.
3584

3585
  """
3586
  pnode = instance.primary_node
3587
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3588
  ins_l.Raise("Can't contact node %s" % pnode)
3589

    
3590
  if instance.name in ins_l.payload:
3591
    raise errors.OpExecError("Instance is running, can't shutdown"
3592
                             " block devices.")
3593

    
3594
  _ShutdownInstanceDisks(lu, instance)
3595

    
3596

    
3597
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3598
  """Shutdown block devices of an instance.
3599

3600
  This does the shutdown on all nodes of the instance.
3601

3602
  If the ignore_primary is false, errors on the primary node are
3603
  ignored.
3604

3605
  """
3606
  all_result = True
3607
  for disk in instance.disks:
3608
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3609
      lu.cfg.SetDiskID(top_disk, node)
3610
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3611
      msg = result.fail_msg
3612
      if msg:
3613
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3614
                      disk.iv_name, node, msg)
3615
        if not ignore_primary or node != instance.primary_node:
3616
          all_result = False
3617
  return all_result
3618

    
3619

    
3620
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3621
  """Checks if a node has enough free memory.
3622

3623
  This function check if a given node has the needed amount of free
3624
  memory. In case the node has less memory or we cannot get the
3625
  information from the node, this function raise an OpPrereqError
3626
  exception.
3627

3628
  @type lu: C{LogicalUnit}
3629
  @param lu: a logical unit from which we get configuration data
3630
  @type node: C{str}
3631
  @param node: the node to check
3632
  @type reason: C{str}
3633
  @param reason: string to use in the error message
3634
  @type requested: C{int}
3635
  @param requested: the amount of memory in MiB to check for
3636
  @type hypervisor_name: C{str}
3637
  @param hypervisor_name: the hypervisor to ask for memory stats
3638
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3639
      we cannot check the node
3640

3641
  """
3642
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3643
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3644
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3645
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3646
  if not isinstance(free_mem, int):
3647
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3648
                               " was '%s'" % (node, free_mem),
3649
                               errors.ECODE_ENVIRON)
3650
  if requested > free_mem:
3651
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3652
                               " needed %s MiB, available %s MiB" %
3653
                               (node, reason, requested, free_mem),
3654
                               errors.ECODE_NORES)
3655

    
3656

    
3657
class LUStartupInstance(LogicalUnit):
3658
  """Starts an instance.
3659

3660
  """
3661
  HPATH = "instance-start"
3662
  HTYPE = constants.HTYPE_INSTANCE
3663
  _OP_REQP = ["instance_name", "force"]
3664
  REQ_BGL = False
3665

    
3666
  def ExpandNames(self):
3667
    self._ExpandAndLockInstance()
3668

    
3669
  def BuildHooksEnv(self):
3670
    """Build hooks env.
3671

3672
    This runs on master, primary and secondary nodes of the instance.
3673

3674
    """
3675
    env = {
3676
      "FORCE": self.op.force,
3677
      }
3678
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3679
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3680
    return env, nl, nl
3681

    
3682
  def CheckPrereq(self):
3683
    """Check prerequisites.
3684

3685
    This checks that the instance is in the cluster.
3686

3687
    """
3688
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3689
    assert self.instance is not None, \
3690
      "Cannot retrieve locked instance %s" % self.op.instance_name
3691

    
3692
    # extra beparams
3693
    self.beparams = getattr(self.op, "beparams", {})
3694
    if self.beparams:
3695
      if not isinstance(self.beparams, dict):
3696
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3697
                                   " dict" % (type(self.beparams), ),
3698
                                   errors.ECODE_INVAL)
3699
      # fill the beparams dict
3700
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3701
      self.op.beparams = self.beparams
3702

    
3703
    # extra hvparams
3704
    self.hvparams = getattr(self.op, "hvparams", {})
3705
    if self.hvparams:
3706
      if not isinstance(self.hvparams, dict):
3707
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3708
                                   " dict" % (type(self.hvparams), ),
3709
                                   errors.ECODE_INVAL)
3710

    
3711
      # check hypervisor parameter syntax (locally)
3712
      cluster = self.cfg.GetClusterInfo()
3713
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3714
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3715
                                    instance.hvparams)
3716
      filled_hvp.update(self.hvparams)
3717
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3718
      hv_type.CheckParameterSyntax(filled_hvp)
3719
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3720
      self.op.hvparams = self.hvparams
3721

    
3722
    _CheckNodeOnline(self, instance.primary_node)
3723

    
3724
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3725
    # check bridges existence
3726
    _CheckInstanceBridgesExist(self, instance)
3727

    
3728
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3729
                                              instance.name,
3730
                                              instance.hypervisor)
3731
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3732
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3733
    if not remote_info.payload: # not running already
3734
      _CheckNodeFreeMemory(self, instance.primary_node,
3735
                           "starting instance %s" % instance.name,
3736
                           bep[constants.BE_MEMORY], instance.hypervisor)
3737

    
3738
  def Exec(self, feedback_fn):
3739
    """Start the instance.
3740

3741
    """
3742
    instance = self.instance
3743
    force = self.op.force
3744

    
3745
    self.cfg.MarkInstanceUp(instance.name)
3746

    
3747
    node_current = instance.primary_node
3748

    
3749
    _StartInstanceDisks(self, instance, force)
3750

    
3751
    result = self.rpc.call_instance_start(node_current, instance,
3752
                                          self.hvparams, self.beparams)
3753
    msg = result.fail_msg
3754
    if msg:
3755
      _ShutdownInstanceDisks(self, instance)
3756
      raise errors.OpExecError("Could not start instance: %s" % msg)
3757

    
3758

    
3759
class LURebootInstance(LogicalUnit):
3760
  """Reboot an instance.
3761

3762
  """
3763
  HPATH = "instance-reboot"
3764
  HTYPE = constants.HTYPE_INSTANCE
3765
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3766
  REQ_BGL = False
3767

    
3768
  def CheckArguments(self):
3769
    """Check the arguments.
3770

3771
    """
3772
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3773
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3774

    
3775
  def ExpandNames(self):
3776
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3777
                                   constants.INSTANCE_REBOOT_HARD,
3778
                                   constants.INSTANCE_REBOOT_FULL]:
3779
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3780
                                  (constants.INSTANCE_REBOOT_SOFT,
3781
                                   constants.INSTANCE_REBOOT_HARD,
3782
                                   constants.INSTANCE_REBOOT_FULL))
3783
    self._ExpandAndLockInstance()
3784

    
3785
  def BuildHooksEnv(self):
3786
    """Build hooks env.
3787

3788
    This runs on master, primary and secondary nodes of the instance.
3789

3790
    """
3791
    env = {
3792
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3793
      "REBOOT_TYPE": self.op.reboot_type,
3794
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3795
      }
3796
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3797
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3798
    return env, nl, nl
3799

    
3800
  def CheckPrereq(self):
3801
    """Check prerequisites.
3802

3803
    This checks that the instance is in the cluster.
3804

3805
    """
3806
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3807
    assert self.instance is not None, \
3808
      "Cannot retrieve locked instance %s" % self.op.instance_name
3809

    
3810
    _CheckNodeOnline(self, instance.primary_node)
3811

    
3812
    # check bridges existence
3813
    _CheckInstanceBridgesExist(self, instance)
3814

    
3815
  def Exec(self, feedback_fn):
3816
    """Reboot the instance.
3817

3818
    """
3819
    instance = self.instance
3820
    ignore_secondaries = self.op.ignore_secondaries
3821
    reboot_type = self.op.reboot_type
3822

    
3823
    node_current = instance.primary_node
3824

    
3825
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3826
                       constants.INSTANCE_REBOOT_HARD]:
3827
      for disk in instance.disks:
3828
        self.cfg.SetDiskID(disk, node_current)
3829
      result = self.rpc.call_instance_reboot(node_current, instance,
3830
                                             reboot_type,
3831
                                             self.shutdown_timeout)
3832
      result.Raise("Could not reboot instance")
3833
    else:
3834
      result = self.rpc.call_instance_shutdown(node_current, instance,
3835
                                               self.shutdown_timeout)
3836
      result.Raise("Could not shutdown instance for full reboot")
3837
      _ShutdownInstanceDisks(self, instance)
3838
      _StartInstanceDisks(self, instance, ignore_secondaries)
3839
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3840
      msg = result.fail_msg
3841
      if msg:
3842
        _ShutdownInstanceDisks(self, instance)
3843
        raise errors.OpExecError("Could not start instance for"
3844
                                 " full reboot: %s" % msg)
3845

    
3846
    self.cfg.MarkInstanceUp(instance.name)
3847

    
3848

    
3849
class LUShutdownInstance(LogicalUnit):
3850
  """Shutdown an instance.
3851

3852
  """
3853
  HPATH = "instance-stop"
3854
  HTYPE = constants.HTYPE_INSTANCE
3855
  _OP_REQP = ["instance_name"]
3856
  REQ_BGL = False
3857

    
3858
  def CheckArguments(self):
3859
    """Check the arguments.
3860

3861
    """
3862
    self.timeout = getattr(self.op, "timeout",
3863
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3864

    
3865
  def ExpandNames(self):
3866
    self._ExpandAndLockInstance()
3867

    
3868
  def BuildHooksEnv(self):
3869
    """Build hooks env.
3870

3871
    This runs on master, primary and secondary nodes of the instance.
3872

3873
    """
3874
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3875
    env["TIMEOUT"] = self.timeout
3876
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3877
    return env, nl, nl
3878

    
3879
  def CheckPrereq(self):
3880
    """Check prerequisites.
3881

3882
    This checks that the instance is in the cluster.
3883

3884
    """
3885
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3886
    assert self.instance is not None, \
3887
      "Cannot retrieve locked instance %s" % self.op.instance_name
3888
    _CheckNodeOnline(self, self.instance.primary_node)
3889

    
3890
  def Exec(self, feedback_fn):
3891
    """Shutdown the instance.
3892

3893
    """
3894
    instance = self.instance
3895
    node_current = instance.primary_node
3896
    timeout = self.timeout
3897
    self.cfg.MarkInstanceDown(instance.name)
3898
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3899
    msg = result.fail_msg
3900
    if msg:
3901
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3902

    
3903
    _ShutdownInstanceDisks(self, instance)
3904

    
3905

    
3906
class LUReinstallInstance(LogicalUnit):
3907
  """Reinstall an instance.
3908

3909
  """
3910
  HPATH = "instance-reinstall"
3911
  HTYPE = constants.HTYPE_INSTANCE
3912
  _OP_REQP = ["instance_name"]
3913
  REQ_BGL = False
3914

    
3915
  def ExpandNames(self):
3916
    self._ExpandAndLockInstance()
3917

    
3918
  def BuildHooksEnv(self):
3919
    """Build hooks env.
3920

3921
    This runs on master, primary and secondary nodes of the instance.
3922

3923
    """
3924
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3925
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3926
    return env, nl, nl
3927

    
3928
  def CheckPrereq(self):
3929
    """Check prerequisites.
3930

3931
    This checks that the instance is in the cluster and is not running.
3932

3933
    """
3934
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3935
    assert instance is not None, \
3936
      "Cannot retrieve locked instance %s" % self.op.instance_name
3937
    _CheckNodeOnline(self, instance.primary_node)
3938

    
3939
    if instance.disk_template == constants.DT_DISKLESS:
3940
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3941
                                 self.op.instance_name,
3942
                                 errors.ECODE_INVAL)
3943
    if instance.admin_up:
3944
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3945
                                 self.op.instance_name,
3946
                                 errors.ECODE_STATE)
3947
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3948
                                              instance.name,
3949
                                              instance.hypervisor)
3950
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3951
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3952
    if remote_info.payload:
3953
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3954
                                 (self.op.instance_name,
3955
                                  instance.primary_node),
3956
                                 errors.ECODE_STATE)
3957

    
3958
    self.op.os_type = getattr(self.op, "os_type", None)
3959
    self.op.force_variant = getattr(self.op, "force_variant", False)
3960
    if self.op.os_type is not None:
3961
      # OS verification
3962
      pnode = self.cfg.GetNodeInfo(
3963
        self.cfg.ExpandNodeName(instance.primary_node))
3964
      if pnode is None:
3965
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3966
                                   self.op.pnode, errors.ECODE_NOENT)
3967
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3968
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3969
                   (self.op.os_type, pnode.name),
3970
                   prereq=True, ecode=errors.ECODE_INVAL)
3971
      if not self.op.force_variant:
3972
        _CheckOSVariant(result.payload, self.op.os_type)
3973

    
3974
    self.instance = instance
3975

    
3976
  def Exec(self, feedback_fn):
3977
    """Reinstall the instance.
3978

3979
    """
3980
    inst = self.instance
3981

    
3982
    if self.op.os_type is not None:
3983
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3984
      inst.os = self.op.os_type
3985
      self.cfg.Update(inst, feedback_fn)
3986

    
3987
    _StartInstanceDisks(self, inst, None)
3988
    try:
3989
      feedback_fn("Running the instance OS create scripts...")
3990
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3991
      result.Raise("Could not install OS for instance %s on node %s" %
3992
                   (inst.name, inst.primary_node))
3993
    finally:
3994
      _ShutdownInstanceDisks(self, inst)
3995

    
3996

    
3997
class LURecreateInstanceDisks(LogicalUnit):
3998
  """Recreate an instance's missing disks.
3999

4000
  """
4001
  HPATH = "instance-recreate-disks"
4002
  HTYPE = constants.HTYPE_INSTANCE
4003
  _OP_REQP = ["instance_name", "disks"]
4004
  REQ_BGL = False
4005

    
4006
  def CheckArguments(self):
4007
    """Check the arguments.
4008

4009
    """
4010
    if not isinstance(self.op.disks, list):
4011
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4012
    for item in self.op.disks:
4013
      if (not isinstance(item, int) or
4014
          item < 0):
4015
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4016
                                   str(item), errors.ECODE_INVAL)
4017

    
4018
  def ExpandNames(self):
4019
    self._ExpandAndLockInstance()
4020

    
4021
  def BuildHooksEnv(self):
4022
    """Build hooks env.
4023

4024
    This runs on master, primary and secondary nodes of the instance.
4025

4026
    """
4027
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4028
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4029
    return env, nl, nl
4030

    
4031
  def CheckPrereq(self):
4032
    """Check prerequisites.
4033

4034
    This checks that the instance is in the cluster and is not running.
4035

4036
    """
4037
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4038
    assert instance is not None, \
4039
      "Cannot retrieve locked instance %s" % self.op.instance_name
4040
    _CheckNodeOnline(self, instance.primary_node)
4041

    
4042
    if instance.disk_template == constants.DT_DISKLESS:
4043
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4044
                                 self.op.instance_name, errors.ECODE_INVAL)
4045
    if instance.admin_up:
4046
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4047
                                 self.op.instance_name, errors.ECODE_STATE)
4048
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4049
                                              instance.name,
4050
                                              instance.hypervisor)
4051
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4052
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4053
    if remote_info.payload:
4054
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4055
                                 (self.op.instance_name,
4056
                                  instance.primary_node), errors.ECODE_STATE)
4057

    
4058
    if not self.op.disks:
4059
      self.op.disks = range(len(instance.disks))
4060
    else:
4061
      for idx in self.op.disks:
4062
        if idx >= len(instance.disks):
4063
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4064
                                     errors.ECODE_INVAL)
4065

    
4066
    self.instance = instance
4067

    
4068
  def Exec(self, feedback_fn):
4069
    """Recreate the disks.
4070

4071
    """
4072
    to_skip = []
4073
    for idx, _ in enumerate(self.instance.disks):
4074
      if idx not in self.op.disks: # disk idx has not been passed in
4075
        to_skip.append(idx)
4076
        continue
4077

    
4078
    _CreateDisks(self, self.instance, to_skip=to_skip)
4079

    
4080

    
4081
class LURenameInstance(LogicalUnit):
4082
  """Rename an instance.
4083

4084
  """
4085
  HPATH = "instance-rename"
4086
  HTYPE = constants.HTYPE_INSTANCE
4087
  _OP_REQP = ["instance_name", "new_name"]
4088

    
4089
  def BuildHooksEnv(self):
4090
    """Build hooks env.
4091

4092
    This runs on master, primary and secondary nodes of the instance.
4093

4094
    """
4095
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4096
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4097
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4098
    return env, nl, nl
4099

    
4100
  def CheckPrereq(self):
4101
    """Check prerequisites.
4102

4103
    This checks that the instance is in the cluster and is not running.
4104

4105
    """
4106
    instance = self.cfg.GetInstanceInfo(
4107
      self.cfg.ExpandInstanceName(self.op.instance_name))
4108
    if instance is None:
4109
      raise errors.OpPrereqError("Instance '%s' not known" %
4110
                                 self.op.instance_name, errors.ECODE_NOENT)
4111
    _CheckNodeOnline(self, instance.primary_node)
4112

    
4113
    if instance.admin_up:
4114
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4115
                                 self.op.instance_name, errors.ECODE_STATE)
4116
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4117
                                              instance.name,
4118
                                              instance.hypervisor)
4119
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4120
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4121
    if remote_info.payload:
4122
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4123
                                 (self.op.instance_name,
4124
                                  instance.primary_node), errors.ECODE_STATE)
4125
    self.instance = instance
4126

    
4127
    # new name verification
4128
    name_info = utils.GetHostInfo(self.op.new_name)
4129

    
4130
    self.op.new_name = new_name = name_info.name
4131
    instance_list = self.cfg.GetInstanceList()
4132
    if new_name in instance_list:
4133
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4134
                                 new_name, errors.ECODE_EXISTS)
4135

    
4136
    if not getattr(self.op, "ignore_ip", False):
4137
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4138
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4139
                                   (name_info.ip, new_name),
4140
                                   errors.ECODE_NOTUNIQUE)
4141

    
4142

    
4143
  def Exec(self, feedback_fn):
4144
    """Reinstall the instance.
4145

4146
    """
4147
    inst = self.instance
4148
    old_name = inst.name
4149

    
4150
    if inst.disk_template == constants.DT_FILE:
4151
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4152

    
4153
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4154
    # Change the instance lock. This is definitely safe while we hold the BGL
4155
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4156
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4157

    
4158
    # re-read the instance from the configuration after rename
4159
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4160

    
4161
    if inst.disk_template == constants.DT_FILE:
4162
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4163
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4164
                                                     old_file_storage_dir,
4165
                                                     new_file_storage_dir)
4166
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4167
                   " (but the instance has been renamed in Ganeti)" %
4168
                   (inst.primary_node, old_file_storage_dir,
4169
                    new_file_storage_dir))
4170

    
4171
    _StartInstanceDisks(self, inst, None)
4172
    try:
4173
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4174
                                                 old_name)
4175
      msg = result.fail_msg
4176
      if msg:
4177
        msg = ("Could not run OS rename script for instance %s on node %s"
4178
               " (but the instance has been renamed in Ganeti): %s" %
4179
               (inst.name, inst.primary_node, msg))
4180
        self.proc.LogWarning(msg)
4181
    finally:
4182
      _ShutdownInstanceDisks(self, inst)
4183

    
4184

    
4185
class LURemoveInstance(LogicalUnit):
4186
  """Remove an instance.
4187

4188
  """
4189
  HPATH = "instance-remove"
4190
  HTYPE = constants.HTYPE_INSTANCE
4191
  _OP_REQP = ["instance_name", "ignore_failures"]
4192
  REQ_BGL = False
4193

    
4194
  def CheckArguments(self):
4195
    """Check the arguments.
4196

4197
    """
4198
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4199
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4200

    
4201
  def ExpandNames(self):
4202
    self._ExpandAndLockInstance()
4203
    self.needed_locks[locking.LEVEL_NODE] = []
4204
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4205

    
4206
  def DeclareLocks(self, level):
4207
    if level == locking.LEVEL_NODE:
4208
      self._LockInstancesNodes()
4209

    
4210
  def BuildHooksEnv(self):
4211
    """Build hooks env.
4212

4213
    This runs on master, primary and secondary nodes of the instance.
4214

4215
    """
4216
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4217
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4218
    nl = [self.cfg.GetMasterNode()]
4219
    return env, nl, nl
4220

    
4221
  def CheckPrereq(self):
4222
    """Check prerequisites.
4223

4224
    This checks that the instance is in the cluster.
4225

4226
    """
4227
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4228
    assert self.instance is not None, \
4229
      "Cannot retrieve locked instance %s" % self.op.instance_name
4230

    
4231
  def Exec(self, feedback_fn):
4232
    """Remove the instance.
4233

4234
    """
4235
    instance = self.instance
4236
    logging.info("Shutting down instance %s on node %s",
4237
                 instance.name, instance.primary_node)
4238

    
4239
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4240
                                             self.shutdown_timeout)
4241
    msg = result.fail_msg
4242
    if msg:
4243
      if self.op.ignore_failures:
4244
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4245
      else:
4246
        raise errors.OpExecError("Could not shutdown instance %s on"
4247
                                 " node %s: %s" %
4248
                                 (instance.name, instance.primary_node, msg))
4249

    
4250
    logging.info("Removing block devices for instance %s", instance.name)
4251

    
4252
    if not _RemoveDisks(self, instance):
4253
      if self.op.ignore_failures:
4254
        feedback_fn("Warning: can't remove instance's disks")
4255
      else:
4256
        raise errors.OpExecError("Can't remove instance's disks")
4257

    
4258
    logging.info("Removing instance %s out of cluster config", instance.name)
4259

    
4260
    self.cfg.RemoveInstance(instance.name)
4261
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4262

    
4263

    
4264
class LUQueryInstances(NoHooksLU):
4265
  """Logical unit for querying instances.
4266

4267
  """
4268
  # pylint: disable-msg=W0142
4269
  _OP_REQP = ["output_fields", "names", "use_locking"]
4270
  REQ_BGL = False
4271
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4272
                    "serial_no", "ctime", "mtime", "uuid"]
4273
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4274
                                    "admin_state",
4275
                                    "disk_template", "ip", "mac", "bridge",
4276
                                    "nic_mode", "nic_link",
4277
                                    "sda_size", "sdb_size", "vcpus", "tags",
4278
                                    "network_port", "beparams",
4279
                                    r"(disk)\.(size)/([0-9]+)",
4280
                                    r"(disk)\.(sizes)", "disk_usage",
4281
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4282
                                    r"(nic)\.(bridge)/([0-9]+)",
4283
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4284
                                    r"(disk|nic)\.(count)",
4285
                                    "hvparams",
4286
                                    ] + _SIMPLE_FIELDS +
4287
                                  ["hv/%s" % name
4288
                                   for name in constants.HVS_PARAMETERS
4289
                                   if name not in constants.HVC_GLOBALS] +
4290
                                  ["be/%s" % name
4291
                                   for name in constants.BES_PARAMETERS])
4292
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4293

    
4294

    
4295
  def ExpandNames(self):
4296
    _CheckOutputFields(static=self._FIELDS_STATIC,
4297
                       dynamic=self._FIELDS_DYNAMIC,
4298
                       selected=self.op.output_fields)
4299

    
4300
    self.needed_locks = {}
4301
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4302
    self.share_locks[locking.LEVEL_NODE] = 1
4303

    
4304
    if self.op.names:
4305
      self.wanted = _GetWantedInstances(self, self.op.names)
4306
    else:
4307
      self.wanted = locking.ALL_SET
4308

    
4309
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4310
    self.do_locking = self.do_node_query and self.op.use_locking
4311
    if self.do_locking:
4312
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4313
      self.needed_locks[locking.LEVEL_NODE] = []
4314
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4315

    
4316
  def DeclareLocks(self, level):
4317
    if level == locking.LEVEL_NODE and self.do_locking:
4318
      self._LockInstancesNodes()
4319

    
4320
  def CheckPrereq(self):
4321
    """Check prerequisites.
4322

4323
    """
4324
    pass
4325

    
4326
  def Exec(self, feedback_fn):
4327
    """Computes the list of nodes and their attributes.
4328

4329
    """
4330
    # pylint: disable-msg=R0912
4331
    # way too many branches here
4332
    all_info = self.cfg.GetAllInstancesInfo()
4333
    if self.wanted == locking.ALL_SET:
4334
      # caller didn't specify instance names, so ordering is not important
4335
      if self.do_locking:
4336
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4337
      else:
4338
        instance_names = all_info.keys()
4339
      instance_names = utils.NiceSort(instance_names)
4340
    else:
4341
      # caller did specify names, so we must keep the ordering
4342
      if self.do_locking:
4343
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4344
      else:
4345
        tgt_set = all_info.keys()
4346
      missing = set(self.wanted).difference(tgt_set)
4347
      if missing:
4348
        raise errors.OpExecError("Some instances were removed before"
4349
                                 " retrieving their data: %s" % missing)
4350
      instance_names = self.wanted
4351

    
4352
    instance_list = [all_info[iname] for iname in instance_names]
4353

    
4354
    # begin data gathering
4355

    
4356
    nodes = frozenset([inst.primary_node for inst in instance_list])
4357
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4358

    
4359
    bad_nodes = []
4360
    off_nodes = []
4361
    if self.do_node_query:
4362
      live_data = {}
4363
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4364
      for name in nodes:
4365
        result = node_data[name]
4366
        if result.offline:
4367
          # offline nodes will be in both lists
4368
          off_nodes.append(name)
4369
        if result.fail_msg:
4370
          bad_nodes.append(name)
4371
        else:
4372
          if result.payload:
4373
            live_data.update(result.payload)
4374
          # else no instance is alive
4375
    else:
4376
      live_data = dict([(name, {}) for name in instance_names])
4377

    
4378
    # end data gathering
4379

    
4380
    HVPREFIX = "hv/"
4381
    BEPREFIX = "be/"
4382
    output = []
4383
    cluster = self.cfg.GetClusterInfo()
4384
    for instance in instance_list:
4385
      iout = []
4386
      i_hv = cluster.FillHV(instance, skip_globals=True)
4387
      i_be = cluster.FillBE(instance)
4388
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4389
                                 nic.nicparams) for nic in instance.nics]
4390
      for field in self.op.output_fields:
4391
        st_match = self._FIELDS_STATIC.Matches(field)
4392
        if field in self._SIMPLE_FIELDS:
4393
          val = getattr(instance, field)
4394
        elif field == "pnode":
4395
          val = instance.primary_node
4396
        elif field == "snodes":
4397
          val = list(instance.secondary_nodes)
4398
        elif field == "admin_state":
4399
          val = instance.admin_up
4400
        elif field == "oper_state":
4401
          if instance.primary_node in bad_nodes:
4402
            val = None
4403
          else:
4404
            val = bool(live_data.get(instance.name))
4405
        elif field == "status":
4406
          if instance.primary_node in off_nodes:
4407
            val = "ERROR_nodeoffline"
4408
          elif instance.primary_node in bad_nodes:
4409
            val = "ERROR_nodedown"
4410
          else:
4411
            running = bool(live_data.get(instance.name))
4412
            if running:
4413
              if instance.admin_up:
4414
                val = "running"
4415
              else:
4416
                val = "ERROR_up"
4417
            else:
4418
              if instance.admin_up:
4419
                val = "ERROR_down"
4420
              else:
4421
                val = "ADMIN_down"
4422
        elif field == "oper_ram":
4423
          if instance.primary_node in bad_nodes:
4424
            val = None
4425
          elif instance.name in live_data:
4426
            val = live_data[instance.name].get("memory", "?")
4427
          else:
4428
            val = "-"
4429
        elif field == "vcpus":
4430
          val = i_be[constants.BE_VCPUS]
4431
        elif field == "disk_template":
4432
          val = instance.disk_template
4433
        elif field == "ip":
4434
          if instance.nics:
4435
            val = instance.nics[0].ip
4436
          else:
4437
            val = None
4438
        elif field == "nic_mode":
4439
          if instance.nics:
4440
            val = i_nicp[0][constants.NIC_MODE]
4441
          else:
4442
            val = None
4443
        elif field == "nic_link":
4444
          if instance.nics:
4445
            val = i_nicp[0][constants.NIC_LINK]
4446
          else:
4447
            val = None
4448
        elif field == "bridge":
4449
          if (instance.nics and
4450
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4451
            val = i_nicp[0][constants.NIC_LINK]
4452
          else:
4453
            val = None
4454
        elif field == "mac":
4455
          if instance.nics:
4456
            val = instance.nics[0].mac
4457
          else:
4458
            val = None
4459
        elif field == "sda_size" or field == "sdb_size":
4460
          idx = ord(field[2]) - ord('a')
4461
          try:
4462
            val = instance.FindDisk(idx).size
4463
          except errors.OpPrereqError:
4464
            val = None
4465
        elif field == "disk_usage": # total disk usage per node
4466
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4467
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4468
        elif field == "tags":
4469
          val = list(instance.GetTags())
4470
        elif field == "hvparams":
4471
          val = i_hv
4472
        elif (field.startswith(HVPREFIX) and
4473
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4474
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4475
          val = i_hv.get(field[len(HVPREFIX):], None)
4476
        elif field == "beparams":
4477
          val = i_be
4478
        elif (field.startswith(BEPREFIX) and
4479
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4480
          val = i_be.get(field[len(BEPREFIX):], None)
4481
        elif st_match and st_match.groups():
4482
          # matches a variable list
4483
          st_groups = st_match.groups()
4484
          if st_groups and st_groups[0] == "disk":
4485
            if st_groups[1] == "count":
4486
              val = len(instance.disks)
4487
            elif st_groups[1] == "sizes":
4488
              val = [disk.size for disk in instance.disks]
4489
            elif st_groups[1] == "size":
4490
              try:
4491
                val = instance.FindDisk(st_groups[2]).size
4492
              except errors.OpPrereqError:
4493
                val = None
4494
            else:
4495
              assert False, "Unhandled disk parameter"
4496
          elif st_groups[0] == "nic":
4497
            if st_groups[1] == "count":
4498
              val = len(instance.nics)
4499
            elif st_groups[1] == "macs":
4500
              val = [nic.mac for nic in instance.nics]
4501
            elif st_groups[1] == "ips":
4502
              val = [nic.ip for nic in instance.nics]
4503
            elif st_groups[1] == "modes":
4504
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4505
            elif st_groups[1] == "links":
4506
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4507
            elif st_groups[1] == "bridges":
4508
              val = []
4509
              for nicp in i_nicp:
4510
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4511
                  val.append(nicp[constants.NIC_LINK])
4512
                else:
4513
                  val.append(None)
4514
            else:
4515
              # index-based item
4516
              nic_idx = int(st_groups[2])
4517
              if nic_idx >= len(instance.nics):
4518
                val = None
4519
              else:
4520
                if st_groups[1] == "mac":
4521
                  val = instance.nics[nic_idx].mac
4522
                elif st_groups[1] == "ip":
4523
                  val = instance.nics[nic_idx].ip
4524
                elif st_groups[1] == "mode":
4525
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4526
                elif st_groups[1] == "link":
4527
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4528
                elif st_groups[1] == "bridge":
4529
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4530
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4531
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4532
                  else:
4533
                    val = None
4534
                else:
4535
                  assert False, "Unhandled NIC parameter"
4536
          else:
4537
            assert False, ("Declared but unhandled variable parameter '%s'" %
4538
                           field)
4539
        else:
4540
          assert False, "Declared but unhandled parameter '%s'" % field
4541
        iout.append(val)
4542
      output.append(iout)
4543

    
4544
    return output
4545

    
4546

    
4547
class LUFailoverInstance(LogicalUnit):
4548
  """Failover an instance.
4549

4550
  """
4551
  HPATH = "instance-failover"
4552
  HTYPE = constants.HTYPE_INSTANCE
4553
  _OP_REQP = ["instance_name", "ignore_consistency"]
4554
  REQ_BGL = False
4555

    
4556
  def CheckArguments(self):
4557
    """Check the arguments.
4558

4559
    """
4560
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4561
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4562

    
4563
  def ExpandNames(self):
4564
    self._ExpandAndLockInstance()
4565
    self.needed_locks[locking.LEVEL_NODE] = []
4566
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4567

    
4568
  def DeclareLocks(self, level):
4569
    if level == locking.LEVEL_NODE:
4570
      self._LockInstancesNodes()
4571

    
4572
  def BuildHooksEnv(self):
4573
    """Build hooks env.
4574

4575
    This runs on master, primary and secondary nodes of the instance.
4576

4577
    """
4578
    env = {
4579
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4580
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4581
      }
4582
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4583
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4584
    return env, nl, nl
4585

    
4586
  def CheckPrereq(self):
4587
    """Check prerequisites.
4588

4589
    This checks that the instance is in the cluster.
4590

4591
    """
4592
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4593
    assert self.instance is not None, \
4594
      "Cannot retrieve locked instance %s" % self.op.instance_name
4595

    
4596
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4597
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4598
      raise errors.OpPrereqError("Instance's disk layout is not"
4599
                                 " network mirrored, cannot failover.",
4600
                                 errors.ECODE_STATE)
4601

    
4602
    secondary_nodes = instance.secondary_nodes
4603
    if not secondary_nodes:
4604
      raise errors.ProgrammerError("no secondary node but using "
4605
                                   "a mirrored disk template")
4606

    
4607
    target_node = secondary_nodes[0]
4608
    _CheckNodeOnline(self, target_node)
4609
    _CheckNodeNotDrained(self, target_node)
4610
    if instance.admin_up:
4611
      # check memory requirements on the secondary node
4612
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4613
                           instance.name, bep[constants.BE_MEMORY],
4614
                           instance.hypervisor)
4615
    else:
4616
      self.LogInfo("Not checking memory on the secondary node as"
4617
                   " instance will not be started")
4618

    
4619
    # check bridge existance
4620
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4621

    
4622
  def Exec(self, feedback_fn):
4623
    """Failover an instance.
4624

4625
    The failover is done by shutting it down on its present node and
4626
    starting it on the secondary.
4627

4628
    """
4629
    instance = self.instance
4630

    
4631
    source_node = instance.primary_node
4632
    target_node = instance.secondary_nodes[0]
4633

    
4634
    if instance.admin_up:
4635
      feedback_fn("* checking disk consistency between source and target")
4636
      for dev in instance.disks:
4637
        # for drbd, these are drbd over lvm
4638
        if not _CheckDiskConsistency(self, dev, target_node, False):
4639
          if not self.op.ignore_consistency:
4640
            raise errors.OpExecError("Disk %s is degraded on target node,"
4641
                                     " aborting failover." % dev.iv_name)
4642
    else:
4643
      feedback_fn("* not checking disk consistency as instance is not running")
4644

    
4645
    feedback_fn("* shutting down instance on source node")
4646
    logging.info("Shutting down instance %s on node %s",
4647
                 instance.name, source_node)
4648

    
4649
    result = self.rpc.call_instance_shutdown(source_node, instance,
4650
                                             self.shutdown_timeout)
4651
    msg = result.fail_msg
4652
    if msg:
4653
      if self.op.ignore_consistency:
4654
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4655
                             " Proceeding anyway. Please make sure node"
4656
                             " %s is down. Error details: %s",
4657
                             instance.name, source_node, source_node, msg)
4658
      else:
4659
        raise errors.OpExecError("Could not shutdown instance %s on"
4660
                                 " node %s: %s" %
4661
                                 (instance.name, source_node, msg))
4662

    
4663
    feedback_fn("* deactivating the instance's disks on source node")
4664
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4665
      raise errors.OpExecError("Can't shut down the instance's disks.")
4666

    
4667
    instance.primary_node = target_node
4668
    # distribute new instance config to the other nodes
4669
    self.cfg.Update(instance, feedback_fn)
4670

    
4671
    # Only start the instance if it's marked as up
4672
    if instance.admin_up:
4673
      feedback_fn("* activating the instance's disks on target node")
4674
      logging.info("Starting instance %s on node %s",
4675
                   instance.name, target_node)
4676

    
4677
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4678
                                               ignore_secondaries=True)
4679
      if not disks_ok:
4680
        _ShutdownInstanceDisks(self, instance)
4681
        raise errors.OpExecError("Can't activate the instance's disks")
4682

    
4683
      feedback_fn("* starting the instance on the target node")
4684
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4685
      msg = result.fail_msg
4686
      if msg:
4687
        _ShutdownInstanceDisks(self, instance)
4688
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4689
                                 (instance.name, target_node, msg))
4690

    
4691

    
4692
class LUMigrateInstance(LogicalUnit):
4693
  """Migrate an instance.
4694

4695
  This is migration without shutting down, compared to the failover,
4696
  which is done with shutdown.
4697

4698
  """
4699
  HPATH = "instance-migrate"
4700
  HTYPE = constants.HTYPE_INSTANCE
4701
  _OP_REQP = ["instance_name", "live", "cleanup"]
4702

    
4703
  REQ_BGL = False
4704

    
4705
  def ExpandNames(self):
4706
    self._ExpandAndLockInstance()
4707

    
4708
    self.needed_locks[locking.LEVEL_NODE] = []
4709
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4710

    
4711
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4712
                                       self.op.live, self.op.cleanup)
4713
    self.tasklets = [self._migrater]
4714

    
4715
  def DeclareLocks(self, level):
4716
    if level == locking.LEVEL_NODE:
4717
      self._LockInstancesNodes()
4718

    
4719
  def BuildHooksEnv(self):
4720
    """Build hooks env.
4721

4722
    This runs on master, primary and secondary nodes of the instance.
4723

4724
    """
4725
    instance = self._migrater.instance
4726
    env = _BuildInstanceHookEnvByObject(self, instance)
4727
    env["MIGRATE_LIVE"] = self.op.live
4728
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4729
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4730
    return env, nl, nl
4731

    
4732

    
4733
class LUMoveInstance(LogicalUnit):
4734
  """Move an instance by data-copying.
4735

4736
  """
4737
  HPATH = "instance-move"
4738
  HTYPE = constants.HTYPE_INSTANCE
4739
  _OP_REQP = ["instance_name", "target_node"]
4740
  REQ_BGL = False
4741

    
4742
  def CheckArguments(self):
4743
    """Check the arguments.
4744

4745
    """
4746
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4747
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4748

    
4749
  def ExpandNames(self):
4750
    self._ExpandAndLockInstance()
4751
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4752
    if target_node is None:
4753
      raise errors.OpPrereqError("Node '%s' not known" %
4754
                                  self.op.target_node, errors.ECODE_NOENT)
4755
    self.op.target_node = target_node
4756
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4757
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4758

    
4759
  def DeclareLocks(self, level):
4760
    if level == locking.LEVEL_NODE:
4761
      self._LockInstancesNodes(primary_only=True)
4762

    
4763
  def BuildHooksEnv(self):
4764
    """Build hooks env.
4765

4766
    This runs on master, primary and secondary nodes of the instance.
4767

4768
    """
4769
    env = {
4770
      "TARGET_NODE": self.op.target_node,
4771
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4772
      }
4773
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4774
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4775
                                       self.op.target_node]
4776
    return env, nl, nl
4777

    
4778
  def CheckPrereq(self):
4779
    """Check prerequisites.
4780

4781
    This checks that the instance is in the cluster.
4782

4783
    """
4784
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4785
    assert self.instance is not None, \
4786
      "Cannot retrieve locked instance %s" % self.op.instance_name
4787

    
4788
    node = self.cfg.GetNodeInfo(self.op.target_node)
4789
    assert node is not None, \
4790
      "Cannot retrieve locked node %s" % self.op.target_node
4791

    
4792
    self.target_node = target_node = node.name
4793

    
4794
    if target_node == instance.primary_node:
4795
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4796
                                 (instance.name, target_node),
4797
                                 errors.ECODE_STATE)
4798

    
4799
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4800

    
4801
    for idx, dsk in enumerate(instance.disks):
4802
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4803
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4804
                                   " cannot copy" % idx, errors.ECODE_STATE)
4805

    
4806
    _CheckNodeOnline(self, target_node)
4807
    _CheckNodeNotDrained(self, target_node)
4808

    
4809
    if instance.admin_up:
4810
      # check memory requirements on the secondary node
4811
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4812
                           instance.name, bep[constants.BE_MEMORY],
4813
                           instance.hypervisor)
4814
    else:
4815
      self.LogInfo("Not checking memory on the secondary node as"
4816
                   " instance will not be started")
4817

    
4818
    # check bridge existance
4819
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4820

    
4821
  def Exec(self, feedback_fn):
4822
    """Move an instance.
4823

4824
    The move is done by shutting it down on its present node, copying
4825
    the data over (slow) and starting it on the new node.
4826

4827
    """
4828
    instance = self.instance
4829

    
4830
    source_node = instance.primary_node
4831
    target_node = self.target_node
4832

    
4833
    self.LogInfo("Shutting down instance %s on source node %s",
4834
                 instance.name, source_node)
4835

    
4836
    result = self.rpc.call_instance_shutdown(source_node, instance,
4837
                                             self.shutdown_timeout)
4838
    msg = result.fail_msg
4839
    if msg:
4840
      if self.op.ignore_consistency:
4841
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4842
                             " Proceeding anyway. Please make sure node"
4843
                             " %s is down. Error details: %s",
4844
                             instance.name, source_node, source_node, msg)
4845
      else:
4846
        raise errors.OpExecError("Could not shutdown instance %s on"
4847
                                 " node %s: %s" %
4848
                                 (instance.name, source_node, msg))
4849

    
4850
    # create the target disks
4851
    try:
4852
      _CreateDisks(self, instance, target_node=target_node)
4853
    except errors.OpExecError:
4854
      self.LogWarning("Device creation failed, reverting...")
4855
      try:
4856
        _RemoveDisks(self, instance, target_node=target_node)
4857
      finally:
4858
        self.cfg.ReleaseDRBDMinors(instance.name)
4859
        raise
4860

    
4861
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4862

    
4863
    errs = []
4864
    # activate, get path, copy the data over
4865
    for idx, disk in enumerate(instance.disks):
4866
      self.LogInfo("Copying data for disk %d", idx)
4867
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4868
                                               instance.name, True)
4869
      if result.fail_msg:
4870
        self.LogWarning("Can't assemble newly created disk %d: %s",
4871
                        idx, result.fail_msg)
4872
        errs.append(result.fail_msg)
4873
        break
4874
      dev_path = result.payload
4875
      result = self.rpc.call_blockdev_export(source_node, disk,
4876
                                             target_node, dev_path,
4877
                                             cluster_name)
4878
      if result.fail_msg:
4879
        self.LogWarning("Can't copy data over for disk %d: %s",
4880
                        idx, result.fail_msg)
4881
        errs.append(result.fail_msg)
4882
        break
4883

    
4884
    if errs:
4885
      self.LogWarning("Some disks failed to copy, aborting")
4886
      try:
4887
        _RemoveDisks(self, instance, target_node=target_node)
4888
      finally:
4889
        self.cfg.ReleaseDRBDMinors(instance.name)
4890
        raise errors.OpExecError("Errors during disk copy: %s" %
4891
                                 (",".join(errs),))
4892

    
4893
    instance.primary_node = target_node
4894
    self.cfg.Update(instance, feedback_fn)
4895

    
4896
    self.LogInfo("Removing the disks on the original node")
4897
    _RemoveDisks(self, instance, target_node=source_node)
4898

    
4899
    # Only start the instance if it's marked as up
4900
    if instance.admin_up:
4901
      self.LogInfo("Starting instance %s on node %s",
4902
                   instance.name, target_node)
4903

    
4904
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4905
                                           ignore_secondaries=True)
4906
      if not disks_ok:
4907
        _ShutdownInstanceDisks(self, instance)
4908
        raise errors.OpExecError("Can't activate the instance's disks")
4909

    
4910
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4911
      msg = result.fail_msg
4912
      if msg:
4913
        _ShutdownInstanceDisks(self, instance)
4914
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4915
                                 (instance.name, target_node, msg))
4916

    
4917

    
4918
class LUMigrateNode(LogicalUnit):
4919
  """Migrate all instances from a node.
4920

4921
  """
4922
  HPATH = "node-migrate"
4923
  HTYPE = constants.HTYPE_NODE
4924
  _OP_REQP = ["node_name", "live"]
4925
  REQ_BGL = False
4926

    
4927
  def ExpandNames(self):
4928
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4929
    if self.op.node_name is None:
4930
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
4931
                                 errors.ECODE_NOENT)
4932

    
4933
    self.needed_locks = {
4934
      locking.LEVEL_NODE: [self.op.node_name],
4935
      }
4936

    
4937
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4938

    
4939
    # Create tasklets for migrating instances for all instances on this node
4940
    names = []
4941
    tasklets = []
4942

    
4943
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4944
      logging.debug("Migrating instance %s", inst.name)
4945
      names.append(inst.name)
4946

    
4947
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4948

    
4949
    self.tasklets = tasklets
4950

    
4951
    # Declare instance locks
4952
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4953

    
4954
  def DeclareLocks(self, level):
4955
    if level == locking.LEVEL_NODE:
4956
      self._LockInstancesNodes()
4957

    
4958
  def BuildHooksEnv(self):
4959
    """Build hooks env.
4960

4961
    This runs on the master, the primary and all the secondaries.
4962

4963
    """
4964
    env = {
4965
      "NODE_NAME": self.op.node_name,
4966
      }
4967

    
4968
    nl = [self.cfg.GetMasterNode()]
4969

    
4970
    return (env, nl, nl)
4971

    
4972

    
4973
class TLMigrateInstance(Tasklet):
4974
  def __init__(self, lu, instance_name, live, cleanup):
4975
    """Initializes this class.
4976

4977
    """
4978
    Tasklet.__init__(self, lu)
4979

    
4980
    # Parameters
4981
    self.instance_name = instance_name
4982
    self.live = live
4983
    self.cleanup = cleanup
4984

    
4985
  def CheckPrereq(self):
4986
    """Check prerequisites.
4987

4988
    This checks that the instance is in the cluster.
4989

4990
    """
4991
    instance = self.cfg.GetInstanceInfo(
4992
      self.cfg.ExpandInstanceName(self.instance_name))
4993
    if instance is None:
4994
      raise errors.OpPrereqError("Instance '%s' not known" %
4995
                                 self.instance_name, errors.ECODE_NOENT)
4996

    
4997
    if instance.disk_template != constants.DT_DRBD8:
4998
      raise errors.OpPrereqError("Instance's disk layout is not"
4999
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5000

    
5001
    secondary_nodes = instance.secondary_nodes
5002
    if not secondary_nodes:
5003
      raise errors.ConfigurationError("No secondary node but using"
5004
                                      " drbd8 disk template")
5005

    
5006
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5007

    
5008
    target_node = secondary_nodes[0]
5009
    # check memory requirements on the secondary node
5010
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5011
                         instance.name, i_be[constants.BE_MEMORY],
5012
                         instance.hypervisor)
5013

    
5014
    # check bridge existance
5015
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5016

    
5017
    if not self.cleanup:
5018
      _CheckNodeNotDrained(self, target_node)
5019
      result = self.rpc.call_instance_migratable(instance.primary_node,
5020
                                                 instance)
5021
      result.Raise("Can't migrate, please use failover",
5022
                   prereq=True, ecode=errors.ECODE_STATE)
5023

    
5024
    self.instance = instance
5025

    
5026
  def _WaitUntilSync(self):
5027
    """Poll with custom rpc for disk sync.
5028

5029
    This uses our own step-based rpc call.
5030

5031
    """
5032
    self.feedback_fn("* wait until resync is done")
5033
    all_done = False
5034
    while not all_done:
5035
      all_done = True
5036
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5037
                                            self.nodes_ip,
5038
                                            self.instance.disks)
5039
      min_percent = 100
5040
      for node, nres in result.items():
5041
        nres.Raise("Cannot resync disks on node %s" % node)
5042
        node_done, node_percent = nres.payload
5043
        all_done = all_done and node_done
5044
        if node_percent is not None:
5045
          min_percent = min(min_percent, node_percent)
5046
      if not all_done:
5047
        if min_percent < 100:
5048
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5049
        time.sleep(2)
5050

    
5051
  def _EnsureSecondary(self, node):
5052
    """Demote a node to secondary.
5053

5054
    """
5055
    self.feedback_fn("* switching node %s to secondary mode" % node)
5056

    
5057
    for dev in self.instance.disks:
5058
      self.cfg.SetDiskID(dev, node)
5059

    
5060
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5061
                                          self.instance.disks)
5062
    result.Raise("Cannot change disk to secondary on node %s" % node)
5063

    
5064
  def _GoStandalone(self):
5065
    """Disconnect from the network.
5066

5067
    """
5068
    self.feedback_fn("* changing into standalone mode")
5069
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5070
                                               self.instance.disks)
5071
    for node, nres in result.items():
5072
      nres.Raise("Cannot disconnect disks node %s" % node)
5073

    
5074
  def _GoReconnect(self, multimaster):
5075
    """Reconnect to the network.
5076

5077
    """
5078
    if multimaster:
5079
      msg = "dual-master"
5080
    else:
5081
      msg = "single-master"
5082
    self.feedback_fn("* changing disks into %s mode" % msg)
5083
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5084
                                           self.instance.disks,
5085
                                           self.instance.name, multimaster)
5086
    for node, nres in result.items():
5087
      nres.Raise("Cannot change disks config on node %s" % node)
5088

    
5089
  def _ExecCleanup(self):
5090
    """Try to cleanup after a failed migration.
5091

5092
    The cleanup is done by:
5093
      - check that the instance is running only on one node
5094
        (and update the config if needed)
5095
      - change disks on its secondary node to secondary
5096
      - wait until disks are fully synchronized
5097
      - disconnect from the network
5098
      - change disks into single-master mode
5099
      - wait again until disks are fully synchronized
5100

5101
    """
5102
    instance = self.instance
5103
    target_node = self.target_node
5104
    source_node = self.source_node
5105

    
5106
    # check running on only one node
5107
    self.feedback_fn("* checking where the instance actually runs"
5108
                     " (if this hangs, the hypervisor might be in"
5109
                     " a bad state)")
5110
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5111
    for node, result in ins_l.items():
5112
      result.Raise("Can't contact node %s" % node)
5113

    
5114
    runningon_source = instance.name in ins_l[source_node].payload
5115
    runningon_target = instance.name in ins_l[target_node].payload
5116

    
5117
    if runningon_source and runningon_target:
5118
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5119
                               " or the hypervisor is confused. You will have"
5120
                               " to ensure manually that it runs only on one"
5121
                               " and restart this operation.")
5122

    
5123
    if not (runningon_source or runningon_target):
5124
      raise errors.OpExecError("Instance does not seem to be running at all."
5125
                               " In this case, it's safer to repair by"
5126
                               " running 'gnt-instance stop' to ensure disk"
5127
                               " shutdown, and then restarting it.")
5128

    
5129
    if runningon_target:
5130
      # the migration has actually succeeded, we need to update the config
5131
      self.feedback_fn("* instance running on secondary node (%s),"
5132
                       " updating config" % target_node)
5133
      instance.primary_node = target_node
5134
      self.cfg.Update(instance, self.feedback_fn)
5135
      demoted_node = source_node
5136
    else:
5137
      self.feedback_fn("* instance confirmed to be running on its"
5138
                       " primary node (%s)" % source_node)
5139
      demoted_node = target_node
5140

    
5141
    self._EnsureSecondary(demoted_node)
5142
    try:
5143
      self._WaitUntilSync()
5144
    except errors.OpExecError:
5145
      # we ignore here errors, since if the device is standalone, it
5146
      # won't be able to sync
5147
      pass
5148
    self._GoStandalone()
5149
    self._GoReconnect(False)
5150
    self._WaitUntilSync()
5151

    
5152
    self.feedback_fn("* done")
5153

    
5154
  def _RevertDiskStatus(self):
5155
    """Try to revert the disk status after a failed migration.
5156

5157
    """
5158
    target_node = self.target_node
5159
    try:
5160
      self._EnsureSecondary(target_node)
5161
      self._GoStandalone()
5162
      self._GoReconnect(False)
5163
      self._WaitUntilSync()
5164
    except errors.OpExecError, err:
5165
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5166
                         " drives: error '%s'\n"
5167
                         "Please look and recover the instance status" %
5168
                         str(err))
5169

    
5170
  def _AbortMigration(self):
5171
    """Call the hypervisor code to abort a started migration.
5172

5173
    """
5174
    instance = self.instance
5175
    target_node = self.target_node
5176
    migration_info = self.migration_info
5177

    
5178
    abort_result = self.rpc.call_finalize_migration(target_node,
5179
                                                    instance,
5180
                                                    migration_info,
5181
                                                    False)
5182
    abort_msg = abort_result.fail_msg
5183
    if abort_msg:
5184
      logging.error("Aborting migration failed on target node %s: %s",
5185
                    target_node, abort_msg)
5186
      # Don't raise an exception here, as we stil have to try to revert the
5187
      # disk status, even if this step failed.
5188

    
5189
  def _ExecMigration(self):
5190
    """Migrate an instance.
5191

5192
    The migrate is done by:
5193
      - change the disks into dual-master mode
5194
      - wait until disks are fully synchronized again
5195
      - migrate the instance
5196
      - change disks on the new secondary node (the old primary) to secondary
5197
      - wait until disks are fully synchronized
5198
      - change disks into single-master mode
5199

5200
    """
5201
    instance = self.instance
5202
    target_node = self.target_node
5203
    source_node = self.source_node
5204

    
5205
    self.feedback_fn("* checking disk consistency between source and target")
5206
    for dev in instance.disks:
5207
      if not _CheckDiskConsistency(self, dev, target_node, False):
5208
        raise errors.OpExecError("Disk %s is degraded or not fully"
5209
                                 " synchronized on target node,"
5210
                                 " aborting migrate." % dev.iv_name)
5211

    
5212
    # First get the migration information from the remote node
5213
    result = self.rpc.call_migration_info(source_node, instance)
5214
    msg = result.fail_msg
5215
    if msg:
5216
      log_err = ("Failed fetching source migration information from %s: %s" %
5217
                 (source_node, msg))
5218
      logging.error(log_err)
5219
      raise errors.OpExecError(log_err)
5220

    
5221
    self.migration_info = migration_info = result.payload
5222

    
5223
    # Then switch the disks to master/master mode
5224
    self._EnsureSecondary(target_node)
5225
    self._GoStandalone()
5226
    self._GoReconnect(True)
5227
    self._WaitUntilSync()
5228

    
5229
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5230
    result = self.rpc.call_accept_instance(target_node,
5231
                                           instance,
5232
                                           migration_info,
5233
                                           self.nodes_ip[target_node])
5234

    
5235
    msg = result.fail_msg
5236
    if msg:
5237
      logging.error("Instance pre-migration failed, trying to revert"
5238
                    " disk status: %s", msg)
5239
      self.feedback_fn("Pre-migration failed, aborting")
5240
      self._AbortMigration()
5241
      self._RevertDiskStatus()
5242
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5243
                               (instance.name, msg))
5244

    
5245
    self.feedback_fn("* migrating instance to %s" % target_node)
5246
    time.sleep(10)
5247
    result = self.rpc.call_instance_migrate(source_node, instance,
5248
                                            self.nodes_ip[target_node],
5249
                                            self.live)
5250
    msg = result.fail_msg
5251
    if msg:
5252
      logging.error("Instance migration failed, trying to revert"
5253
                    " disk status: %s", msg)
5254
      self.feedback_fn("Migration failed, aborting")
5255
      self._AbortMigration()
5256
      self._RevertDiskStatus()
5257
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5258
                               (instance.name, msg))
5259
    time.sleep(10)
5260

    
5261
    instance.primary_node = target_node
5262
    # distribute new instance config to the other nodes
5263
    self.cfg.Update(instance, self.feedback_fn)
5264

    
5265
    result = self.rpc.call_finalize_migration(target_node,
5266
                                              instance,
5267
                                              migration_info,
5268
                                              True)
5269
    msg = result.fail_msg
5270
    if msg:
5271
      logging.error("Instance migration succeeded, but finalization failed:"
5272
                    " %s", msg)
5273
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5274
                               msg)
5275

    
5276
    self._EnsureSecondary(source_node)
5277
    self._WaitUntilSync()
5278
    self._GoStandalone()
5279
    self._GoReconnect(False)
5280
    self._WaitUntilSync()
5281

    
5282
    self.feedback_fn("* done")
5283

    
5284
  def Exec(self, feedback_fn):
5285
    """Perform the migration.
5286

5287
    """
5288
    feedback_fn("Migrating instance %s" % self.instance.name)
5289

    
5290
    self.feedback_fn = feedback_fn
5291

    
5292
    self.source_node = self.instance.primary_node
5293
    self.target_node = self.instance.secondary_nodes[0]
5294
    self.all_nodes = [self.source_node, self.target_node]
5295
    self.nodes_ip = {
5296
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5297
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5298
      }
5299

    
5300
    if self.cleanup:
5301
      return self._ExecCleanup()
5302
    else:
5303
      return self._ExecMigration()
5304

    
5305

    
5306
def _CreateBlockDev(lu, node, instance, device, force_create,
5307
                    info, force_open):
5308
  """Create a tree of block devices on a given node.
5309

5310
  If this device type has to be created on secondaries, create it and
5311
  all its children.
5312

5313
  If not, just recurse to children keeping the same 'force' value.
5314

5315
  @param lu: the lu on whose behalf we execute
5316
  @param node: the node on which to create the device
5317
  @type instance: L{objects.Instance}
5318
  @param instance: the instance which owns the device
5319
  @type device: L{objects.Disk}
5320
  @param device: the device to create
5321
  @type force_create: boolean
5322
  @param force_create: whether to force creation of this device; this
5323
      will be change to True whenever we find a device which has
5324
      CreateOnSecondary() attribute
5325
  @param info: the extra 'metadata' we should attach to the device
5326
      (this will be represented as a LVM tag)
5327
  @type force_open: boolean
5328
  @param force_open: this parameter will be passes to the
5329
      L{backend.BlockdevCreate} function where it specifies
5330
      whether we run on primary or not, and it affects both
5331
      the child assembly and the device own Open() execution
5332

5333
  """
5334
  if device.CreateOnSecondary():
5335
    force_create = True
5336

    
5337
  if device.children:
5338
    for child in device.children:
5339
      _CreateBlockDev(lu, node, instance, child, force_create,
5340
                      info, force_open)
5341

    
5342
  if not force_create:
5343
    return
5344

    
5345
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5346

    
5347

    
5348
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5349
  """Create a single block device on a given node.
5350

5351
  This will not recurse over children of the device, so they must be
5352
  created in advance.
5353

5354
  @param lu: the lu on whose behalf we execute
5355
  @param node: the node on which to create the device
5356
  @type instance: L{objects.Instance}
5357
  @param instance: the instance which owns the device
5358
  @type device: L{objects.Disk}
5359
  @param device: the device to create
5360
  @param info: the extra 'metadata' we should attach to the device
5361
      (this will be represented as a LVM tag)
5362
  @type force_open: boolean
5363
  @param force_open: this parameter will be passes to the
5364
      L{backend.BlockdevCreate} function where it specifies
5365
      whether we run on primary or not, and it affects both
5366
      the child assembly and the device own Open() execution
5367

5368
  """
5369
  lu.cfg.SetDiskID(device, node)
5370
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5371
                                       instance.name, force_open, info)
5372
  result.Raise("Can't create block device %s on"
5373
               " node %s for instance %s" % (device, node, instance.name))
5374
  if device.physical_id is None:
5375
    device.physical_id = result.payload
5376

    
5377

    
5378
def _GenerateUniqueNames(lu, exts):
5379
  """Generate a suitable LV name.
5380

5381
  This will generate a logical volume name for the given instance.
5382

5383
  """
5384
  results = []
5385
  for val in exts:
5386
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5387
    results.append("%s%s" % (new_id, val))
5388
  return results
5389

    
5390

    
5391
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5392
                         p_minor, s_minor):
5393
  """Generate a drbd8 device complete with its children.
5394

5395
  """
5396
  port = lu.cfg.AllocatePort()
5397
  vgname = lu.cfg.GetVGName()
5398
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5399
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5400
                          logical_id=(vgname, names[0]))
5401
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5402
                          logical_id=(vgname, names[1]))
5403
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5404
                          logical_id=(primary, secondary, port,
5405
                                      p_minor, s_minor,
5406
                                      shared_secret),
5407
                          children=[dev_data, dev_meta],
5408
                          iv_name=iv_name)
5409
  return drbd_dev
5410

    
5411

    
5412
def _GenerateDiskTemplate(lu, template_name,
5413
                          instance_name, primary_node,
5414
                          secondary_nodes, disk_info,
5415
                          file_storage_dir, file_driver,
5416
                          base_index):
5417
  """Generate the entire disk layout for a given template type.
5418

5419
  """
5420
  #TODO: compute space requirements
5421

    
5422
  vgname = lu.cfg.GetVGName()
5423
  disk_count = len(disk_info)
5424
  disks = []
5425
  if template_name == constants.DT_DISKLESS:
5426
    pass
5427
  elif template_name == constants.DT_PLAIN:
5428
    if len(secondary_nodes) != 0:
5429
      raise errors.ProgrammerError("Wrong template configuration")
5430

    
5431
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5432
                                      for i in range(disk_count)])
5433
    for idx, disk in enumerate(disk_info):
5434
      disk_index = idx + base_index
5435
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5436
                              logical_id=(vgname, names[idx]),
5437
                              iv_name="disk/%d" % disk_index,
5438
                              mode=disk["mode"])
5439
      disks.append(disk_dev)
5440
  elif template_name == constants.DT_DRBD8:
5441
    if len(secondary_nodes) != 1:
5442
      raise errors.ProgrammerError("Wrong template configuration")
5443
    remote_node = secondary_nodes[0]
5444
    minors = lu.cfg.AllocateDRBDMinor(
5445
      [primary_node, remote_node] * len(disk_info), instance_name)
5446

    
5447
    names = []
5448
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5449
                                               for i in range(disk_count)]):
5450
      names.append(lv_prefix + "_data")
5451
      names.append(lv_prefix + "_meta")
5452
    for idx, disk in enumerate(disk_info):
5453
      disk_index = idx + base_index
5454
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5455
                                      disk["size"], names[idx*2:idx*2+2],
5456
                                      "disk/%d" % disk_index,
5457
                                      minors[idx*2], minors[idx*2+1])
5458
      disk_dev.mode = disk["mode"]
5459
      disks.append(disk_dev)
5460
  elif template_name == constants.DT_FILE:
5461
    if len(secondary_nodes) != 0:
5462
      raise errors.ProgrammerError("Wrong template configuration")
5463

    
5464
    for idx, disk in enumerate(disk_info):
5465
      disk_index = idx + base_index
5466
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5467
                              iv_name="disk/%d" % disk_index,
5468
                              logical_id=(file_driver,
5469
                                          "%s/disk%d" % (file_storage_dir,
5470
                                                         disk_index)),
5471
                              mode=disk["mode"])
5472
      disks.append(disk_dev)
5473
  else:
5474
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5475
  return disks
5476

    
5477

    
5478
def _GetInstanceInfoText(instance):
5479
  """Compute that text that should be added to the disk's metadata.
5480

5481
  """
5482
  return "originstname+%s" % instance.name
5483

    
5484

    
5485
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5486
  """Create all disks for an instance.
5487

5488
  This abstracts away some work from AddInstance.
5489

5490
  @type lu: L{LogicalUnit}
5491
  @param lu: the logical unit on whose behalf we execute
5492
  @type instance: L{objects.Instance}
5493
  @param instance: the instance whose disks we should create
5494
  @type to_skip: list
5495
  @param to_skip: list of indices to skip
5496
  @type target_node: string
5497
  @param target_node: if passed, overrides the target node for creation
5498
  @rtype: boolean
5499
  @return: the success of the creation
5500

5501
  """
5502
  info = _GetInstanceInfoText(instance)
5503
  if target_node is None:
5504
    pnode = instance.primary_node
5505
    all_nodes = instance.all_nodes
5506
  else:
5507
    pnode = target_node
5508
    all_nodes = [pnode]
5509

    
5510
  if instance.disk_template == constants.DT_FILE:
5511
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5512
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5513

    
5514
    result.Raise("Failed to create directory '%s' on"
5515
                 " node %s" % (file_storage_dir, pnode))
5516

    
5517
  # Note: this needs to be kept in sync with adding of disks in
5518
  # LUSetInstanceParams
5519
  for idx, device in enumerate(instance.disks):
5520
    if to_skip and idx in to_skip:
5521
      continue
5522
    logging.info("Creating volume %s for instance %s",
5523
                 device.iv_name, instance.name)
5524
    #HARDCODE
5525
    for node in all_nodes:
5526
      f_create = node == pnode
5527
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5528

    
5529

    
5530
def _RemoveDisks(lu, instance, target_node=None):
5531
  """Remove all disks for an instance.
5532

5533
  This abstracts away some work from `AddInstance()` and
5534
  `RemoveInstance()`. Note that in case some of the devices couldn't
5535
  be removed, the removal will continue with the other ones (compare
5536
  with `_CreateDisks()`).
5537

5538
  @type lu: L{LogicalUnit}
5539
  @param lu: the logical unit on whose behalf we execute
5540
  @type instance: L{objects.Instance}
5541
  @param instance: the instance whose disks we should remove
5542
  @type target_node: string
5543
  @param target_node: used to override the node on which to remove the disks
5544
  @rtype: boolean
5545
  @return: the success of the removal
5546

5547
  """
5548
  logging.info("Removing block devices for instance %s", instance.name)
5549

    
5550
  all_result = True
5551
  for device in instance.disks:
5552
    if target_node:
5553
      edata = [(target_node, device)]
5554
    else:
5555
      edata = device.ComputeNodeTree(instance.primary_node)
5556
    for node, disk in edata:
5557
      lu.cfg.SetDiskID(disk, node)
5558
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5559
      if msg:
5560
        lu.LogWarning("Could not remove block device %s on node %s,"
5561
                      " continuing anyway: %s", device.iv_name, node, msg)
5562
        all_result = False
5563

    
5564
  if instance.disk_template == constants.DT_FILE:
5565
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5566
    if target_node:
5567
      tgt = target_node
5568
    else:
5569
      tgt = instance.primary_node
5570
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5571
    if result.fail_msg:
5572
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5573
                    file_storage_dir, instance.primary_node, result.fail_msg)
5574
      all_result = False
5575

    
5576
  return all_result
5577

    
5578

    
5579
def _ComputeDiskSize(disk_template, disks):
5580
  """Compute disk size requirements in the volume group
5581

5582
  """
5583
  # Required free disk space as a function of disk and swap space
5584
  req_size_dict = {
5585
    constants.DT_DISKLESS: None,
5586
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5587
    # 128 MB are added for drbd metadata for each disk
5588
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5589
    constants.DT_FILE: None,
5590
  }
5591

    
5592
  if disk_template not in req_size_dict:
5593
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5594
                                 " is unknown" %  disk_template)
5595

    
5596
  return req_size_dict[disk_template]
5597

    
5598

    
5599
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5600
  """Hypervisor parameter validation.
5601

5602
  This function abstract the hypervisor parameter validation to be
5603
  used in both instance create and instance modify.
5604

5605
  @type lu: L{LogicalUnit}
5606
  @param lu: the logical unit for which we check
5607
  @type nodenames: list
5608
  @param nodenames: the list of nodes on which we should check
5609
  @type hvname: string
5610
  @param hvname: the name of the hypervisor we should use
5611
  @type hvparams: dict
5612
  @param hvparams: the parameters which we need to check
5613
  @raise errors.OpPrereqError: if the parameters are not valid
5614

5615
  """
5616
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5617
                                                  hvname,
5618
                                                  hvparams)
5619
  for node in nodenames:
5620
    info = hvinfo[node]
5621
    if info.offline:
5622
      continue
5623
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5624

    
5625

    
5626
class LUCreateInstance(LogicalUnit):
5627
  """Create an instance.
5628

5629
  """
5630
  HPATH = "instance-add"
5631
  HTYPE = constants.HTYPE_INSTANCE
5632
  _OP_REQP = ["instance_name", "disks", "disk_template",
5633
              "mode", "start",
5634
              "wait_for_sync", "ip_check", "nics",
5635
              "hvparams", "beparams"]
5636
  REQ_BGL = False
5637

    
5638
  def CheckArguments(self):
5639
    """Check arguments.
5640

5641
    """
5642
    # do not require name_check to ease forward/backward compatibility
5643
    # for tools
5644
    if not hasattr(self.op, "name_check"):
5645
      self.op.name_check = True
5646
    if self.op.ip_check and not self.op.name_check:
5647
      # TODO: make the ip check more flexible and not depend on the name check
5648
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5649
                                 errors.ECODE_INVAL)
5650

    
5651
  def _ExpandNode(self, node):
5652
    """Expands and checks one node name.
5653

5654
    """
5655
    node_full = self.cfg.ExpandNodeName(node)
5656
    if node_full is None:
5657
      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
5658
    return node_full
5659

    
5660
  def ExpandNames(self):
5661
    """ExpandNames for CreateInstance.
5662

5663
    Figure out the right locks for instance creation.
5664

5665
    """
5666
    self.needed_locks = {}
5667

    
5668
    # set optional parameters to none if they don't exist
5669
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5670
      if not hasattr(self.op, attr):
5671
        setattr(self.op, attr, None)
5672

    
5673
    # cheap checks, mostly valid constants given
5674

    
5675
    # verify creation mode
5676
    if self.op.mode not in (constants.INSTANCE_CREATE,
5677
                            constants.INSTANCE_IMPORT):
5678
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5679
                                 self.op.mode, errors.ECODE_INVAL)
5680

    
5681
    # disk template and mirror node verification
5682
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5683
      raise errors.OpPrereqError("Invalid disk template name",
5684
                                 errors.ECODE_INVAL)
5685

    
5686
    if self.op.hypervisor is None:
5687
      self.op.hypervisor = self.cfg.GetHypervisorType()
5688

    
5689
    cluster = self.cfg.GetClusterInfo()
5690
    enabled_hvs = cluster.enabled_hypervisors
5691
    if self.op.hypervisor not in enabled_hvs:
5692
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5693
                                 " cluster (%s)" % (self.op.hypervisor,
5694
                                  ",".join(enabled_hvs)),
5695
                                 errors.ECODE_STATE)
5696

    
5697
    # check hypervisor parameter syntax (locally)
5698
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5699
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5700
                                  self.op.hvparams)
5701
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5702
    hv_type.CheckParameterSyntax(filled_hvp)
5703
    self.hv_full = filled_hvp
5704
    # check that we don't specify global parameters on an instance
5705
    _CheckGlobalHvParams(self.op.hvparams)
5706

    
5707
    # fill and remember the beparams dict
5708
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5709
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5710
                                    self.op.beparams)
5711

    
5712
    #### instance parameters check
5713

    
5714
    # instance name verification
5715
    if self.op.name_check:
5716
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5717
      self.op.instance_name = instance_name = hostname1.name
5718
      # used in CheckPrereq for ip ping check
5719
      self.check_ip = hostname1.ip
5720
    else:
5721
      instance_name = self.op.instance_name
5722
      self.check_ip = None
5723

    
5724
    # this is just a preventive check, but someone might still add this
5725
    # instance in the meantime, and creation will fail at lock-add time
5726
    if instance_name in self.cfg.GetInstanceList():
5727
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5728
                                 instance_name, errors.ECODE_EXISTS)
5729

    
5730
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5731

    
5732
    # NIC buildup
5733
    self.nics = []
5734
    for idx, nic in enumerate(self.op.nics):
5735
      nic_mode_req = nic.get("mode", None)
5736
      nic_mode = nic_mode_req
5737
      if nic_mode is None:
5738
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5739

    
5740
      # in routed mode, for the first nic, the default ip is 'auto'
5741
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5742
        default_ip_mode = constants.VALUE_AUTO
5743
      else:
5744
        default_ip_mode = constants.VALUE_NONE
5745

    
5746
      # ip validity checks
5747
      ip = nic.get("ip", default_ip_mode)
5748
      if ip is None or ip.lower() == constants.VALUE_NONE:
5749
        nic_ip = None
5750
      elif ip.lower() == constants.VALUE_AUTO:
5751
        if not self.op.name_check:
5752
          raise errors.OpPrereqError("IP address set to auto but name checks"
5753
                                     " have been skipped. Aborting.",
5754
                                     errors.ECODE_INVAL)
5755
        nic_ip = hostname1.ip
5756
      else:
5757
        if not utils.IsValidIP(ip):
5758
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5759
                                     " like a valid IP" % ip,
5760
                                     errors.ECODE_INVAL)
5761
        nic_ip = ip
5762

    
5763
      # TODO: check the ip address for uniqueness
5764
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5765
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5766
                                   errors.ECODE_INVAL)
5767

    
5768
      # MAC address verification
5769
      mac = nic.get("mac", constants.VALUE_AUTO)
5770
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5771
        if not utils.IsValidMac(mac.lower()):
5772
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5773
                                     mac, errors.ECODE_INVAL)
5774
        else:
5775
          try:
5776
            self.cfg.ReserveMAC(mac, self.proc.GetECId())
5777
          except errors.ReservationError:
5778
            raise errors.OpPrereqError("MAC address %s already in use"
5779
                                       " in cluster" % mac,
5780
                                       errors.ECODE_NOTUNIQUE)
5781

    
5782
      # bridge verification
5783
      bridge = nic.get("bridge", None)
5784
      link = nic.get("link", None)
5785
      if bridge and link:
5786
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5787
                                   " at the same time", errors.ECODE_INVAL)
5788
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5789
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5790
                                   errors.ECODE_INVAL)
5791
      elif bridge:
5792
        link = bridge
5793

    
5794
      nicparams = {}
5795
      if nic_mode_req:
5796
        nicparams[constants.NIC_MODE] = nic_mode_req
5797
      if link:
5798
        nicparams[constants.NIC_LINK] = link
5799

    
5800
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5801
                                      nicparams)
5802
      objects.NIC.CheckParameterSyntax(check_params)
5803
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5804

    
5805
    # disk checks/pre-build
5806
    self.disks = []
5807
    for disk in self.op.disks:
5808
      mode = disk.get("mode", constants.DISK_RDWR)
5809
      if mode not in constants.DISK_ACCESS_SET:
5810
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5811
                                   mode, errors.ECODE_INVAL)
5812
      size = disk.get("size", None)
5813
      if size is None:
5814
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5815
      try:
5816
        size = int(size)
5817
      except ValueError:
5818
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5819
                                   errors.ECODE_INVAL)
5820
      self.disks.append({"size": size, "mode": mode})
5821

    
5822
    # file storage checks
5823
    if (self.op.file_driver and
5824
        not self.op.file_driver in constants.FILE_DRIVER):
5825
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5826
                                 self.op.file_driver, errors.ECODE_INVAL)
5827

    
5828
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5829
      raise errors.OpPrereqError("File storage directory path not absolute",
5830
                                 errors.ECODE_INVAL)
5831

    
5832
    ### Node/iallocator related checks
5833
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5834
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5835
                                 " node must be given",
5836
                                 errors.ECODE_INVAL)
5837

    
5838
    if self.op.iallocator:
5839
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5840
    else:
5841
      self.op.pnode = self._ExpandNode(self.op.pnode)
5842
      nodelist = [self.op.pnode]
5843
      if self.op.snode is not None:
5844
        self.op.snode = self._ExpandNode(self.op.snode)
5845
        nodelist.append(self.op.snode)
5846
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5847

    
5848
    # in case of import lock the source node too
5849
    if self.op.mode == constants.INSTANCE_IMPORT:
5850
      src_node = getattr(self.op, "src_node", None)
5851
      src_path = getattr(self.op, "src_path", None)
5852

    
5853
      if src_path is None:
5854
        self.op.src_path = src_path = self.op.instance_name
5855

    
5856
      if src_node is None:
5857
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5858
        self.op.src_node = None
5859
        if os.path.isabs(src_path):
5860
          raise errors.OpPrereqError("Importing an instance from an absolute"
5861
                                     " path requires a source node option.",
5862
                                     errors.ECODE_INVAL)
5863
      else:
5864
        self.op.src_node = src_node = self._ExpandNode(src_node)
5865
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5866
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5867
        if not os.path.isabs(src_path):
5868
          self.op.src_path = src_path = \
5869
            os.path.join(constants.EXPORT_DIR, src_path)
5870

    
5871
      # On import force_variant must be True, because if we forced it at
5872
      # initial install, our only chance when importing it back is that it
5873
      # works again!
5874
      self.op.force_variant = True
5875

    
5876
    else: # INSTANCE_CREATE
5877
      if getattr(self.op, "os_type", None) is None:
5878
        raise errors.OpPrereqError("No guest OS specified",
5879
                                   errors.ECODE_INVAL)
5880
      self.op.force_variant = getattr(self.op, "force_variant", False)
5881

    
5882
  def _RunAllocator(self):
5883
    """Run the allocator based on input opcode.
5884

5885
    """
5886
    nics = [n.ToDict() for n in self.nics]
5887
    ial = IAllocator(self.cfg, self.rpc,
5888
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5889
                     name=self.op.instance_name,
5890
                     disk_template=self.op.disk_template,
5891
                     tags=[],
5892
                     os=self.op.os_type,
5893
                     vcpus=self.be_full[constants.BE_VCPUS],
5894
                     mem_size=self.be_full[constants.BE_MEMORY],
5895
                     disks=self.disks,
5896
                     nics=nics,
5897
                     hypervisor=self.op.hypervisor,
5898
                     )
5899

    
5900
    ial.Run(self.op.iallocator)
5901

    
5902
    if not ial.success:
5903
      raise errors.OpPrereqError("Can't compute nodes using"
5904
                                 " iallocator '%s': %s" %
5905
                                 (self.op.iallocator, ial.info),
5906
                                 errors.ECODE_NORES)
5907
    if len(ial.nodes) != ial.required_nodes:
5908
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5909
                                 " of nodes (%s), required %s" %
5910
                                 (self.op.iallocator, len(ial.nodes),
5911
                                  ial.required_nodes), errors.ECODE_FAULT)
5912
    self.op.pnode = ial.nodes[0]
5913
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5914
                 self.op.instance_name, self.op.iallocator,
5915
                 utils.CommaJoin(ial.nodes))
5916
    if ial.required_nodes == 2:
5917
      self.op.snode = ial.nodes[1]
5918

    
5919
  def BuildHooksEnv(self):
5920
    """Build hooks env.
5921

5922
    This runs on master, primary and secondary nodes of the instance.
5923

5924
    """
5925
    env = {
5926
      "ADD_MODE": self.op.mode,
5927
      }
5928
    if self.op.mode == constants.INSTANCE_IMPORT:
5929
      env["SRC_NODE"] = self.op.src_node
5930
      env["SRC_PATH"] = self.op.src_path
5931
      env["SRC_IMAGES"] = self.src_images
5932

    
5933
    env.update(_BuildInstanceHookEnv(
5934
      name=self.op.instance_name,
5935
      primary_node=self.op.pnode,
5936
      secondary_nodes=self.secondaries,
5937
      status=self.op.start,
5938
      os_type=self.op.os_type,
5939
      memory=self.be_full[constants.BE_MEMORY],
5940
      vcpus=self.be_full[constants.BE_VCPUS],
5941
      nics=_NICListToTuple(self, self.nics),
5942
      disk_template=self.op.disk_template,
5943
      disks=[(d["size"], d["mode"]) for d in self.disks],
5944
      bep=self.be_full,
5945
      hvp=self.hv_full,
5946
      hypervisor_name=self.op.hypervisor,
5947
    ))
5948

    
5949
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5950
          self.secondaries)
5951
    return env, nl, nl
5952

    
5953

    
5954
  def CheckPrereq(self):
5955
    """Check prerequisites.
5956

5957
    """
5958
    if (not self.cfg.GetVGName() and
5959
        self.op.disk_template not in constants.DTS_NOT_LVM):
5960
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5961
                                 " instances", errors.ECODE_STATE)
5962

    
5963
    if self.op.mode == constants.INSTANCE_IMPORT:
5964
      src_node = self.op.src_node
5965
      src_path = self.op.src_path
5966

    
5967
      if src_node is None:
5968
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5969
        exp_list = self.rpc.call_export_list(locked_nodes)
5970
        found = False
5971
        for node in exp_list:
5972
          if exp_list[node].fail_msg:
5973
            continue
5974
          if src_path in exp_list[node].payload:
5975
            found = True
5976
            self.op.src_node = src_node = node
5977
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5978
                                                       src_path)
5979
            break
5980
        if not found:
5981
          raise errors.OpPrereqError("No export found for relative path %s" %
5982
                                      src_path, errors.ECODE_INVAL)
5983

    
5984
      _CheckNodeOnline(self, src_node)
5985
      result = self.rpc.call_export_info(src_node, src_path)
5986
      result.Raise("No export or invalid export found in dir %s" % src_path)
5987

    
5988
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5989
      if not export_info.has_section(constants.INISECT_EXP):
5990
        raise errors.ProgrammerError("Corrupted export config",
5991
                                     errors.ECODE_ENVIRON)
5992

    
5993
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5994
      if (int(ei_version) != constants.EXPORT_VERSION):
5995
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5996
                                   (ei_version, constants.EXPORT_VERSION),
5997
                                   errors.ECODE_ENVIRON)
5998

    
5999
      # Check that the new instance doesn't have less disks than the export
6000
      instance_disks = len(self.disks)
6001
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6002
      if instance_disks < export_disks:
6003
        raise errors.OpPrereqError("Not enough disks to import."
6004
                                   " (instance: %d, export: %d)" %
6005
                                   (instance_disks, export_disks),
6006
                                   errors.ECODE_INVAL)
6007

    
6008
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6009
      disk_images = []
6010
      for idx in range(export_disks):
6011
        option = 'disk%d_dump' % idx
6012
        if export_info.has_option(constants.INISECT_INS, option):
6013
          # FIXME: are the old os-es, disk sizes, etc. useful?
6014
          export_name = export_info.get(constants.INISECT_INS, option)
6015
          image = os.path.join(src_path, export_name)
6016
          disk_images.append(image)
6017
        else:
6018
          disk_images.append(False)
6019

    
6020
      self.src_images = disk_images
6021

    
6022
      old_name = export_info.get(constants.INISECT_INS, 'name')
6023
      # FIXME: int() here could throw a ValueError on broken exports
6024
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6025
      if self.op.instance_name == old_name:
6026
        for idx, nic in enumerate(self.nics):
6027
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6028
            nic_mac_ini = 'nic%d_mac' % idx
6029
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6030

    
6031
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6032

    
6033
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6034
    if self.op.ip_check:
6035
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6036
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6037
                                   (self.check_ip, self.op.instance_name),
6038
                                   errors.ECODE_NOTUNIQUE)
6039

    
6040
    #### mac address generation
6041
    # By generating here the mac address both the allocator and the hooks get
6042
    # the real final mac address rather than the 'auto' or 'generate' value.
6043
    # There is a race condition between the generation and the instance object
6044
    # creation, which means that we know the mac is valid now, but we're not
6045
    # sure it will be when we actually add the instance. If things go bad
6046
    # adding the instance will abort because of a duplicate mac, and the
6047
    # creation job will fail.
6048
    for nic in self.nics:
6049
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6050
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6051

    
6052
    #### allocator run
6053

    
6054
    if self.op.iallocator is not None:
6055
      self._RunAllocator()
6056

    
6057
    #### node related checks
6058

    
6059
    # check primary node
6060
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6061
    assert self.pnode is not None, \
6062
      "Cannot retrieve locked node %s" % self.op.pnode
6063
    if pnode.offline:
6064
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6065
                                 pnode.name, errors.ECODE_STATE)
6066
    if pnode.drained:
6067
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6068
                                 pnode.name, errors.ECODE_STATE)
6069

    
6070
    self.secondaries = []
6071

    
6072
    # mirror node verification
6073
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6074
      if self.op.snode is None:
6075
        raise errors.OpPrereqError("The networked disk templates need"
6076
                                   " a mirror node", errors.ECODE_INVAL)
6077
      if self.op.snode == pnode.name:
6078
        raise errors.OpPrereqError("The secondary node cannot be the"
6079
                                   " primary node.", errors.ECODE_INVAL)
6080
      _CheckNodeOnline(self, self.op.snode)
6081
      _CheckNodeNotDrained(self, self.op.snode)
6082
      self.secondaries.append(self.op.snode)
6083

    
6084
    nodenames = [pnode.name] + self.secondaries
6085

    
6086
    req_size = _ComputeDiskSize(self.op.disk_template,
6087
                                self.disks)
6088

    
6089
    # Check lv size requirements
6090
    if req_size is not None:
6091
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6092
                                         self.op.hypervisor)
6093
      for node in nodenames:
6094
        info = nodeinfo[node]
6095
        info.Raise("Cannot get current information from node %s" % node)
6096
        info = info.payload
6097
        vg_free = info.get('vg_free', None)
6098
        if not isinstance(vg_free, int):
6099
          raise errors.OpPrereqError("Can't compute free disk space on"
6100
                                     " node %s" % node, errors.ECODE_ENVIRON)
6101
        if req_size > vg_free:
6102
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6103
                                     " %d MB available, %d MB required" %
6104
                                     (node, vg_free, req_size),
6105
                                     errors.ECODE_NORES)
6106

    
6107
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6108

    
6109
    # os verification
6110
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6111
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6112
                 (self.op.os_type, pnode.name),
6113
                 prereq=True, ecode=errors.ECODE_INVAL)
6114
    if not self.op.force_variant:
6115
      _CheckOSVariant(result.payload, self.op.os_type)
6116

    
6117
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6118

    
6119
    # memory check on primary node
6120
    if self.op.start:
6121
      _CheckNodeFreeMemory(self, self.pnode.name,
6122
                           "creating instance %s" % self.op.instance_name,
6123
                           self.be_full[constants.BE_MEMORY],
6124
                           self.op.hypervisor)
6125

    
6126
    self.dry_run_result = list(nodenames)
6127

    
6128
  def Exec(self, feedback_fn):
6129
    """Create and add the instance to the cluster.
6130

6131
    """
6132
    instance = self.op.instance_name
6133
    pnode_name = self.pnode.name
6134

    
6135
    ht_kind = self.op.hypervisor
6136
    if ht_kind in constants.HTS_REQ_PORT:
6137
      network_port = self.cfg.AllocatePort()
6138
    else:
6139
      network_port = None
6140

    
6141
    ##if self.op.vnc_bind_address is None:
6142
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6143

    
6144
    # this is needed because os.path.join does not accept None arguments
6145
    if self.op.file_storage_dir is None:
6146
      string_file_storage_dir = ""
6147
    else:
6148
      string_file_storage_dir = self.op.file_storage_dir
6149

    
6150
    # build the full file storage dir path
6151
    file_storage_dir = os.path.normpath(os.path.join(
6152
                                        self.cfg.GetFileStorageDir(),
6153
                                        string_file_storage_dir, instance))
6154

    
6155

    
6156
    disks = _GenerateDiskTemplate(self,
6157
                                  self.op.disk_template,
6158
                                  instance, pnode_name,
6159
                                  self.secondaries,
6160
                                  self.disks,
6161
                                  file_storage_dir,
6162
                                  self.op.file_driver,
6163
                                  0)
6164

    
6165
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6166
                            primary_node=pnode_name,
6167
                            nics=self.nics, disks=disks,
6168
                            disk_template=self.op.disk_template,
6169
                            admin_up=False,
6170
                            network_port=network_port,
6171
                            beparams=self.op.beparams,
6172
                            hvparams=self.op.hvparams,
6173
                            hypervisor=self.op.hypervisor,
6174
                            )
6175

    
6176
    feedback_fn("* creating instance disks...")
6177
    try:
6178
      _CreateDisks(self, iobj)
6179
    except errors.OpExecError:
6180
      self.LogWarning("Device creation failed, reverting...")
6181
      try:
6182
        _RemoveDisks(self, iobj)
6183
      finally:
6184
        self.cfg.ReleaseDRBDMinors(instance)
6185
        raise
6186

    
6187
    feedback_fn("adding instance %s to cluster config" % instance)
6188

    
6189
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6190

    
6191
    # Declare that we don't want to remove the instance lock anymore, as we've
6192
    # added the instance to the config
6193
    del self.remove_locks[locking.LEVEL_INSTANCE]
6194
    # Unlock all the nodes
6195
    if self.op.mode == constants.INSTANCE_IMPORT:
6196
      nodes_keep = [self.op.src_node]
6197
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6198
                       if node != self.op.src_node]
6199
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6200
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6201
    else:
6202
      self.context.glm.release(locking.LEVEL_NODE)
6203
      del self.acquired_locks[locking.LEVEL_NODE]
6204

    
6205
    if self.op.wait_for_sync:
6206
      disk_abort = not _WaitForSync(self, iobj)
6207
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6208
      # make sure the disks are not degraded (still sync-ing is ok)
6209
      time.sleep(15)
6210
      feedback_fn("* checking mirrors status")
6211
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6212
    else:
6213
      disk_abort = False
6214

    
6215
    if disk_abort:
6216
      _RemoveDisks(self, iobj)
6217
      self.cfg.RemoveInstance(iobj.name)
6218
      # Make sure the instance lock gets removed
6219
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6220
      raise errors.OpExecError("There are some degraded disks for"
6221
                               " this instance")
6222

    
6223
    feedback_fn("creating os for instance %s on node %s" %
6224
                (instance, pnode_name))
6225

    
6226
    if iobj.disk_template != constants.DT_DISKLESS:
6227
      if self.op.mode == constants.INSTANCE_CREATE:
6228
        feedback_fn("* running the instance OS create scripts...")
6229
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6230
        result.Raise("Could not add os for instance %s"
6231
                     " on node %s" % (instance, pnode_name))
6232

    
6233
      elif self.op.mode == constants.INSTANCE_IMPORT:
6234
        feedback_fn("* running the instance OS import scripts...")
6235
        src_node = self.op.src_node
6236
        src_images = self.src_images
6237
        cluster_name = self.cfg.GetClusterName()
6238
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6239
                                                         src_node, src_images,
6240
                                                         cluster_name)
6241
        msg = import_result.fail_msg
6242
        if msg:
6243
          self.LogWarning("Error while importing the disk images for instance"
6244
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6245
      else:
6246
        # also checked in the prereq part
6247
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6248
                                     % self.op.mode)
6249

    
6250
    if self.op.start:
6251
      iobj.admin_up = True
6252
      self.cfg.Update(iobj, feedback_fn)
6253
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6254
      feedback_fn("* starting instance...")
6255
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6256
      result.Raise("Could not start instance")
6257

    
6258
    return list(iobj.all_nodes)
6259

    
6260

    
6261
class LUConnectConsole(NoHooksLU):
6262
  """Connect to an instance's console.
6263

6264
  This is somewhat special in that it returns the command line that
6265
  you need to run on the master node in order to connect to the
6266
  console.
6267

6268
  """
6269
  _OP_REQP = ["instance_name"]
6270
  REQ_BGL = False
6271

    
6272
  def ExpandNames(self):
6273
    self._ExpandAndLockInstance()
6274

    
6275
  def CheckPrereq(self):
6276
    """Check prerequisites.
6277

6278
    This checks that the instance is in the cluster.
6279

6280
    """
6281
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6282
    assert self.instance is not None, \
6283
      "Cannot retrieve locked instance %s" % self.op.instance_name
6284
    _CheckNodeOnline(self, self.instance.primary_node)
6285

    
6286
  def Exec(self, feedback_fn):
6287
    """Connect to the console of an instance
6288

6289
    """
6290
    instance = self.instance
6291
    node = instance.primary_node
6292

    
6293
    node_insts = self.rpc.call_instance_list([node],
6294
                                             [instance.hypervisor])[node]
6295
    node_insts.Raise("Can't get node information from %s" % node)
6296

    
6297
    if instance.name not in node_insts.payload:
6298
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6299

    
6300
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6301

    
6302
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6303
    cluster = self.cfg.GetClusterInfo()
6304
    # beparams and hvparams are passed separately, to avoid editing the
6305
    # instance and then saving the defaults in the instance itself.
6306
    hvparams = cluster.FillHV(instance)
6307
    beparams = cluster.FillBE(instance)
6308
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6309

    
6310
    # build ssh cmdline
6311
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6312

    
6313

    
6314
class LUReplaceDisks(LogicalUnit):
6315
  """Replace the disks of an instance.
6316

6317
  """
6318
  HPATH = "mirrors-replace"
6319
  HTYPE = constants.HTYPE_INSTANCE
6320
  _OP_REQP = ["instance_name", "mode", "disks"]
6321
  REQ_BGL = False
6322

    
6323
  def CheckArguments(self):
6324
    if not hasattr(self.op, "remote_node"):
6325
      self.op.remote_node = None
6326
    if not hasattr(self.op, "iallocator"):
6327
      self.op.iallocator = None
6328

    
6329
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6330
                                  self.op.iallocator)
6331

    
6332
  def ExpandNames(self):
6333
    self._ExpandAndLockInstance()
6334

    
6335
    if self.op.iallocator is not None:
6336
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6337

    
6338
    elif self.op.remote_node is not None:
6339
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6340
      if remote_node is None:
6341
        raise errors.OpPrereqError("Node '%s' not known" %
6342
                                   self.op.remote_node, errors.ECODE_NOENT)
6343

    
6344
      self.op.remote_node = remote_node
6345

    
6346
      # Warning: do not remove the locking of the new secondary here
6347
      # unless DRBD8.AddChildren is changed to work in parallel;
6348
      # currently it doesn't since parallel invocations of
6349
      # FindUnusedMinor will conflict
6350
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6351
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6352

    
6353
    else:
6354
      self.needed_locks[locking.LEVEL_NODE] = []
6355
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6356

    
6357
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6358
                                   self.op.iallocator, self.op.remote_node,
6359
                                   self.op.disks)
6360

    
6361
    self.tasklets = [self.replacer]
6362

    
6363
  def DeclareLocks(self, level):
6364
    # If we're not already locking all nodes in the set we have to declare the
6365
    # instance's primary/secondary nodes.
6366
    if (level == locking.LEVEL_NODE and
6367
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6368
      self._LockInstancesNodes()
6369

    
6370
  def BuildHooksEnv(self):
6371
    """Build hooks env.
6372

6373
    This runs on the master, the primary and all the secondaries.
6374

6375
    """
6376
    instance = self.replacer.instance
6377
    env = {
6378
      "MODE": self.op.mode,
6379
      "NEW_SECONDARY": self.op.remote_node,
6380
      "OLD_SECONDARY": instance.secondary_nodes[0],
6381
      }
6382
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6383
    nl = [
6384
      self.cfg.GetMasterNode(),
6385
      instance.primary_node,
6386
      ]
6387
    if self.op.remote_node is not None:
6388
      nl.append(self.op.remote_node)
6389
    return env, nl, nl
6390

    
6391

    
6392
class LUEvacuateNode(LogicalUnit):
6393
  """Relocate the secondary instances from a node.
6394

6395
  """
6396
  HPATH = "node-evacuate"
6397
  HTYPE = constants.HTYPE_NODE
6398
  _OP_REQP = ["node_name"]
6399
  REQ_BGL = False
6400

    
6401
  def CheckArguments(self):
6402
    if not hasattr(self.op, "remote_node"):
6403
      self.op.remote_node = None
6404
    if not hasattr(self.op, "iallocator"):
6405
      self.op.iallocator = None
6406

    
6407
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6408
                                  self.op.remote_node,
6409
                                  self.op.iallocator)
6410

    
6411
  def ExpandNames(self):
6412
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6413
    if self.op.node_name is None:
6414
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
6415
                                 errors.ECODE_NOENT)
6416

    
6417
    self.needed_locks = {}
6418

    
6419
    # Declare node locks
6420
    if self.op.iallocator is not None:
6421
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6422

    
6423
    elif self.op.remote_node is not None:
6424
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6425
      if remote_node is None:
6426
        raise errors.OpPrereqError("Node '%s' not known" %
6427
                                   self.op.remote_node, errors.ECODE_NOENT)
6428

    
6429
      self.op.remote_node = remote_node
6430

    
6431
      # Warning: do not remove the locking of the new secondary here
6432
      # unless DRBD8.AddChildren is changed to work in parallel;
6433
      # currently it doesn't since parallel invocations of
6434
      # FindUnusedMinor will conflict
6435
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6436
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6437

    
6438
    else:
6439
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6440

    
6441
    # Create tasklets for replacing disks for all secondary instances on this
6442
    # node
6443
    names = []
6444
    tasklets = []
6445

    
6446
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6447
      logging.debug("Replacing disks for instance %s", inst.name)
6448
      names.append(inst.name)
6449

    
6450
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6451
                                self.op.iallocator, self.op.remote_node, [])
6452
      tasklets.append(replacer)
6453

    
6454
    self.tasklets = tasklets
6455
    self.instance_names = names
6456

    
6457
    # Declare instance locks
6458
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6459

    
6460
  def DeclareLocks(self, level):
6461
    # If we're not already locking all nodes in the set we have to declare the
6462
    # instance's primary/secondary nodes.
6463
    if (level == locking.LEVEL_NODE and
6464
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6465
      self._LockInstancesNodes()
6466

    
6467
  def BuildHooksEnv(self):
6468
    """Build hooks env.
6469

6470
    This runs on the master, the primary and all the secondaries.
6471

6472
    """
6473
    env = {
6474
      "NODE_NAME": self.op.node_name,
6475
      }
6476

    
6477
    nl = [self.cfg.GetMasterNode()]
6478

    
6479
    if self.op.remote_node is not None:
6480
      env["NEW_SECONDARY"] = self.op.remote_node
6481
      nl.append(self.op.remote_node)
6482

    
6483
    return (env, nl, nl)
6484

    
6485

    
6486
class TLReplaceDisks(Tasklet):
6487
  """Replaces disks for an instance.
6488

6489
  Note: Locking is not within the scope of this class.
6490

6491
  """
6492
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6493
               disks):
6494
    """Initializes this class.
6495

6496
    """
6497
    Tasklet.__init__(self, lu)
6498

    
6499
    # Parameters
6500
    self.instance_name = instance_name
6501
    self.mode = mode
6502
    self.iallocator_name = iallocator_name
6503
    self.remote_node = remote_node
6504
    self.disks = disks
6505

    
6506
    # Runtime data
6507
    self.instance = None
6508
    self.new_node = None
6509
    self.target_node = None
6510
    self.other_node = None
6511
    self.remote_node_info = None
6512
    self.node_secondary_ip = None
6513

    
6514
  @staticmethod
6515
  def CheckArguments(mode, remote_node, iallocator):
6516
    """Helper function for users of this class.
6517

6518
    """
6519
    # check for valid parameter combination
6520
    if mode == constants.REPLACE_DISK_CHG:
6521
      if remote_node is None and iallocator is None:
6522
        raise errors.OpPrereqError("When changing the secondary either an"
6523
                                   " iallocator script must be used or the"
6524
                                   " new node given", errors.ECODE_INVAL)
6525

    
6526
      if remote_node is not None and iallocator is not None:
6527
        raise errors.OpPrereqError("Give either the iallocator or the new"
6528
                                   " secondary, not both", errors.ECODE_INVAL)
6529

    
6530
    elif remote_node is not None or iallocator is not None:
6531
      # Not replacing the secondary
6532
      raise errors.OpPrereqError("The iallocator and new node options can"
6533
                                 " only be used when changing the"
6534
                                 " secondary node", errors.ECODE_INVAL)
6535

    
6536
  @staticmethod
6537
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6538
    """Compute a new secondary node using an IAllocator.
6539

6540
    """
6541
    ial = IAllocator(lu.cfg, lu.rpc,
6542
                     mode=constants.IALLOCATOR_MODE_RELOC,
6543
                     name=instance_name,
6544
                     relocate_from=relocate_from)
6545

    
6546
    ial.Run(iallocator_name)
6547

    
6548
    if not ial.success:
6549
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6550
                                 " %s" % (iallocator_name, ial.info),
6551
                                 errors.ECODE_NORES)
6552

    
6553
    if len(ial.nodes) != ial.required_nodes:
6554
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6555
                                 " of nodes (%s), required %s" %
6556
                                 (iallocator_name,
6557
                                  len(ial.nodes), ial.required_nodes),
6558
                                 errors.ECODE_FAULT)
6559

    
6560
    remote_node_name = ial.nodes[0]
6561

    
6562
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6563
               instance_name, remote_node_name)
6564

    
6565
    return remote_node_name
6566

    
6567
  def _FindFaultyDisks(self, node_name):
6568
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6569
                                    node_name, True)
6570

    
6571
  def CheckPrereq(self):
6572
    """Check prerequisites.
6573

6574
    This checks that the instance is in the cluster.
6575

6576
    """
6577
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6578
    assert instance is not None, \
6579
      "Cannot retrieve locked instance %s" % self.instance_name
6580

    
6581
    if instance.disk_template != constants.DT_DRBD8:
6582
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6583
                                 " instances", errors.ECODE_INVAL)
6584

    
6585
    if len(instance.secondary_nodes) != 1:
6586
      raise errors.OpPrereqError("The instance has a strange layout,"
6587
                                 " expected one secondary but found %d" %
6588
                                 len(instance.secondary_nodes),
6589
                                 errors.ECODE_FAULT)
6590

    
6591
    secondary_node = instance.secondary_nodes[0]
6592

    
6593
    if self.iallocator_name is None:
6594
      remote_node = self.remote_node
6595
    else:
6596
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6597
                                       instance.name, instance.secondary_nodes)
6598

    
6599
    if remote_node is not None:
6600
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6601
      assert self.remote_node_info is not None, \
6602
        "Cannot retrieve locked node %s" % remote_node
6603
    else:
6604
      self.remote_node_info = None
6605

    
6606
    if remote_node == self.instance.primary_node:
6607
      raise errors.OpPrereqError("The specified node is the primary node of"
6608
                                 " the instance.", errors.ECODE_INVAL)
6609

    
6610
    if remote_node == secondary_node:
6611
      raise errors.OpPrereqError("The specified node is already the"
6612
                                 " secondary node of the instance.",
6613
                                 errors.ECODE_INVAL)
6614

    
6615
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6616
                                    constants.REPLACE_DISK_CHG):
6617
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6618
                                 errors.ECODE_INVAL)
6619

    
6620
    if self.mode == constants.REPLACE_DISK_AUTO:
6621
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6622
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6623

    
6624
      if faulty_primary and faulty_secondary:
6625
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6626
                                   " one node and can not be repaired"
6627
                                   " automatically" % self.instance_name,
6628
                                   errors.ECODE_STATE)
6629

    
6630
      if faulty_primary:
6631
        self.disks = faulty_primary
6632
        self.target_node = instance.primary_node
6633
        self.other_node = secondary_node
6634
        check_nodes = [self.target_node, self.other_node]
6635
      elif faulty_secondary:
6636
        self.disks = faulty_secondary
6637
        self.target_node = secondary_node
6638
        self.other_node = instance.primary_node
6639
        check_nodes = [self.target_node, self.other_node]
6640
      else:
6641
        self.disks = []
6642
        check_nodes = []
6643

    
6644
    else:
6645
      # Non-automatic modes
6646
      if self.mode == constants.REPLACE_DISK_PRI:
6647
        self.target_node = instance.primary_node
6648
        self.other_node = secondary_node
6649
        check_nodes = [self.target_node, self.other_node]
6650

    
6651
      elif self.mode == constants.REPLACE_DISK_SEC:
6652
        self.target_node = secondary_node
6653
        self.other_node = instance.primary_node
6654
        check_nodes = [self.target_node, self.other_node]
6655

    
6656
      elif self.mode == constants.REPLACE_DISK_CHG:
6657
        self.new_node = remote_node
6658
        self.other_node = instance.primary_node
6659
        self.target_node = secondary_node
6660
        check_nodes = [self.new_node, self.other_node]
6661

    
6662
        _CheckNodeNotDrained(self.lu, remote_node)
6663

    
6664
      else:
6665
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6666
                                     self.mode)
6667

    
6668
      # If not specified all disks should be replaced
6669
      if not self.disks:
6670
        self.disks = range(len(self.instance.disks))
6671

    
6672
    for node in check_nodes:
6673
      _CheckNodeOnline(self.lu, node)
6674

    
6675
    # Check whether disks are valid
6676
    for disk_idx in self.disks:
6677
      instance.FindDisk(disk_idx)
6678

    
6679
    # Get secondary node IP addresses
6680
    node_2nd_ip = {}
6681

    
6682
    for node_name in [self.target_node, self.other_node, self.new_node]:
6683
      if node_name is not None:
6684
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6685

    
6686
    self.node_secondary_ip = node_2nd_ip
6687

    
6688
  def Exec(self, feedback_fn):
6689
    """Execute disk replacement.
6690

6691
    This dispatches the disk replacement to the appropriate handler.
6692

6693
    """
6694
    if not self.disks:
6695
      feedback_fn("No disks need replacement")
6696
      return
6697

    
6698
    feedback_fn("Replacing disk(s) %s for %s" %
6699
                (utils.CommaJoin(self.disks), self.instance.name))
6700

    
6701
    activate_disks = (not self.instance.admin_up)
6702

    
6703
    # Activate the instance disks if we're replacing them on a down instance
6704
    if activate_disks:
6705
      _StartInstanceDisks(self.lu, self.instance, True)
6706

    
6707
    try:
6708
      # Should we replace the secondary node?
6709
      if self.new_node is not None:
6710
        fn = self._ExecDrbd8Secondary
6711
      else:
6712
        fn = self._ExecDrbd8DiskOnly
6713

    
6714
      return fn(feedback_fn)
6715

    
6716
    finally:
6717
      # Deactivate the instance disks if we're replacing them on a
6718
      # down instance
6719
      if activate_disks:
6720
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6721

    
6722
  def _CheckVolumeGroup(self, nodes):
6723
    self.lu.LogInfo("Checking volume groups")
6724

    
6725
    vgname = self.cfg.GetVGName()
6726

    
6727
    # Make sure volume group exists on all involved nodes
6728
    results = self.rpc.call_vg_list(nodes)
6729
    if not results:
6730
      raise errors.OpExecError("Can't list volume groups on the nodes")
6731

    
6732
    for node in nodes:
6733
      res = results[node]
6734
      res.Raise("Error checking node %s" % node)
6735
      if vgname not in res.payload:
6736
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6737
                                 (vgname, node))
6738

    
6739
  def _CheckDisksExistence(self, nodes):
6740
    # Check disk existence
6741
    for idx, dev in enumerate(self.instance.disks):
6742
      if idx not in self.disks:
6743
        continue
6744

    
6745
      for node in nodes:
6746
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6747
        self.cfg.SetDiskID(dev, node)
6748

    
6749
        result = self.rpc.call_blockdev_find(node, dev)
6750

    
6751
        msg = result.fail_msg
6752
        if msg or not result.payload:
6753
          if not msg:
6754
            msg = "disk not found"
6755
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6756
                                   (idx, node, msg))
6757

    
6758
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6759
    for idx, dev in enumerate(self.instance.disks):
6760
      if idx not in self.disks:
6761
        continue
6762

    
6763
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6764
                      (idx, node_name))
6765

    
6766
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6767
                                   ldisk=ldisk):
6768
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6769
                                 " replace disks for instance %s" %
6770
                                 (node_name, self.instance.name))
6771

    
6772
  def _CreateNewStorage(self, node_name):
6773
    vgname = self.cfg.GetVGName()
6774
    iv_names = {}
6775

    
6776
    for idx, dev in enumerate(self.instance.disks):
6777
      if idx not in self.disks:
6778
        continue
6779

    
6780
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6781

    
6782
      self.cfg.SetDiskID(dev, node_name)
6783

    
6784
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6785
      names = _GenerateUniqueNames(self.lu, lv_names)
6786

    
6787
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6788
                             logical_id=(vgname, names[0]))
6789
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6790
                             logical_id=(vgname, names[1]))
6791

    
6792
      new_lvs = [lv_data, lv_meta]
6793
      old_lvs = dev.children
6794
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6795

    
6796
      # we pass force_create=True to force the LVM creation
6797
      for new_lv in new_lvs:
6798
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6799
                        _GetInstanceInfoText(self.instance), False)
6800

    
6801
    return iv_names
6802

    
6803
  def _CheckDevices(self, node_name, iv_names):
6804
    for name, (dev, _, _) in iv_names.iteritems():
6805
      self.cfg.SetDiskID(dev, node_name)
6806

    
6807
      result = self.rpc.call_blockdev_find(node_name, dev)
6808

    
6809
      msg = result.fail_msg
6810
      if msg or not result.payload:
6811
        if not msg:
6812
          msg = "disk not found"
6813
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6814
                                 (name, msg))
6815

    
6816
      if result.payload.is_degraded:
6817
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6818

    
6819
  def _RemoveOldStorage(self, node_name, iv_names):
6820
    for name, (_, old_lvs, _) in iv_names.iteritems():
6821
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6822

    
6823
      for lv in old_lvs:
6824
        self.cfg.SetDiskID(lv, node_name)
6825

    
6826
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6827
        if msg:
6828
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6829
                             hint="remove unused LVs manually")
6830

    
6831
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6832
    """Replace a disk on the primary or secondary for DRBD 8.
6833

6834
    The algorithm for replace is quite complicated:
6835

6836
      1. for each disk to be replaced:
6837

6838
        1. create new LVs on the target node with unique names
6839
        1. detach old LVs from the drbd device
6840
        1. rename old LVs to name_replaced.<time_t>
6841
        1. rename new LVs to old LVs
6842
        1. attach the new LVs (with the old names now) to the drbd device
6843

6844
      1. wait for sync across all devices
6845

6846
      1. for each modified disk:
6847

6848
        1. remove old LVs (which have the name name_replaces.<time_t>)
6849

6850
    Failures are not very well handled.
6851

6852
    """
6853
    steps_total = 6
6854

    
6855
    # Step: check device activation
6856
    self.lu.LogStep(1, steps_total, "Check device existence")
6857
    self._CheckDisksExistence([self.other_node, self.target_node])
6858
    self._CheckVolumeGroup([self.target_node, self.other_node])
6859

    
6860
    # Step: check other node consistency
6861
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6862
    self._CheckDisksConsistency(self.other_node,
6863
                                self.other_node == self.instance.primary_node,
6864
                                False)
6865

    
6866
    # Step: create new storage
6867
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6868
    iv_names = self._CreateNewStorage(self.target_node)
6869

    
6870
    # Step: for each lv, detach+rename*2+attach
6871
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6872
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6873
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6874

    
6875
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6876
                                                     old_lvs)
6877
      result.Raise("Can't detach drbd from local storage on node"
6878
                   " %s for device %s" % (self.target_node, dev.iv_name))
6879
      #dev.children = []
6880
      #cfg.Update(instance)
6881

    
6882
      # ok, we created the new LVs, so now we know we have the needed
6883
      # storage; as such, we proceed on the target node to rename
6884
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6885
      # using the assumption that logical_id == physical_id (which in
6886
      # turn is the unique_id on that node)
6887

    
6888
      # FIXME(iustin): use a better name for the replaced LVs
6889
      temp_suffix = int(time.time())
6890
      ren_fn = lambda d, suff: (d.physical_id[0],
6891
                                d.physical_id[1] + "_replaced-%s" % suff)
6892

    
6893
      # Build the rename list based on what LVs exist on the node
6894
      rename_old_to_new = []
6895
      for to_ren in old_lvs:
6896
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6897
        if not result.fail_msg and result.payload:
6898
          # device exists
6899
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6900

    
6901
      self.lu.LogInfo("Renaming the old LVs on the target node")
6902
      result = self.rpc.call_blockdev_rename(self.target_node,
6903
                                             rename_old_to_new)
6904
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6905

    
6906
      # Now we rename the new LVs to the old LVs
6907
      self.lu.LogInfo("Renaming the new LVs on the target node")
6908
      rename_new_to_old = [(new, old.physical_id)
6909
                           for old, new in zip(old_lvs, new_lvs)]
6910
      result = self.rpc.call_blockdev_rename(self.target_node,
6911
                                             rename_new_to_old)
6912
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6913

    
6914
      for old, new in zip(old_lvs, new_lvs):
6915
        new.logical_id = old.logical_id
6916
        self.cfg.SetDiskID(new, self.target_node)
6917

    
6918
      for disk in old_lvs:
6919
        disk.logical_id = ren_fn(disk, temp_suffix)
6920
        self.cfg.SetDiskID(disk, self.target_node)
6921

    
6922
      # Now that the new lvs have the old name, we can add them to the device
6923
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6924
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6925
                                                  new_lvs)
6926
      msg = result.fail_msg
6927
      if msg:
6928
        for new_lv in new_lvs:
6929
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6930
                                               new_lv).fail_msg
6931
          if msg2:
6932
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6933
                               hint=("cleanup manually the unused logical"
6934
                                     "volumes"))
6935
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6936

    
6937
      dev.children = new_lvs
6938

    
6939
      self.cfg.Update(self.instance, feedback_fn)
6940

    
6941
    # Wait for sync
6942
    # This can fail as the old devices are degraded and _WaitForSync
6943
    # does a combined result over all disks, so we don't check its return value
6944
    self.lu.LogStep(5, steps_total, "Sync devices")
6945
    _WaitForSync(self.lu, self.instance)
6946

    
6947
    # Check all devices manually
6948
    self._CheckDevices(self.instance.primary_node, iv_names)
6949

    
6950
    # Step: remove old storage
6951
    self.lu.LogStep(6, steps_total, "Removing old storage")
6952
    self._RemoveOldStorage(self.target_node, iv_names)
6953

    
6954
  def _ExecDrbd8Secondary(self, feedback_fn):
6955
    """Replace the secondary node for DRBD 8.
6956

6957
    The algorithm for replace is quite complicated:
6958
      - for all disks of the instance:
6959
        - create new LVs on the new node with same names
6960
        - shutdown the drbd device on the old secondary
6961
        - disconnect the drbd network on the primary
6962
        - create the drbd device on the new secondary
6963
        - network attach the drbd on the primary, using an artifice:
6964
          the drbd code for Attach() will connect to the network if it
6965
          finds a device which is connected to the good local disks but
6966
          not network enabled
6967
      - wait for sync across all devices
6968
      - remove all disks from the old secondary
6969

6970
    Failures are not very well handled.
6971

6972
    """
6973
    steps_total = 6
6974

    
6975
    # Step: check device activation
6976
    self.lu.LogStep(1, steps_total, "Check device existence")
6977
    self._CheckDisksExistence([self.instance.primary_node])
6978
    self._CheckVolumeGroup([self.instance.primary_node])
6979

    
6980
    # Step: check other node consistency
6981
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6982
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6983

    
6984
    # Step: create new storage
6985
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6986
    for idx, dev in enumerate(self.instance.disks):
6987
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6988
                      (self.new_node, idx))
6989
      # we pass force_create=True to force LVM creation
6990
      for new_lv in dev.children:
6991
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6992
                        _GetInstanceInfoText(self.instance), False)
6993

    
6994
    # Step 4: dbrd minors and drbd setups changes
6995
    # after this, we must manually remove the drbd minors on both the
6996
    # error and the success paths
6997
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6998
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6999
                                         for dev in self.instance.disks],
7000
                                        self.instance.name)
7001
    logging.debug("Allocated minors %r", minors)
7002

    
7003
    iv_names = {}
7004
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7005
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7006
                      (self.new_node, idx))
7007
      # create new devices on new_node; note that we create two IDs:
7008
      # one without port, so the drbd will be activated without
7009
      # networking information on the new node at this stage, and one
7010
      # with network, for the latter activation in step 4
7011
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7012
      if self.instance.primary_node == o_node1:
7013
        p_minor = o_minor1
7014
      else:
7015
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7016
        p_minor = o_minor2
7017

    
7018
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7019
                      p_minor, new_minor, o_secret)
7020
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7021
                    p_minor, new_minor, o_secret)
7022

    
7023
      iv_names[idx] = (dev, dev.children, new_net_id)
7024
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7025
                    new_net_id)
7026
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7027
                              logical_id=new_alone_id,
7028
                              children=dev.children,
7029
                              size=dev.size)
7030
      try:
7031
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7032
                              _GetInstanceInfoText(self.instance), False)
7033
      except errors.GenericError:
7034
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7035
        raise
7036

    
7037
    # We have new devices, shutdown the drbd on the old secondary
7038
    for idx, dev in enumerate(self.instance.disks):
7039
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7040
      self.cfg.SetDiskID(dev, self.target_node)
7041
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7042
      if msg:
7043
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7044
                           "node: %s" % (idx, msg),
7045
                           hint=("Please cleanup this device manually as"
7046
                                 " soon as possible"))
7047

    
7048
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7049
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7050
                                               self.node_secondary_ip,
7051
                                               self.instance.disks)\
7052
                                              [self.instance.primary_node]
7053

    
7054
    msg = result.fail_msg
7055
    if msg:
7056
      # detaches didn't succeed (unlikely)
7057
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7058
      raise errors.OpExecError("Can't detach the disks from the network on"
7059
                               " old node: %s" % (msg,))
7060

    
7061
    # if we managed to detach at least one, we update all the disks of
7062
    # the instance to point to the new secondary
7063
    self.lu.LogInfo("Updating instance configuration")
7064
    for dev, _, new_logical_id in iv_names.itervalues():
7065
      dev.logical_id = new_logical_id
7066
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7067

    
7068
    self.cfg.Update(self.instance, feedback_fn)
7069

    
7070
    # and now perform the drbd attach
7071
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7072
                    " (standalone => connected)")
7073
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7074
                                            self.new_node],
7075
                                           self.node_secondary_ip,
7076
                                           self.instance.disks,
7077
                                           self.instance.name,
7078
                                           False)
7079
    for to_node, to_result in result.items():
7080
      msg = to_result.fail_msg
7081
      if msg:
7082
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7083
                           to_node, msg,
7084
                           hint=("please do a gnt-instance info to see the"
7085
                                 " status of disks"))
7086

    
7087
    # Wait for sync
7088
    # This can fail as the old devices are degraded and _WaitForSync
7089
    # does a combined result over all disks, so we don't check its return value
7090
    self.lu.LogStep(5, steps_total, "Sync devices")
7091
    _WaitForSync(self.lu, self.instance)
7092

    
7093
    # Check all devices manually
7094
    self._CheckDevices(self.instance.primary_node, iv_names)
7095

    
7096
    # Step: remove old storage
7097
    self.lu.LogStep(6, steps_total, "Removing old storage")
7098
    self._RemoveOldStorage(self.target_node, iv_names)
7099

    
7100

    
7101
class LURepairNodeStorage(NoHooksLU):
7102
  """Repairs the volume group on a node.
7103

7104
  """
7105
  _OP_REQP = ["node_name"]
7106
  REQ_BGL = False
7107

    
7108
  def CheckArguments(self):
7109
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
7110
    if node_name is None:
7111
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
7112
                                 errors.ECODE_NOENT)
7113

    
7114
    self.op.node_name = node_name
7115

    
7116
  def ExpandNames(self):
7117
    self.needed_locks = {
7118
      locking.LEVEL_NODE: [self.op.node_name],
7119
      }
7120

    
7121
  def _CheckFaultyDisks(self, instance, node_name):
7122
    """Ensure faulty disks abort the opcode or at least warn."""
7123
    try:
7124
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7125
                                  node_name, True):
7126
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7127
                                   " node '%s'" % (instance.name, node_name),
7128
                                   errors.ECODE_STATE)
7129
    except errors.OpPrereqError, err:
7130
      if self.op.ignore_consistency:
7131
        self.proc.LogWarning(str(err.args[0]))
7132
      else:
7133
        raise
7134

    
7135
  def CheckPrereq(self):
7136
    """Check prerequisites.
7137

7138
    """
7139
    storage_type = self.op.storage_type
7140

    
7141
    if (constants.SO_FIX_CONSISTENCY not in
7142
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7143
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7144
                                 " repaired" % storage_type,
7145
                                 errors.ECODE_INVAL)
7146

    
7147
    # Check whether any instance on this node has faulty disks
7148
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7149
      if not inst.admin_up:
7150
        continue
7151
      check_nodes = set(inst.all_nodes)
7152
      check_nodes.discard(self.op.node_name)
7153
      for inst_node_name in check_nodes:
7154
        self._CheckFaultyDisks(inst, inst_node_name)
7155

    
7156
  def Exec(self, feedback_fn):
7157
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7158
                (self.op.name, self.op.node_name))
7159

    
7160
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7161
    result = self.rpc.call_storage_execute(self.op.node_name,
7162
                                           self.op.storage_type, st_args,
7163
                                           self.op.name,
7164
                                           constants.SO_FIX_CONSISTENCY)
7165
    result.Raise("Failed to repair storage unit '%s' on %s" %
7166
                 (self.op.name, self.op.node_name))
7167

    
7168

    
7169
class LUGrowDisk(LogicalUnit):
7170
  """Grow a disk of an instance.
7171

7172
  """
7173
  HPATH = "disk-grow"
7174
  HTYPE = constants.HTYPE_INSTANCE
7175
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7176
  REQ_BGL = False
7177

    
7178
  def ExpandNames(self):
7179
    self._ExpandAndLockInstance()
7180
    self.needed_locks[locking.LEVEL_NODE] = []
7181
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7182

    
7183
  def DeclareLocks(self, level):
7184
    if level == locking.LEVEL_NODE:
7185
      self._LockInstancesNodes()
7186

    
7187
  def BuildHooksEnv(self):
7188
    """Build hooks env.
7189

7190
    This runs on the master, the primary and all the secondaries.
7191

7192
    """
7193
    env = {
7194
      "DISK": self.op.disk,
7195
      "AMOUNT": self.op.amount,
7196
      }
7197
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7198
    nl = [
7199
      self.cfg.GetMasterNode(),
7200
      self.instance.primary_node,
7201
      ]
7202
    return env, nl, nl
7203

    
7204
  def CheckPrereq(self):
7205
    """Check prerequisites.
7206

7207
    This checks that the instance is in the cluster.
7208

7209
    """
7210
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7211
    assert instance is not None, \
7212
      "Cannot retrieve locked instance %s" % self.op.instance_name
7213
    nodenames = list(instance.all_nodes)
7214
    for node in nodenames:
7215
      _CheckNodeOnline(self, node)
7216

    
7217

    
7218
    self.instance = instance
7219

    
7220
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7221
      raise errors.OpPrereqError("Instance's disk layout does not support"
7222
                                 " growing.", errors.ECODE_INVAL)
7223

    
7224
    self.disk = instance.FindDisk(self.op.disk)
7225

    
7226
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7227
                                       instance.hypervisor)
7228
    for node in nodenames:
7229
      info = nodeinfo[node]
7230
      info.Raise("Cannot get current information from node %s" % node)
7231
      vg_free = info.payload.get('vg_free', None)
7232
      if not isinstance(vg_free, int):
7233
        raise errors.OpPrereqError("Can't compute free disk space on"
7234
                                   " node %s" % node, errors.ECODE_ENVIRON)
7235
      if self.op.amount > vg_free:
7236
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7237
                                   " %d MiB available, %d MiB required" %
7238
                                   (node, vg_free, self.op.amount),
7239
                                   errors.ECODE_NORES)
7240

    
7241
  def Exec(self, feedback_fn):
7242
    """Execute disk grow.
7243

7244
    """
7245
    instance = self.instance
7246
    disk = self.disk
7247
    for node in instance.all_nodes:
7248
      self.cfg.SetDiskID(disk, node)
7249
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7250
      result.Raise("Grow request failed to node %s" % node)
7251

    
7252
      # TODO: Rewrite code to work properly
7253
      # DRBD goes into sync mode for a short amount of time after executing the
7254
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7255
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7256
      # time is a work-around.
7257
      time.sleep(5)
7258

    
7259
    disk.RecordGrow(self.op.amount)
7260
    self.cfg.Update(instance, feedback_fn)
7261
    if self.op.wait_for_sync:
7262
      disk_abort = not _WaitForSync(self, instance)
7263
      if disk_abort:
7264
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7265
                             " status.\nPlease check the instance.")
7266

    
7267

    
7268
class LUQueryInstanceData(NoHooksLU):
7269
  """Query runtime instance data.
7270

7271
  """
7272
  _OP_REQP = ["instances", "static"]
7273
  REQ_BGL = False
7274

    
7275
  def ExpandNames(self):
7276
    self.needed_locks = {}
7277
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7278

    
7279
    if not isinstance(self.op.instances, list):
7280
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7281
                                 errors.ECODE_INVAL)
7282

    
7283
    if self.op.instances:
7284
      self.wanted_names = []
7285
      for name in self.op.instances:
7286
        full_name = self.cfg.ExpandInstanceName(name)
7287
        if full_name is None:
7288
          raise errors.OpPrereqError("Instance '%s' not known" % name,
7289
                                     errors.ECODE_NOENT)
7290
        self.wanted_names.append(full_name)
7291
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7292
    else:
7293
      self.wanted_names = None
7294
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7295

    
7296
    self.needed_locks[locking.LEVEL_NODE] = []
7297
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7298

    
7299
  def DeclareLocks(self, level):
7300
    if level == locking.LEVEL_NODE:
7301
      self._LockInstancesNodes()
7302

    
7303
  def CheckPrereq(self):
7304
    """Check prerequisites.
7305

7306
    This only checks the optional instance list against the existing names.
7307

7308
    """
7309
    if self.wanted_names is None:
7310
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7311

    
7312
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7313
                             in self.wanted_names]
7314
    return
7315

    
7316
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7317
    """Returns the status of a block device
7318

7319
    """
7320
    if self.op.static or not node:
7321
      return None
7322

    
7323
    self.cfg.SetDiskID(dev, node)
7324

    
7325
    result = self.rpc.call_blockdev_find(node, dev)
7326
    if result.offline:
7327
      return None
7328

    
7329
    result.Raise("Can't compute disk status for %s" % instance_name)
7330

    
7331
    status = result.payload
7332
    if status is None:
7333
      return None
7334

    
7335
    return (status.dev_path, status.major, status.minor,
7336
            status.sync_percent, status.estimated_time,
7337
            status.is_degraded, status.ldisk_status)
7338

    
7339
  def _ComputeDiskStatus(self, instance, snode, dev):
7340
    """Compute block device status.
7341

7342
    """
7343
    if dev.dev_type in constants.LDS_DRBD:
7344
      # we change the snode then (otherwise we use the one passed in)
7345
      if dev.logical_id[0] == instance.primary_node:
7346
        snode = dev.logical_id[1]
7347
      else:
7348
        snode = dev.logical_id[0]
7349

    
7350
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7351
                                              instance.name, dev)
7352
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7353

    
7354
    if dev.children:
7355
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7356
                      for child in dev.children]
7357
    else:
7358
      dev_children = []
7359

    
7360
    data = {
7361
      "iv_name": dev.iv_name,
7362
      "dev_type": dev.dev_type,
7363
      "logical_id": dev.logical_id,
7364
      "physical_id": dev.physical_id,
7365
      "pstatus": dev_pstatus,
7366
      "sstatus": dev_sstatus,
7367
      "children": dev_children,
7368
      "mode": dev.mode,
7369
      "size": dev.size,
7370
      }
7371

    
7372
    return data
7373

    
7374
  def Exec(self, feedback_fn):
7375
    """Gather and return data"""
7376
    result = {}
7377

    
7378
    cluster = self.cfg.GetClusterInfo()
7379

    
7380
    for instance in self.wanted_instances:
7381
      if not self.op.static:
7382
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7383
                                                  instance.name,
7384
                                                  instance.hypervisor)
7385
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7386
        remote_info = remote_info.payload
7387
        if remote_info and "state" in remote_info:
7388
          remote_state = "up"
7389
        else:
7390
          remote_state = "down"
7391
      else:
7392
        remote_state = None
7393
      if instance.admin_up:
7394
        config_state = "up"
7395
      else:
7396
        config_state = "down"
7397

    
7398
      disks = [self._ComputeDiskStatus(instance, None, device)
7399
               for device in instance.disks]
7400

    
7401
      idict = {
7402
        "name": instance.name,
7403
        "config_state": config_state,
7404
        "run_state": remote_state,
7405
        "pnode": instance.primary_node,
7406
        "snodes": instance.secondary_nodes,
7407
        "os": instance.os,
7408
        # this happens to be the same format used for hooks
7409
        "nics": _NICListToTuple(self, instance.nics),
7410
        "disks": disks,
7411
        "hypervisor": instance.hypervisor,
7412
        "network_port": instance.network_port,
7413
        "hv_instance": instance.hvparams,
7414
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7415
        "be_instance": instance.beparams,
7416
        "be_actual": cluster.FillBE(instance),
7417
        "serial_no": instance.serial_no,
7418
        "mtime": instance.mtime,
7419
        "ctime": instance.ctime,
7420
        "uuid": instance.uuid,
7421
        }
7422

    
7423
      result[instance.name] = idict
7424

    
7425
    return result
7426

    
7427

    
7428
class LUSetInstanceParams(LogicalUnit):
7429
  """Modifies an instances's parameters.
7430

7431
  """
7432
  HPATH = "instance-modify"
7433
  HTYPE = constants.HTYPE_INSTANCE
7434
  _OP_REQP = ["instance_name"]
7435
  REQ_BGL = False
7436

    
7437
  def CheckArguments(self):
7438
    if not hasattr(self.op, 'nics'):
7439
      self.op.nics = []
7440
    if not hasattr(self.op, 'disks'):
7441
      self.op.disks = []
7442
    if not hasattr(self.op, 'beparams'):
7443
      self.op.beparams = {}
7444
    if not hasattr(self.op, 'hvparams'):
7445
      self.op.hvparams = {}
7446
    self.op.force = getattr(self.op, "force", False)
7447
    if not (self.op.nics or self.op.disks or
7448
            self.op.hvparams or self.op.beparams):
7449
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7450

    
7451
    if self.op.hvparams:
7452
      _CheckGlobalHvParams(self.op.hvparams)
7453

    
7454
    # Disk validation
7455
    disk_addremove = 0
7456
    for disk_op, disk_dict in self.op.disks:
7457
      if disk_op == constants.DDM_REMOVE:
7458
        disk_addremove += 1
7459
        continue
7460
      elif disk_op == constants.DDM_ADD:
7461
        disk_addremove += 1
7462
      else:
7463
        if not isinstance(disk_op, int):
7464
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7465
        if not isinstance(disk_dict, dict):
7466
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7467
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7468

    
7469
      if disk_op == constants.DDM_ADD:
7470
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7471
        if mode not in constants.DISK_ACCESS_SET:
7472
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7473
                                     errors.ECODE_INVAL)
7474
        size = disk_dict.get('size', None)
7475
        if size is None:
7476
          raise errors.OpPrereqError("Required disk parameter size missing",
7477
                                     errors.ECODE_INVAL)
7478
        try:
7479
          size = int(size)
7480
        except ValueError, err:
7481
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7482
                                     str(err), errors.ECODE_INVAL)
7483
        disk_dict['size'] = size
7484
      else:
7485
        # modification of disk
7486
        if 'size' in disk_dict:
7487
          raise errors.OpPrereqError("Disk size change not possible, use"
7488
                                     " grow-disk", errors.ECODE_INVAL)
7489

    
7490
    if disk_addremove > 1:
7491
      raise errors.OpPrereqError("Only one disk add or remove operation"
7492
                                 " supported at a time", errors.ECODE_INVAL)
7493

    
7494
    # NIC validation
7495
    nic_addremove = 0
7496
    for nic_op, nic_dict in self.op.nics:
7497
      if nic_op == constants.DDM_REMOVE:
7498
        nic_addremove += 1
7499
        continue
7500
      elif nic_op == constants.DDM_ADD:
7501
        nic_addremove += 1
7502
      else:
7503
        if not isinstance(nic_op, int):
7504
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7505
        if not isinstance(nic_dict, dict):
7506
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7507
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7508

    
7509
      # nic_dict should be a dict
7510
      nic_ip = nic_dict.get('ip', None)
7511
      if nic_ip is not None:
7512
        if nic_ip.lower() == constants.VALUE_NONE:
7513
          nic_dict['ip'] = None
7514
        else:
7515
          if not utils.IsValidIP(nic_ip):
7516
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7517
                                       errors.ECODE_INVAL)
7518

    
7519
      nic_bridge = nic_dict.get('bridge', None)
7520
      nic_link = nic_dict.get('link', None)
7521
      if nic_bridge and nic_link:
7522
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7523
                                   " at the same time", errors.ECODE_INVAL)
7524
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7525
        nic_dict['bridge'] = None
7526
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7527
        nic_dict['link'] = None
7528

    
7529
      if nic_op == constants.DDM_ADD:
7530
        nic_mac = nic_dict.get('mac', None)
7531
        if nic_mac is None:
7532
          nic_dict['mac'] = constants.VALUE_AUTO
7533

    
7534
      if 'mac' in nic_dict:
7535
        nic_mac = nic_dict['mac']
7536
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7537
          if not utils.IsValidMac(nic_mac):
7538
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac,
7539
                                       errors.ECODE_INVAL)
7540
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7541
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7542
                                     " modifying an existing nic",
7543
                                     errors.ECODE_INVAL)
7544

    
7545
    if nic_addremove > 1:
7546
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7547
                                 " supported at a time", errors.ECODE_INVAL)
7548

    
7549
  def ExpandNames(self):
7550
    self._ExpandAndLockInstance()
7551
    self.needed_locks[locking.LEVEL_NODE] = []
7552
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7553

    
7554
  def DeclareLocks(self, level):
7555
    if level == locking.LEVEL_NODE:
7556
      self._LockInstancesNodes()
7557

    
7558
  def BuildHooksEnv(self):
7559
    """Build hooks env.
7560

7561
    This runs on the master, primary and secondaries.
7562

7563
    """
7564
    args = dict()
7565
    if constants.BE_MEMORY in self.be_new:
7566
      args['memory'] = self.be_new[constants.BE_MEMORY]
7567
    if constants.BE_VCPUS in self.be_new:
7568
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7569
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7570
    # information at all.
7571
    if self.op.nics:
7572
      args['nics'] = []
7573
      nic_override = dict(self.op.nics)
7574
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7575
      for idx, nic in enumerate(self.instance.nics):
7576
        if idx in nic_override:
7577
          this_nic_override = nic_override[idx]
7578
        else:
7579
          this_nic_override = {}
7580
        if 'ip' in this_nic_override:
7581
          ip = this_nic_override['ip']
7582
        else:
7583
          ip = nic.ip
7584
        if 'mac' in this_nic_override:
7585
          mac = this_nic_override['mac']
7586
        else:
7587
          mac = nic.mac
7588
        if idx in self.nic_pnew:
7589
          nicparams = self.nic_pnew[idx]
7590
        else:
7591
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7592
        mode = nicparams[constants.NIC_MODE]
7593
        link = nicparams[constants.NIC_LINK]
7594
        args['nics'].append((ip, mac, mode, link))
7595
      if constants.DDM_ADD in nic_override:
7596
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7597
        mac = nic_override[constants.DDM_ADD]['mac']
7598
        nicparams = self.nic_pnew[constants.DDM_ADD]
7599
        mode = nicparams[constants.NIC_MODE]
7600
        link = nicparams[constants.NIC_LINK]
7601
        args['nics'].append((ip, mac, mode, link))
7602
      elif constants.DDM_REMOVE in nic_override:
7603
        del args['nics'][-1]
7604

    
7605
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7606
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7607
    return env, nl, nl
7608

    
7609
  @staticmethod
7610
  def _GetUpdatedParams(old_params, update_dict,
7611
                        default_values, parameter_types):
7612
    """Return the new params dict for the given params.
7613

7614
    @type old_params: dict
7615
    @param old_params: old parameters
7616
    @type update_dict: dict
7617
    @param update_dict: dict containing new parameter values,
7618
                        or constants.VALUE_DEFAULT to reset the
7619
                        parameter to its default value
7620
    @type default_values: dict
7621
    @param default_values: default values for the filled parameters
7622
    @type parameter_types: dict
7623
    @param parameter_types: dict mapping target dict keys to types
7624
                            in constants.ENFORCEABLE_TYPES
7625
    @rtype: (dict, dict)
7626
    @return: (new_parameters, filled_parameters)
7627

7628
    """
7629
    params_copy = copy.deepcopy(old_params)
7630
    for key, val in update_dict.iteritems():
7631
      if val == constants.VALUE_DEFAULT:
7632
        try:
7633
          del params_copy[key]
7634
        except KeyError:
7635
          pass
7636
      else:
7637
        params_copy[key] = val
7638
    utils.ForceDictType(params_copy, parameter_types)
7639
    params_filled = objects.FillDict(default_values, params_copy)
7640
    return (params_copy, params_filled)
7641

    
7642
  def CheckPrereq(self):
7643
    """Check prerequisites.
7644

7645
    This only checks the instance list against the existing names.
7646

7647
    """
7648
    self.force = self.op.force
7649

    
7650
    # checking the new params on the primary/secondary nodes
7651

    
7652
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7653
    cluster = self.cluster = self.cfg.GetClusterInfo()
7654
    assert self.instance is not None, \
7655
      "Cannot retrieve locked instance %s" % self.op.instance_name
7656
    pnode = instance.primary_node
7657
    nodelist = list(instance.all_nodes)
7658

    
7659
    # hvparams processing
7660
    if self.op.hvparams:
7661
      i_hvdict, hv_new = self._GetUpdatedParams(
7662
                             instance.hvparams, self.op.hvparams,
7663
                             cluster.hvparams[instance.hypervisor],
7664
                             constants.HVS_PARAMETER_TYPES)
7665
      # local check
7666
      hypervisor.GetHypervisor(
7667
        instance.hypervisor).CheckParameterSyntax(hv_new)
7668
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7669
      self.hv_new = hv_new # the new actual values
7670
      self.hv_inst = i_hvdict # the new dict (without defaults)
7671
    else:
7672
      self.hv_new = self.hv_inst = {}
7673

    
7674
    # beparams processing
7675
    if self.op.beparams:
7676
      i_bedict, be_new = self._GetUpdatedParams(
7677
                             instance.beparams, self.op.beparams,
7678
                             cluster.beparams[constants.PP_DEFAULT],
7679
                             constants.BES_PARAMETER_TYPES)
7680
      self.be_new = be_new # the new actual values
7681
      self.be_inst = i_bedict # the new dict (without defaults)
7682
    else:
7683
      self.be_new = self.be_inst = {}
7684

    
7685
    self.warn = []
7686

    
7687
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7688
      mem_check_list = [pnode]
7689
      if be_new[constants.BE_AUTO_BALANCE]:
7690
        # either we changed auto_balance to yes or it was from before
7691
        mem_check_list.extend(instance.secondary_nodes)
7692
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7693
                                                  instance.hypervisor)
7694
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7695
                                         instance.hypervisor)
7696
      pninfo = nodeinfo[pnode]
7697
      msg = pninfo.fail_msg
7698
      if msg:
7699
        # Assume the primary node is unreachable and go ahead
7700
        self.warn.append("Can't get info from primary node %s: %s" %
7701
                         (pnode,  msg))
7702
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7703
        self.warn.append("Node data from primary node %s doesn't contain"
7704
                         " free memory information" % pnode)
7705
      elif instance_info.fail_msg:
7706
        self.warn.append("Can't get instance runtime information: %s" %
7707
                        instance_info.fail_msg)
7708
      else:
7709
        if instance_info.payload:
7710
          current_mem = int(instance_info.payload['memory'])
7711
        else:
7712
          # Assume instance not running
7713
          # (there is a slight race condition here, but it's not very probable,
7714
          # and we have no other way to check)
7715
          current_mem = 0
7716
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7717
                    pninfo.payload['memory_free'])
7718
        if miss_mem > 0:
7719
          raise errors.OpPrereqError("This change will prevent the instance"
7720
                                     " from starting, due to %d MB of memory"
7721
                                     " missing on its primary node" % miss_mem,
7722
                                     errors.ECODE_NORES)
7723

    
7724
      if be_new[constants.BE_AUTO_BALANCE]:
7725
        for node, nres in nodeinfo.items():
7726
          if node not in instance.secondary_nodes:
7727
            continue
7728
          msg = nres.fail_msg
7729
          if msg:
7730
            self.warn.append("Can't get info from secondary node %s: %s" %
7731
                             (node, msg))
7732
          elif not isinstance(nres.payload.get('memory_free', None), int):
7733
            self.warn.append("Secondary node %s didn't return free"
7734
                             " memory information" % node)
7735
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7736
            self.warn.append("Not enough memory to failover instance to"
7737
                             " secondary node %s" % node)
7738

    
7739
    # NIC processing
7740
    self.nic_pnew = {}
7741
    self.nic_pinst = {}
7742
    for nic_op, nic_dict in self.op.nics:
7743
      if nic_op == constants.DDM_REMOVE:
7744
        if not instance.nics:
7745
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7746
                                     errors.ECODE_INVAL)
7747
        continue
7748
      if nic_op != constants.DDM_ADD:
7749
        # an existing nic
7750
        if not instance.nics:
7751
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7752
                                     " no NICs" % nic_op,
7753
                                     errors.ECODE_INVAL)
7754
        if nic_op < 0 or nic_op >= len(instance.nics):
7755
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7756
                                     " are 0 to %d" %
7757
                                     (nic_op, len(instance.nics) - 1),
7758
                                     errors.ECODE_INVAL)
7759
        old_nic_params = instance.nics[nic_op].nicparams
7760
        old_nic_ip = instance.nics[nic_op].ip
7761
      else:
7762
        old_nic_params = {}
7763
        old_nic_ip = None
7764

    
7765
      update_params_dict = dict([(key, nic_dict[key])
7766
                                 for key in constants.NICS_PARAMETERS
7767
                                 if key in nic_dict])
7768

    
7769
      if 'bridge' in nic_dict:
7770
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7771

    
7772
      new_nic_params, new_filled_nic_params = \
7773
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7774
                                 cluster.nicparams[constants.PP_DEFAULT],
7775
                                 constants.NICS_PARAMETER_TYPES)
7776
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7777
      self.nic_pinst[nic_op] = new_nic_params
7778
      self.nic_pnew[nic_op] = new_filled_nic_params
7779
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7780

    
7781
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7782
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7783
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7784
        if msg:
7785
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7786
          if self.force:
7787
            self.warn.append(msg)
7788
          else:
7789
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7790
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7791
        if 'ip' in nic_dict:
7792
          nic_ip = nic_dict['ip']
7793
        else:
7794
          nic_ip = old_nic_ip
7795
        if nic_ip is None:
7796
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7797
                                     ' on a routed nic', errors.ECODE_INVAL)
7798
      if 'mac' in nic_dict:
7799
        nic_mac = nic_dict['mac']
7800
        if nic_mac is None:
7801
          raise errors.OpPrereqError('Cannot set the nic mac to None',
7802
                                     errors.ECODE_INVAL)
7803
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7804
          # otherwise generate the mac
7805
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7806
        else:
7807
          # or validate/reserve the current one
7808
          try:
7809
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7810
          except errors.ReservationError:
7811
            raise errors.OpPrereqError("MAC address %s already in use"
7812
                                       " in cluster" % nic_mac,
7813
                                       errors.ECODE_NOTUNIQUE)
7814

    
7815
    # DISK processing
7816
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7817
      raise errors.OpPrereqError("Disk operations not supported for"
7818
                                 " diskless instances",
7819
                                 errors.ECODE_INVAL)
7820
    for disk_op, _ in self.op.disks:
7821
      if disk_op == constants.DDM_REMOVE:
7822
        if len(instance.disks) == 1:
7823
          raise errors.OpPrereqError("Cannot remove the last disk of"
7824
                                     " an instance",
7825
                                     errors.ECODE_INVAL)
7826
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7827
        ins_l = ins_l[pnode]
7828
        msg = ins_l.fail_msg
7829
        if msg:
7830
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7831
                                     (pnode, msg), errors.ECODE_ENVIRON)
7832
        if instance.name in ins_l.payload:
7833
          raise errors.OpPrereqError("Instance is running, can't remove"
7834
                                     " disks.", errors.ECODE_STATE)
7835

    
7836
      if (disk_op == constants.DDM_ADD and
7837
          len(instance.nics) >= constants.MAX_DISKS):
7838
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7839
                                   " add more" % constants.MAX_DISKS,
7840
                                   errors.ECODE_STATE)
7841
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7842
        # an existing disk
7843
        if disk_op < 0 or disk_op >= len(instance.disks):
7844
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7845
                                     " are 0 to %d" %
7846
                                     (disk_op, len(instance.disks)),
7847
                                     errors.ECODE_INVAL)
7848

    
7849
    return
7850

    
7851
  def Exec(self, feedback_fn):
7852
    """Modifies an instance.
7853

7854
    All parameters take effect only at the next restart of the instance.
7855

7856
    """
7857
    # Process here the warnings from CheckPrereq, as we don't have a
7858
    # feedback_fn there.
7859
    for warn in self.warn:
7860
      feedback_fn("WARNING: %s" % warn)
7861

    
7862
    result = []
7863
    instance = self.instance
7864
    # disk changes
7865
    for disk_op, disk_dict in self.op.disks:
7866
      if disk_op == constants.DDM_REMOVE:
7867
        # remove the last disk
7868
        device = instance.disks.pop()
7869
        device_idx = len(instance.disks)
7870
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7871
          self.cfg.SetDiskID(disk, node)
7872
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7873
          if msg:
7874
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7875
                            " continuing anyway", device_idx, node, msg)
7876
        result.append(("disk/%d" % device_idx, "remove"))
7877
      elif disk_op == constants.DDM_ADD:
7878
        # add a new disk
7879
        if instance.disk_template == constants.DT_FILE:
7880
          file_driver, file_path = instance.disks[0].logical_id
7881
          file_path = os.path.dirname(file_path)
7882
        else:
7883
          file_driver = file_path = None
7884
        disk_idx_base = len(instance.disks)
7885
        new_disk = _GenerateDiskTemplate(self,
7886
                                         instance.disk_template,
7887
                                         instance.name, instance.primary_node,
7888
                                         instance.secondary_nodes,
7889
                                         [disk_dict],
7890
                                         file_path,
7891
                                         file_driver,
7892
                                         disk_idx_base)[0]
7893
        instance.disks.append(new_disk)
7894
        info = _GetInstanceInfoText(instance)
7895

    
7896
        logging.info("Creating volume %s for instance %s",
7897
                     new_disk.iv_name, instance.name)
7898
        # Note: this needs to be kept in sync with _CreateDisks
7899
        #HARDCODE
7900
        for node in instance.all_nodes:
7901
          f_create = node == instance.primary_node
7902
          try:
7903
            _CreateBlockDev(self, node, instance, new_disk,
7904
                            f_create, info, f_create)
7905
          except errors.OpExecError, err:
7906
            self.LogWarning("Failed to create volume %s (%s) on"
7907
                            " node %s: %s",
7908
                            new_disk.iv_name, new_disk, node, err)
7909
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7910
                       (new_disk.size, new_disk.mode)))
7911
      else:
7912
        # change a given disk
7913
        instance.disks[disk_op].mode = disk_dict['mode']
7914
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7915
    # NIC changes
7916
    for nic_op, nic_dict in self.op.nics:
7917
      if nic_op == constants.DDM_REMOVE:
7918
        # remove the last nic
7919
        del instance.nics[-1]
7920
        result.append(("nic.%d" % len(instance.nics), "remove"))
7921
      elif nic_op == constants.DDM_ADD:
7922
        # mac and bridge should be set, by now
7923
        mac = nic_dict['mac']
7924
        ip = nic_dict.get('ip', None)
7925
        nicparams = self.nic_pinst[constants.DDM_ADD]
7926
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7927
        instance.nics.append(new_nic)
7928
        result.append(("nic.%d" % (len(instance.nics) - 1),
7929
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7930
                       (new_nic.mac, new_nic.ip,
7931
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7932
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7933
                       )))
7934
      else:
7935
        for key in 'mac', 'ip':
7936
          if key in nic_dict:
7937
            setattr(instance.nics[nic_op], key, nic_dict[key])
7938
        if nic_op in self.nic_pinst:
7939
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7940
        for key, val in nic_dict.iteritems():
7941
          result.append(("nic.%s/%d" % (key, nic_op), val))
7942

    
7943
    # hvparams changes
7944
    if self.op.hvparams:
7945
      instance.hvparams = self.hv_inst
7946
      for key, val in self.op.hvparams.iteritems():
7947
        result.append(("hv/%s" % key, val))
7948

    
7949
    # beparams changes
7950
    if self.op.beparams:
7951
      instance.beparams = self.be_inst
7952
      for key, val in self.op.beparams.iteritems():
7953
        result.append(("be/%s" % key, val))
7954

    
7955
    self.cfg.Update(instance, feedback_fn)
7956

    
7957
    return result
7958

    
7959

    
7960
class LUQueryExports(NoHooksLU):
7961
  """Query the exports list
7962

7963
  """
7964
  _OP_REQP = ['nodes']
7965
  REQ_BGL = False
7966

    
7967
  def ExpandNames(self):
7968
    self.needed_locks = {}
7969
    self.share_locks[locking.LEVEL_NODE] = 1
7970
    if not self.op.nodes:
7971
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7972
    else:
7973
      self.needed_locks[locking.LEVEL_NODE] = \
7974
        _GetWantedNodes(self, self.op.nodes)
7975

    
7976
  def CheckPrereq(self):
7977
    """Check prerequisites.
7978

7979
    """
7980
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7981

    
7982
  def Exec(self, feedback_fn):
7983
    """Compute the list of all the exported system images.
7984

7985
    @rtype: dict
7986
    @return: a dictionary with the structure node->(export-list)
7987
        where export-list is a list of the instances exported on
7988
        that node.
7989

7990
    """
7991
    rpcresult = self.rpc.call_export_list(self.nodes)
7992
    result = {}
7993
    for node in rpcresult:
7994
      if rpcresult[node].fail_msg:
7995
        result[node] = False
7996
      else:
7997
        result[node] = rpcresult[node].payload
7998

    
7999
    return result
8000

    
8001

    
8002
class LUExportInstance(LogicalUnit):
8003
  """Export an instance to an image in the cluster.
8004

8005
  """
8006
  HPATH = "instance-export"
8007
  HTYPE = constants.HTYPE_INSTANCE
8008
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8009
  REQ_BGL = False
8010

    
8011
  def CheckArguments(self):
8012
    """Check the arguments.
8013

8014
    """
8015
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8016
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8017

    
8018
  def ExpandNames(self):
8019
    self._ExpandAndLockInstance()
8020
    # FIXME: lock only instance primary and destination node
8021
    #
8022
    # Sad but true, for now we have do lock all nodes, as we don't know where
8023
    # the previous export might be, and and in this LU we search for it and
8024
    # remove it from its current node. In the future we could fix this by:
8025
    #  - making a tasklet to search (share-lock all), then create the new one,
8026
    #    then one to remove, after
8027
    #  - removing the removal operation altogether
8028
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8029

    
8030
  def DeclareLocks(self, level):
8031
    """Last minute lock declaration."""
8032
    # All nodes are locked anyway, so nothing to do here.
8033

    
8034
  def BuildHooksEnv(self):
8035
    """Build hooks env.
8036

8037
    This will run on the master, primary node and target node.
8038

8039
    """
8040
    env = {
8041
      "EXPORT_NODE": self.op.target_node,
8042
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8043
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8044
      }
8045
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8046
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8047
          self.op.target_node]
8048
    return env, nl, nl
8049

    
8050
  def CheckPrereq(self):
8051
    """Check prerequisites.
8052

8053
    This checks that the instance and node names are valid.
8054

8055
    """
8056
    instance_name = self.op.instance_name
8057
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8058
    assert self.instance is not None, \
8059
          "Cannot retrieve locked instance %s" % self.op.instance_name
8060
    _CheckNodeOnline(self, self.instance.primary_node)
8061

    
8062
    self.dst_node = self.cfg.GetNodeInfo(
8063
      self.cfg.ExpandNodeName(self.op.target_node))
8064

    
8065
    if self.dst_node is None:
8066
      # This is wrong node name, not a non-locked node
8067
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
8068
                                 errors.ECODE_NOENT)
8069
    _CheckNodeOnline(self, self.dst_node.name)
8070
    _CheckNodeNotDrained(self, self.dst_node.name)
8071

    
8072
    # instance disk type verification
8073
    for disk in self.instance.disks:
8074
      if disk.dev_type == constants.LD_FILE:
8075
        raise errors.OpPrereqError("Export not supported for instances with"
8076
                                   " file-based disks", errors.ECODE_INVAL)
8077

    
8078
  def Exec(self, feedback_fn):
8079
    """Export an instance to an image in the cluster.
8080

8081
    """
8082
    instance = self.instance
8083
    dst_node = self.dst_node
8084
    src_node = instance.primary_node
8085

    
8086
    if self.op.shutdown:
8087
      # shutdown the instance, but not the disks
8088
      feedback_fn("Shutting down instance %s" % instance.name)
8089
      result = self.rpc.call_instance_shutdown(src_node, instance,
8090
                                               self.shutdown_timeout)
8091
      result.Raise("Could not shutdown instance %s on"
8092
                   " node %s" % (instance.name, src_node))
8093

    
8094
    vgname = self.cfg.GetVGName()
8095

    
8096
    snap_disks = []
8097

    
8098
    # set the disks ID correctly since call_instance_start needs the
8099
    # correct drbd minor to create the symlinks
8100
    for disk in instance.disks:
8101
      self.cfg.SetDiskID(disk, src_node)
8102

    
8103
    activate_disks = (not instance.admin_up)
8104

    
8105
    if activate_disks:
8106
      # Activate the instance disks if we'exporting a stopped instance
8107
      feedback_fn("Activating disks for %s" % instance.name)
8108
      _StartInstanceDisks(self, instance, None)
8109

    
8110
    try:
8111
      # per-disk results
8112
      dresults = []
8113
      try:
8114
        for idx, disk in enumerate(instance.disks):
8115
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8116
                      (idx, src_node))
8117

    
8118
          # result.payload will be a snapshot of an lvm leaf of the one we
8119
          # passed
8120
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8121
          msg = result.fail_msg
8122
          if msg:
8123
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8124
                            idx, src_node, msg)
8125
            snap_disks.append(False)
8126
          else:
8127
            disk_id = (vgname, result.payload)
8128
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8129
                                   logical_id=disk_id, physical_id=disk_id,
8130
                                   iv_name=disk.iv_name)
8131
            snap_disks.append(new_dev)
8132

    
8133
      finally:
8134
        if self.op.shutdown and instance.admin_up:
8135
          feedback_fn("Starting instance %s" % instance.name)
8136
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8137
          msg = result.fail_msg
8138
          if msg:
8139
            _ShutdownInstanceDisks(self, instance)
8140
            raise errors.OpExecError("Could not start instance: %s" % msg)
8141

    
8142
      # TODO: check for size
8143

    
8144
      cluster_name = self.cfg.GetClusterName()
8145
      for idx, dev in enumerate(snap_disks):
8146
        feedback_fn("Exporting snapshot %s from %s to %s" %
8147
                    (idx, src_node, dst_node.name))
8148
        if dev:
8149
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8150
                                                 instance, cluster_name, idx)
8151
          msg = result.fail_msg
8152
          if msg:
8153
            self.LogWarning("Could not export disk/%s from node %s to"
8154
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8155
            dresults.append(False)
8156
          else:
8157
            dresults.append(True)
8158
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8159
          if msg:
8160
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8161
                            " %s: %s", idx, src_node, msg)
8162
        else:
8163
          dresults.append(False)
8164

    
8165
      feedback_fn("Finalizing export on %s" % dst_node.name)
8166
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8167
                                             snap_disks)
8168
      fin_resu = True
8169
      msg = result.fail_msg
8170
      if msg:
8171
        self.LogWarning("Could not finalize export for instance %s"
8172
                        " on node %s: %s", instance.name, dst_node.name, msg)
8173
        fin_resu = False
8174

    
8175
    finally:
8176
      if activate_disks:
8177
        feedback_fn("Deactivating disks for %s" % instance.name)
8178
        _ShutdownInstanceDisks(self, instance)
8179

    
8180
    nodelist = self.cfg.GetNodeList()
8181
    nodelist.remove(dst_node.name)
8182

    
8183
    # on one-node clusters nodelist will be empty after the removal
8184
    # if we proceed the backup would be removed because OpQueryExports
8185
    # substitutes an empty list with the full cluster node list.
8186
    iname = instance.name
8187
    if nodelist:
8188
      feedback_fn("Removing old exports for instance %s" % iname)
8189
      exportlist = self.rpc.call_export_list(nodelist)
8190
      for node in exportlist:
8191
        if exportlist[node].fail_msg:
8192
          continue
8193
        if iname in exportlist[node].payload:
8194
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8195
          if msg:
8196
            self.LogWarning("Could not remove older export for instance %s"
8197
                            " on node %s: %s", iname, node, msg)
8198
    return fin_resu, dresults
8199

    
8200

    
8201
class LURemoveExport(NoHooksLU):
8202
  """Remove exports related to the named instance.
8203

8204
  """
8205
  _OP_REQP = ["instance_name"]
8206
  REQ_BGL = False
8207

    
8208
  def ExpandNames(self):
8209
    self.needed_locks = {}
8210
    # We need all nodes to be locked in order for RemoveExport to work, but we
8211
    # don't need to lock the instance itself, as nothing will happen to it (and
8212
    # we can remove exports also for a removed instance)
8213
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8214

    
8215
  def CheckPrereq(self):
8216
    """Check prerequisites.
8217
    """
8218
    pass
8219

    
8220
  def Exec(self, feedback_fn):
8221
    """Remove any export.
8222

8223
    """
8224
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8225
    # If the instance was not found we'll try with the name that was passed in.
8226
    # This will only work if it was an FQDN, though.
8227
    fqdn_warn = False
8228
    if not instance_name:
8229
      fqdn_warn = True
8230
      instance_name = self.op.instance_name
8231

    
8232
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8233
    exportlist = self.rpc.call_export_list(locked_nodes)
8234
    found = False
8235
    for node in exportlist:
8236
      msg = exportlist[node].fail_msg
8237
      if msg:
8238
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8239
        continue
8240
      if instance_name in exportlist[node].payload:
8241
        found = True
8242
        result = self.rpc.call_export_remove(node, instance_name)
8243
        msg = result.fail_msg
8244
        if msg:
8245
          logging.error("Could not remove export for instance %s"
8246
                        " on node %s: %s", instance_name, node, msg)
8247

    
8248
    if fqdn_warn and not found:
8249
      feedback_fn("Export not found. If trying to remove an export belonging"
8250
                  " to a deleted instance please use its Fully Qualified"
8251
                  " Domain Name.")
8252

    
8253

    
8254
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8255
  """Generic tags LU.
8256

8257
  This is an abstract class which is the parent of all the other tags LUs.
8258

8259
  """
8260

    
8261
  def ExpandNames(self):
8262
    self.needed_locks = {}
8263
    if self.op.kind == constants.TAG_NODE:
8264
      name = self.cfg.ExpandNodeName(self.op.name)
8265
      if name is None:
8266
        raise errors.OpPrereqError("Invalid node name (%s)" %
8267
                                   (self.op.name,), errors.ECODE_NOENT)
8268
      self.op.name = name
8269
      self.needed_locks[locking.LEVEL_NODE] = name
8270
    elif self.op.kind == constants.TAG_INSTANCE:
8271
      name = self.cfg.ExpandInstanceName(self.op.name)
8272
      if name is None:
8273
        raise errors.OpPrereqError("Invalid instance name (%s)" %
8274
                                   (self.op.name,), errors.ECODE_NOENT)
8275
      self.op.name = name
8276
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8277

    
8278
  def CheckPrereq(self):
8279
    """Check prerequisites.
8280

8281
    """
8282
    if self.op.kind == constants.TAG_CLUSTER:
8283
      self.target = self.cfg.GetClusterInfo()
8284
    elif self.op.kind == constants.TAG_NODE:
8285
      self.target = self.cfg.GetNodeInfo(self.op.name)
8286
    elif self.op.kind == constants.TAG_INSTANCE:
8287
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8288
    else:
8289
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8290
                                 str(self.op.kind), errors.ECODE_INVAL)
8291

    
8292

    
8293
class LUGetTags(TagsLU):
8294
  """Returns the tags of a given object.
8295

8296
  """
8297
  _OP_REQP = ["kind", "name"]
8298
  REQ_BGL = False
8299

    
8300
  def Exec(self, feedback_fn):
8301
    """Returns the tag list.
8302

8303
    """
8304
    return list(self.target.GetTags())
8305

    
8306

    
8307
class LUSearchTags(NoHooksLU):
8308
  """Searches the tags for a given pattern.
8309

8310
  """
8311
  _OP_REQP = ["pattern"]
8312
  REQ_BGL = False
8313

    
8314
  def ExpandNames(self):
8315
    self.needed_locks = {}
8316

    
8317
  def CheckPrereq(self):
8318
    """Check prerequisites.
8319

8320
    This checks the pattern passed for validity by compiling it.
8321

8322
    """
8323
    try:
8324
      self.re = re.compile(self.op.pattern)
8325
    except re.error, err:
8326
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8327
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8328

    
8329
  def Exec(self, feedback_fn):
8330
    """Returns the tag list.
8331

8332
    """
8333
    cfg = self.cfg
8334
    tgts = [("/cluster", cfg.GetClusterInfo())]
8335
    ilist = cfg.GetAllInstancesInfo().values()
8336
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8337
    nlist = cfg.GetAllNodesInfo().values()
8338
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8339
    results = []
8340
    for path, target in tgts:
8341
      for tag in target.GetTags():
8342
        if self.re.search(tag):
8343
          results.append((path, tag))
8344
    return results
8345

    
8346

    
8347
class LUAddTags(TagsLU):
8348
  """Sets a tag on a given object.
8349

8350
  """
8351
  _OP_REQP = ["kind", "name", "tags"]
8352
  REQ_BGL = False
8353

    
8354
  def CheckPrereq(self):
8355
    """Check prerequisites.
8356

8357
    This checks the type and length of the tag name and value.
8358

8359
    """
8360
    TagsLU.CheckPrereq(self)
8361
    for tag in self.op.tags:
8362
      objects.TaggableObject.ValidateTag(tag)
8363

    
8364
  def Exec(self, feedback_fn):
8365
    """Sets the tag.
8366

8367
    """
8368
    try:
8369
      for tag in self.op.tags:
8370
        self.target.AddTag(tag)
8371
    except errors.TagError, err:
8372
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8373
    self.cfg.Update(self.target, feedback_fn)
8374

    
8375

    
8376
class LUDelTags(TagsLU):
8377
  """Delete a list of tags from a given object.
8378

8379
  """
8380
  _OP_REQP = ["kind", "name", "tags"]
8381
  REQ_BGL = False
8382

    
8383
  def CheckPrereq(self):
8384
    """Check prerequisites.
8385

8386
    This checks that we have the given tag.
8387

8388
    """
8389
    TagsLU.CheckPrereq(self)
8390
    for tag in self.op.tags:
8391
      objects.TaggableObject.ValidateTag(tag)
8392
    del_tags = frozenset(self.op.tags)
8393
    cur_tags = self.target.GetTags()
8394
    if not del_tags <= cur_tags:
8395
      diff_tags = del_tags - cur_tags
8396
      diff_names = ["'%s'" % tag for tag in diff_tags]
8397
      diff_names.sort()
8398
      raise errors.OpPrereqError("Tag(s) %s not found" %
8399
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8400

    
8401
  def Exec(self, feedback_fn):
8402
    """Remove the tag from the object.
8403

8404
    """
8405
    for tag in self.op.tags:
8406
      self.target.RemoveTag(tag)
8407
    self.cfg.Update(self.target, feedback_fn)
8408

    
8409

    
8410
class LUTestDelay(NoHooksLU):
8411
  """Sleep for a specified amount of time.
8412

8413
  This LU sleeps on the master and/or nodes for a specified amount of
8414
  time.
8415

8416
  """
8417
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8418
  REQ_BGL = False
8419

    
8420
  def ExpandNames(self):
8421
    """Expand names and set required locks.
8422

8423
    This expands the node list, if any.
8424

8425
    """
8426
    self.needed_locks = {}
8427
    if self.op.on_nodes:
8428
      # _GetWantedNodes can be used here, but is not always appropriate to use
8429
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8430
      # more information.
8431
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8432
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8433

    
8434
  def CheckPrereq(self):
8435
    """Check prerequisites.
8436

8437
    """
8438

    
8439
  def Exec(self, feedback_fn):
8440
    """Do the actual sleep.
8441

8442
    """
8443
    if self.op.on_master:
8444
      if not utils.TestDelay(self.op.duration):
8445
        raise errors.OpExecError("Error during master delay test")
8446
    if self.op.on_nodes:
8447
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8448
      for node, node_result in result.items():
8449
        node_result.Raise("Failure during rpc call to node %s" % node)
8450

    
8451

    
8452
class IAllocator(object):
8453
  """IAllocator framework.
8454

8455
  An IAllocator instance has three sets of attributes:
8456
    - cfg that is needed to query the cluster
8457
    - input data (all members of the _KEYS class attribute are required)
8458
    - four buffer attributes (in|out_data|text), that represent the
8459
      input (to the external script) in text and data structure format,
8460
      and the output from it, again in two formats
8461
    - the result variables from the script (success, info, nodes) for
8462
      easy usage
8463

8464
  """
8465
  # pylint: disable-msg=R0902
8466
  # lots of instance attributes
8467
  _ALLO_KEYS = [
8468
    "mem_size", "disks", "disk_template",
8469
    "os", "tags", "nics", "vcpus", "hypervisor",
8470
    ]
8471
  _RELO_KEYS = [
8472
    "relocate_from",
8473
    ]
8474

    
8475
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8476
    self.cfg = cfg
8477
    self.rpc = rpc
8478
    # init buffer variables
8479
    self.in_text = self.out_text = self.in_data = self.out_data = None
8480
    # init all input fields so that pylint is happy
8481
    self.mode = mode
8482
    self.name = name
8483
    self.mem_size = self.disks = self.disk_template = None
8484
    self.os = self.tags = self.nics = self.vcpus = None
8485
    self.hypervisor = None
8486
    self.relocate_from = None
8487
    # computed fields
8488
    self.required_nodes = None
8489
    # init result fields
8490
    self.success = self.info = self.nodes = None
8491
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8492
      keyset = self._ALLO_KEYS
8493
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8494
      keyset = self._RELO_KEYS
8495
    else:
8496
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8497
                                   " IAllocator" % self.mode)
8498
    for key in kwargs:
8499
      if key not in keyset:
8500
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8501
                                     " IAllocator" % key)
8502
      setattr(self, key, kwargs[key])
8503
    for key in keyset:
8504
      if key not in kwargs:
8505
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8506
                                     " IAllocator" % key)
8507
    self._BuildInputData()
8508

    
8509
  def _ComputeClusterData(self):
8510
    """Compute the generic allocator input data.
8511

8512
    This is the data that is independent of the actual operation.
8513

8514
    """
8515
    cfg = self.cfg
8516
    cluster_info = cfg.GetClusterInfo()
8517
    # cluster data
8518
    data = {
8519
      "version": constants.IALLOCATOR_VERSION,
8520
      "cluster_name": cfg.GetClusterName(),
8521
      "cluster_tags": list(cluster_info.GetTags()),
8522
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8523
      # we don't have job IDs
8524
      }
8525
    iinfo = cfg.GetAllInstancesInfo().values()
8526
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8527

    
8528
    # node data
8529
    node_results = {}
8530
    node_list = cfg.GetNodeList()
8531

    
8532
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8533
      hypervisor_name = self.hypervisor
8534
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8535
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8536

    
8537
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8538
                                        hypervisor_name)
8539
    node_iinfo = \
8540
      self.rpc.call_all_instances_info(node_list,
8541
                                       cluster_info.enabled_hypervisors)
8542
    for nname, nresult in node_data.items():
8543
      # first fill in static (config-based) values
8544
      ninfo = cfg.GetNodeInfo(nname)
8545
      pnr = {
8546
        "tags": list(ninfo.GetTags()),
8547
        "primary_ip": ninfo.primary_ip,
8548
        "secondary_ip": ninfo.secondary_ip,
8549
        "offline": ninfo.offline,
8550
        "drained": ninfo.drained,
8551
        "master_candidate": ninfo.master_candidate,
8552
        }
8553

    
8554
      if not (ninfo.offline or ninfo.drained):
8555
        nresult.Raise("Can't get data for node %s" % nname)
8556
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8557
                                nname)
8558
        remote_info = nresult.payload
8559

    
8560
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8561
                     'vg_size', 'vg_free', 'cpu_total']:
8562
          if attr not in remote_info:
8563
            raise errors.OpExecError("Node '%s' didn't return attribute"
8564
                                     " '%s'" % (nname, attr))
8565
          if not isinstance(remote_info[attr], int):
8566
            raise errors.OpExecError("Node '%s' returned invalid value"
8567
                                     " for '%s': %s" %
8568
                                     (nname, attr, remote_info[attr]))
8569
        # compute memory used by primary instances
8570
        i_p_mem = i_p_up_mem = 0
8571
        for iinfo, beinfo in i_list:
8572
          if iinfo.primary_node == nname:
8573
            i_p_mem += beinfo[constants.BE_MEMORY]
8574
            if iinfo.name not in node_iinfo[nname].payload:
8575
              i_used_mem = 0
8576
            else:
8577
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8578
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8579
            remote_info['memory_free'] -= max(0, i_mem_diff)
8580

    
8581
            if iinfo.admin_up:
8582
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8583

    
8584
        # compute memory used by instances
8585
        pnr_dyn = {
8586
          "total_memory": remote_info['memory_total'],
8587
          "reserved_memory": remote_info['memory_dom0'],
8588
          "free_memory": remote_info['memory_free'],
8589
          "total_disk": remote_info['vg_size'],
8590
          "free_disk": remote_info['vg_free'],
8591
          "total_cpus": remote_info['cpu_total'],
8592
          "i_pri_memory": i_p_mem,
8593
          "i_pri_up_memory": i_p_up_mem,
8594
          }
8595
        pnr.update(pnr_dyn)
8596

    
8597
      node_results[nname] = pnr
8598
    data["nodes"] = node_results
8599

    
8600
    # instance data
8601
    instance_data = {}
8602
    for iinfo, beinfo in i_list:
8603
      nic_data = []
8604
      for nic in iinfo.nics:
8605
        filled_params = objects.FillDict(
8606
            cluster_info.nicparams[constants.PP_DEFAULT],
8607
            nic.nicparams)
8608
        nic_dict = {"mac": nic.mac,
8609
                    "ip": nic.ip,
8610
                    "mode": filled_params[constants.NIC_MODE],
8611
                    "link": filled_params[constants.NIC_LINK],
8612
                   }
8613
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8614
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8615
        nic_data.append(nic_dict)
8616
      pir = {
8617
        "tags": list(iinfo.GetTags()),
8618
        "admin_up": iinfo.admin_up,
8619
        "vcpus": beinfo[constants.BE_VCPUS],
8620
        "memory": beinfo[constants.BE_MEMORY],
8621
        "os": iinfo.os,
8622
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8623
        "nics": nic_data,
8624
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8625
        "disk_template": iinfo.disk_template,
8626
        "hypervisor": iinfo.hypervisor,
8627
        }
8628
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8629
                                                 pir["disks"])
8630
      instance_data[iinfo.name] = pir
8631

    
8632
    data["instances"] = instance_data
8633

    
8634
    self.in_data = data
8635

    
8636
  def _AddNewInstance(self):
8637
    """Add new instance data to allocator structure.
8638

8639
    This in combination with _AllocatorGetClusterData will create the
8640
    correct structure needed as input for the allocator.
8641

8642
    The checks for the completeness of the opcode must have already been
8643
    done.
8644

8645
    """
8646
    data = self.in_data
8647

    
8648
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8649

    
8650
    if self.disk_template in constants.DTS_NET_MIRROR:
8651
      self.required_nodes = 2
8652
    else:
8653
      self.required_nodes = 1
8654
    request = {
8655
      "type": "allocate",
8656
      "name": self.name,
8657
      "disk_template": self.disk_template,
8658
      "tags": self.tags,
8659
      "os": self.os,
8660
      "vcpus": self.vcpus,
8661
      "memory": self.mem_size,
8662
      "disks": self.disks,
8663
      "disk_space_total": disk_space,
8664
      "nics": self.nics,
8665
      "required_nodes": self.required_nodes,
8666
      }
8667
    data["request"] = request
8668

    
8669
  def _AddRelocateInstance(self):
8670
    """Add relocate instance data to allocator structure.
8671

8672
    This in combination with _IAllocatorGetClusterData will create the
8673
    correct structure needed as input for the allocator.
8674

8675
    The checks for the completeness of the opcode must have already been
8676
    done.
8677

8678
    """
8679
    instance = self.cfg.GetInstanceInfo(self.name)
8680
    if instance is None:
8681
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8682
                                   " IAllocator" % self.name)
8683

    
8684
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8685
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8686
                                 errors.ECODE_INVAL)
8687

    
8688
    if len(instance.secondary_nodes) != 1:
8689
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8690
                                 errors.ECODE_STATE)
8691

    
8692
    self.required_nodes = 1
8693
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8694
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8695

    
8696
    request = {
8697
      "type": "relocate",
8698
      "name": self.name,
8699
      "disk_space_total": disk_space,
8700
      "required_nodes": self.required_nodes,
8701
      "relocate_from": self.relocate_from,
8702
      }
8703
    self.in_data["request"] = request
8704

    
8705
  def _BuildInputData(self):
8706
    """Build input data structures.
8707

8708
    """
8709
    self._ComputeClusterData()
8710

    
8711
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8712
      self._AddNewInstance()
8713
    else:
8714
      self._AddRelocateInstance()
8715

    
8716
    self.in_text = serializer.Dump(self.in_data)
8717

    
8718
  def Run(self, name, validate=True, call_fn=None):
8719
    """Run an instance allocator and return the results.
8720

8721
    """
8722
    if call_fn is None:
8723
      call_fn = self.rpc.call_iallocator_runner
8724

    
8725
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8726
    result.Raise("Failure while running the iallocator script")
8727

    
8728
    self.out_text = result.payload
8729
    if validate:
8730
      self._ValidateResult()
8731

    
8732
  def _ValidateResult(self):
8733
    """Process the allocator results.
8734

8735
    This will process and if successful save the result in
8736
    self.out_data and the other parameters.
8737

8738
    """
8739
    try:
8740
      rdict = serializer.Load(self.out_text)
8741
    except Exception, err:
8742
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8743

    
8744
    if not isinstance(rdict, dict):
8745
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8746

    
8747
    for key in "success", "info", "nodes":
8748
      if key not in rdict:
8749
        raise errors.OpExecError("Can't parse iallocator results:"
8750
                                 " missing key '%s'" % key)
8751
      setattr(self, key, rdict[key])
8752

    
8753
    if not isinstance(rdict["nodes"], list):
8754
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8755
                               " is not a list")
8756
    self.out_data = rdict
8757

    
8758

    
8759
class LUTestAllocator(NoHooksLU):
8760
  """Run allocator tests.
8761

8762
  This LU runs the allocator tests
8763

8764
  """
8765
  _OP_REQP = ["direction", "mode", "name"]
8766

    
8767
  def CheckPrereq(self):
8768
    """Check prerequisites.
8769

8770
    This checks the opcode parameters depending on the director and mode test.
8771

8772
    """
8773
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8774
      for attr in ["name", "mem_size", "disks", "disk_template",
8775
                   "os", "tags", "nics", "vcpus"]:
8776
        if not hasattr(self.op, attr):
8777
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8778
                                     attr, errors.ECODE_INVAL)
8779
      iname = self.cfg.ExpandInstanceName(self.op.name)
8780
      if iname is not None:
8781
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8782
                                   iname, errors.ECODE_EXISTS)
8783
      if not isinstance(self.op.nics, list):
8784
        raise errors.OpPrereqError("Invalid parameter 'nics'",
8785
                                   errors.ECODE_INVAL)
8786
      for row in self.op.nics:
8787
        if (not isinstance(row, dict) or
8788
            "mac" not in row or
8789
            "ip" not in row or
8790
            "bridge" not in row):
8791
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
8792
                                     " parameter", errors.ECODE_INVAL)
8793
      if not isinstance(self.op.disks, list):
8794
        raise errors.OpPrereqError("Invalid parameter 'disks'",
8795
                                   errors.ECODE_INVAL)
8796
      for row in self.op.disks:
8797
        if (not isinstance(row, dict) or
8798
            "size" not in row or
8799
            not isinstance(row["size"], int) or
8800
            "mode" not in row or
8801
            row["mode"] not in ['r', 'w']):
8802
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
8803
                                     " parameter", errors.ECODE_INVAL)
8804
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8805
        self.op.hypervisor = self.cfg.GetHypervisorType()
8806
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8807
      if not hasattr(self.op, "name"):
8808
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8809
                                   errors.ECODE_INVAL)
8810
      fname = self.cfg.ExpandInstanceName(self.op.name)
8811
      if fname is None:
8812
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8813
                                   self.op.name, errors.ECODE_NOENT)
8814
      self.op.name = fname
8815
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8816
    else:
8817
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8818
                                 self.op.mode, errors.ECODE_INVAL)
8819

    
8820
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8821
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8822
        raise errors.OpPrereqError("Missing allocator name",
8823
                                   errors.ECODE_INVAL)
8824
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8825
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8826
                                 self.op.direction, errors.ECODE_INVAL)
8827

    
8828
  def Exec(self, feedback_fn):
8829
    """Run the allocator test.
8830

8831
    """
8832
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8833
      ial = IAllocator(self.cfg, self.rpc,
8834
                       mode=self.op.mode,
8835
                       name=self.op.name,
8836
                       mem_size=self.op.mem_size,
8837
                       disks=self.op.disks,
8838
                       disk_template=self.op.disk_template,
8839
                       os=self.op.os,
8840
                       tags=self.op.tags,
8841
                       nics=self.op.nics,
8842
                       vcpus=self.op.vcpus,
8843
                       hypervisor=self.op.hypervisor,
8844
                       )
8845
    else:
8846
      ial = IAllocator(self.cfg, self.rpc,
8847
                       mode=self.op.mode,
8848
                       name=self.op.name,
8849
                       relocate_from=list(self.relocate_from),
8850
                       )
8851

    
8852
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8853
      result = ial.in_text
8854
    else:
8855
      ial.Run(self.op.allocator, validate=False)
8856
      result = ial.out_text
8857
    return result