Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 47a72f18

History | View | Annotate | Download (309.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq (except when tasklets are used)
54
    - implement Exec (except when tasklets are used)
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  @ivar dry_run_result: the value (if any) that will be returned to the caller
63
      in dry-run mode (signalled by opcode dry_run parameter)
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_BGL = True
70

    
71
  def __init__(self, processor, op, context, rpc):
72
    """Constructor for LogicalUnit.
73

74
    This needs to be overridden in derived classes in order to check op
75
    validity.
76

77
    """
78
    self.proc = processor
79
    self.op = op
80
    self.cfg = context.cfg
81
    self.context = context
82
    self.rpc = rpc
83
    # Dicts used to declare locking needs to mcpu
84
    self.needed_locks = None
85
    self.acquired_locks = {}
86
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
87
    self.add_locks = {}
88
    self.remove_locks = {}
89
    # Used to force good behavior when calling helper functions
90
    self.recalculate_locks = {}
91
    self.__ssh = None
92
    # logging
93
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
94
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
95
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
96
    # support for dry-run
97
    self.dry_run_result = None
98

    
99
    # Tasklets
100
    self.tasklets = None
101

    
102
    for attr_name in self._OP_REQP:
103
      attr_val = getattr(op, attr_name, None)
104
      if attr_val is None:
105
        raise errors.OpPrereqError("Required parameter '%s' missing" %
106
                                   attr_name, errors.ECODE_INVAL)
107

    
108
    self.CheckArguments()
109

    
110
  def __GetSSH(self):
111
    """Returns the SshRunner object
112

113
    """
114
    if not self.__ssh:
115
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
116
    return self.__ssh
117

    
118
  ssh = property(fget=__GetSSH)
119

    
120
  def CheckArguments(self):
121
    """Check syntactic validity for the opcode arguments.
122

123
    This method is for doing a simple syntactic check and ensure
124
    validity of opcode parameters, without any cluster-related
125
    checks. While the same can be accomplished in ExpandNames and/or
126
    CheckPrereq, doing these separate is better because:
127

128
      - ExpandNames is left as as purely a lock-related function
129
      - CheckPrereq is run after we have acquired locks (and possible
130
        waited for them)
131

132
    The function is allowed to change the self.op attribute so that
133
    later methods can no longer worry about missing parameters.
134

135
    """
136
    pass
137

    
138
  def ExpandNames(self):
139
    """Expand names for this LU.
140

141
    This method is called before starting to execute the opcode, and it should
142
    update all the parameters of the opcode to their canonical form (e.g. a
143
    short node name must be fully expanded after this method has successfully
144
    completed). This way locking, hooks, logging, ecc. can work correctly.
145

146
    LUs which implement this method must also populate the self.needed_locks
147
    member, as a dict with lock levels as keys, and a list of needed lock names
148
    as values. Rules:
149

150
      - use an empty dict if you don't need any lock
151
      - if you don't need any lock at a particular level omit that level
152
      - don't put anything for the BGL level
153
      - if you want all locks at a level use locking.ALL_SET as a value
154

155
    If you need to share locks (rather than acquire them exclusively) at one
156
    level you can modify self.share_locks, setting a true value (usually 1) for
157
    that level. By default locks are not shared.
158

159
    This function can also define a list of tasklets, which then will be
160
    executed in order instead of the usual LU-level CheckPrereq and Exec
161
    functions, if those are not defined by the LU.
162

163
    Examples::
164

165
      # Acquire all nodes and one instance
166
      self.needed_locks = {
167
        locking.LEVEL_NODE: locking.ALL_SET,
168
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
169
      }
170
      # Acquire just two nodes
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
173
      }
174
      # Acquire no locks
175
      self.needed_locks = {} # No, you can't leave it to the default value None
176

177
    """
178
    # The implementation of this method is mandatory only if the new LU is
179
    # concurrent, so that old LUs don't need to be changed all at the same
180
    # time.
181
    if self.REQ_BGL:
182
      self.needed_locks = {} # Exclusive LUs don't need locks.
183
    else:
184
      raise NotImplementedError
185

    
186
  def DeclareLocks(self, level):
187
    """Declare LU locking needs for a level
188

189
    While most LUs can just declare their locking needs at ExpandNames time,
190
    sometimes there's the need to calculate some locks after having acquired
191
    the ones before. This function is called just before acquiring locks at a
192
    particular level, but after acquiring the ones at lower levels, and permits
193
    such calculations. It can be used to modify self.needed_locks, and by
194
    default it does nothing.
195

196
    This function is only called if you have something already set in
197
    self.needed_locks for the level.
198

199
    @param level: Locking level which is going to be locked
200
    @type level: member of ganeti.locking.LEVELS
201

202
    """
203

    
204
  def CheckPrereq(self):
205
    """Check prerequisites for this LU.
206

207
    This method should check that the prerequisites for the execution
208
    of this LU are fulfilled. It can do internode communication, but
209
    it should be idempotent - no cluster or system changes are
210
    allowed.
211

212
    The method should raise errors.OpPrereqError in case something is
213
    not fulfilled. Its return value is ignored.
214

215
    This method should also update all the parameters of the opcode to
216
    their canonical form if it hasn't been done by ExpandNames before.
217

218
    """
219
    if self.tasklets is not None:
220
      for (idx, tl) in enumerate(self.tasklets):
221
        logging.debug("Checking prerequisites for tasklet %s/%s",
222
                      idx + 1, len(self.tasklets))
223
        tl.CheckPrereq()
224
    else:
225
      raise NotImplementedError
226

    
227
  def Exec(self, feedback_fn):
228
    """Execute the LU.
229

230
    This method should implement the actual work. It should raise
231
    errors.OpExecError for failures that are somewhat dealt with in
232
    code, or expected.
233

234
    """
235
    if self.tasklets is not None:
236
      for (idx, tl) in enumerate(self.tasklets):
237
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
238
        tl.Exec(feedback_fn)
239
    else:
240
      raise NotImplementedError
241

    
242
  def BuildHooksEnv(self):
243
    """Build hooks environment for this LU.
244

245
    This method should return a three-node tuple consisting of: a dict
246
    containing the environment that will be used for running the
247
    specific hook for this LU, a list of node names on which the hook
248
    should run before the execution, and a list of node names on which
249
    the hook should run after the execution.
250

251
    The keys of the dict must not have 'GANETI_' prefixed as this will
252
    be handled in the hooks runner. Also note additional keys will be
253
    added by the hooks runner. If the LU doesn't define any
254
    environment, an empty dict (and not None) should be returned.
255

256
    No nodes should be returned as an empty list (and not None).
257

258
    Note that if the HPATH for a LU class is None, this function will
259
    not be called.
260

261
    """
262
    raise NotImplementedError
263

    
264
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
265
    """Notify the LU about the results of its hooks.
266

267
    This method is called every time a hooks phase is executed, and notifies
268
    the Logical Unit about the hooks' result. The LU can then use it to alter
269
    its result based on the hooks.  By default the method does nothing and the
270
    previous result is passed back unchanged but any LU can define it if it
271
    wants to use the local cluster hook-scripts somehow.
272

273
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
274
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
275
    @param hook_results: the results of the multi-node hooks rpc call
276
    @param feedback_fn: function used send feedback back to the caller
277
    @param lu_result: the previous Exec result this LU had, or None
278
        in the PRE phase
279
    @return: the new Exec result, based on the previous result
280
        and hook results
281

282
    """
283
    # API must be kept, thus we ignore the unused argument and could
284
    # be a function warnings
285
    # pylint: disable-msg=W0613,R0201
286
    return lu_result
287

    
288
  def _ExpandAndLockInstance(self):
289
    """Helper function to expand and lock an instance.
290

291
    Many LUs that work on an instance take its name in self.op.instance_name
292
    and need to expand it and then declare the expanded name for locking. This
293
    function does it, and then updates self.op.instance_name to the expanded
294
    name. It also initializes needed_locks as a dict, if this hasn't been done
295
    before.
296

297
    """
298
    if self.needed_locks is None:
299
      self.needed_locks = {}
300
    else:
301
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
302
        "_ExpandAndLockInstance called with instance-level locks set"
303
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
304
    if expanded_name is None:
305
      raise errors.OpPrereqError("Instance '%s' not known" %
306
                                 self.op.instance_name, errors.ECODE_NOENT)
307
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
308
    self.op.instance_name = expanded_name
309

    
310
  def _LockInstancesNodes(self, primary_only=False):
311
    """Helper function to declare instances' nodes for locking.
312

313
    This function should be called after locking one or more instances to lock
314
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
315
    with all primary or secondary nodes for instances already locked and
316
    present in self.needed_locks[locking.LEVEL_INSTANCE].
317

318
    It should be called from DeclareLocks, and for safety only works if
319
    self.recalculate_locks[locking.LEVEL_NODE] is set.
320

321
    In the future it may grow parameters to just lock some instance's nodes, or
322
    to just lock primaries or secondary nodes, if needed.
323

324
    If should be called in DeclareLocks in a way similar to::
325

326
      if level == locking.LEVEL_NODE:
327
        self._LockInstancesNodes()
328

329
    @type primary_only: boolean
330
    @param primary_only: only lock primary nodes of locked instances
331

332
    """
333
    assert locking.LEVEL_NODE in self.recalculate_locks, \
334
      "_LockInstancesNodes helper function called with no nodes to recalculate"
335

    
336
    # TODO: check if we're really been called with the instance locks held
337

    
338
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
339
    # future we might want to have different behaviors depending on the value
340
    # of self.recalculate_locks[locking.LEVEL_NODE]
341
    wanted_nodes = []
342
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
343
      instance = self.context.cfg.GetInstanceInfo(instance_name)
344
      wanted_nodes.append(instance.primary_node)
345
      if not primary_only:
346
        wanted_nodes.extend(instance.secondary_nodes)
347

    
348
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
349
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
350
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
351
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
352

    
353
    del self.recalculate_locks[locking.LEVEL_NODE]
354

    
355

    
356
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
357
  """Simple LU which runs no hooks.
358

359
  This LU is intended as a parent for other LogicalUnits which will
360
  run no hooks, in order to reduce duplicate code.
361

362
  """
363
  HPATH = None
364
  HTYPE = None
365

    
366
  def BuildHooksEnv(self):
367
    """Empty BuildHooksEnv for NoHooksLu.
368

369
    This just raises an error.
370

371
    """
372
    assert False, "BuildHooksEnv called for NoHooksLUs"
373

    
374

    
375
class Tasklet:
376
  """Tasklet base class.
377

378
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
379
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
380
  tasklets know nothing about locks.
381

382
  Subclasses must follow these rules:
383
    - Implement CheckPrereq
384
    - Implement Exec
385

386
  """
387
  def __init__(self, lu):
388
    self.lu = lu
389

    
390
    # Shortcuts
391
    self.cfg = lu.cfg
392
    self.rpc = lu.rpc
393

    
394
  def CheckPrereq(self):
395
    """Check prerequisites for this tasklets.
396

397
    This method should check whether the prerequisites for the execution of
398
    this tasklet are fulfilled. It can do internode communication, but it
399
    should be idempotent - no cluster or system changes are allowed.
400

401
    The method should raise errors.OpPrereqError in case something is not
402
    fulfilled. Its return value is ignored.
403

404
    This method should also update all parameters to their canonical form if it
405
    hasn't been done before.
406

407
    """
408
    raise NotImplementedError
409

    
410
  def Exec(self, feedback_fn):
411
    """Execute the tasklet.
412

413
    This method should implement the actual work. It should raise
414
    errors.OpExecError for failures that are somewhat dealt with in code, or
415
    expected.
416

417
    """
418
    raise NotImplementedError
419

    
420

    
421
def _GetWantedNodes(lu, nodes):
422
  """Returns list of checked and expanded node names.
423

424
  @type lu: L{LogicalUnit}
425
  @param lu: the logical unit on whose behalf we execute
426
  @type nodes: list
427
  @param nodes: list of node names or None for all nodes
428
  @rtype: list
429
  @return: the list of nodes, sorted
430
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
431

432
  """
433
  if not isinstance(nodes, list):
434
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
435
                               errors.ECODE_INVAL)
436

    
437
  if not nodes:
438
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
439
      " non-empty list of nodes whose name is to be expanded.")
440

    
441
  wanted = []
442
  for name in nodes:
443
    node = lu.cfg.ExpandNodeName(name)
444
    if node is None:
445
      raise errors.OpPrereqError("No such node name '%s'" % name,
446
                                 errors.ECODE_NOENT)
447
    wanted.append(node)
448

    
449
  return utils.NiceSort(wanted)
450

    
451

    
452
def _GetWantedInstances(lu, instances):
453
  """Returns list of checked and expanded instance names.
454

455
  @type lu: L{LogicalUnit}
456
  @param lu: the logical unit on whose behalf we execute
457
  @type instances: list
458
  @param instances: list of instance names or None for all instances
459
  @rtype: list
460
  @return: the list of instances, sorted
461
  @raise errors.OpPrereqError: if the instances parameter is wrong type
462
  @raise errors.OpPrereqError: if any of the passed instances is not found
463

464
  """
465
  if not isinstance(instances, list):
466
    raise errors.OpPrereqError("Invalid argument type 'instances'",
467
                               errors.ECODE_INVAL)
468

    
469
  if instances:
470
    wanted = []
471

    
472
    for name in instances:
473
      instance = lu.cfg.ExpandInstanceName(name)
474
      if instance is None:
475
        raise errors.OpPrereqError("No such instance name '%s'" % name,
476
                                   errors.ECODE_NOENT)
477
      wanted.append(instance)
478

    
479
  else:
480
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
481
  return wanted
482

    
483

    
484
def _CheckOutputFields(static, dynamic, selected):
485
  """Checks whether all selected fields are valid.
486

487
  @type static: L{utils.FieldSet}
488
  @param static: static fields set
489
  @type dynamic: L{utils.FieldSet}
490
  @param dynamic: dynamic fields set
491

492
  """
493
  f = utils.FieldSet()
494
  f.Extend(static)
495
  f.Extend(dynamic)
496

    
497
  delta = f.NonMatching(selected)
498
  if delta:
499
    raise errors.OpPrereqError("Unknown output fields selected: %s"
500
                               % ",".join(delta), errors.ECODE_INVAL)
501

    
502

    
503
def _CheckBooleanOpField(op, name):
504
  """Validates boolean opcode parameters.
505

506
  This will ensure that an opcode parameter is either a boolean value,
507
  or None (but that it always exists).
508

509
  """
510
  val = getattr(op, name, None)
511
  if not (val is None or isinstance(val, bool)):
512
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
513
                               (name, str(val)), errors.ECODE_INVAL)
514
  setattr(op, name, val)
515

    
516

    
517
def _CheckGlobalHvParams(params):
518
  """Validates that given hypervisor params are not global ones.
519

520
  This will ensure that instances don't get customised versions of
521
  global params.
522

523
  """
524
  used_globals = constants.HVC_GLOBALS.intersection(params)
525
  if used_globals:
526
    msg = ("The following hypervisor parameters are global and cannot"
527
           " be customized at instance level, please modify them at"
528
           " cluster level: %s" % utils.CommaJoin(used_globals))
529
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeOnline(lu, node):
533
  """Ensure that a given node is online.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is offline
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).offline:
541
    raise errors.OpPrereqError("Can't use offline node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckNodeNotDrained(lu, node):
546
  """Ensure that a given node is not drained.
547

548
  @param lu: the LU on behalf of which we make the check
549
  @param node: the node to check
550
  @raise errors.OpPrereqError: if the node is drained
551

552
  """
553
  if lu.cfg.GetNodeInfo(node).drained:
554
    raise errors.OpPrereqError("Can't use drained node %s" % node,
555
                               errors.ECODE_INVAL)
556

    
557

    
558
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
559
                          memory, vcpus, nics, disk_template, disks,
560
                          bep, hvp, hypervisor_name):
561
  """Builds instance related env variables for hooks
562

563
  This builds the hook environment from individual variables.
564

565
  @type name: string
566
  @param name: the name of the instance
567
  @type primary_node: string
568
  @param primary_node: the name of the instance's primary node
569
  @type secondary_nodes: list
570
  @param secondary_nodes: list of secondary nodes as strings
571
  @type os_type: string
572
  @param os_type: the name of the instance's OS
573
  @type status: boolean
574
  @param status: the should_run status of the instance
575
  @type memory: string
576
  @param memory: the memory size of the instance
577
  @type vcpus: string
578
  @param vcpus: the count of VCPUs the instance has
579
  @type nics: list
580
  @param nics: list of tuples (ip, mac, mode, link) representing
581
      the NICs the instance has
582
  @type disk_template: string
583
  @param disk_template: the disk template of the instance
584
  @type disks: list
585
  @param disks: the list of (size, mode) pairs
586
  @type bep: dict
587
  @param bep: the backend parameters for the instance
588
  @type hvp: dict
589
  @param hvp: the hypervisor parameters for the instance
590
  @type hypervisor_name: string
591
  @param hypervisor_name: the hypervisor for the instance
592
  @rtype: dict
593
  @return: the hook environment for this instance
594

595
  """
596
  if status:
597
    str_status = "up"
598
  else:
599
    str_status = "down"
600
  env = {
601
    "OP_TARGET": name,
602
    "INSTANCE_NAME": name,
603
    "INSTANCE_PRIMARY": primary_node,
604
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
605
    "INSTANCE_OS_TYPE": os_type,
606
    "INSTANCE_STATUS": str_status,
607
    "INSTANCE_MEMORY": memory,
608
    "INSTANCE_VCPUS": vcpus,
609
    "INSTANCE_DISK_TEMPLATE": disk_template,
610
    "INSTANCE_HYPERVISOR": hypervisor_name,
611
  }
612

    
613
  if nics:
614
    nic_count = len(nics)
615
    for idx, (ip, mac, mode, link) in enumerate(nics):
616
      if ip is None:
617
        ip = ""
618
      env["INSTANCE_NIC%d_IP" % idx] = ip
619
      env["INSTANCE_NIC%d_MAC" % idx] = mac
620
      env["INSTANCE_NIC%d_MODE" % idx] = mode
621
      env["INSTANCE_NIC%d_LINK" % idx] = link
622
      if mode == constants.NIC_MODE_BRIDGED:
623
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
624
  else:
625
    nic_count = 0
626

    
627
  env["INSTANCE_NIC_COUNT"] = nic_count
628

    
629
  if disks:
630
    disk_count = len(disks)
631
    for idx, (size, mode) in enumerate(disks):
632
      env["INSTANCE_DISK%d_SIZE" % idx] = size
633
      env["INSTANCE_DISK%d_MODE" % idx] = mode
634
  else:
635
    disk_count = 0
636

    
637
  env["INSTANCE_DISK_COUNT"] = disk_count
638

    
639
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
640
    for key, value in source.items():
641
      env["INSTANCE_%s_%s" % (kind, key)] = value
642

    
643
  return env
644

    
645

    
646
def _NICListToTuple(lu, nics):
647
  """Build a list of nic information tuples.
648

649
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
650
  value in LUQueryInstanceData.
651

652
  @type lu:  L{LogicalUnit}
653
  @param lu: the logical unit on whose behalf we execute
654
  @type nics: list of L{objects.NIC}
655
  @param nics: list of nics to convert to hooks tuples
656

657
  """
658
  hooks_nics = []
659
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
660
  for nic in nics:
661
    ip = nic.ip
662
    mac = nic.mac
663
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
664
    mode = filled_params[constants.NIC_MODE]
665
    link = filled_params[constants.NIC_LINK]
666
    hooks_nics.append((ip, mac, mode, link))
667
  return hooks_nics
668

    
669

    
670
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
671
  """Builds instance related env variables for hooks from an object.
672

673
  @type lu: L{LogicalUnit}
674
  @param lu: the logical unit on whose behalf we execute
675
  @type instance: L{objects.Instance}
676
  @param instance: the instance for which we should build the
677
      environment
678
  @type override: dict
679
  @param override: dictionary with key/values that will override
680
      our values
681
  @rtype: dict
682
  @return: the hook environment dictionary
683

684
  """
685
  cluster = lu.cfg.GetClusterInfo()
686
  bep = cluster.FillBE(instance)
687
  hvp = cluster.FillHV(instance)
688
  args = {
689
    'name': instance.name,
690
    'primary_node': instance.primary_node,
691
    'secondary_nodes': instance.secondary_nodes,
692
    'os_type': instance.os,
693
    'status': instance.admin_up,
694
    'memory': bep[constants.BE_MEMORY],
695
    'vcpus': bep[constants.BE_VCPUS],
696
    'nics': _NICListToTuple(lu, instance.nics),
697
    'disk_template': instance.disk_template,
698
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
699
    'bep': bep,
700
    'hvp': hvp,
701
    'hypervisor_name': instance.hypervisor,
702
  }
703
  if override:
704
    args.update(override)
705
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
706

    
707

    
708
def _AdjustCandidatePool(lu, exceptions):
709
  """Adjust the candidate pool after node operations.
710

711
  """
712
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
713
  if mod_list:
714
    lu.LogInfo("Promoted nodes to master candidate role: %s",
715
               utils.CommaJoin(node.name for node in mod_list))
716
    for name in mod_list:
717
      lu.context.ReaddNode(name)
718
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
719
  if mc_now > mc_max:
720
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
721
               (mc_now, mc_max))
722

    
723

    
724
def _DecideSelfPromotion(lu, exceptions=None):
725
  """Decide whether I should promote myself as a master candidate.
726

727
  """
728
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
729
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
730
  # the new node will increase mc_max with one, so:
731
  mc_should = min(mc_should + 1, cp_size)
732
  return mc_now < mc_should
733

    
734

    
735
def _CheckNicsBridgesExist(lu, target_nics, target_node,
736
                               profile=constants.PP_DEFAULT):
737
  """Check that the brigdes needed by a list of nics exist.
738

739
  """
740
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
741
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
742
                for nic in target_nics]
743
  brlist = [params[constants.NIC_LINK] for params in paramslist
744
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
745
  if brlist:
746
    result = lu.rpc.call_bridges_exist(target_node, brlist)
747
    result.Raise("Error checking bridges on destination node '%s'" %
748
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
749

    
750

    
751
def _CheckInstanceBridgesExist(lu, instance, node=None):
752
  """Check that the brigdes needed by an instance exist.
753

754
  """
755
  if node is None:
756
    node = instance.primary_node
757
  _CheckNicsBridgesExist(lu, instance.nics, node)
758

    
759

    
760
def _CheckOSVariant(os_obj, name):
761
  """Check whether an OS name conforms to the os variants specification.
762

763
  @type os_obj: L{objects.OS}
764
  @param os_obj: OS object to check
765
  @type name: string
766
  @param name: OS name passed by the user, to check for validity
767

768
  """
769
  if not os_obj.supported_variants:
770
    return
771
  try:
772
    variant = name.split("+", 1)[1]
773
  except IndexError:
774
    raise errors.OpPrereqError("OS name must include a variant",
775
                               errors.ECODE_INVAL)
776

    
777
  if variant not in os_obj.supported_variants:
778
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
779

    
780

    
781
def _GetNodeInstancesInner(cfg, fn):
782
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
783

    
784

    
785
def _GetNodeInstances(cfg, node_name):
786
  """Returns a list of all primary and secondary instances on a node.
787

788
  """
789

    
790
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
791

    
792

    
793
def _GetNodePrimaryInstances(cfg, node_name):
794
  """Returns primary instances on a node.
795

796
  """
797
  return _GetNodeInstancesInner(cfg,
798
                                lambda inst: node_name == inst.primary_node)
799

    
800

    
801
def _GetNodeSecondaryInstances(cfg, node_name):
802
  """Returns secondary instances on a node.
803

804
  """
805
  return _GetNodeInstancesInner(cfg,
806
                                lambda inst: node_name in inst.secondary_nodes)
807

    
808

    
809
def _GetStorageTypeArgs(cfg, storage_type):
810
  """Returns the arguments for a storage type.
811

812
  """
813
  # Special case for file storage
814
  if storage_type == constants.ST_FILE:
815
    # storage.FileStorage wants a list of storage directories
816
    return [[cfg.GetFileStorageDir()]]
817

    
818
  return []
819

    
820

    
821
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
822
  faulty = []
823

    
824
  for dev in instance.disks:
825
    cfg.SetDiskID(dev, node_name)
826

    
827
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
828
  result.Raise("Failed to get disk status from node %s" % node_name,
829
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
830

    
831
  for idx, bdev_status in enumerate(result.payload):
832
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
833
      faulty.append(idx)
834

    
835
  return faulty
836

    
837

    
838
class LUPostInitCluster(LogicalUnit):
839
  """Logical unit for running hooks after cluster initialization.
840

841
  """
842
  HPATH = "cluster-init"
843
  HTYPE = constants.HTYPE_CLUSTER
844
  _OP_REQP = []
845

    
846
  def BuildHooksEnv(self):
847
    """Build hooks env.
848

849
    """
850
    env = {"OP_TARGET": self.cfg.GetClusterName()}
851
    mn = self.cfg.GetMasterNode()
852
    return env, [], [mn]
853

    
854
  def CheckPrereq(self):
855
    """No prerequisites to check.
856

857
    """
858
    return True
859

    
860
  def Exec(self, feedback_fn):
861
    """Nothing to do.
862

863
    """
864
    return True
865

    
866

    
867
class LUDestroyCluster(LogicalUnit):
868
  """Logical unit for destroying the cluster.
869

870
  """
871
  HPATH = "cluster-destroy"
872
  HTYPE = constants.HTYPE_CLUSTER
873
  _OP_REQP = []
874

    
875
  def BuildHooksEnv(self):
876
    """Build hooks env.
877

878
    """
879
    env = {"OP_TARGET": self.cfg.GetClusterName()}
880
    return env, [], []
881

    
882
  def CheckPrereq(self):
883
    """Check prerequisites.
884

885
    This checks whether the cluster is empty.
886

887
    Any errors are signaled by raising errors.OpPrereqError.
888

889
    """
890
    master = self.cfg.GetMasterNode()
891

    
892
    nodelist = self.cfg.GetNodeList()
893
    if len(nodelist) != 1 or nodelist[0] != master:
894
      raise errors.OpPrereqError("There are still %d node(s) in"
895
                                 " this cluster." % (len(nodelist) - 1),
896
                                 errors.ECODE_INVAL)
897
    instancelist = self.cfg.GetInstanceList()
898
    if instancelist:
899
      raise errors.OpPrereqError("There are still %d instance(s) in"
900
                                 " this cluster." % len(instancelist),
901
                                 errors.ECODE_INVAL)
902

    
903
  def Exec(self, feedback_fn):
904
    """Destroys the cluster.
905

906
    """
907
    master = self.cfg.GetMasterNode()
908
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
909

    
910
    # Run post hooks on master node before it's removed
911
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
912
    try:
913
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
914
    except:
915
      # pylint: disable-msg=W0702
916
      self.LogWarning("Errors occurred running hooks on %s" % master)
917

    
918
    result = self.rpc.call_node_stop_master(master, False)
919
    result.Raise("Could not disable the master role")
920

    
921
    if modify_ssh_setup:
922
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
923
      utils.CreateBackup(priv_key)
924
      utils.CreateBackup(pub_key)
925

    
926
    return master
927

    
928

    
929
class LUVerifyCluster(LogicalUnit):
930
  """Verifies the cluster status.
931

932
  """
933
  HPATH = "cluster-verify"
934
  HTYPE = constants.HTYPE_CLUSTER
935
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
936
  REQ_BGL = False
937

    
938
  TCLUSTER = "cluster"
939
  TNODE = "node"
940
  TINSTANCE = "instance"
941

    
942
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
943
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
944
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
945
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
946
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
947
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
948
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
949
  ENODEDRBD = (TNODE, "ENODEDRBD")
950
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
951
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
952
  ENODEHV = (TNODE, "ENODEHV")
953
  ENODELVM = (TNODE, "ENODELVM")
954
  ENODEN1 = (TNODE, "ENODEN1")
955
  ENODENET = (TNODE, "ENODENET")
956
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
957
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
958
  ENODERPC = (TNODE, "ENODERPC")
959
  ENODESSH = (TNODE, "ENODESSH")
960
  ENODEVERSION = (TNODE, "ENODEVERSION")
961
  ENODESETUP = (TNODE, "ENODESETUP")
962
  ENODETIME = (TNODE, "ENODETIME")
963

    
964
  ETYPE_FIELD = "code"
965
  ETYPE_ERROR = "ERROR"
966
  ETYPE_WARNING = "WARNING"
967

    
968
  def ExpandNames(self):
969
    self.needed_locks = {
970
      locking.LEVEL_NODE: locking.ALL_SET,
971
      locking.LEVEL_INSTANCE: locking.ALL_SET,
972
    }
973
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
974

    
975
  def _Error(self, ecode, item, msg, *args, **kwargs):
976
    """Format an error message.
977

978
    Based on the opcode's error_codes parameter, either format a
979
    parseable error code, or a simpler error string.
980

981
    This must be called only from Exec and functions called from Exec.
982

983
    """
984
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
985
    itype, etxt = ecode
986
    # first complete the msg
987
    if args:
988
      msg = msg % args
989
    # then format the whole message
990
    if self.op.error_codes:
991
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
992
    else:
993
      if item:
994
        item = " " + item
995
      else:
996
        item = ""
997
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
998
    # and finally report it via the feedback_fn
999
    self._feedback_fn("  - %s" % msg)
1000

    
1001
  def _ErrorIf(self, cond, *args, **kwargs):
1002
    """Log an error message if the passed condition is True.
1003

1004
    """
1005
    cond = bool(cond) or self.op.debug_simulate_errors
1006
    if cond:
1007
      self._Error(*args, **kwargs)
1008
    # do not mark the operation as failed for WARN cases only
1009
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1010
      self.bad = self.bad or cond
1011

    
1012
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1013
                  node_result, master_files, drbd_map, vg_name):
1014
    """Run multiple tests against a node.
1015

1016
    Test list:
1017

1018
      - compares ganeti version
1019
      - checks vg existence and size > 20G
1020
      - checks config file checksum
1021
      - checks ssh to other nodes
1022

1023
    @type nodeinfo: L{objects.Node}
1024
    @param nodeinfo: the node to check
1025
    @param file_list: required list of files
1026
    @param local_cksum: dictionary of local files and their checksums
1027
    @param node_result: the results from the node
1028
    @param master_files: list of files that only masters should have
1029
    @param drbd_map: the useddrbd minors for this node, in
1030
        form of minor: (instance, must_exist) which correspond to instances
1031
        and their running status
1032
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1033

1034
    """
1035
    node = nodeinfo.name
1036
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1037

    
1038
    # main result, node_result should be a non-empty dict
1039
    test = not node_result or not isinstance(node_result, dict)
1040
    _ErrorIf(test, self.ENODERPC, node,
1041
                  "unable to verify node: no data returned")
1042
    if test:
1043
      return
1044

    
1045
    # compares ganeti version
1046
    local_version = constants.PROTOCOL_VERSION
1047
    remote_version = node_result.get('version', None)
1048
    test = not (remote_version and
1049
                isinstance(remote_version, (list, tuple)) and
1050
                len(remote_version) == 2)
1051
    _ErrorIf(test, self.ENODERPC, node,
1052
             "connection to node returned invalid data")
1053
    if test:
1054
      return
1055

    
1056
    test = local_version != remote_version[0]
1057
    _ErrorIf(test, self.ENODEVERSION, node,
1058
             "incompatible protocol versions: master %s,"
1059
             " node %s", local_version, remote_version[0])
1060
    if test:
1061
      return
1062

    
1063
    # node seems compatible, we can actually try to look into its results
1064

    
1065
    # full package version
1066
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1067
                  self.ENODEVERSION, node,
1068
                  "software version mismatch: master %s, node %s",
1069
                  constants.RELEASE_VERSION, remote_version[1],
1070
                  code=self.ETYPE_WARNING)
1071

    
1072
    # checks vg existence and size > 20G
1073
    if vg_name is not None:
1074
      vglist = node_result.get(constants.NV_VGLIST, None)
1075
      test = not vglist
1076
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1077
      if not test:
1078
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1079
                                              constants.MIN_VG_SIZE)
1080
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1081

    
1082
    # checks config file checksum
1083

    
1084
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1085
    test = not isinstance(remote_cksum, dict)
1086
    _ErrorIf(test, self.ENODEFILECHECK, node,
1087
             "node hasn't returned file checksum data")
1088
    if not test:
1089
      for file_name in file_list:
1090
        node_is_mc = nodeinfo.master_candidate
1091
        must_have = (file_name not in master_files) or node_is_mc
1092
        # missing
1093
        test1 = file_name not in remote_cksum
1094
        # invalid checksum
1095
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1096
        # existing and good
1097
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1098
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1099
                 "file '%s' missing", file_name)
1100
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1101
                 "file '%s' has wrong checksum", file_name)
1102
        # not candidate and this is not a must-have file
1103
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1104
                 "file '%s' should not exist on non master"
1105
                 " candidates (and the file is outdated)", file_name)
1106
        # all good, except non-master/non-must have combination
1107
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1108
                 "file '%s' should not exist"
1109
                 " on non master candidates", file_name)
1110

    
1111
    # checks ssh to any
1112

    
1113
    test = constants.NV_NODELIST not in node_result
1114
    _ErrorIf(test, self.ENODESSH, node,
1115
             "node hasn't returned node ssh connectivity data")
1116
    if not test:
1117
      if node_result[constants.NV_NODELIST]:
1118
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1119
          _ErrorIf(True, self.ENODESSH, node,
1120
                   "ssh communication with node '%s': %s", a_node, a_msg)
1121

    
1122
    test = constants.NV_NODENETTEST not in node_result
1123
    _ErrorIf(test, self.ENODENET, node,
1124
             "node hasn't returned node tcp connectivity data")
1125
    if not test:
1126
      if node_result[constants.NV_NODENETTEST]:
1127
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1128
        for anode in nlist:
1129
          _ErrorIf(True, self.ENODENET, node,
1130
                   "tcp communication with node '%s': %s",
1131
                   anode, node_result[constants.NV_NODENETTEST][anode])
1132

    
1133
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1134
    if isinstance(hyp_result, dict):
1135
      for hv_name, hv_result in hyp_result.iteritems():
1136
        test = hv_result is not None
1137
        _ErrorIf(test, self.ENODEHV, node,
1138
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1139

    
1140
    # check used drbd list
1141
    if vg_name is not None:
1142
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1143
      test = not isinstance(used_minors, (tuple, list))
1144
      _ErrorIf(test, self.ENODEDRBD, node,
1145
               "cannot parse drbd status file: %s", str(used_minors))
1146
      if not test:
1147
        for minor, (iname, must_exist) in drbd_map.items():
1148
          test = minor not in used_minors and must_exist
1149
          _ErrorIf(test, self.ENODEDRBD, node,
1150
                   "drbd minor %d of instance %s is not active",
1151
                   minor, iname)
1152
        for minor in used_minors:
1153
          test = minor not in drbd_map
1154
          _ErrorIf(test, self.ENODEDRBD, node,
1155
                   "unallocated drbd minor %d is in use", minor)
1156
    test = node_result.get(constants.NV_NODESETUP,
1157
                           ["Missing NODESETUP results"])
1158
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1159
             "; ".join(test))
1160

    
1161
    # check pv names
1162
    if vg_name is not None:
1163
      pvlist = node_result.get(constants.NV_PVLIST, None)
1164
      test = pvlist is None
1165
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1166
      if not test:
1167
        # check that ':' is not present in PV names, since it's a
1168
        # special character for lvcreate (denotes the range of PEs to
1169
        # use on the PV)
1170
        for _, pvname, owner_vg in pvlist:
1171
          test = ":" in pvname
1172
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1173
                   " '%s' of VG '%s'", pvname, owner_vg)
1174

    
1175
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1176
                      node_instance, n_offline):
1177
    """Verify an instance.
1178

1179
    This function checks to see if the required block devices are
1180
    available on the instance's node.
1181

1182
    """
1183
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1184
    node_current = instanceconfig.primary_node
1185

    
1186
    node_vol_should = {}
1187
    instanceconfig.MapLVsByNode(node_vol_should)
1188

    
1189
    for node in node_vol_should:
1190
      if node in n_offline:
1191
        # ignore missing volumes on offline nodes
1192
        continue
1193
      for volume in node_vol_should[node]:
1194
        test = node not in node_vol_is or volume not in node_vol_is[node]
1195
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1196
                 "volume %s missing on node %s", volume, node)
1197

    
1198
    if instanceconfig.admin_up:
1199
      test = ((node_current not in node_instance or
1200
               not instance in node_instance[node_current]) and
1201
              node_current not in n_offline)
1202
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1203
               "instance not running on its primary node %s",
1204
               node_current)
1205

    
1206
    for node in node_instance:
1207
      if (not node == node_current):
1208
        test = instance in node_instance[node]
1209
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1210
                 "instance should not run on node %s", node)
1211

    
1212
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1213
    """Verify if there are any unknown volumes in the cluster.
1214

1215
    The .os, .swap and backup volumes are ignored. All other volumes are
1216
    reported as unknown.
1217

1218
    """
1219
    for node in node_vol_is:
1220
      for volume in node_vol_is[node]:
1221
        test = (node not in node_vol_should or
1222
                volume not in node_vol_should[node])
1223
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1224
                      "volume %s is unknown", volume)
1225

    
1226
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1227
    """Verify the list of running instances.
1228

1229
    This checks what instances are running but unknown to the cluster.
1230

1231
    """
1232
    for node in node_instance:
1233
      for o_inst in node_instance[node]:
1234
        test = o_inst not in instancelist
1235
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1236
                      "instance %s on node %s should not exist", o_inst, node)
1237

    
1238
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1239
    """Verify N+1 Memory Resilience.
1240

1241
    Check that if one single node dies we can still start all the instances it
1242
    was primary for.
1243

1244
    """
1245
    for node, nodeinfo in node_info.iteritems():
1246
      # This code checks that every node which is now listed as secondary has
1247
      # enough memory to host all instances it is supposed to should a single
1248
      # other node in the cluster fail.
1249
      # FIXME: not ready for failover to an arbitrary node
1250
      # FIXME: does not support file-backed instances
1251
      # WARNING: we currently take into account down instances as well as up
1252
      # ones, considering that even if they're down someone might want to start
1253
      # them even in the event of a node failure.
1254
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1255
        needed_mem = 0
1256
        for instance in instances:
1257
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1258
          if bep[constants.BE_AUTO_BALANCE]:
1259
            needed_mem += bep[constants.BE_MEMORY]
1260
        test = nodeinfo['mfree'] < needed_mem
1261
        self._ErrorIf(test, self.ENODEN1, node,
1262
                      "not enough memory on to accommodate"
1263
                      " failovers should peer node %s fail", prinode)
1264

    
1265
  def CheckPrereq(self):
1266
    """Check prerequisites.
1267

1268
    Transform the list of checks we're going to skip into a set and check that
1269
    all its members are valid.
1270

1271
    """
1272
    self.skip_set = frozenset(self.op.skip_checks)
1273
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1274
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1275
                                 errors.ECODE_INVAL)
1276

    
1277
  def BuildHooksEnv(self):
1278
    """Build hooks env.
1279

1280
    Cluster-Verify hooks just ran in the post phase and their failure makes
1281
    the output be logged in the verify output and the verification to fail.
1282

1283
    """
1284
    all_nodes = self.cfg.GetNodeList()
1285
    env = {
1286
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1287
      }
1288
    for node in self.cfg.GetAllNodesInfo().values():
1289
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1290

    
1291
    return env, [], all_nodes
1292

    
1293
  def Exec(self, feedback_fn):
1294
    """Verify integrity of cluster, performing various test on nodes.
1295

1296
    """
1297
    self.bad = False
1298
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1299
    verbose = self.op.verbose
1300
    self._feedback_fn = feedback_fn
1301
    feedback_fn("* Verifying global settings")
1302
    for msg in self.cfg.VerifyConfig():
1303
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1304

    
1305
    vg_name = self.cfg.GetVGName()
1306
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1307
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1308
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1309
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1310
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1311
                        for iname in instancelist)
1312
    i_non_redundant = [] # Non redundant instances
1313
    i_non_a_balanced = [] # Non auto-balanced instances
1314
    n_offline = [] # List of offline nodes
1315
    n_drained = [] # List of nodes being drained
1316
    node_volume = {}
1317
    node_instance = {}
1318
    node_info = {}
1319
    instance_cfg = {}
1320

    
1321
    # FIXME: verify OS list
1322
    # do local checksums
1323
    master_files = [constants.CLUSTER_CONF_FILE]
1324

    
1325
    file_names = ssconf.SimpleStore().GetFileList()
1326
    file_names.append(constants.SSL_CERT_FILE)
1327
    file_names.append(constants.RAPI_CERT_FILE)
1328
    file_names.extend(master_files)
1329

    
1330
    local_checksums = utils.FingerprintFiles(file_names)
1331

    
1332
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1333
    node_verify_param = {
1334
      constants.NV_FILELIST: file_names,
1335
      constants.NV_NODELIST: [node.name for node in nodeinfo
1336
                              if not node.offline],
1337
      constants.NV_HYPERVISOR: hypervisors,
1338
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1339
                                  node.secondary_ip) for node in nodeinfo
1340
                                 if not node.offline],
1341
      constants.NV_INSTANCELIST: hypervisors,
1342
      constants.NV_VERSION: None,
1343
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1344
      constants.NV_NODESETUP: None,
1345
      constants.NV_TIME: None,
1346
      }
1347

    
1348
    if vg_name is not None:
1349
      node_verify_param[constants.NV_VGLIST] = None
1350
      node_verify_param[constants.NV_LVLIST] = vg_name
1351
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1352
      node_verify_param[constants.NV_DRBDLIST] = None
1353

    
1354
    # Due to the way our RPC system works, exact response times cannot be
1355
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1356
    # time before and after executing the request, we can at least have a time
1357
    # window.
1358
    nvinfo_starttime = time.time()
1359
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1360
                                           self.cfg.GetClusterName())
1361
    nvinfo_endtime = time.time()
1362

    
1363
    cluster = self.cfg.GetClusterInfo()
1364
    master_node = self.cfg.GetMasterNode()
1365
    all_drbd_map = self.cfg.ComputeDRBDMap()
1366

    
1367
    feedback_fn("* Verifying node status")
1368
    for node_i in nodeinfo:
1369
      node = node_i.name
1370

    
1371
      if node_i.offline:
1372
        if verbose:
1373
          feedback_fn("* Skipping offline node %s" % (node,))
1374
        n_offline.append(node)
1375
        continue
1376

    
1377
      if node == master_node:
1378
        ntype = "master"
1379
      elif node_i.master_candidate:
1380
        ntype = "master candidate"
1381
      elif node_i.drained:
1382
        ntype = "drained"
1383
        n_drained.append(node)
1384
      else:
1385
        ntype = "regular"
1386
      if verbose:
1387
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1388

    
1389
      msg = all_nvinfo[node].fail_msg
1390
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1391
      if msg:
1392
        continue
1393

    
1394
      nresult = all_nvinfo[node].payload
1395
      node_drbd = {}
1396
      for minor, instance in all_drbd_map[node].items():
1397
        test = instance not in instanceinfo
1398
        _ErrorIf(test, self.ECLUSTERCFG, None,
1399
                 "ghost instance '%s' in temporary DRBD map", instance)
1400
          # ghost instance should not be running, but otherwise we
1401
          # don't give double warnings (both ghost instance and
1402
          # unallocated minor in use)
1403
        if test:
1404
          node_drbd[minor] = (instance, False)
1405
        else:
1406
          instance = instanceinfo[instance]
1407
          node_drbd[minor] = (instance.name, instance.admin_up)
1408

    
1409
      self._VerifyNode(node_i, file_names, local_checksums,
1410
                       nresult, master_files, node_drbd, vg_name)
1411

    
1412
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1413
      if vg_name is None:
1414
        node_volume[node] = {}
1415
      elif isinstance(lvdata, basestring):
1416
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1417
                 utils.SafeEncode(lvdata))
1418
        node_volume[node] = {}
1419
      elif not isinstance(lvdata, dict):
1420
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1421
        continue
1422
      else:
1423
        node_volume[node] = lvdata
1424

    
1425
      # node_instance
1426
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1427
      test = not isinstance(idata, list)
1428
      _ErrorIf(test, self.ENODEHV, node,
1429
               "rpc call to node failed (instancelist)")
1430
      if test:
1431
        continue
1432

    
1433
      node_instance[node] = idata
1434

    
1435
      # node_info
1436
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1437
      test = not isinstance(nodeinfo, dict)
1438
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1439
      if test:
1440
        continue
1441

    
1442
      # Node time
1443
      ntime = nresult.get(constants.NV_TIME, None)
1444
      try:
1445
        ntime_merged = utils.MergeTime(ntime)
1446
      except (ValueError, TypeError):
1447
        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1448

    
1449
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1450
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1451
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1452
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1453
      else:
1454
        ntime_diff = None
1455

    
1456
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1457
               "Node time diverges by at least %0.1fs from master node time",
1458
               ntime_diff)
1459

    
1460
      if ntime_diff is not None:
1461
        continue
1462

    
1463
      try:
1464
        node_info[node] = {
1465
          "mfree": int(nodeinfo['memory_free']),
1466
          "pinst": [],
1467
          "sinst": [],
1468
          # dictionary holding all instances this node is secondary for,
1469
          # grouped by their primary node. Each key is a cluster node, and each
1470
          # value is a list of instances which have the key as primary and the
1471
          # current node as secondary.  this is handy to calculate N+1 memory
1472
          # availability if you can only failover from a primary to its
1473
          # secondary.
1474
          "sinst-by-pnode": {},
1475
        }
1476
        # FIXME: devise a free space model for file based instances as well
1477
        if vg_name is not None:
1478
          test = (constants.NV_VGLIST not in nresult or
1479
                  vg_name not in nresult[constants.NV_VGLIST])
1480
          _ErrorIf(test, self.ENODELVM, node,
1481
                   "node didn't return data for the volume group '%s'"
1482
                   " - it is either missing or broken", vg_name)
1483
          if test:
1484
            continue
1485
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1486
      except (ValueError, KeyError):
1487
        _ErrorIf(True, self.ENODERPC, node,
1488
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1489
        continue
1490

    
1491
    node_vol_should = {}
1492

    
1493
    feedback_fn("* Verifying instance status")
1494
    for instance in instancelist:
1495
      if verbose:
1496
        feedback_fn("* Verifying instance %s" % instance)
1497
      inst_config = instanceinfo[instance]
1498
      self._VerifyInstance(instance, inst_config, node_volume,
1499
                           node_instance, n_offline)
1500
      inst_nodes_offline = []
1501

    
1502
      inst_config.MapLVsByNode(node_vol_should)
1503

    
1504
      instance_cfg[instance] = inst_config
1505

    
1506
      pnode = inst_config.primary_node
1507
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1508
               self.ENODERPC, pnode, "instance %s, connection to"
1509
               " primary node failed", instance)
1510
      if pnode in node_info:
1511
        node_info[pnode]['pinst'].append(instance)
1512

    
1513
      if pnode in n_offline:
1514
        inst_nodes_offline.append(pnode)
1515

    
1516
      # If the instance is non-redundant we cannot survive losing its primary
1517
      # node, so we are not N+1 compliant. On the other hand we have no disk
1518
      # templates with more than one secondary so that situation is not well
1519
      # supported either.
1520
      # FIXME: does not support file-backed instances
1521
      if len(inst_config.secondary_nodes) == 0:
1522
        i_non_redundant.append(instance)
1523
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1524
               self.EINSTANCELAYOUT, instance,
1525
               "instance has multiple secondary nodes", code="WARNING")
1526

    
1527
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1528
        i_non_a_balanced.append(instance)
1529

    
1530
      for snode in inst_config.secondary_nodes:
1531
        _ErrorIf(snode not in node_info and snode not in n_offline,
1532
                 self.ENODERPC, snode,
1533
                 "instance %s, connection to secondary node"
1534
                 "failed", instance)
1535

    
1536
        if snode in node_info:
1537
          node_info[snode]['sinst'].append(instance)
1538
          if pnode not in node_info[snode]['sinst-by-pnode']:
1539
            node_info[snode]['sinst-by-pnode'][pnode] = []
1540
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1541

    
1542
        if snode in n_offline:
1543
          inst_nodes_offline.append(snode)
1544

    
1545
      # warn that the instance lives on offline nodes
1546
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1547
               "instance lives on offline node(s) %s",
1548
               utils.CommaJoin(inst_nodes_offline))
1549

    
1550
    feedback_fn("* Verifying orphan volumes")
1551
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1552

    
1553
    feedback_fn("* Verifying remaining instances")
1554
    self._VerifyOrphanInstances(instancelist, node_instance)
1555

    
1556
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1557
      feedback_fn("* Verifying N+1 Memory redundancy")
1558
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1559

    
1560
    feedback_fn("* Other Notes")
1561
    if i_non_redundant:
1562
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1563
                  % len(i_non_redundant))
1564

    
1565
    if i_non_a_balanced:
1566
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1567
                  % len(i_non_a_balanced))
1568

    
1569
    if n_offline:
1570
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1571

    
1572
    if n_drained:
1573
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1574

    
1575
    return not self.bad
1576

    
1577
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1578
    """Analyze the post-hooks' result
1579

1580
    This method analyses the hook result, handles it, and sends some
1581
    nicely-formatted feedback back to the user.
1582

1583
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1584
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1585
    @param hooks_results: the results of the multi-node hooks rpc call
1586
    @param feedback_fn: function used send feedback back to the caller
1587
    @param lu_result: previous Exec result
1588
    @return: the new Exec result, based on the previous result
1589
        and hook results
1590

1591
    """
1592
    # We only really run POST phase hooks, and are only interested in
1593
    # their results
1594
    if phase == constants.HOOKS_PHASE_POST:
1595
      # Used to change hooks' output to proper indentation
1596
      indent_re = re.compile('^', re.M)
1597
      feedback_fn("* Hooks Results")
1598
      assert hooks_results, "invalid result from hooks"
1599

    
1600
      for node_name in hooks_results:
1601
        res = hooks_results[node_name]
1602
        msg = res.fail_msg
1603
        test = msg and not res.offline
1604
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1605
                      "Communication failure in hooks execution: %s", msg)
1606
        if test:
1607
          # override manually lu_result here as _ErrorIf only
1608
          # overrides self.bad
1609
          lu_result = 1
1610
          continue
1611
        for script, hkr, output in res.payload:
1612
          test = hkr == constants.HKR_FAIL
1613
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1614
                        "Script %s failed, output:", script)
1615
          if test:
1616
            output = indent_re.sub('      ', output)
1617
            feedback_fn("%s" % output)
1618
            lu_result = 1
1619

    
1620
      return lu_result
1621

    
1622

    
1623
class LUVerifyDisks(NoHooksLU):
1624
  """Verifies the cluster disks status.
1625

1626
  """
1627
  _OP_REQP = []
1628
  REQ_BGL = False
1629

    
1630
  def ExpandNames(self):
1631
    self.needed_locks = {
1632
      locking.LEVEL_NODE: locking.ALL_SET,
1633
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1634
    }
1635
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1636

    
1637
  def CheckPrereq(self):
1638
    """Check prerequisites.
1639

1640
    This has no prerequisites.
1641

1642
    """
1643
    pass
1644

    
1645
  def Exec(self, feedback_fn):
1646
    """Verify integrity of cluster disks.
1647

1648
    @rtype: tuple of three items
1649
    @return: a tuple of (dict of node-to-node_error, list of instances
1650
        which need activate-disks, dict of instance: (node, volume) for
1651
        missing volumes
1652

1653
    """
1654
    result = res_nodes, res_instances, res_missing = {}, [], {}
1655

    
1656
    vg_name = self.cfg.GetVGName()
1657
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1658
    instances = [self.cfg.GetInstanceInfo(name)
1659
                 for name in self.cfg.GetInstanceList()]
1660

    
1661
    nv_dict = {}
1662
    for inst in instances:
1663
      inst_lvs = {}
1664
      if (not inst.admin_up or
1665
          inst.disk_template not in constants.DTS_NET_MIRROR):
1666
        continue
1667
      inst.MapLVsByNode(inst_lvs)
1668
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1669
      for node, vol_list in inst_lvs.iteritems():
1670
        for vol in vol_list:
1671
          nv_dict[(node, vol)] = inst
1672

    
1673
    if not nv_dict:
1674
      return result
1675

    
1676
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1677

    
1678
    for node in nodes:
1679
      # node_volume
1680
      node_res = node_lvs[node]
1681
      if node_res.offline:
1682
        continue
1683
      msg = node_res.fail_msg
1684
      if msg:
1685
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1686
        res_nodes[node] = msg
1687
        continue
1688

    
1689
      lvs = node_res.payload
1690
      for lv_name, (_, _, lv_online) in lvs.items():
1691
        inst = nv_dict.pop((node, lv_name), None)
1692
        if (not lv_online and inst is not None
1693
            and inst.name not in res_instances):
1694
          res_instances.append(inst.name)
1695

    
1696
    # any leftover items in nv_dict are missing LVs, let's arrange the
1697
    # data better
1698
    for key, inst in nv_dict.iteritems():
1699
      if inst.name not in res_missing:
1700
        res_missing[inst.name] = []
1701
      res_missing[inst.name].append(key)
1702

    
1703
    return result
1704

    
1705

    
1706
class LURepairDiskSizes(NoHooksLU):
1707
  """Verifies the cluster disks sizes.
1708

1709
  """
1710
  _OP_REQP = ["instances"]
1711
  REQ_BGL = False
1712

    
1713
  def ExpandNames(self):
1714
    if not isinstance(self.op.instances, list):
1715
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1716
                                 errors.ECODE_INVAL)
1717

    
1718
    if self.op.instances:
1719
      self.wanted_names = []
1720
      for name in self.op.instances:
1721
        full_name = self.cfg.ExpandInstanceName(name)
1722
        if full_name is None:
1723
          raise errors.OpPrereqError("Instance '%s' not known" % name,
1724
                                     errors.ECODE_NOENT)
1725
        self.wanted_names.append(full_name)
1726
      self.needed_locks = {
1727
        locking.LEVEL_NODE: [],
1728
        locking.LEVEL_INSTANCE: self.wanted_names,
1729
        }
1730
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1731
    else:
1732
      self.wanted_names = None
1733
      self.needed_locks = {
1734
        locking.LEVEL_NODE: locking.ALL_SET,
1735
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1736
        }
1737
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1738

    
1739
  def DeclareLocks(self, level):
1740
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1741
      self._LockInstancesNodes(primary_only=True)
1742

    
1743
  def CheckPrereq(self):
1744
    """Check prerequisites.
1745

1746
    This only checks the optional instance list against the existing names.
1747

1748
    """
1749
    if self.wanted_names is None:
1750
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1751

    
1752
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1753
                             in self.wanted_names]
1754

    
1755
  def _EnsureChildSizes(self, disk):
1756
    """Ensure children of the disk have the needed disk size.
1757

1758
    This is valid mainly for DRBD8 and fixes an issue where the
1759
    children have smaller disk size.
1760

1761
    @param disk: an L{ganeti.objects.Disk} object
1762

1763
    """
1764
    if disk.dev_type == constants.LD_DRBD8:
1765
      assert disk.children, "Empty children for DRBD8?"
1766
      fchild = disk.children[0]
1767
      mismatch = fchild.size < disk.size
1768
      if mismatch:
1769
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1770
                     fchild.size, disk.size)
1771
        fchild.size = disk.size
1772

    
1773
      # and we recurse on this child only, not on the metadev
1774
      return self._EnsureChildSizes(fchild) or mismatch
1775
    else:
1776
      return False
1777

    
1778
  def Exec(self, feedback_fn):
1779
    """Verify the size of cluster disks.
1780

1781
    """
1782
    # TODO: check child disks too
1783
    # TODO: check differences in size between primary/secondary nodes
1784
    per_node_disks = {}
1785
    for instance in self.wanted_instances:
1786
      pnode = instance.primary_node
1787
      if pnode not in per_node_disks:
1788
        per_node_disks[pnode] = []
1789
      for idx, disk in enumerate(instance.disks):
1790
        per_node_disks[pnode].append((instance, idx, disk))
1791

    
1792
    changed = []
1793
    for node, dskl in per_node_disks.items():
1794
      newl = [v[2].Copy() for v in dskl]
1795
      for dsk in newl:
1796
        self.cfg.SetDiskID(dsk, node)
1797
      result = self.rpc.call_blockdev_getsizes(node, newl)
1798
      if result.fail_msg:
1799
        self.LogWarning("Failure in blockdev_getsizes call to node"
1800
                        " %s, ignoring", node)
1801
        continue
1802
      if len(result.data) != len(dskl):
1803
        self.LogWarning("Invalid result from node %s, ignoring node results",
1804
                        node)
1805
        continue
1806
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1807
        if size is None:
1808
          self.LogWarning("Disk %d of instance %s did not return size"
1809
                          " information, ignoring", idx, instance.name)
1810
          continue
1811
        if not isinstance(size, (int, long)):
1812
          self.LogWarning("Disk %d of instance %s did not return valid"
1813
                          " size information, ignoring", idx, instance.name)
1814
          continue
1815
        size = size >> 20
1816
        if size != disk.size:
1817
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1818
                       " correcting: recorded %d, actual %d", idx,
1819
                       instance.name, disk.size, size)
1820
          disk.size = size
1821
          self.cfg.Update(instance, feedback_fn)
1822
          changed.append((instance.name, idx, size))
1823
        if self._EnsureChildSizes(disk):
1824
          self.cfg.Update(instance, feedback_fn)
1825
          changed.append((instance.name, idx, disk.size))
1826
    return changed
1827

    
1828

    
1829
class LURenameCluster(LogicalUnit):
1830
  """Rename the cluster.
1831

1832
  """
1833
  HPATH = "cluster-rename"
1834
  HTYPE = constants.HTYPE_CLUSTER
1835
  _OP_REQP = ["name"]
1836

    
1837
  def BuildHooksEnv(self):
1838
    """Build hooks env.
1839

1840
    """
1841
    env = {
1842
      "OP_TARGET": self.cfg.GetClusterName(),
1843
      "NEW_NAME": self.op.name,
1844
      }
1845
    mn = self.cfg.GetMasterNode()
1846
    all_nodes = self.cfg.GetNodeList()
1847
    return env, [mn], all_nodes
1848

    
1849
  def CheckPrereq(self):
1850
    """Verify that the passed name is a valid one.
1851

1852
    """
1853
    hostname = utils.GetHostInfo(self.op.name)
1854

    
1855
    new_name = hostname.name
1856
    self.ip = new_ip = hostname.ip
1857
    old_name = self.cfg.GetClusterName()
1858
    old_ip = self.cfg.GetMasterIP()
1859
    if new_name == old_name and new_ip == old_ip:
1860
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1861
                                 " cluster has changed",
1862
                                 errors.ECODE_INVAL)
1863
    if new_ip != old_ip:
1864
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1865
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1866
                                   " reachable on the network. Aborting." %
1867
                                   new_ip, errors.ECODE_NOTUNIQUE)
1868

    
1869
    self.op.name = new_name
1870

    
1871
  def Exec(self, feedback_fn):
1872
    """Rename the cluster.
1873

1874
    """
1875
    clustername = self.op.name
1876
    ip = self.ip
1877

    
1878
    # shutdown the master IP
1879
    master = self.cfg.GetMasterNode()
1880
    result = self.rpc.call_node_stop_master(master, False)
1881
    result.Raise("Could not disable the master role")
1882

    
1883
    try:
1884
      cluster = self.cfg.GetClusterInfo()
1885
      cluster.cluster_name = clustername
1886
      cluster.master_ip = ip
1887
      self.cfg.Update(cluster, feedback_fn)
1888

    
1889
      # update the known hosts file
1890
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1891
      node_list = self.cfg.GetNodeList()
1892
      try:
1893
        node_list.remove(master)
1894
      except ValueError:
1895
        pass
1896
      result = self.rpc.call_upload_file(node_list,
1897
                                         constants.SSH_KNOWN_HOSTS_FILE)
1898
      for to_node, to_result in result.iteritems():
1899
        msg = to_result.fail_msg
1900
        if msg:
1901
          msg = ("Copy of file %s to node %s failed: %s" %
1902
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1903
          self.proc.LogWarning(msg)
1904

    
1905
    finally:
1906
      result = self.rpc.call_node_start_master(master, False, False)
1907
      msg = result.fail_msg
1908
      if msg:
1909
        self.LogWarning("Could not re-enable the master role on"
1910
                        " the master, please restart manually: %s", msg)
1911

    
1912

    
1913
def _RecursiveCheckIfLVMBased(disk):
1914
  """Check if the given disk or its children are lvm-based.
1915

1916
  @type disk: L{objects.Disk}
1917
  @param disk: the disk to check
1918
  @rtype: boolean
1919
  @return: boolean indicating whether a LD_LV dev_type was found or not
1920

1921
  """
1922
  if disk.children:
1923
    for chdisk in disk.children:
1924
      if _RecursiveCheckIfLVMBased(chdisk):
1925
        return True
1926
  return disk.dev_type == constants.LD_LV
1927

    
1928

    
1929
class LUSetClusterParams(LogicalUnit):
1930
  """Change the parameters of the cluster.
1931

1932
  """
1933
  HPATH = "cluster-modify"
1934
  HTYPE = constants.HTYPE_CLUSTER
1935
  _OP_REQP = []
1936
  REQ_BGL = False
1937

    
1938
  def CheckArguments(self):
1939
    """Check parameters
1940

1941
    """
1942
    if not hasattr(self.op, "candidate_pool_size"):
1943
      self.op.candidate_pool_size = None
1944
    if self.op.candidate_pool_size is not None:
1945
      try:
1946
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1947
      except (ValueError, TypeError), err:
1948
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1949
                                   str(err), errors.ECODE_INVAL)
1950
      if self.op.candidate_pool_size < 1:
1951
        raise errors.OpPrereqError("At least one master candidate needed",
1952
                                   errors.ECODE_INVAL)
1953

    
1954
  def ExpandNames(self):
1955
    # FIXME: in the future maybe other cluster params won't require checking on
1956
    # all nodes to be modified.
1957
    self.needed_locks = {
1958
      locking.LEVEL_NODE: locking.ALL_SET,
1959
    }
1960
    self.share_locks[locking.LEVEL_NODE] = 1
1961

    
1962
  def BuildHooksEnv(self):
1963
    """Build hooks env.
1964

1965
    """
1966
    env = {
1967
      "OP_TARGET": self.cfg.GetClusterName(),
1968
      "NEW_VG_NAME": self.op.vg_name,
1969
      }
1970
    mn = self.cfg.GetMasterNode()
1971
    return env, [mn], [mn]
1972

    
1973
  def CheckPrereq(self):
1974
    """Check prerequisites.
1975

1976
    This checks whether the given params don't conflict and
1977
    if the given volume group is valid.
1978

1979
    """
1980
    if self.op.vg_name is not None and not self.op.vg_name:
1981
      instances = self.cfg.GetAllInstancesInfo().values()
1982
      for inst in instances:
1983
        for disk in inst.disks:
1984
          if _RecursiveCheckIfLVMBased(disk):
1985
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1986
                                       " lvm-based instances exist",
1987
                                       errors.ECODE_INVAL)
1988

    
1989
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1990

    
1991
    # if vg_name not None, checks given volume group on all nodes
1992
    if self.op.vg_name:
1993
      vglist = self.rpc.call_vg_list(node_list)
1994
      for node in node_list:
1995
        msg = vglist[node].fail_msg
1996
        if msg:
1997
          # ignoring down node
1998
          self.LogWarning("Error while gathering data on node %s"
1999
                          " (ignoring node): %s", node, msg)
2000
          continue
2001
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2002
                                              self.op.vg_name,
2003
                                              constants.MIN_VG_SIZE)
2004
        if vgstatus:
2005
          raise errors.OpPrereqError("Error on node '%s': %s" %
2006
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2007

    
2008
    self.cluster = cluster = self.cfg.GetClusterInfo()
2009
    # validate params changes
2010
    if self.op.beparams:
2011
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2012
      self.new_beparams = objects.FillDict(
2013
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2014

    
2015
    if self.op.nicparams:
2016
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2017
      self.new_nicparams = objects.FillDict(
2018
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2019
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2020
      nic_errors = []
2021

    
2022
      # check all instances for consistency
2023
      for instance in self.cfg.GetAllInstancesInfo().values():
2024
        for nic_idx, nic in enumerate(instance.nics):
2025
          params_copy = copy.deepcopy(nic.nicparams)
2026
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2027

    
2028
          # check parameter syntax
2029
          try:
2030
            objects.NIC.CheckParameterSyntax(params_filled)
2031
          except errors.ConfigurationError, err:
2032
            nic_errors.append("Instance %s, nic/%d: %s" %
2033
                              (instance.name, nic_idx, err))
2034

    
2035
          # if we're moving instances to routed, check that they have an ip
2036
          target_mode = params_filled[constants.NIC_MODE]
2037
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2038
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2039
                              (instance.name, nic_idx))
2040
      if nic_errors:
2041
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2042
                                   "\n".join(nic_errors))
2043

    
2044
    # hypervisor list/parameters
2045
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2046
    if self.op.hvparams:
2047
      if not isinstance(self.op.hvparams, dict):
2048
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2049
                                   errors.ECODE_INVAL)
2050
      for hv_name, hv_dict in self.op.hvparams.items():
2051
        if hv_name not in self.new_hvparams:
2052
          self.new_hvparams[hv_name] = hv_dict
2053
        else:
2054
          self.new_hvparams[hv_name].update(hv_dict)
2055

    
2056
    if self.op.enabled_hypervisors is not None:
2057
      self.hv_list = self.op.enabled_hypervisors
2058
      if not self.hv_list:
2059
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2060
                                   " least one member",
2061
                                   errors.ECODE_INVAL)
2062
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2063
      if invalid_hvs:
2064
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2065
                                   " entries: %s" %
2066
                                   utils.CommaJoin(invalid_hvs),
2067
                                   errors.ECODE_INVAL)
2068
    else:
2069
      self.hv_list = cluster.enabled_hypervisors
2070

    
2071
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2072
      # either the enabled list has changed, or the parameters have, validate
2073
      for hv_name, hv_params in self.new_hvparams.items():
2074
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2075
            (self.op.enabled_hypervisors and
2076
             hv_name in self.op.enabled_hypervisors)):
2077
          # either this is a new hypervisor, or its parameters have changed
2078
          hv_class = hypervisor.GetHypervisor(hv_name)
2079
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2080
          hv_class.CheckParameterSyntax(hv_params)
2081
          _CheckHVParams(self, node_list, hv_name, hv_params)
2082

    
2083
  def Exec(self, feedback_fn):
2084
    """Change the parameters of the cluster.
2085

2086
    """
2087
    if self.op.vg_name is not None:
2088
      new_volume = self.op.vg_name
2089
      if not new_volume:
2090
        new_volume = None
2091
      if new_volume != self.cfg.GetVGName():
2092
        self.cfg.SetVGName(new_volume)
2093
      else:
2094
        feedback_fn("Cluster LVM configuration already in desired"
2095
                    " state, not changing")
2096
    if self.op.hvparams:
2097
      self.cluster.hvparams = self.new_hvparams
2098
    if self.op.enabled_hypervisors is not None:
2099
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2100
    if self.op.beparams:
2101
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2102
    if self.op.nicparams:
2103
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2104

    
2105
    if self.op.candidate_pool_size is not None:
2106
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2107
      # we need to update the pool size here, otherwise the save will fail
2108
      _AdjustCandidatePool(self, [])
2109

    
2110
    self.cfg.Update(self.cluster, feedback_fn)
2111

    
2112

    
2113
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2114
  """Distribute additional files which are part of the cluster configuration.
2115

2116
  ConfigWriter takes care of distributing the config and ssconf files, but
2117
  there are more files which should be distributed to all nodes. This function
2118
  makes sure those are copied.
2119

2120
  @param lu: calling logical unit
2121
  @param additional_nodes: list of nodes not in the config to distribute to
2122

2123
  """
2124
  # 1. Gather target nodes
2125
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2126
  dist_nodes = lu.cfg.GetNodeList()
2127
  if additional_nodes is not None:
2128
    dist_nodes.extend(additional_nodes)
2129
  if myself.name in dist_nodes:
2130
    dist_nodes.remove(myself.name)
2131

    
2132
  # 2. Gather files to distribute
2133
  dist_files = set([constants.ETC_HOSTS,
2134
                    constants.SSH_KNOWN_HOSTS_FILE,
2135
                    constants.RAPI_CERT_FILE,
2136
                    constants.RAPI_USERS_FILE,
2137
                    constants.HMAC_CLUSTER_KEY,
2138
                   ])
2139

    
2140
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2141
  for hv_name in enabled_hypervisors:
2142
    hv_class = hypervisor.GetHypervisor(hv_name)
2143
    dist_files.update(hv_class.GetAncillaryFiles())
2144

    
2145
  # 3. Perform the files upload
2146
  for fname in dist_files:
2147
    if os.path.exists(fname):
2148
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2149
      for to_node, to_result in result.items():
2150
        msg = to_result.fail_msg
2151
        if msg:
2152
          msg = ("Copy of file %s to node %s failed: %s" %
2153
                 (fname, to_node, msg))
2154
          lu.proc.LogWarning(msg)
2155

    
2156

    
2157
class LURedistributeConfig(NoHooksLU):
2158
  """Force the redistribution of cluster configuration.
2159

2160
  This is a very simple LU.
2161

2162
  """
2163
  _OP_REQP = []
2164
  REQ_BGL = False
2165

    
2166
  def ExpandNames(self):
2167
    self.needed_locks = {
2168
      locking.LEVEL_NODE: locking.ALL_SET,
2169
    }
2170
    self.share_locks[locking.LEVEL_NODE] = 1
2171

    
2172
  def CheckPrereq(self):
2173
    """Check prerequisites.
2174

2175
    """
2176

    
2177
  def Exec(self, feedback_fn):
2178
    """Redistribute the configuration.
2179

2180
    """
2181
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2182
    _RedistributeAncillaryFiles(self)
2183

    
2184

    
2185
def _WaitForSync(lu, instance, oneshot=False):
2186
  """Sleep and poll for an instance's disk to sync.
2187

2188
  """
2189
  if not instance.disks:
2190
    return True
2191

    
2192
  if not oneshot:
2193
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2194

    
2195
  node = instance.primary_node
2196

    
2197
  for dev in instance.disks:
2198
    lu.cfg.SetDiskID(dev, node)
2199

    
2200
  # TODO: Convert to utils.Retry
2201

    
2202
  retries = 0
2203
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2204
  while True:
2205
    max_time = 0
2206
    done = True
2207
    cumul_degraded = False
2208
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2209
    msg = rstats.fail_msg
2210
    if msg:
2211
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2212
      retries += 1
2213
      if retries >= 10:
2214
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2215
                                 " aborting." % node)
2216
      time.sleep(6)
2217
      continue
2218
    rstats = rstats.payload
2219
    retries = 0
2220
    for i, mstat in enumerate(rstats):
2221
      if mstat is None:
2222
        lu.LogWarning("Can't compute data for node %s/%s",
2223
                           node, instance.disks[i].iv_name)
2224
        continue
2225

    
2226
      cumul_degraded = (cumul_degraded or
2227
                        (mstat.is_degraded and mstat.sync_percent is None))
2228
      if mstat.sync_percent is not None:
2229
        done = False
2230
        if mstat.estimated_time is not None:
2231
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2232
          max_time = mstat.estimated_time
2233
        else:
2234
          rem_time = "no time estimate"
2235
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2236
                        (instance.disks[i].iv_name, mstat.sync_percent,
2237
                         rem_time))
2238

    
2239
    # if we're done but degraded, let's do a few small retries, to
2240
    # make sure we see a stable and not transient situation; therefore
2241
    # we force restart of the loop
2242
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2243
      logging.info("Degraded disks found, %d retries left", degr_retries)
2244
      degr_retries -= 1
2245
      time.sleep(1)
2246
      continue
2247

    
2248
    if done or oneshot:
2249
      break
2250

    
2251
    time.sleep(min(60, max_time))
2252

    
2253
  if done:
2254
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2255
  return not cumul_degraded
2256

    
2257

    
2258
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2259
  """Check that mirrors are not degraded.
2260

2261
  The ldisk parameter, if True, will change the test from the
2262
  is_degraded attribute (which represents overall non-ok status for
2263
  the device(s)) to the ldisk (representing the local storage status).
2264

2265
  """
2266
  lu.cfg.SetDiskID(dev, node)
2267

    
2268
  result = True
2269

    
2270
  if on_primary or dev.AssembleOnSecondary():
2271
    rstats = lu.rpc.call_blockdev_find(node, dev)
2272
    msg = rstats.fail_msg
2273
    if msg:
2274
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2275
      result = False
2276
    elif not rstats.payload:
2277
      lu.LogWarning("Can't find disk on node %s", node)
2278
      result = False
2279
    else:
2280
      if ldisk:
2281
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2282
      else:
2283
        result = result and not rstats.payload.is_degraded
2284

    
2285
  if dev.children:
2286
    for child in dev.children:
2287
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2288

    
2289
  return result
2290

    
2291

    
2292
class LUDiagnoseOS(NoHooksLU):
2293
  """Logical unit for OS diagnose/query.
2294

2295
  """
2296
  _OP_REQP = ["output_fields", "names"]
2297
  REQ_BGL = False
2298
  _FIELDS_STATIC = utils.FieldSet()
2299
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2300
  # Fields that need calculation of global os validity
2301
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2302

    
2303
  def ExpandNames(self):
2304
    if self.op.names:
2305
      raise errors.OpPrereqError("Selective OS query not supported",
2306
                                 errors.ECODE_INVAL)
2307

    
2308
    _CheckOutputFields(static=self._FIELDS_STATIC,
2309
                       dynamic=self._FIELDS_DYNAMIC,
2310
                       selected=self.op.output_fields)
2311

    
2312
    # Lock all nodes, in shared mode
2313
    # Temporary removal of locks, should be reverted later
2314
    # TODO: reintroduce locks when they are lighter-weight
2315
    self.needed_locks = {}
2316
    #self.share_locks[locking.LEVEL_NODE] = 1
2317
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2318

    
2319
  def CheckPrereq(self):
2320
    """Check prerequisites.
2321

2322
    """
2323

    
2324
  @staticmethod
2325
  def _DiagnoseByOS(rlist):
2326
    """Remaps a per-node return list into an a per-os per-node dictionary
2327

2328
    @param rlist: a map with node names as keys and OS objects as values
2329

2330
    @rtype: dict
2331
    @return: a dictionary with osnames as keys and as value another map, with
2332
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2333

2334
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2335
                                     (/srv/..., False, "invalid api")],
2336
                           "node2": [(/srv/..., True, "")]}
2337
          }
2338

2339
    """
2340
    all_os = {}
2341
    # we build here the list of nodes that didn't fail the RPC (at RPC
2342
    # level), so that nodes with a non-responding node daemon don't
2343
    # make all OSes invalid
2344
    good_nodes = [node_name for node_name in rlist
2345
                  if not rlist[node_name].fail_msg]
2346
    for node_name, nr in rlist.items():
2347
      if nr.fail_msg or not nr.payload:
2348
        continue
2349
      for name, path, status, diagnose, variants in nr.payload:
2350
        if name not in all_os:
2351
          # build a list of nodes for this os containing empty lists
2352
          # for each node in node_list
2353
          all_os[name] = {}
2354
          for nname in good_nodes:
2355
            all_os[name][nname] = []
2356
        all_os[name][node_name].append((path, status, diagnose, variants))
2357
    return all_os
2358

    
2359
  def Exec(self, feedback_fn):
2360
    """Compute the list of OSes.
2361

2362
    """
2363
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2364
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2365
    pol = self._DiagnoseByOS(node_data)
2366
    output = []
2367
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2368
    calc_variants = "variants" in self.op.output_fields
2369

    
2370
    for os_name, os_data in pol.items():
2371
      row = []
2372
      if calc_valid:
2373
        valid = True
2374
        variants = None
2375
        for osl in os_data.values():
2376
          valid = valid and osl and osl[0][1]
2377
          if not valid:
2378
            variants = None
2379
            break
2380
          if calc_variants:
2381
            node_variants = osl[0][3]
2382
            if variants is None:
2383
              variants = node_variants
2384
            else:
2385
              variants = [v for v in variants if v in node_variants]
2386

    
2387
      for field in self.op.output_fields:
2388
        if field == "name":
2389
          val = os_name
2390
        elif field == "valid":
2391
          val = valid
2392
        elif field == "node_status":
2393
          # this is just a copy of the dict
2394
          val = {}
2395
          for node_name, nos_list in os_data.items():
2396
            val[node_name] = nos_list
2397
        elif field == "variants":
2398
          val =  variants
2399
        else:
2400
          raise errors.ParameterError(field)
2401
        row.append(val)
2402
      output.append(row)
2403

    
2404
    return output
2405

    
2406

    
2407
class LURemoveNode(LogicalUnit):
2408
  """Logical unit for removing a node.
2409

2410
  """
2411
  HPATH = "node-remove"
2412
  HTYPE = constants.HTYPE_NODE
2413
  _OP_REQP = ["node_name"]
2414

    
2415
  def BuildHooksEnv(self):
2416
    """Build hooks env.
2417

2418
    This doesn't run on the target node in the pre phase as a failed
2419
    node would then be impossible to remove.
2420

2421
    """
2422
    env = {
2423
      "OP_TARGET": self.op.node_name,
2424
      "NODE_NAME": self.op.node_name,
2425
      }
2426
    all_nodes = self.cfg.GetNodeList()
2427
    if self.op.node_name in all_nodes:
2428
      all_nodes.remove(self.op.node_name)
2429
    return env, all_nodes, all_nodes
2430

    
2431
  def CheckPrereq(self):
2432
    """Check prerequisites.
2433

2434
    This checks:
2435
     - the node exists in the configuration
2436
     - it does not have primary or secondary instances
2437
     - it's not the master
2438

2439
    Any errors are signaled by raising errors.OpPrereqError.
2440

2441
    """
2442
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2443
    if node is None:
2444
      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2445
                                 errors.ECODE_NOENT)
2446

    
2447
    instance_list = self.cfg.GetInstanceList()
2448

    
2449
    masternode = self.cfg.GetMasterNode()
2450
    if node.name == masternode:
2451
      raise errors.OpPrereqError("Node is the master node,"
2452
                                 " you need to failover first.",
2453
                                 errors.ECODE_INVAL)
2454

    
2455
    for instance_name in instance_list:
2456
      instance = self.cfg.GetInstanceInfo(instance_name)
2457
      if node.name in instance.all_nodes:
2458
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2459
                                   " please remove first." % instance_name,
2460
                                   errors.ECODE_INVAL)
2461
    self.op.node_name = node.name
2462
    self.node = node
2463

    
2464
  def Exec(self, feedback_fn):
2465
    """Removes the node from the cluster.
2466

2467
    """
2468
    node = self.node
2469
    logging.info("Stopping the node daemon and removing configs from node %s",
2470
                 node.name)
2471

    
2472
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2473

    
2474
    # Promote nodes to master candidate as needed
2475
    _AdjustCandidatePool(self, exceptions=[node.name])
2476
    self.context.RemoveNode(node.name)
2477

    
2478
    # Run post hooks on the node before it's removed
2479
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2480
    try:
2481
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2482
    except:
2483
      # pylint: disable-msg=W0702
2484
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2485

    
2486
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2487
    msg = result.fail_msg
2488
    if msg:
2489
      self.LogWarning("Errors encountered on the remote node while leaving"
2490
                      " the cluster: %s", msg)
2491

    
2492

    
2493
class LUQueryNodes(NoHooksLU):
2494
  """Logical unit for querying nodes.
2495

2496
  """
2497
  # pylint: disable-msg=W0142
2498
  _OP_REQP = ["output_fields", "names", "use_locking"]
2499
  REQ_BGL = False
2500

    
2501
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2502
                    "master_candidate", "offline", "drained"]
2503

    
2504
  _FIELDS_DYNAMIC = utils.FieldSet(
2505
    "dtotal", "dfree",
2506
    "mtotal", "mnode", "mfree",
2507
    "bootid",
2508
    "ctotal", "cnodes", "csockets",
2509
    )
2510

    
2511
  _FIELDS_STATIC = utils.FieldSet(*[
2512
    "pinst_cnt", "sinst_cnt",
2513
    "pinst_list", "sinst_list",
2514
    "pip", "sip", "tags",
2515
    "master",
2516
    "role"] + _SIMPLE_FIELDS
2517
    )
2518

    
2519
  def ExpandNames(self):
2520
    _CheckOutputFields(static=self._FIELDS_STATIC,
2521
                       dynamic=self._FIELDS_DYNAMIC,
2522
                       selected=self.op.output_fields)
2523

    
2524
    self.needed_locks = {}
2525
    self.share_locks[locking.LEVEL_NODE] = 1
2526

    
2527
    if self.op.names:
2528
      self.wanted = _GetWantedNodes(self, self.op.names)
2529
    else:
2530
      self.wanted = locking.ALL_SET
2531

    
2532
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2533
    self.do_locking = self.do_node_query and self.op.use_locking
2534
    if self.do_locking:
2535
      # if we don't request only static fields, we need to lock the nodes
2536
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2537

    
2538
  def CheckPrereq(self):
2539
    """Check prerequisites.
2540

2541
    """
2542
    # The validation of the node list is done in the _GetWantedNodes,
2543
    # if non empty, and if empty, there's no validation to do
2544
    pass
2545

    
2546
  def Exec(self, feedback_fn):
2547
    """Computes the list of nodes and their attributes.
2548

2549
    """
2550
    all_info = self.cfg.GetAllNodesInfo()
2551
    if self.do_locking:
2552
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2553
    elif self.wanted != locking.ALL_SET:
2554
      nodenames = self.wanted
2555
      missing = set(nodenames).difference(all_info.keys())
2556
      if missing:
2557
        raise errors.OpExecError(
2558
          "Some nodes were removed before retrieving their data: %s" % missing)
2559
    else:
2560
      nodenames = all_info.keys()
2561

    
2562
    nodenames = utils.NiceSort(nodenames)
2563
    nodelist = [all_info[name] for name in nodenames]
2564

    
2565
    # begin data gathering
2566

    
2567
    if self.do_node_query:
2568
      live_data = {}
2569
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2570
                                          self.cfg.GetHypervisorType())
2571
      for name in nodenames:
2572
        nodeinfo = node_data[name]
2573
        if not nodeinfo.fail_msg and nodeinfo.payload:
2574
          nodeinfo = nodeinfo.payload
2575
          fn = utils.TryConvert
2576
          live_data[name] = {
2577
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2578
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2579
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2580
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2581
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2582
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2583
            "bootid": nodeinfo.get('bootid', None),
2584
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2585
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2586
            }
2587
        else:
2588
          live_data[name] = {}
2589
    else:
2590
      live_data = dict.fromkeys(nodenames, {})
2591

    
2592
    node_to_primary = dict([(name, set()) for name in nodenames])
2593
    node_to_secondary = dict([(name, set()) for name in nodenames])
2594

    
2595
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2596
                             "sinst_cnt", "sinst_list"))
2597
    if inst_fields & frozenset(self.op.output_fields):
2598
      inst_data = self.cfg.GetAllInstancesInfo()
2599

    
2600
      for inst in inst_data.values():
2601
        if inst.primary_node in node_to_primary:
2602
          node_to_primary[inst.primary_node].add(inst.name)
2603
        for secnode in inst.secondary_nodes:
2604
          if secnode in node_to_secondary:
2605
            node_to_secondary[secnode].add(inst.name)
2606

    
2607
    master_node = self.cfg.GetMasterNode()
2608

    
2609
    # end data gathering
2610

    
2611
    output = []
2612
    for node in nodelist:
2613
      node_output = []
2614
      for field in self.op.output_fields:
2615
        if field in self._SIMPLE_FIELDS:
2616
          val = getattr(node, field)
2617
        elif field == "pinst_list":
2618
          val = list(node_to_primary[node.name])
2619
        elif field == "sinst_list":
2620
          val = list(node_to_secondary[node.name])
2621
        elif field == "pinst_cnt":
2622
          val = len(node_to_primary[node.name])
2623
        elif field == "sinst_cnt":
2624
          val = len(node_to_secondary[node.name])
2625
        elif field == "pip":
2626
          val = node.primary_ip
2627
        elif field == "sip":
2628
          val = node.secondary_ip
2629
        elif field == "tags":
2630
          val = list(node.GetTags())
2631
        elif field == "master":
2632
          val = node.name == master_node
2633
        elif self._FIELDS_DYNAMIC.Matches(field):
2634
          val = live_data[node.name].get(field, None)
2635
        elif field == "role":
2636
          if node.name == master_node:
2637
            val = "M"
2638
          elif node.master_candidate:
2639
            val = "C"
2640
          elif node.drained:
2641
            val = "D"
2642
          elif node.offline:
2643
            val = "O"
2644
          else:
2645
            val = "R"
2646
        else:
2647
          raise errors.ParameterError(field)
2648
        node_output.append(val)
2649
      output.append(node_output)
2650

    
2651
    return output
2652

    
2653

    
2654
class LUQueryNodeVolumes(NoHooksLU):
2655
  """Logical unit for getting volumes on node(s).
2656

2657
  """
2658
  _OP_REQP = ["nodes", "output_fields"]
2659
  REQ_BGL = False
2660
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2661
  _FIELDS_STATIC = utils.FieldSet("node")
2662

    
2663
  def ExpandNames(self):
2664
    _CheckOutputFields(static=self._FIELDS_STATIC,
2665
                       dynamic=self._FIELDS_DYNAMIC,
2666
                       selected=self.op.output_fields)
2667

    
2668
    self.needed_locks = {}
2669
    self.share_locks[locking.LEVEL_NODE] = 1
2670
    if not self.op.nodes:
2671
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2672
    else:
2673
      self.needed_locks[locking.LEVEL_NODE] = \
2674
        _GetWantedNodes(self, self.op.nodes)
2675

    
2676
  def CheckPrereq(self):
2677
    """Check prerequisites.
2678

2679
    This checks that the fields required are valid output fields.
2680

2681
    """
2682
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2683

    
2684
  def Exec(self, feedback_fn):
2685
    """Computes the list of nodes and their attributes.
2686

2687
    """
2688
    nodenames = self.nodes
2689
    volumes = self.rpc.call_node_volumes(nodenames)
2690

    
2691
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2692
             in self.cfg.GetInstanceList()]
2693

    
2694
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2695

    
2696
    output = []
2697
    for node in nodenames:
2698
      nresult = volumes[node]
2699
      if nresult.offline:
2700
        continue
2701
      msg = nresult.fail_msg
2702
      if msg:
2703
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2704
        continue
2705

    
2706
      node_vols = nresult.payload[:]
2707
      node_vols.sort(key=lambda vol: vol['dev'])
2708

    
2709
      for vol in node_vols:
2710
        node_output = []
2711
        for field in self.op.output_fields:
2712
          if field == "node":
2713
            val = node
2714
          elif field == "phys":
2715
            val = vol['dev']
2716
          elif field == "vg":
2717
            val = vol['vg']
2718
          elif field == "name":
2719
            val = vol['name']
2720
          elif field == "size":
2721
            val = int(float(vol['size']))
2722
          elif field == "instance":
2723
            for inst in ilist:
2724
              if node not in lv_by_node[inst]:
2725
                continue
2726
              if vol['name'] in lv_by_node[inst][node]:
2727
                val = inst.name
2728
                break
2729
            else:
2730
              val = '-'
2731
          else:
2732
            raise errors.ParameterError(field)
2733
          node_output.append(str(val))
2734

    
2735
        output.append(node_output)
2736

    
2737
    return output
2738

    
2739

    
2740
class LUQueryNodeStorage(NoHooksLU):
2741
  """Logical unit for getting information on storage units on node(s).
2742

2743
  """
2744
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2745
  REQ_BGL = False
2746
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2747

    
2748
  def ExpandNames(self):
2749
    storage_type = self.op.storage_type
2750

    
2751
    if storage_type not in constants.VALID_STORAGE_TYPES:
2752
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2753
                                 errors.ECODE_INVAL)
2754

    
2755
    _CheckOutputFields(static=self._FIELDS_STATIC,
2756
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2757
                       selected=self.op.output_fields)
2758

    
2759
    self.needed_locks = {}
2760
    self.share_locks[locking.LEVEL_NODE] = 1
2761

    
2762
    if self.op.nodes:
2763
      self.needed_locks[locking.LEVEL_NODE] = \
2764
        _GetWantedNodes(self, self.op.nodes)
2765
    else:
2766
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2767

    
2768
  def CheckPrereq(self):
2769
    """Check prerequisites.
2770

2771
    This checks that the fields required are valid output fields.
2772

2773
    """
2774
    self.op.name = getattr(self.op, "name", None)
2775

    
2776
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2777

    
2778
  def Exec(self, feedback_fn):
2779
    """Computes the list of nodes and their attributes.
2780

2781
    """
2782
    # Always get name to sort by
2783
    if constants.SF_NAME in self.op.output_fields:
2784
      fields = self.op.output_fields[:]
2785
    else:
2786
      fields = [constants.SF_NAME] + self.op.output_fields
2787

    
2788
    # Never ask for node or type as it's only known to the LU
2789
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2790
      while extra in fields:
2791
        fields.remove(extra)
2792

    
2793
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2794
    name_idx = field_idx[constants.SF_NAME]
2795

    
2796
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2797
    data = self.rpc.call_storage_list(self.nodes,
2798
                                      self.op.storage_type, st_args,
2799
                                      self.op.name, fields)
2800

    
2801
    result = []
2802

    
2803
    for node in utils.NiceSort(self.nodes):
2804
      nresult = data[node]
2805
      if nresult.offline:
2806
        continue
2807

    
2808
      msg = nresult.fail_msg
2809
      if msg:
2810
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2811
        continue
2812

    
2813
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2814

    
2815
      for name in utils.NiceSort(rows.keys()):
2816
        row = rows[name]
2817

    
2818
        out = []
2819

    
2820
        for field in self.op.output_fields:
2821
          if field == constants.SF_NODE:
2822
            val = node
2823
          elif field == constants.SF_TYPE:
2824
            val = self.op.storage_type
2825
          elif field in field_idx:
2826
            val = row[field_idx[field]]
2827
          else:
2828
            raise errors.ParameterError(field)
2829

    
2830
          out.append(val)
2831

    
2832
        result.append(out)
2833

    
2834
    return result
2835

    
2836

    
2837
class LUModifyNodeStorage(NoHooksLU):
2838
  """Logical unit for modifying a storage volume on a node.
2839

2840
  """
2841
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2842
  REQ_BGL = False
2843

    
2844
  def CheckArguments(self):
2845
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2846
    if node_name is None:
2847
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2848
                                 errors.ECODE_NOENT)
2849

    
2850
    self.op.node_name = node_name
2851

    
2852
    storage_type = self.op.storage_type
2853
    if storage_type not in constants.VALID_STORAGE_TYPES:
2854
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2855
                                 errors.ECODE_INVAL)
2856

    
2857
  def ExpandNames(self):
2858
    self.needed_locks = {
2859
      locking.LEVEL_NODE: self.op.node_name,
2860
      }
2861

    
2862
  def CheckPrereq(self):
2863
    """Check prerequisites.
2864

2865
    """
2866
    storage_type = self.op.storage_type
2867

    
2868
    try:
2869
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2870
    except KeyError:
2871
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2872
                                 " modified" % storage_type,
2873
                                 errors.ECODE_INVAL)
2874

    
2875
    diff = set(self.op.changes.keys()) - modifiable
2876
    if diff:
2877
      raise errors.OpPrereqError("The following fields can not be modified for"
2878
                                 " storage units of type '%s': %r" %
2879
                                 (storage_type, list(diff)),
2880
                                 errors.ECODE_INVAL)
2881

    
2882
  def Exec(self, feedback_fn):
2883
    """Computes the list of nodes and their attributes.
2884

2885
    """
2886
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2887
    result = self.rpc.call_storage_modify(self.op.node_name,
2888
                                          self.op.storage_type, st_args,
2889
                                          self.op.name, self.op.changes)
2890
    result.Raise("Failed to modify storage unit '%s' on %s" %
2891
                 (self.op.name, self.op.node_name))
2892

    
2893

    
2894
class LUAddNode(LogicalUnit):
2895
  """Logical unit for adding node to the cluster.
2896

2897
  """
2898
  HPATH = "node-add"
2899
  HTYPE = constants.HTYPE_NODE
2900
  _OP_REQP = ["node_name"]
2901

    
2902
  def BuildHooksEnv(self):
2903
    """Build hooks env.
2904

2905
    This will run on all nodes before, and on all nodes + the new node after.
2906

2907
    """
2908
    env = {
2909
      "OP_TARGET": self.op.node_name,
2910
      "NODE_NAME": self.op.node_name,
2911
      "NODE_PIP": self.op.primary_ip,
2912
      "NODE_SIP": self.op.secondary_ip,
2913
      }
2914
    nodes_0 = self.cfg.GetNodeList()
2915
    nodes_1 = nodes_0 + [self.op.node_name, ]
2916
    return env, nodes_0, nodes_1
2917

    
2918
  def CheckPrereq(self):
2919
    """Check prerequisites.
2920

2921
    This checks:
2922
     - the new node is not already in the config
2923
     - it is resolvable
2924
     - its parameters (single/dual homed) matches the cluster
2925

2926
    Any errors are signaled by raising errors.OpPrereqError.
2927

2928
    """
2929
    node_name = self.op.node_name
2930
    cfg = self.cfg
2931

    
2932
    dns_data = utils.GetHostInfo(node_name)
2933

    
2934
    node = dns_data.name
2935
    primary_ip = self.op.primary_ip = dns_data.ip
2936
    secondary_ip = getattr(self.op, "secondary_ip", None)
2937
    if secondary_ip is None:
2938
      secondary_ip = primary_ip
2939
    if not utils.IsValidIP(secondary_ip):
2940
      raise errors.OpPrereqError("Invalid secondary IP given",
2941
                                 errors.ECODE_INVAL)
2942
    self.op.secondary_ip = secondary_ip
2943

    
2944
    node_list = cfg.GetNodeList()
2945
    if not self.op.readd and node in node_list:
2946
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2947
                                 node, errors.ECODE_EXISTS)
2948
    elif self.op.readd and node not in node_list:
2949
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2950
                                 errors.ECODE_NOENT)
2951

    
2952
    for existing_node_name in node_list:
2953
      existing_node = cfg.GetNodeInfo(existing_node_name)
2954

    
2955
      if self.op.readd and node == existing_node_name:
2956
        if (existing_node.primary_ip != primary_ip or
2957
            existing_node.secondary_ip != secondary_ip):
2958
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2959
                                     " address configuration as before",
2960
                                     errors.ECODE_INVAL)
2961
        continue
2962

    
2963
      if (existing_node.primary_ip == primary_ip or
2964
          existing_node.secondary_ip == primary_ip or
2965
          existing_node.primary_ip == secondary_ip or
2966
          existing_node.secondary_ip == secondary_ip):
2967
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2968
                                   " existing node %s" % existing_node.name,
2969
                                   errors.ECODE_NOTUNIQUE)
2970

    
2971
    # check that the type of the node (single versus dual homed) is the
2972
    # same as for the master
2973
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2974
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2975
    newbie_singlehomed = secondary_ip == primary_ip
2976
    if master_singlehomed != newbie_singlehomed:
2977
      if master_singlehomed:
2978
        raise errors.OpPrereqError("The master has no private ip but the"
2979
                                   " new node has one",
2980
                                   errors.ECODE_INVAL)
2981
      else:
2982
        raise errors.OpPrereqError("The master has a private ip but the"
2983
                                   " new node doesn't have one",
2984
                                   errors.ECODE_INVAL)
2985

    
2986
    # checks reachability
2987
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2988
      raise errors.OpPrereqError("Node not reachable by ping",
2989
                                 errors.ECODE_ENVIRON)
2990

    
2991
    if not newbie_singlehomed:
2992
      # check reachability from my secondary ip to newbie's secondary ip
2993
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2994
                           source=myself.secondary_ip):
2995
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2996
                                   " based ping to noded port",
2997
                                   errors.ECODE_ENVIRON)
2998

    
2999
    if self.op.readd:
3000
      exceptions = [node]
3001
    else:
3002
      exceptions = []
3003

    
3004
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3005

    
3006
    if self.op.readd:
3007
      self.new_node = self.cfg.GetNodeInfo(node)
3008
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3009
    else:
3010
      self.new_node = objects.Node(name=node,
3011
                                   primary_ip=primary_ip,
3012
                                   secondary_ip=secondary_ip,
3013
                                   master_candidate=self.master_candidate,
3014
                                   offline=False, drained=False)
3015

    
3016
  def Exec(self, feedback_fn):
3017
    """Adds the new node to the cluster.
3018

3019
    """
3020
    new_node = self.new_node
3021
    node = new_node.name
3022

    
3023
    # for re-adds, reset the offline/drained/master-candidate flags;
3024
    # we need to reset here, otherwise offline would prevent RPC calls
3025
    # later in the procedure; this also means that if the re-add
3026
    # fails, we are left with a non-offlined, broken node
3027
    if self.op.readd:
3028
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3029
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3030
      # if we demote the node, we do cleanup later in the procedure
3031
      new_node.master_candidate = self.master_candidate
3032

    
3033
    # notify the user about any possible mc promotion
3034
    if new_node.master_candidate:
3035
      self.LogInfo("Node will be a master candidate")
3036

    
3037
    # check connectivity
3038
    result = self.rpc.call_version([node])[node]
3039
    result.Raise("Can't get version information from node %s" % node)
3040
    if constants.PROTOCOL_VERSION == result.payload:
3041
      logging.info("Communication to node %s fine, sw version %s match",
3042
                   node, result.payload)
3043
    else:
3044
      raise errors.OpExecError("Version mismatch master version %s,"
3045
                               " node version %s" %
3046
                               (constants.PROTOCOL_VERSION, result.payload))
3047

    
3048
    # setup ssh on node
3049
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3050
      logging.info("Copy ssh key to node %s", node)
3051
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3052
      keyarray = []
3053
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3054
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3055
                  priv_key, pub_key]
3056

    
3057
      for i in keyfiles:
3058
        keyarray.append(utils.ReadFile(i))
3059

    
3060
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3061
                                      keyarray[2], keyarray[3], keyarray[4],
3062
                                      keyarray[5])
3063
      result.Raise("Cannot transfer ssh keys to the new node")
3064

    
3065
    # Add node to our /etc/hosts, and add key to known_hosts
3066
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3067
      utils.AddHostToEtcHosts(new_node.name)
3068

    
3069
    if new_node.secondary_ip != new_node.primary_ip:
3070
      result = self.rpc.call_node_has_ip_address(new_node.name,
3071
                                                 new_node.secondary_ip)
3072
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3073
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3074
      if not result.payload:
3075
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3076
                                 " you gave (%s). Please fix and re-run this"
3077
                                 " command." % new_node.secondary_ip)
3078

    
3079
    node_verify_list = [self.cfg.GetMasterNode()]
3080
    node_verify_param = {
3081
      constants.NV_NODELIST: [node],
3082
      # TODO: do a node-net-test as well?
3083
    }
3084

    
3085
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3086
                                       self.cfg.GetClusterName())
3087
    for verifier in node_verify_list:
3088
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3089
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3090
      if nl_payload:
3091
        for failed in nl_payload:
3092
          feedback_fn("ssh/hostname verification failed"
3093
                      " (checking from %s): %s" %
3094
                      (verifier, nl_payload[failed]))
3095
        raise errors.OpExecError("ssh/hostname verification failed.")
3096

    
3097
    if self.op.readd:
3098
      _RedistributeAncillaryFiles(self)
3099
      self.context.ReaddNode(new_node)
3100
      # make sure we redistribute the config
3101
      self.cfg.Update(new_node, feedback_fn)
3102
      # and make sure the new node will not have old files around
3103
      if not new_node.master_candidate:
3104
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3105
        msg = result.fail_msg
3106
        if msg:
3107
          self.LogWarning("Node failed to demote itself from master"
3108
                          " candidate status: %s" % msg)
3109
    else:
3110
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3111
      self.context.AddNode(new_node, self.proc.GetECId())
3112

    
3113

    
3114
class LUSetNodeParams(LogicalUnit):
3115
  """Modifies the parameters of a node.
3116

3117
  """
3118
  HPATH = "node-modify"
3119
  HTYPE = constants.HTYPE_NODE
3120
  _OP_REQP = ["node_name"]
3121
  REQ_BGL = False
3122

    
3123
  def CheckArguments(self):
3124
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3125
    if node_name is None:
3126
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3127
                                 errors.ECODE_INVAL)
3128
    self.op.node_name = node_name
3129
    _CheckBooleanOpField(self.op, 'master_candidate')
3130
    _CheckBooleanOpField(self.op, 'offline')
3131
    _CheckBooleanOpField(self.op, 'drained')
3132
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3133
    if all_mods.count(None) == 3:
3134
      raise errors.OpPrereqError("Please pass at least one modification",
3135
                                 errors.ECODE_INVAL)
3136
    if all_mods.count(True) > 1:
3137
      raise errors.OpPrereqError("Can't set the node into more than one"
3138
                                 " state at the same time",
3139
                                 errors.ECODE_INVAL)
3140

    
3141
  def ExpandNames(self):
3142
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3143

    
3144
  def BuildHooksEnv(self):
3145
    """Build hooks env.
3146

3147
    This runs on the master node.
3148

3149
    """
3150
    env = {
3151
      "OP_TARGET": self.op.node_name,
3152
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3153
      "OFFLINE": str(self.op.offline),
3154
      "DRAINED": str(self.op.drained),
3155
      }
3156
    nl = [self.cfg.GetMasterNode(),
3157
          self.op.node_name]
3158
    return env, nl, nl
3159

    
3160
  def CheckPrereq(self):
3161
    """Check prerequisites.
3162

3163
    This only checks the instance list against the existing names.
3164

3165
    """
3166
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3167

    
3168
    if (self.op.master_candidate is not None or
3169
        self.op.drained is not None or
3170
        self.op.offline is not None):
3171
      # we can't change the master's node flags
3172
      if self.op.node_name == self.cfg.GetMasterNode():
3173
        raise errors.OpPrereqError("The master role can be changed"
3174
                                   " only via masterfailover",
3175
                                   errors.ECODE_INVAL)
3176

    
3177
    # Boolean value that tells us whether we're offlining or draining the node
3178
    offline_or_drain = self.op.offline == True or self.op.drained == True
3179
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3180

    
3181
    if (node.master_candidate and
3182
        (self.op.master_candidate == False or offline_or_drain)):
3183
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3184
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3185
      if mc_now <= cp_size:
3186
        msg = ("Not enough master candidates (desired"
3187
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3188
        # Only allow forcing the operation if it's an offline/drain operation,
3189
        # and we could not possibly promote more nodes.
3190
        # FIXME: this can still lead to issues if in any way another node which
3191
        # could be promoted appears in the meantime.
3192
        if self.op.force and offline_or_drain and mc_should == mc_max:
3193
          self.LogWarning(msg)
3194
        else:
3195
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3196

    
3197
    if (self.op.master_candidate == True and
3198
        ((node.offline and not self.op.offline == False) or
3199
         (node.drained and not self.op.drained == False))):
3200
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3201
                                 " to master_candidate" % node.name,
3202
                                 errors.ECODE_INVAL)
3203

    
3204
    # If we're being deofflined/drained, we'll MC ourself if needed
3205
    if (deoffline_or_drain and not offline_or_drain and not
3206
        self.op.master_candidate == True):
3207
      self.op.master_candidate = _DecideSelfPromotion(self)
3208
      if self.op.master_candidate:
3209
        self.LogInfo("Autopromoting node to master candidate")
3210

    
3211
    return
3212

    
3213
  def Exec(self, feedback_fn):
3214
    """Modifies a node.
3215

3216
    """
3217
    node = self.node
3218

    
3219
    result = []
3220
    changed_mc = False
3221

    
3222
    if self.op.offline is not None:
3223
      node.offline = self.op.offline
3224
      result.append(("offline", str(self.op.offline)))
3225
      if self.op.offline == True:
3226
        if node.master_candidate:
3227
          node.master_candidate = False
3228
          changed_mc = True
3229
          result.append(("master_candidate", "auto-demotion due to offline"))
3230
        if node.drained:
3231
          node.drained = False
3232
          result.append(("drained", "clear drained status due to offline"))
3233

    
3234
    if self.op.master_candidate is not None:
3235
      node.master_candidate = self.op.master_candidate
3236
      changed_mc = True
3237
      result.append(("master_candidate", str(self.op.master_candidate)))
3238
      if self.op.master_candidate == False:
3239
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3240
        msg = rrc.fail_msg
3241
        if msg:
3242
          self.LogWarning("Node failed to demote itself: %s" % msg)
3243

    
3244
    if self.op.drained is not None:
3245
      node.drained = self.op.drained
3246
      result.append(("drained", str(self.op.drained)))
3247
      if self.op.drained == True:
3248
        if node.master_candidate:
3249
          node.master_candidate = False
3250
          changed_mc = True
3251
          result.append(("master_candidate", "auto-demotion due to drain"))
3252
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3253
          msg = rrc.fail_msg
3254
          if msg:
3255
            self.LogWarning("Node failed to demote itself: %s" % msg)
3256
        if node.offline:
3257
          node.offline = False
3258
          result.append(("offline", "clear offline status due to drain"))
3259

    
3260
    # this will trigger configuration file update, if needed
3261
    self.cfg.Update(node, feedback_fn)
3262
    # this will trigger job queue propagation or cleanup
3263
    if changed_mc:
3264
      self.context.ReaddNode(node)
3265

    
3266
    return result
3267

    
3268

    
3269
class LUPowercycleNode(NoHooksLU):
3270
  """Powercycles a node.
3271

3272
  """
3273
  _OP_REQP = ["node_name", "force"]
3274
  REQ_BGL = False
3275

    
3276
  def CheckArguments(self):
3277
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3278
    if node_name is None:
3279
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3280
                                 errors.ECODE_NOENT)
3281
    self.op.node_name = node_name
3282
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3283
      raise errors.OpPrereqError("The node is the master and the force"
3284
                                 " parameter was not set",
3285
                                 errors.ECODE_INVAL)
3286

    
3287
  def ExpandNames(self):
3288
    """Locking for PowercycleNode.
3289

3290
    This is a last-resort option and shouldn't block on other
3291
    jobs. Therefore, we grab no locks.
3292

3293
    """
3294
    self.needed_locks = {}
3295

    
3296
  def CheckPrereq(self):
3297
    """Check prerequisites.
3298

3299
    This LU has no prereqs.
3300

3301
    """
3302
    pass
3303

    
3304
  def Exec(self, feedback_fn):
3305
    """Reboots a node.
3306

3307
    """
3308
    result = self.rpc.call_node_powercycle(self.op.node_name,
3309
                                           self.cfg.GetHypervisorType())
3310
    result.Raise("Failed to schedule the reboot")
3311
    return result.payload
3312

    
3313

    
3314
class LUQueryClusterInfo(NoHooksLU):
3315
  """Query cluster configuration.
3316

3317
  """
3318
  _OP_REQP = []
3319
  REQ_BGL = False
3320

    
3321
  def ExpandNames(self):
3322
    self.needed_locks = {}
3323

    
3324
  def CheckPrereq(self):
3325
    """No prerequsites needed for this LU.
3326

3327
    """
3328
    pass
3329

    
3330
  def Exec(self, feedback_fn):
3331
    """Return cluster config.
3332

3333
    """
3334
    cluster = self.cfg.GetClusterInfo()
3335
    result = {
3336
      "software_version": constants.RELEASE_VERSION,
3337
      "protocol_version": constants.PROTOCOL_VERSION,
3338
      "config_version": constants.CONFIG_VERSION,
3339
      "os_api_version": max(constants.OS_API_VERSIONS),
3340
      "export_version": constants.EXPORT_VERSION,
3341
      "architecture": (platform.architecture()[0], platform.machine()),
3342
      "name": cluster.cluster_name,
3343
      "master": cluster.master_node,
3344
      "default_hypervisor": cluster.enabled_hypervisors[0],
3345
      "enabled_hypervisors": cluster.enabled_hypervisors,
3346
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3347
                        for hypervisor_name in cluster.enabled_hypervisors]),
3348
      "beparams": cluster.beparams,
3349
      "nicparams": cluster.nicparams,
3350
      "candidate_pool_size": cluster.candidate_pool_size,
3351
      "master_netdev": cluster.master_netdev,
3352
      "volume_group_name": cluster.volume_group_name,
3353
      "file_storage_dir": cluster.file_storage_dir,
3354
      "ctime": cluster.ctime,
3355
      "mtime": cluster.mtime,
3356
      "uuid": cluster.uuid,
3357
      "tags": list(cluster.GetTags()),
3358
      }
3359

    
3360
    return result
3361

    
3362

    
3363
class LUQueryConfigValues(NoHooksLU):
3364
  """Return configuration values.
3365

3366
  """
3367
  _OP_REQP = []
3368
  REQ_BGL = False
3369
  _FIELDS_DYNAMIC = utils.FieldSet()
3370
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3371
                                  "watcher_pause")
3372

    
3373
  def ExpandNames(self):
3374
    self.needed_locks = {}
3375

    
3376
    _CheckOutputFields(static=self._FIELDS_STATIC,
3377
                       dynamic=self._FIELDS_DYNAMIC,
3378
                       selected=self.op.output_fields)
3379

    
3380
  def CheckPrereq(self):
3381
    """No prerequisites.
3382

3383
    """
3384
    pass
3385

    
3386
  def Exec(self, feedback_fn):
3387
    """Dump a representation of the cluster config to the standard output.
3388

3389
    """
3390
    values = []
3391
    for field in self.op.output_fields:
3392
      if field == "cluster_name":
3393
        entry = self.cfg.GetClusterName()
3394
      elif field == "master_node":
3395
        entry = self.cfg.GetMasterNode()
3396
      elif field == "drain_flag":
3397
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3398
      elif field == "watcher_pause":
3399
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3400
      else:
3401
        raise errors.ParameterError(field)
3402
      values.append(entry)
3403
    return values
3404

    
3405

    
3406
class LUActivateInstanceDisks(NoHooksLU):
3407
  """Bring up an instance's disks.
3408

3409
  """
3410
  _OP_REQP = ["instance_name"]
3411
  REQ_BGL = False
3412

    
3413
  def ExpandNames(self):
3414
    self._ExpandAndLockInstance()
3415
    self.needed_locks[locking.LEVEL_NODE] = []
3416
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3417

    
3418
  def DeclareLocks(self, level):
3419
    if level == locking.LEVEL_NODE:
3420
      self._LockInstancesNodes()
3421

    
3422
  def CheckPrereq(self):
3423
    """Check prerequisites.
3424

3425
    This checks that the instance is in the cluster.
3426

3427
    """
3428
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3429
    assert self.instance is not None, \
3430
      "Cannot retrieve locked instance %s" % self.op.instance_name
3431
    _CheckNodeOnline(self, self.instance.primary_node)
3432
    if not hasattr(self.op, "ignore_size"):
3433
      self.op.ignore_size = False
3434

    
3435
  def Exec(self, feedback_fn):
3436
    """Activate the disks.
3437

3438
    """
3439
    disks_ok, disks_info = \
3440
              _AssembleInstanceDisks(self, self.instance,
3441
                                     ignore_size=self.op.ignore_size)
3442
    if not disks_ok:
3443
      raise errors.OpExecError("Cannot activate block devices")
3444

    
3445
    return disks_info
3446

    
3447

    
3448
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3449
                           ignore_size=False):
3450
  """Prepare the block devices for an instance.
3451

3452
  This sets up the block devices on all nodes.
3453

3454
  @type lu: L{LogicalUnit}
3455
  @param lu: the logical unit on whose behalf we execute
3456
  @type instance: L{objects.Instance}
3457
  @param instance: the instance for whose disks we assemble
3458
  @type ignore_secondaries: boolean
3459
  @param ignore_secondaries: if true, errors on secondary nodes
3460
      won't result in an error return from the function
3461
  @type ignore_size: boolean
3462
  @param ignore_size: if true, the current known size of the disk
3463
      will not be used during the disk activation, useful for cases
3464
      when the size is wrong
3465
  @return: False if the operation failed, otherwise a list of
3466
      (host, instance_visible_name, node_visible_name)
3467
      with the mapping from node devices to instance devices
3468

3469
  """
3470
  device_info = []
3471
  disks_ok = True
3472
  iname = instance.name
3473
  # With the two passes mechanism we try to reduce the window of
3474
  # opportunity for the race condition of switching DRBD to primary
3475
  # before handshaking occured, but we do not eliminate it
3476

    
3477
  # The proper fix would be to wait (with some limits) until the
3478
  # connection has been made and drbd transitions from WFConnection
3479
  # into any other network-connected state (Connected, SyncTarget,
3480
  # SyncSource, etc.)
3481

    
3482
  # 1st pass, assemble on all nodes in secondary mode
3483
  for inst_disk in instance.disks:
3484
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3485
      if ignore_size:
3486
        node_disk = node_disk.Copy()
3487
        node_disk.UnsetSize()
3488
      lu.cfg.SetDiskID(node_disk, node)
3489
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3490
      msg = result.fail_msg
3491
      if msg:
3492
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3493
                           " (is_primary=False, pass=1): %s",
3494
                           inst_disk.iv_name, node, msg)
3495
        if not ignore_secondaries:
3496
          disks_ok = False
3497

    
3498
  # FIXME: race condition on drbd migration to primary
3499

    
3500
  # 2nd pass, do only the primary node
3501
  for inst_disk in instance.disks:
3502
    dev_path = None
3503

    
3504
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3505
      if node != instance.primary_node:
3506
        continue
3507
      if ignore_size:
3508
        node_disk = node_disk.Copy()
3509
        node_disk.UnsetSize()
3510
      lu.cfg.SetDiskID(node_disk, node)
3511
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3512
      msg = result.fail_msg
3513
      if msg:
3514
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3515
                           " (is_primary=True, pass=2): %s",
3516
                           inst_disk.iv_name, node, msg)
3517
        disks_ok = False
3518
      else:
3519
        dev_path = result.payload
3520

    
3521
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3522

    
3523
  # leave the disks configured for the primary node
3524
  # this is a workaround that would be fixed better by
3525
  # improving the logical/physical id handling
3526
  for disk in instance.disks:
3527
    lu.cfg.SetDiskID(disk, instance.primary_node)
3528

    
3529
  return disks_ok, device_info
3530

    
3531

    
3532
def _StartInstanceDisks(lu, instance, force):
3533
  """Start the disks of an instance.
3534

3535
  """
3536
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3537
                                           ignore_secondaries=force)
3538
  if not disks_ok:
3539
    _ShutdownInstanceDisks(lu, instance)
3540
    if force is not None and not force:
3541
      lu.proc.LogWarning("", hint="If the message above refers to a"
3542
                         " secondary node,"
3543
                         " you can retry the operation using '--force'.")
3544
    raise errors.OpExecError("Disk consistency error")
3545

    
3546

    
3547
class LUDeactivateInstanceDisks(NoHooksLU):
3548
  """Shutdown an instance's disks.
3549

3550
  """
3551
  _OP_REQP = ["instance_name"]
3552
  REQ_BGL = False
3553

    
3554
  def ExpandNames(self):
3555
    self._ExpandAndLockInstance()
3556
    self.needed_locks[locking.LEVEL_NODE] = []
3557
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3558

    
3559
  def DeclareLocks(self, level):
3560
    if level == locking.LEVEL_NODE:
3561
      self._LockInstancesNodes()
3562

    
3563
  def CheckPrereq(self):
3564
    """Check prerequisites.
3565

3566
    This checks that the instance is in the cluster.
3567

3568
    """
3569
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3570
    assert self.instance is not None, \
3571
      "Cannot retrieve locked instance %s" % self.op.instance_name
3572

    
3573
  def Exec(self, feedback_fn):
3574
    """Deactivate the disks
3575

3576
    """
3577
    instance = self.instance
3578
    _SafeShutdownInstanceDisks(self, instance)
3579

    
3580

    
3581
def _SafeShutdownInstanceDisks(lu, instance):
3582
  """Shutdown block devices of an instance.
3583

3584
  This function checks if an instance is running, before calling
3585
  _ShutdownInstanceDisks.
3586

3587
  """
3588
  pnode = instance.primary_node
3589
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3590
  ins_l.Raise("Can't contact node %s" % pnode)
3591

    
3592
  if instance.name in ins_l.payload:
3593
    raise errors.OpExecError("Instance is running, can't shutdown"
3594
                             " block devices.")
3595

    
3596
  _ShutdownInstanceDisks(lu, instance)
3597

    
3598

    
3599
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3600
  """Shutdown block devices of an instance.
3601

3602
  This does the shutdown on all nodes of the instance.
3603

3604
  If the ignore_primary is false, errors on the primary node are
3605
  ignored.
3606

3607
  """
3608
  all_result = True
3609
  for disk in instance.disks:
3610
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3611
      lu.cfg.SetDiskID(top_disk, node)
3612
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3613
      msg = result.fail_msg
3614
      if msg:
3615
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3616
                      disk.iv_name, node, msg)
3617
        if not ignore_primary or node != instance.primary_node:
3618
          all_result = False
3619
  return all_result
3620

    
3621

    
3622
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3623
  """Checks if a node has enough free memory.
3624

3625
  This function check if a given node has the needed amount of free
3626
  memory. In case the node has less memory or we cannot get the
3627
  information from the node, this function raise an OpPrereqError
3628
  exception.
3629

3630
  @type lu: C{LogicalUnit}
3631
  @param lu: a logical unit from which we get configuration data
3632
  @type node: C{str}
3633
  @param node: the node to check
3634
  @type reason: C{str}
3635
  @param reason: string to use in the error message
3636
  @type requested: C{int}
3637
  @param requested: the amount of memory in MiB to check for
3638
  @type hypervisor_name: C{str}
3639
  @param hypervisor_name: the hypervisor to ask for memory stats
3640
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3641
      we cannot check the node
3642

3643
  """
3644
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3645
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3646
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3647
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3648
  if not isinstance(free_mem, int):
3649
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3650
                               " was '%s'" % (node, free_mem),
3651
                               errors.ECODE_ENVIRON)
3652
  if requested > free_mem:
3653
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3654
                               " needed %s MiB, available %s MiB" %
3655
                               (node, reason, requested, free_mem),
3656
                               errors.ECODE_NORES)
3657

    
3658

    
3659
class LUStartupInstance(LogicalUnit):
3660
  """Starts an instance.
3661

3662
  """
3663
  HPATH = "instance-start"
3664
  HTYPE = constants.HTYPE_INSTANCE
3665
  _OP_REQP = ["instance_name", "force"]
3666
  REQ_BGL = False
3667

    
3668
  def ExpandNames(self):
3669
    self._ExpandAndLockInstance()
3670

    
3671
  def BuildHooksEnv(self):
3672
    """Build hooks env.
3673

3674
    This runs on master, primary and secondary nodes of the instance.
3675

3676
    """
3677
    env = {
3678
      "FORCE": self.op.force,
3679
      }
3680
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3681
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3682
    return env, nl, nl
3683

    
3684
  def CheckPrereq(self):
3685
    """Check prerequisites.
3686

3687
    This checks that the instance is in the cluster.
3688

3689
    """
3690
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3691
    assert self.instance is not None, \
3692
      "Cannot retrieve locked instance %s" % self.op.instance_name
3693

    
3694
    # extra beparams
3695
    self.beparams = getattr(self.op, "beparams", {})
3696
    if self.beparams:
3697
      if not isinstance(self.beparams, dict):
3698
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3699
                                   " dict" % (type(self.beparams), ),
3700
                                   errors.ECODE_INVAL)
3701
      # fill the beparams dict
3702
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3703
      self.op.beparams = self.beparams
3704

    
3705
    # extra hvparams
3706
    self.hvparams = getattr(self.op, "hvparams", {})
3707
    if self.hvparams:
3708
      if not isinstance(self.hvparams, dict):
3709
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3710
                                   " dict" % (type(self.hvparams), ),
3711
                                   errors.ECODE_INVAL)
3712

    
3713
      # check hypervisor parameter syntax (locally)
3714
      cluster = self.cfg.GetClusterInfo()
3715
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3716
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3717
                                    instance.hvparams)
3718
      filled_hvp.update(self.hvparams)
3719
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3720
      hv_type.CheckParameterSyntax(filled_hvp)
3721
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3722
      self.op.hvparams = self.hvparams
3723

    
3724
    _CheckNodeOnline(self, instance.primary_node)
3725

    
3726
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3727
    # check bridges existence
3728
    _CheckInstanceBridgesExist(self, instance)
3729

    
3730
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3731
                                              instance.name,
3732
                                              instance.hypervisor)
3733
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3734
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3735
    if not remote_info.payload: # not running already
3736
      _CheckNodeFreeMemory(self, instance.primary_node,
3737
                           "starting instance %s" % instance.name,
3738
                           bep[constants.BE_MEMORY], instance.hypervisor)
3739

    
3740
  def Exec(self, feedback_fn):
3741
    """Start the instance.
3742

3743
    """
3744
    instance = self.instance
3745
    force = self.op.force
3746

    
3747
    self.cfg.MarkInstanceUp(instance.name)
3748

    
3749
    node_current = instance.primary_node
3750

    
3751
    _StartInstanceDisks(self, instance, force)
3752

    
3753
    result = self.rpc.call_instance_start(node_current, instance,
3754
                                          self.hvparams, self.beparams)
3755
    msg = result.fail_msg
3756
    if msg:
3757
      _ShutdownInstanceDisks(self, instance)
3758
      raise errors.OpExecError("Could not start instance: %s" % msg)
3759

    
3760

    
3761
class LURebootInstance(LogicalUnit):
3762
  """Reboot an instance.
3763

3764
  """
3765
  HPATH = "instance-reboot"
3766
  HTYPE = constants.HTYPE_INSTANCE
3767
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3768
  REQ_BGL = False
3769

    
3770
  def CheckArguments(self):
3771
    """Check the arguments.
3772

3773
    """
3774
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3775
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3776

    
3777
  def ExpandNames(self):
3778
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3779
                                   constants.INSTANCE_REBOOT_HARD,
3780
                                   constants.INSTANCE_REBOOT_FULL]:
3781
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3782
                                  (constants.INSTANCE_REBOOT_SOFT,
3783
                                   constants.INSTANCE_REBOOT_HARD,
3784
                                   constants.INSTANCE_REBOOT_FULL))
3785
    self._ExpandAndLockInstance()
3786

    
3787
  def BuildHooksEnv(self):
3788
    """Build hooks env.
3789

3790
    This runs on master, primary and secondary nodes of the instance.
3791

3792
    """
3793
    env = {
3794
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3795
      "REBOOT_TYPE": self.op.reboot_type,
3796
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3797
      }
3798
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3799
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3800
    return env, nl, nl
3801

    
3802
  def CheckPrereq(self):
3803
    """Check prerequisites.
3804

3805
    This checks that the instance is in the cluster.
3806

3807
    """
3808
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3809
    assert self.instance is not None, \
3810
      "Cannot retrieve locked instance %s" % self.op.instance_name
3811

    
3812
    _CheckNodeOnline(self, instance.primary_node)
3813

    
3814
    # check bridges existence
3815
    _CheckInstanceBridgesExist(self, instance)
3816

    
3817
  def Exec(self, feedback_fn):
3818
    """Reboot the instance.
3819

3820
    """
3821
    instance = self.instance
3822
    ignore_secondaries = self.op.ignore_secondaries
3823
    reboot_type = self.op.reboot_type
3824

    
3825
    node_current = instance.primary_node
3826

    
3827
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3828
                       constants.INSTANCE_REBOOT_HARD]:
3829
      for disk in instance.disks:
3830
        self.cfg.SetDiskID(disk, node_current)
3831
      result = self.rpc.call_instance_reboot(node_current, instance,
3832
                                             reboot_type,
3833
                                             self.shutdown_timeout)
3834
      result.Raise("Could not reboot instance")
3835
    else:
3836
      result = self.rpc.call_instance_shutdown(node_current, instance,
3837
                                               self.shutdown_timeout)
3838
      result.Raise("Could not shutdown instance for full reboot")
3839
      _ShutdownInstanceDisks(self, instance)
3840
      _StartInstanceDisks(self, instance, ignore_secondaries)
3841
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3842
      msg = result.fail_msg
3843
      if msg:
3844
        _ShutdownInstanceDisks(self, instance)
3845
        raise errors.OpExecError("Could not start instance for"
3846
                                 " full reboot: %s" % msg)
3847

    
3848
    self.cfg.MarkInstanceUp(instance.name)
3849

    
3850

    
3851
class LUShutdownInstance(LogicalUnit):
3852
  """Shutdown an instance.
3853

3854
  """
3855
  HPATH = "instance-stop"
3856
  HTYPE = constants.HTYPE_INSTANCE
3857
  _OP_REQP = ["instance_name"]
3858
  REQ_BGL = False
3859

    
3860
  def CheckArguments(self):
3861
    """Check the arguments.
3862

3863
    """
3864
    self.timeout = getattr(self.op, "timeout",
3865
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3866

    
3867
  def ExpandNames(self):
3868
    self._ExpandAndLockInstance()
3869

    
3870
  def BuildHooksEnv(self):
3871
    """Build hooks env.
3872

3873
    This runs on master, primary and secondary nodes of the instance.
3874

3875
    """
3876
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3877
    env["TIMEOUT"] = self.timeout
3878
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3879
    return env, nl, nl
3880

    
3881
  def CheckPrereq(self):
3882
    """Check prerequisites.
3883

3884
    This checks that the instance is in the cluster.
3885

3886
    """
3887
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3888
    assert self.instance is not None, \
3889
      "Cannot retrieve locked instance %s" % self.op.instance_name
3890
    _CheckNodeOnline(self, self.instance.primary_node)
3891

    
3892
  def Exec(self, feedback_fn):
3893
    """Shutdown the instance.
3894

3895
    """
3896
    instance = self.instance
3897
    node_current = instance.primary_node
3898
    timeout = self.timeout
3899
    self.cfg.MarkInstanceDown(instance.name)
3900
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3901
    msg = result.fail_msg
3902
    if msg:
3903
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3904

    
3905
    _ShutdownInstanceDisks(self, instance)
3906

    
3907

    
3908
class LUReinstallInstance(LogicalUnit):
3909
  """Reinstall an instance.
3910

3911
  """
3912
  HPATH = "instance-reinstall"
3913
  HTYPE = constants.HTYPE_INSTANCE
3914
  _OP_REQP = ["instance_name"]
3915
  REQ_BGL = False
3916

    
3917
  def ExpandNames(self):
3918
    self._ExpandAndLockInstance()
3919

    
3920
  def BuildHooksEnv(self):
3921
    """Build hooks env.
3922

3923
    This runs on master, primary and secondary nodes of the instance.
3924

3925
    """
3926
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3927
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3928
    return env, nl, nl
3929

    
3930
  def CheckPrereq(self):
3931
    """Check prerequisites.
3932

3933
    This checks that the instance is in the cluster and is not running.
3934

3935
    """
3936
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3937
    assert instance is not None, \
3938
      "Cannot retrieve locked instance %s" % self.op.instance_name
3939
    _CheckNodeOnline(self, instance.primary_node)
3940

    
3941
    if instance.disk_template == constants.DT_DISKLESS:
3942
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3943
                                 self.op.instance_name,
3944
                                 errors.ECODE_INVAL)
3945
    if instance.admin_up:
3946
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3947
                                 self.op.instance_name,
3948
                                 errors.ECODE_STATE)
3949
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3950
                                              instance.name,
3951
                                              instance.hypervisor)
3952
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3953
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3954
    if remote_info.payload:
3955
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3956
                                 (self.op.instance_name,
3957
                                  instance.primary_node),
3958
                                 errors.ECODE_STATE)
3959

    
3960
    self.op.os_type = getattr(self.op, "os_type", None)
3961
    self.op.force_variant = getattr(self.op, "force_variant", False)
3962
    if self.op.os_type is not None:
3963
      # OS verification
3964
      pnode = self.cfg.GetNodeInfo(
3965
        self.cfg.ExpandNodeName(instance.primary_node))
3966
      if pnode is None:
3967
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3968
                                   self.op.pnode, errors.ECODE_NOENT)
3969
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3970
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3971
                   (self.op.os_type, pnode.name),
3972
                   prereq=True, ecode=errors.ECODE_INVAL)
3973
      if not self.op.force_variant:
3974
        _CheckOSVariant(result.payload, self.op.os_type)
3975

    
3976
    self.instance = instance
3977

    
3978
  def Exec(self, feedback_fn):
3979
    """Reinstall the instance.
3980

3981
    """
3982
    inst = self.instance
3983

    
3984
    if self.op.os_type is not None:
3985
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3986
      inst.os = self.op.os_type
3987
      self.cfg.Update(inst, feedback_fn)
3988

    
3989
    _StartInstanceDisks(self, inst, None)
3990
    try:
3991
      feedback_fn("Running the instance OS create scripts...")
3992
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3993
      result.Raise("Could not install OS for instance %s on node %s" %
3994
                   (inst.name, inst.primary_node))
3995
    finally:
3996
      _ShutdownInstanceDisks(self, inst)
3997

    
3998

    
3999
class LURecreateInstanceDisks(LogicalUnit):
4000
  """Recreate an instance's missing disks.
4001

4002
  """
4003
  HPATH = "instance-recreate-disks"
4004
  HTYPE = constants.HTYPE_INSTANCE
4005
  _OP_REQP = ["instance_name", "disks"]
4006
  REQ_BGL = False
4007

    
4008
  def CheckArguments(self):
4009
    """Check the arguments.
4010

4011
    """
4012
    if not isinstance(self.op.disks, list):
4013
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4014
    for item in self.op.disks:
4015
      if (not isinstance(item, int) or
4016
          item < 0):
4017
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4018
                                   str(item), errors.ECODE_INVAL)
4019

    
4020
  def ExpandNames(self):
4021
    self._ExpandAndLockInstance()
4022

    
4023
  def BuildHooksEnv(self):
4024
    """Build hooks env.
4025

4026
    This runs on master, primary and secondary nodes of the instance.
4027

4028
    """
4029
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4030
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4031
    return env, nl, nl
4032

    
4033
  def CheckPrereq(self):
4034
    """Check prerequisites.
4035

4036
    This checks that the instance is in the cluster and is not running.
4037

4038
    """
4039
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4040
    assert instance is not None, \
4041
      "Cannot retrieve locked instance %s" % self.op.instance_name
4042
    _CheckNodeOnline(self, instance.primary_node)
4043

    
4044
    if instance.disk_template == constants.DT_DISKLESS:
4045
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4046
                                 self.op.instance_name, errors.ECODE_INVAL)
4047
    if instance.admin_up:
4048
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4049
                                 self.op.instance_name, errors.ECODE_STATE)
4050
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4051
                                              instance.name,
4052
                                              instance.hypervisor)
4053
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4054
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4055
    if remote_info.payload:
4056
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4057
                                 (self.op.instance_name,
4058
                                  instance.primary_node), errors.ECODE_STATE)
4059

    
4060
    if not self.op.disks:
4061
      self.op.disks = range(len(instance.disks))
4062
    else:
4063
      for idx in self.op.disks:
4064
        if idx >= len(instance.disks):
4065
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4066
                                     errors.ECODE_INVAL)
4067

    
4068
    self.instance = instance
4069

    
4070
  def Exec(self, feedback_fn):
4071
    """Recreate the disks.
4072

4073
    """
4074
    to_skip = []
4075
    for idx, _ in enumerate(self.instance.disks):
4076
      if idx not in self.op.disks: # disk idx has not been passed in
4077
        to_skip.append(idx)
4078
        continue
4079

    
4080
    _CreateDisks(self, self.instance, to_skip=to_skip)
4081

    
4082

    
4083
class LURenameInstance(LogicalUnit):
4084
  """Rename an instance.
4085

4086
  """
4087
  HPATH = "instance-rename"
4088
  HTYPE = constants.HTYPE_INSTANCE
4089
  _OP_REQP = ["instance_name", "new_name"]
4090

    
4091
  def BuildHooksEnv(self):
4092
    """Build hooks env.
4093

4094
    This runs on master, primary and secondary nodes of the instance.
4095

4096
    """
4097
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4098
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4099
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4100
    return env, nl, nl
4101

    
4102
  def CheckPrereq(self):
4103
    """Check prerequisites.
4104

4105
    This checks that the instance is in the cluster and is not running.
4106

4107
    """
4108
    instance = self.cfg.GetInstanceInfo(
4109
      self.cfg.ExpandInstanceName(self.op.instance_name))
4110
    if instance is None:
4111
      raise errors.OpPrereqError("Instance '%s' not known" %
4112
                                 self.op.instance_name, errors.ECODE_NOENT)
4113
    _CheckNodeOnline(self, instance.primary_node)
4114

    
4115
    if instance.admin_up:
4116
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4117
                                 self.op.instance_name, errors.ECODE_STATE)
4118
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4119
                                              instance.name,
4120
                                              instance.hypervisor)
4121
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4122
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4123
    if remote_info.payload:
4124
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4125
                                 (self.op.instance_name,
4126
                                  instance.primary_node), errors.ECODE_STATE)
4127
    self.instance = instance
4128

    
4129
    # new name verification
4130
    name_info = utils.GetHostInfo(self.op.new_name)
4131

    
4132
    self.op.new_name = new_name = name_info.name
4133
    instance_list = self.cfg.GetInstanceList()
4134
    if new_name in instance_list:
4135
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4136
                                 new_name, errors.ECODE_EXISTS)
4137

    
4138
    if not getattr(self.op, "ignore_ip", False):
4139
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4140
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4141
                                   (name_info.ip, new_name),
4142
                                   errors.ECODE_NOTUNIQUE)
4143

    
4144

    
4145
  def Exec(self, feedback_fn):
4146
    """Reinstall the instance.
4147

4148
    """
4149
    inst = self.instance
4150
    old_name = inst.name
4151

    
4152
    if inst.disk_template == constants.DT_FILE:
4153
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4154

    
4155
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4156
    # Change the instance lock. This is definitely safe while we hold the BGL
4157
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4158
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4159

    
4160
    # re-read the instance from the configuration after rename
4161
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4162

    
4163
    if inst.disk_template == constants.DT_FILE:
4164
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4165
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4166
                                                     old_file_storage_dir,
4167
                                                     new_file_storage_dir)
4168
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4169
                   " (but the instance has been renamed in Ganeti)" %
4170
                   (inst.primary_node, old_file_storage_dir,
4171
                    new_file_storage_dir))
4172

    
4173
    _StartInstanceDisks(self, inst, None)
4174
    try:
4175
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4176
                                                 old_name)
4177
      msg = result.fail_msg
4178
      if msg:
4179
        msg = ("Could not run OS rename script for instance %s on node %s"
4180
               " (but the instance has been renamed in Ganeti): %s" %
4181
               (inst.name, inst.primary_node, msg))
4182
        self.proc.LogWarning(msg)
4183
    finally:
4184
      _ShutdownInstanceDisks(self, inst)
4185

    
4186

    
4187
class LURemoveInstance(LogicalUnit):
4188
  """Remove an instance.
4189

4190
  """
4191
  HPATH = "instance-remove"
4192
  HTYPE = constants.HTYPE_INSTANCE
4193
  _OP_REQP = ["instance_name", "ignore_failures"]
4194
  REQ_BGL = False
4195

    
4196
  def CheckArguments(self):
4197
    """Check the arguments.
4198

4199
    """
4200
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4201
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4202

    
4203
  def ExpandNames(self):
4204
    self._ExpandAndLockInstance()
4205
    self.needed_locks[locking.LEVEL_NODE] = []
4206
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4207

    
4208
  def DeclareLocks(self, level):
4209
    if level == locking.LEVEL_NODE:
4210
      self._LockInstancesNodes()
4211

    
4212
  def BuildHooksEnv(self):
4213
    """Build hooks env.
4214

4215
    This runs on master, primary and secondary nodes of the instance.
4216

4217
    """
4218
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4219
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4220
    nl = [self.cfg.GetMasterNode()]
4221
    return env, nl, nl
4222

    
4223
  def CheckPrereq(self):
4224
    """Check prerequisites.
4225

4226
    This checks that the instance is in the cluster.
4227

4228
    """
4229
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4230
    assert self.instance is not None, \
4231
      "Cannot retrieve locked instance %s" % self.op.instance_name
4232

    
4233
  def Exec(self, feedback_fn):
4234
    """Remove the instance.
4235

4236
    """
4237
    instance = self.instance
4238
    logging.info("Shutting down instance %s on node %s",
4239
                 instance.name, instance.primary_node)
4240

    
4241
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4242
                                             self.shutdown_timeout)
4243
    msg = result.fail_msg
4244
    if msg:
4245
      if self.op.ignore_failures:
4246
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4247
      else:
4248
        raise errors.OpExecError("Could not shutdown instance %s on"
4249
                                 " node %s: %s" %
4250
                                 (instance.name, instance.primary_node, msg))
4251

    
4252
    logging.info("Removing block devices for instance %s", instance.name)
4253

    
4254
    if not _RemoveDisks(self, instance):
4255
      if self.op.ignore_failures:
4256
        feedback_fn("Warning: can't remove instance's disks")
4257
      else:
4258
        raise errors.OpExecError("Can't remove instance's disks")
4259

    
4260
    logging.info("Removing instance %s out of cluster config", instance.name)
4261

    
4262
    self.cfg.RemoveInstance(instance.name)
4263
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4264

    
4265

    
4266
class LUQueryInstances(NoHooksLU):
4267
  """Logical unit for querying instances.
4268

4269
  """
4270
  # pylint: disable-msg=W0142
4271
  _OP_REQP = ["output_fields", "names", "use_locking"]
4272
  REQ_BGL = False
4273
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4274
                    "serial_no", "ctime", "mtime", "uuid"]
4275
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4276
                                    "admin_state",
4277
                                    "disk_template", "ip", "mac", "bridge",
4278
                                    "nic_mode", "nic_link",
4279
                                    "sda_size", "sdb_size", "vcpus", "tags",
4280
                                    "network_port", "beparams",
4281
                                    r"(disk)\.(size)/([0-9]+)",
4282
                                    r"(disk)\.(sizes)", "disk_usage",
4283
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4284
                                    r"(nic)\.(bridge)/([0-9]+)",
4285
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4286
                                    r"(disk|nic)\.(count)",
4287
                                    "hvparams",
4288
                                    ] + _SIMPLE_FIELDS +
4289
                                  ["hv/%s" % name
4290
                                   for name in constants.HVS_PARAMETERS
4291
                                   if name not in constants.HVC_GLOBALS] +
4292
                                  ["be/%s" % name
4293
                                   for name in constants.BES_PARAMETERS])
4294
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4295

    
4296

    
4297
  def ExpandNames(self):
4298
    _CheckOutputFields(static=self._FIELDS_STATIC,
4299
                       dynamic=self._FIELDS_DYNAMIC,
4300
                       selected=self.op.output_fields)
4301

    
4302
    self.needed_locks = {}
4303
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4304
    self.share_locks[locking.LEVEL_NODE] = 1
4305

    
4306
    if self.op.names:
4307
      self.wanted = _GetWantedInstances(self, self.op.names)
4308
    else:
4309
      self.wanted = locking.ALL_SET
4310

    
4311
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4312
    self.do_locking = self.do_node_query and self.op.use_locking
4313
    if self.do_locking:
4314
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4315
      self.needed_locks[locking.LEVEL_NODE] = []
4316
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4317

    
4318
  def DeclareLocks(self, level):
4319
    if level == locking.LEVEL_NODE and self.do_locking:
4320
      self._LockInstancesNodes()
4321

    
4322
  def CheckPrereq(self):
4323
    """Check prerequisites.
4324

4325
    """
4326
    pass
4327

    
4328
  def Exec(self, feedback_fn):
4329
    """Computes the list of nodes and their attributes.
4330

4331
    """
4332
    # pylint: disable-msg=R0912
4333
    # way too many branches here
4334
    all_info = self.cfg.GetAllInstancesInfo()
4335
    if self.wanted == locking.ALL_SET:
4336
      # caller didn't specify instance names, so ordering is not important
4337
      if self.do_locking:
4338
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4339
      else:
4340
        instance_names = all_info.keys()
4341
      instance_names = utils.NiceSort(instance_names)
4342
    else:
4343
      # caller did specify names, so we must keep the ordering
4344
      if self.do_locking:
4345
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4346
      else:
4347
        tgt_set = all_info.keys()
4348
      missing = set(self.wanted).difference(tgt_set)
4349
      if missing:
4350
        raise errors.OpExecError("Some instances were removed before"
4351
                                 " retrieving their data: %s" % missing)
4352
      instance_names = self.wanted
4353

    
4354
    instance_list = [all_info[iname] for iname in instance_names]
4355

    
4356
    # begin data gathering
4357

    
4358
    nodes = frozenset([inst.primary_node for inst in instance_list])
4359
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4360

    
4361
    bad_nodes = []
4362
    off_nodes = []
4363
    if self.do_node_query:
4364
      live_data = {}
4365
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4366
      for name in nodes:
4367
        result = node_data[name]
4368
        if result.offline:
4369
          # offline nodes will be in both lists
4370
          off_nodes.append(name)
4371
        if result.fail_msg:
4372
          bad_nodes.append(name)
4373
        else:
4374
          if result.payload:
4375
            live_data.update(result.payload)
4376
          # else no instance is alive
4377
    else:
4378
      live_data = dict([(name, {}) for name in instance_names])
4379

    
4380
    # end data gathering
4381

    
4382
    HVPREFIX = "hv/"
4383
    BEPREFIX = "be/"
4384
    output = []
4385
    cluster = self.cfg.GetClusterInfo()
4386
    for instance in instance_list:
4387
      iout = []
4388
      i_hv = cluster.FillHV(instance, skip_globals=True)
4389
      i_be = cluster.FillBE(instance)
4390
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4391
                                 nic.nicparams) for nic in instance.nics]
4392
      for field in self.op.output_fields:
4393
        st_match = self._FIELDS_STATIC.Matches(field)
4394
        if field in self._SIMPLE_FIELDS:
4395
          val = getattr(instance, field)
4396
        elif field == "pnode":
4397
          val = instance.primary_node
4398
        elif field == "snodes":
4399
          val = list(instance.secondary_nodes)
4400
        elif field == "admin_state":
4401
          val = instance.admin_up
4402
        elif field == "oper_state":
4403
          if instance.primary_node in bad_nodes:
4404
            val = None
4405
          else:
4406
            val = bool(live_data.get(instance.name))
4407
        elif field == "status":
4408
          if instance.primary_node in off_nodes:
4409
            val = "ERROR_nodeoffline"
4410
          elif instance.primary_node in bad_nodes:
4411
            val = "ERROR_nodedown"
4412
          else:
4413
            running = bool(live_data.get(instance.name))
4414
            if running:
4415
              if instance.admin_up:
4416
                val = "running"
4417
              else:
4418
                val = "ERROR_up"
4419
            else:
4420
              if instance.admin_up:
4421
                val = "ERROR_down"
4422
              else:
4423
                val = "ADMIN_down"
4424
        elif field == "oper_ram":
4425
          if instance.primary_node in bad_nodes:
4426
            val = None
4427
          elif instance.name in live_data:
4428
            val = live_data[instance.name].get("memory", "?")
4429
          else:
4430
            val = "-"
4431
        elif field == "vcpus":
4432
          val = i_be[constants.BE_VCPUS]
4433
        elif field == "disk_template":
4434
          val = instance.disk_template
4435
        elif field == "ip":
4436
          if instance.nics:
4437
            val = instance.nics[0].ip
4438
          else:
4439
            val = None
4440
        elif field == "nic_mode":
4441
          if instance.nics:
4442
            val = i_nicp[0][constants.NIC_MODE]
4443
          else:
4444
            val = None
4445
        elif field == "nic_link":
4446
          if instance.nics:
4447
            val = i_nicp[0][constants.NIC_LINK]
4448
          else:
4449
            val = None
4450
        elif field == "bridge":
4451
          if (instance.nics and
4452
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4453
            val = i_nicp[0][constants.NIC_LINK]
4454
          else:
4455
            val = None
4456
        elif field == "mac":
4457
          if instance.nics:
4458
            val = instance.nics[0].mac
4459
          else:
4460
            val = None
4461
        elif field == "sda_size" or field == "sdb_size":
4462
          idx = ord(field[2]) - ord('a')
4463
          try:
4464
            val = instance.FindDisk(idx).size
4465
          except errors.OpPrereqError:
4466
            val = None
4467
        elif field == "disk_usage": # total disk usage per node
4468
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4469
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4470
        elif field == "tags":
4471
          val = list(instance.GetTags())
4472
        elif field == "hvparams":
4473
          val = i_hv
4474
        elif (field.startswith(HVPREFIX) and
4475
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4476
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4477
          val = i_hv.get(field[len(HVPREFIX):], None)
4478
        elif field == "beparams":
4479
          val = i_be
4480
        elif (field.startswith(BEPREFIX) and
4481
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4482
          val = i_be.get(field[len(BEPREFIX):], None)
4483
        elif st_match and st_match.groups():
4484
          # matches a variable list
4485
          st_groups = st_match.groups()
4486
          if st_groups and st_groups[0] == "disk":
4487
            if st_groups[1] == "count":
4488
              val = len(instance.disks)
4489
            elif st_groups[1] == "sizes":
4490
              val = [disk.size for disk in instance.disks]
4491
            elif st_groups[1] == "size":
4492
              try:
4493
                val = instance.FindDisk(st_groups[2]).size
4494
              except errors.OpPrereqError:
4495
                val = None
4496
            else:
4497
              assert False, "Unhandled disk parameter"
4498
          elif st_groups[0] == "nic":
4499
            if st_groups[1] == "count":
4500
              val = len(instance.nics)
4501
            elif st_groups[1] == "macs":
4502
              val = [nic.mac for nic in instance.nics]
4503
            elif st_groups[1] == "ips":
4504
              val = [nic.ip for nic in instance.nics]
4505
            elif st_groups[1] == "modes":
4506
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4507
            elif st_groups[1] == "links":
4508
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4509
            elif st_groups[1] == "bridges":
4510
              val = []
4511
              for nicp in i_nicp:
4512
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4513
                  val.append(nicp[constants.NIC_LINK])
4514
                else:
4515
                  val.append(None)
4516
            else:
4517
              # index-based item
4518
              nic_idx = int(st_groups[2])
4519
              if nic_idx >= len(instance.nics):
4520
                val = None
4521
              else:
4522
                if st_groups[1] == "mac":
4523
                  val = instance.nics[nic_idx].mac
4524
                elif st_groups[1] == "ip":
4525
                  val = instance.nics[nic_idx].ip
4526
                elif st_groups[1] == "mode":
4527
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4528
                elif st_groups[1] == "link":
4529
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4530
                elif st_groups[1] == "bridge":
4531
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4532
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4533
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4534
                  else:
4535
                    val = None
4536
                else:
4537
                  assert False, "Unhandled NIC parameter"
4538
          else:
4539
            assert False, ("Declared but unhandled variable parameter '%s'" %
4540
                           field)
4541
        else:
4542
          assert False, "Declared but unhandled parameter '%s'" % field
4543
        iout.append(val)
4544
      output.append(iout)
4545

    
4546
    return output
4547

    
4548

    
4549
class LUFailoverInstance(LogicalUnit):
4550
  """Failover an instance.
4551

4552
  """
4553
  HPATH = "instance-failover"
4554
  HTYPE = constants.HTYPE_INSTANCE
4555
  _OP_REQP = ["instance_name", "ignore_consistency"]
4556
  REQ_BGL = False
4557

    
4558
  def CheckArguments(self):
4559
    """Check the arguments.
4560

4561
    """
4562
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4563
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4564

    
4565
  def ExpandNames(self):
4566
    self._ExpandAndLockInstance()
4567
    self.needed_locks[locking.LEVEL_NODE] = []
4568
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4569

    
4570
  def DeclareLocks(self, level):
4571
    if level == locking.LEVEL_NODE:
4572
      self._LockInstancesNodes()
4573

    
4574
  def BuildHooksEnv(self):
4575
    """Build hooks env.
4576

4577
    This runs on master, primary and secondary nodes of the instance.
4578

4579
    """
4580
    env = {
4581
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4582
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4583
      }
4584
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4585
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4586
    return env, nl, nl
4587

    
4588
  def CheckPrereq(self):
4589
    """Check prerequisites.
4590

4591
    This checks that the instance is in the cluster.
4592

4593
    """
4594
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4595
    assert self.instance is not None, \
4596
      "Cannot retrieve locked instance %s" % self.op.instance_name
4597

    
4598
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4599
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4600
      raise errors.OpPrereqError("Instance's disk layout is not"
4601
                                 " network mirrored, cannot failover.",
4602
                                 errors.ECODE_STATE)
4603

    
4604
    secondary_nodes = instance.secondary_nodes
4605
    if not secondary_nodes:
4606
      raise errors.ProgrammerError("no secondary node but using "
4607
                                   "a mirrored disk template")
4608

    
4609
    target_node = secondary_nodes[0]
4610
    _CheckNodeOnline(self, target_node)
4611
    _CheckNodeNotDrained(self, target_node)
4612
    if instance.admin_up:
4613
      # check memory requirements on the secondary node
4614
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4615
                           instance.name, bep[constants.BE_MEMORY],
4616
                           instance.hypervisor)
4617
    else:
4618
      self.LogInfo("Not checking memory on the secondary node as"
4619
                   " instance will not be started")
4620

    
4621
    # check bridge existance
4622
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4623

    
4624
  def Exec(self, feedback_fn):
4625
    """Failover an instance.
4626

4627
    The failover is done by shutting it down on its present node and
4628
    starting it on the secondary.
4629

4630
    """
4631
    instance = self.instance
4632

    
4633
    source_node = instance.primary_node
4634
    target_node = instance.secondary_nodes[0]
4635

    
4636
    if instance.admin_up:
4637
      feedback_fn("* checking disk consistency between source and target")
4638
      for dev in instance.disks:
4639
        # for drbd, these are drbd over lvm
4640
        if not _CheckDiskConsistency(self, dev, target_node, False):
4641
          if not self.op.ignore_consistency:
4642
            raise errors.OpExecError("Disk %s is degraded on target node,"
4643
                                     " aborting failover." % dev.iv_name)
4644
    else:
4645
      feedback_fn("* not checking disk consistency as instance is not running")
4646

    
4647
    feedback_fn("* shutting down instance on source node")
4648
    logging.info("Shutting down instance %s on node %s",
4649
                 instance.name, source_node)
4650

    
4651
    result = self.rpc.call_instance_shutdown(source_node, instance,
4652
                                             self.shutdown_timeout)
4653
    msg = result.fail_msg
4654
    if msg:
4655
      if self.op.ignore_consistency:
4656
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4657
                             " Proceeding anyway. Please make sure node"
4658
                             " %s is down. Error details: %s",
4659
                             instance.name, source_node, source_node, msg)
4660
      else:
4661
        raise errors.OpExecError("Could not shutdown instance %s on"
4662
                                 " node %s: %s" %
4663
                                 (instance.name, source_node, msg))
4664

    
4665
    feedback_fn("* deactivating the instance's disks on source node")
4666
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4667
      raise errors.OpExecError("Can't shut down the instance's disks.")
4668

    
4669
    instance.primary_node = target_node
4670
    # distribute new instance config to the other nodes
4671
    self.cfg.Update(instance, feedback_fn)
4672

    
4673
    # Only start the instance if it's marked as up
4674
    if instance.admin_up:
4675
      feedback_fn("* activating the instance's disks on target node")
4676
      logging.info("Starting instance %s on node %s",
4677
                   instance.name, target_node)
4678

    
4679
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4680
                                               ignore_secondaries=True)
4681
      if not disks_ok:
4682
        _ShutdownInstanceDisks(self, instance)
4683
        raise errors.OpExecError("Can't activate the instance's disks")
4684

    
4685
      feedback_fn("* starting the instance on the target node")
4686
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4687
      msg = result.fail_msg
4688
      if msg:
4689
        _ShutdownInstanceDisks(self, instance)
4690
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4691
                                 (instance.name, target_node, msg))
4692

    
4693

    
4694
class LUMigrateInstance(LogicalUnit):
4695
  """Migrate an instance.
4696

4697
  This is migration without shutting down, compared to the failover,
4698
  which is done with shutdown.
4699

4700
  """
4701
  HPATH = "instance-migrate"
4702
  HTYPE = constants.HTYPE_INSTANCE
4703
  _OP_REQP = ["instance_name", "live", "cleanup"]
4704

    
4705
  REQ_BGL = False
4706

    
4707
  def ExpandNames(self):
4708
    self._ExpandAndLockInstance()
4709

    
4710
    self.needed_locks[locking.LEVEL_NODE] = []
4711
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712

    
4713
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4714
                                       self.op.live, self.op.cleanup)
4715
    self.tasklets = [self._migrater]
4716

    
4717
  def DeclareLocks(self, level):
4718
    if level == locking.LEVEL_NODE:
4719
      self._LockInstancesNodes()
4720

    
4721
  def BuildHooksEnv(self):
4722
    """Build hooks env.
4723

4724
    This runs on master, primary and secondary nodes of the instance.
4725

4726
    """
4727
    instance = self._migrater.instance
4728
    env = _BuildInstanceHookEnvByObject(self, instance)
4729
    env["MIGRATE_LIVE"] = self.op.live
4730
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4731
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4732
    return env, nl, nl
4733

    
4734

    
4735
class LUMoveInstance(LogicalUnit):
4736
  """Move an instance by data-copying.
4737

4738
  """
4739
  HPATH = "instance-move"
4740
  HTYPE = constants.HTYPE_INSTANCE
4741
  _OP_REQP = ["instance_name", "target_node"]
4742
  REQ_BGL = False
4743

    
4744
  def CheckArguments(self):
4745
    """Check the arguments.
4746

4747
    """
4748
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4749
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4750

    
4751
  def ExpandNames(self):
4752
    self._ExpandAndLockInstance()
4753
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4754
    if target_node is None:
4755
      raise errors.OpPrereqError("Node '%s' not known" %
4756
                                  self.op.target_node, errors.ECODE_NOENT)
4757
    self.op.target_node = target_node
4758
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4759
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4760

    
4761
  def DeclareLocks(self, level):
4762
    if level == locking.LEVEL_NODE:
4763
      self._LockInstancesNodes(primary_only=True)
4764

    
4765
  def BuildHooksEnv(self):
4766
    """Build hooks env.
4767

4768
    This runs on master, primary and secondary nodes of the instance.
4769

4770
    """
4771
    env = {
4772
      "TARGET_NODE": self.op.target_node,
4773
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4774
      }
4775
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4776
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4777
                                       self.op.target_node]
4778
    return env, nl, nl
4779

    
4780
  def CheckPrereq(self):
4781
    """Check prerequisites.
4782

4783
    This checks that the instance is in the cluster.
4784

4785
    """
4786
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4787
    assert self.instance is not None, \
4788
      "Cannot retrieve locked instance %s" % self.op.instance_name
4789

    
4790
    node = self.cfg.GetNodeInfo(self.op.target_node)
4791
    assert node is not None, \
4792
      "Cannot retrieve locked node %s" % self.op.target_node
4793

    
4794
    self.target_node = target_node = node.name
4795

    
4796
    if target_node == instance.primary_node:
4797
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4798
                                 (instance.name, target_node),
4799
                                 errors.ECODE_STATE)
4800

    
4801
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4802

    
4803
    for idx, dsk in enumerate(instance.disks):
4804
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4805
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4806
                                   " cannot copy" % idx, errors.ECODE_STATE)
4807

    
4808
    _CheckNodeOnline(self, target_node)
4809
    _CheckNodeNotDrained(self, target_node)
4810

    
4811
    if instance.admin_up:
4812
      # check memory requirements on the secondary node
4813
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4814
                           instance.name, bep[constants.BE_MEMORY],
4815
                           instance.hypervisor)
4816
    else:
4817
      self.LogInfo("Not checking memory on the secondary node as"
4818
                   " instance will not be started")
4819

    
4820
    # check bridge existance
4821
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4822

    
4823
  def Exec(self, feedback_fn):
4824
    """Move an instance.
4825

4826
    The move is done by shutting it down on its present node, copying
4827
    the data over (slow) and starting it on the new node.
4828

4829
    """
4830
    instance = self.instance
4831

    
4832
    source_node = instance.primary_node
4833
    target_node = self.target_node
4834

    
4835
    self.LogInfo("Shutting down instance %s on source node %s",
4836
                 instance.name, source_node)
4837

    
4838
    result = self.rpc.call_instance_shutdown(source_node, instance,
4839
                                             self.shutdown_timeout)
4840
    msg = result.fail_msg
4841
    if msg:
4842
      if self.op.ignore_consistency:
4843
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4844
                             " Proceeding anyway. Please make sure node"
4845
                             " %s is down. Error details: %s",
4846
                             instance.name, source_node, source_node, msg)
4847
      else:
4848
        raise errors.OpExecError("Could not shutdown instance %s on"
4849
                                 " node %s: %s" %
4850
                                 (instance.name, source_node, msg))
4851

    
4852
    # create the target disks
4853
    try:
4854
      _CreateDisks(self, instance, target_node=target_node)
4855
    except errors.OpExecError:
4856
      self.LogWarning("Device creation failed, reverting...")
4857
      try:
4858
        _RemoveDisks(self, instance, target_node=target_node)
4859
      finally:
4860
        self.cfg.ReleaseDRBDMinors(instance.name)
4861
        raise
4862

    
4863
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4864

    
4865
    errs = []
4866
    # activate, get path, copy the data over
4867
    for idx, disk in enumerate(instance.disks):
4868
      self.LogInfo("Copying data for disk %d", idx)
4869
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4870
                                               instance.name, True)
4871
      if result.fail_msg:
4872
        self.LogWarning("Can't assemble newly created disk %d: %s",
4873
                        idx, result.fail_msg)
4874
        errs.append(result.fail_msg)
4875
        break
4876
      dev_path = result.payload
4877
      result = self.rpc.call_blockdev_export(source_node, disk,
4878
                                             target_node, dev_path,
4879
                                             cluster_name)
4880
      if result.fail_msg:
4881
        self.LogWarning("Can't copy data over for disk %d: %s",
4882
                        idx, result.fail_msg)
4883
        errs.append(result.fail_msg)
4884
        break
4885

    
4886
    if errs:
4887
      self.LogWarning("Some disks failed to copy, aborting")
4888
      try:
4889
        _RemoveDisks(self, instance, target_node=target_node)
4890
      finally:
4891
        self.cfg.ReleaseDRBDMinors(instance.name)
4892
        raise errors.OpExecError("Errors during disk copy: %s" %
4893
                                 (",".join(errs),))
4894

    
4895
    instance.primary_node = target_node
4896
    self.cfg.Update(instance, feedback_fn)
4897

    
4898
    self.LogInfo("Removing the disks on the original node")
4899
    _RemoveDisks(self, instance, target_node=source_node)
4900

    
4901
    # Only start the instance if it's marked as up
4902
    if instance.admin_up:
4903
      self.LogInfo("Starting instance %s on node %s",
4904
                   instance.name, target_node)
4905

    
4906
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4907
                                           ignore_secondaries=True)
4908
      if not disks_ok:
4909
        _ShutdownInstanceDisks(self, instance)
4910
        raise errors.OpExecError("Can't activate the instance's disks")
4911

    
4912
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4913
      msg = result.fail_msg
4914
      if msg:
4915
        _ShutdownInstanceDisks(self, instance)
4916
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4917
                                 (instance.name, target_node, msg))
4918

    
4919

    
4920
class LUMigrateNode(LogicalUnit):
4921
  """Migrate all instances from a node.
4922

4923
  """
4924
  HPATH = "node-migrate"
4925
  HTYPE = constants.HTYPE_NODE
4926
  _OP_REQP = ["node_name", "live"]
4927
  REQ_BGL = False
4928

    
4929
  def ExpandNames(self):
4930
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4931
    if self.op.node_name is None:
4932
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
4933
                                 errors.ECODE_NOENT)
4934

    
4935
    self.needed_locks = {
4936
      locking.LEVEL_NODE: [self.op.node_name],
4937
      }
4938

    
4939
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4940

    
4941
    # Create tasklets for migrating instances for all instances on this node
4942
    names = []
4943
    tasklets = []
4944

    
4945
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4946
      logging.debug("Migrating instance %s", inst.name)
4947
      names.append(inst.name)
4948

    
4949
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4950

    
4951
    self.tasklets = tasklets
4952

    
4953
    # Declare instance locks
4954
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4955

    
4956
  def DeclareLocks(self, level):
4957
    if level == locking.LEVEL_NODE:
4958
      self._LockInstancesNodes()
4959

    
4960
  def BuildHooksEnv(self):
4961
    """Build hooks env.
4962

4963
    This runs on the master, the primary and all the secondaries.
4964

4965
    """
4966
    env = {
4967
      "NODE_NAME": self.op.node_name,
4968
      }
4969

    
4970
    nl = [self.cfg.GetMasterNode()]
4971

    
4972
    return (env, nl, nl)
4973

    
4974

    
4975
class TLMigrateInstance(Tasklet):
4976
  def __init__(self, lu, instance_name, live, cleanup):
4977
    """Initializes this class.
4978

4979
    """
4980
    Tasklet.__init__(self, lu)
4981

    
4982
    # Parameters
4983
    self.instance_name = instance_name
4984
    self.live = live
4985
    self.cleanup = cleanup
4986

    
4987
  def CheckPrereq(self):
4988
    """Check prerequisites.
4989

4990
    This checks that the instance is in the cluster.
4991

4992
    """
4993
    instance = self.cfg.GetInstanceInfo(
4994
      self.cfg.ExpandInstanceName(self.instance_name))
4995
    if instance is None:
4996
      raise errors.OpPrereqError("Instance '%s' not known" %
4997
                                 self.instance_name, errors.ECODE_NOENT)
4998

    
4999
    if instance.disk_template != constants.DT_DRBD8:
5000
      raise errors.OpPrereqError("Instance's disk layout is not"
5001
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5002

    
5003
    secondary_nodes = instance.secondary_nodes
5004
    if not secondary_nodes:
5005
      raise errors.ConfigurationError("No secondary node but using"
5006
                                      " drbd8 disk template")
5007

    
5008
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5009

    
5010
    target_node = secondary_nodes[0]
5011
    # check memory requirements on the secondary node
5012
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5013
                         instance.name, i_be[constants.BE_MEMORY],
5014
                         instance.hypervisor)
5015

    
5016
    # check bridge existance
5017
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5018

    
5019
    if not self.cleanup:
5020
      _CheckNodeNotDrained(self, target_node)
5021
      result = self.rpc.call_instance_migratable(instance.primary_node,
5022
                                                 instance)
5023
      result.Raise("Can't migrate, please use failover",
5024
                   prereq=True, ecode=errors.ECODE_STATE)
5025

    
5026
    self.instance = instance
5027

    
5028
  def _WaitUntilSync(self):
5029
    """Poll with custom rpc for disk sync.
5030

5031
    This uses our own step-based rpc call.
5032

5033
    """
5034
    self.feedback_fn("* wait until resync is done")
5035
    all_done = False
5036
    while not all_done:
5037
      all_done = True
5038
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5039
                                            self.nodes_ip,
5040
                                            self.instance.disks)
5041
      min_percent = 100
5042
      for node, nres in result.items():
5043
        nres.Raise("Cannot resync disks on node %s" % node)
5044
        node_done, node_percent = nres.payload
5045
        all_done = all_done and node_done
5046
        if node_percent is not None:
5047
          min_percent = min(min_percent, node_percent)
5048
      if not all_done:
5049
        if min_percent < 100:
5050
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5051
        time.sleep(2)
5052

    
5053
  def _EnsureSecondary(self, node):
5054
    """Demote a node to secondary.
5055

5056
    """
5057
    self.feedback_fn("* switching node %s to secondary mode" % node)
5058

    
5059
    for dev in self.instance.disks:
5060
      self.cfg.SetDiskID(dev, node)
5061

    
5062
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5063
                                          self.instance.disks)
5064
    result.Raise("Cannot change disk to secondary on node %s" % node)
5065

    
5066
  def _GoStandalone(self):
5067
    """Disconnect from the network.
5068

5069
    """
5070
    self.feedback_fn("* changing into standalone mode")
5071
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5072
                                               self.instance.disks)
5073
    for node, nres in result.items():
5074
      nres.Raise("Cannot disconnect disks node %s" % node)
5075

    
5076
  def _GoReconnect(self, multimaster):
5077
    """Reconnect to the network.
5078

5079
    """
5080
    if multimaster:
5081
      msg = "dual-master"
5082
    else:
5083
      msg = "single-master"
5084
    self.feedback_fn("* changing disks into %s mode" % msg)
5085
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5086
                                           self.instance.disks,
5087
                                           self.instance.name, multimaster)
5088
    for node, nres in result.items():
5089
      nres.Raise("Cannot change disks config on node %s" % node)
5090

    
5091
  def _ExecCleanup(self):
5092
    """Try to cleanup after a failed migration.
5093

5094
    The cleanup is done by:
5095
      - check that the instance is running only on one node
5096
        (and update the config if needed)
5097
      - change disks on its secondary node to secondary
5098
      - wait until disks are fully synchronized
5099
      - disconnect from the network
5100
      - change disks into single-master mode
5101
      - wait again until disks are fully synchronized
5102

5103
    """
5104
    instance = self.instance
5105
    target_node = self.target_node
5106
    source_node = self.source_node
5107

    
5108
    # check running on only one node
5109
    self.feedback_fn("* checking where the instance actually runs"
5110
                     " (if this hangs, the hypervisor might be in"
5111
                     " a bad state)")
5112
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5113
    for node, result in ins_l.items():
5114
      result.Raise("Can't contact node %s" % node)
5115

    
5116
    runningon_source = instance.name in ins_l[source_node].payload
5117
    runningon_target = instance.name in ins_l[target_node].payload
5118

    
5119
    if runningon_source and runningon_target:
5120
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5121
                               " or the hypervisor is confused. You will have"
5122
                               " to ensure manually that it runs only on one"
5123
                               " and restart this operation.")
5124

    
5125
    if not (runningon_source or runningon_target):
5126
      raise errors.OpExecError("Instance does not seem to be running at all."
5127
                               " In this case, it's safer to repair by"
5128
                               " running 'gnt-instance stop' to ensure disk"
5129
                               " shutdown, and then restarting it.")
5130

    
5131
    if runningon_target:
5132
      # the migration has actually succeeded, we need to update the config
5133
      self.feedback_fn("* instance running on secondary node (%s),"
5134
                       " updating config" % target_node)
5135
      instance.primary_node = target_node
5136
      self.cfg.Update(instance, self.feedback_fn)
5137
      demoted_node = source_node
5138
    else:
5139
      self.feedback_fn("* instance confirmed to be running on its"
5140
                       " primary node (%s)" % source_node)
5141
      demoted_node = target_node
5142

    
5143
    self._EnsureSecondary(demoted_node)
5144
    try:
5145
      self._WaitUntilSync()
5146
    except errors.OpExecError:
5147
      # we ignore here errors, since if the device is standalone, it
5148
      # won't be able to sync
5149
      pass
5150
    self._GoStandalone()
5151
    self._GoReconnect(False)
5152
    self._WaitUntilSync()
5153

    
5154
    self.feedback_fn("* done")
5155

    
5156
  def _RevertDiskStatus(self):
5157
    """Try to revert the disk status after a failed migration.
5158

5159
    """
5160
    target_node = self.target_node
5161
    try:
5162
      self._EnsureSecondary(target_node)
5163
      self._GoStandalone()
5164
      self._GoReconnect(False)
5165
      self._WaitUntilSync()
5166
    except errors.OpExecError, err:
5167
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5168
                         " drives: error '%s'\n"
5169
                         "Please look and recover the instance status" %
5170
                         str(err))
5171

    
5172
  def _AbortMigration(self):
5173
    """Call the hypervisor code to abort a started migration.
5174

5175
    """
5176
    instance = self.instance
5177
    target_node = self.target_node
5178
    migration_info = self.migration_info
5179

    
5180
    abort_result = self.rpc.call_finalize_migration(target_node,
5181
                                                    instance,
5182
                                                    migration_info,
5183
                                                    False)
5184
    abort_msg = abort_result.fail_msg
5185
    if abort_msg:
5186
      logging.error("Aborting migration failed on target node %s: %s",
5187
                    target_node, abort_msg)
5188
      # Don't raise an exception here, as we stil have to try to revert the
5189
      # disk status, even if this step failed.
5190

    
5191
  def _ExecMigration(self):
5192
    """Migrate an instance.
5193

5194
    The migrate is done by:
5195
      - change the disks into dual-master mode
5196
      - wait until disks are fully synchronized again
5197
      - migrate the instance
5198
      - change disks on the new secondary node (the old primary) to secondary
5199
      - wait until disks are fully synchronized
5200
      - change disks into single-master mode
5201

5202
    """
5203
    instance = self.instance
5204
    target_node = self.target_node
5205
    source_node = self.source_node
5206

    
5207
    self.feedback_fn("* checking disk consistency between source and target")
5208
    for dev in instance.disks:
5209
      if not _CheckDiskConsistency(self, dev, target_node, False):
5210
        raise errors.OpExecError("Disk %s is degraded or not fully"
5211
                                 " synchronized on target node,"
5212
                                 " aborting migrate." % dev.iv_name)
5213

    
5214
    # First get the migration information from the remote node
5215
    result = self.rpc.call_migration_info(source_node, instance)
5216
    msg = result.fail_msg
5217
    if msg:
5218
      log_err = ("Failed fetching source migration information from %s: %s" %
5219
                 (source_node, msg))
5220
      logging.error(log_err)
5221
      raise errors.OpExecError(log_err)
5222

    
5223
    self.migration_info = migration_info = result.payload
5224

    
5225
    # Then switch the disks to master/master mode
5226
    self._EnsureSecondary(target_node)
5227
    self._GoStandalone()
5228
    self._GoReconnect(True)
5229
    self._WaitUntilSync()
5230

    
5231
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5232
    result = self.rpc.call_accept_instance(target_node,
5233
                                           instance,
5234
                                           migration_info,
5235
                                           self.nodes_ip[target_node])
5236

    
5237
    msg = result.fail_msg
5238
    if msg:
5239
      logging.error("Instance pre-migration failed, trying to revert"
5240
                    " disk status: %s", msg)
5241
      self.feedback_fn("Pre-migration failed, aborting")
5242
      self._AbortMigration()
5243
      self._RevertDiskStatus()
5244
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5245
                               (instance.name, msg))
5246

    
5247
    self.feedback_fn("* migrating instance to %s" % target_node)
5248
    time.sleep(10)
5249
    result = self.rpc.call_instance_migrate(source_node, instance,
5250
                                            self.nodes_ip[target_node],
5251
                                            self.live)
5252
    msg = result.fail_msg
5253
    if msg:
5254
      logging.error("Instance migration failed, trying to revert"
5255
                    " disk status: %s", msg)
5256
      self.feedback_fn("Migration failed, aborting")
5257
      self._AbortMigration()
5258
      self._RevertDiskStatus()
5259
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5260
                               (instance.name, msg))
5261
    time.sleep(10)
5262

    
5263
    instance.primary_node = target_node
5264
    # distribute new instance config to the other nodes
5265
    self.cfg.Update(instance, self.feedback_fn)
5266

    
5267
    result = self.rpc.call_finalize_migration(target_node,
5268
                                              instance,
5269
                                              migration_info,
5270
                                              True)
5271
    msg = result.fail_msg
5272
    if msg:
5273
      logging.error("Instance migration succeeded, but finalization failed:"
5274
                    " %s", msg)
5275
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5276
                               msg)
5277

    
5278
    self._EnsureSecondary(source_node)
5279
    self._WaitUntilSync()
5280
    self._GoStandalone()
5281
    self._GoReconnect(False)
5282
    self._WaitUntilSync()
5283

    
5284
    self.feedback_fn("* done")
5285

    
5286
  def Exec(self, feedback_fn):
5287
    """Perform the migration.
5288

5289
    """
5290
    feedback_fn("Migrating instance %s" % self.instance.name)
5291

    
5292
    self.feedback_fn = feedback_fn
5293

    
5294
    self.source_node = self.instance.primary_node
5295
    self.target_node = self.instance.secondary_nodes[0]
5296
    self.all_nodes = [self.source_node, self.target_node]
5297
    self.nodes_ip = {
5298
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5299
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5300
      }
5301

    
5302
    if self.cleanup:
5303
      return self._ExecCleanup()
5304
    else:
5305
      return self._ExecMigration()
5306

    
5307

    
5308
def _CreateBlockDev(lu, node, instance, device, force_create,
5309
                    info, force_open):
5310
  """Create a tree of block devices on a given node.
5311

5312
  If this device type has to be created on secondaries, create it and
5313
  all its children.
5314

5315
  If not, just recurse to children keeping the same 'force' value.
5316

5317
  @param lu: the lu on whose behalf we execute
5318
  @param node: the node on which to create the device
5319
  @type instance: L{objects.Instance}
5320
  @param instance: the instance which owns the device
5321
  @type device: L{objects.Disk}
5322
  @param device: the device to create
5323
  @type force_create: boolean
5324
  @param force_create: whether to force creation of this device; this
5325
      will be change to True whenever we find a device which has
5326
      CreateOnSecondary() attribute
5327
  @param info: the extra 'metadata' we should attach to the device
5328
      (this will be represented as a LVM tag)
5329
  @type force_open: boolean
5330
  @param force_open: this parameter will be passes to the
5331
      L{backend.BlockdevCreate} function where it specifies
5332
      whether we run on primary or not, and it affects both
5333
      the child assembly and the device own Open() execution
5334

5335
  """
5336
  if device.CreateOnSecondary():
5337
    force_create = True
5338

    
5339
  if device.children:
5340
    for child in device.children:
5341
      _CreateBlockDev(lu, node, instance, child, force_create,
5342
                      info, force_open)
5343

    
5344
  if not force_create:
5345
    return
5346

    
5347
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5348

    
5349

    
5350
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5351
  """Create a single block device on a given node.
5352

5353
  This will not recurse over children of the device, so they must be
5354
  created in advance.
5355

5356
  @param lu: the lu on whose behalf we execute
5357
  @param node: the node on which to create the device
5358
  @type instance: L{objects.Instance}
5359
  @param instance: the instance which owns the device
5360
  @type device: L{objects.Disk}
5361
  @param device: the device to create
5362
  @param info: the extra 'metadata' we should attach to the device
5363
      (this will be represented as a LVM tag)
5364
  @type force_open: boolean
5365
  @param force_open: this parameter will be passes to the
5366
      L{backend.BlockdevCreate} function where it specifies
5367
      whether we run on primary or not, and it affects both
5368
      the child assembly and the device own Open() execution
5369

5370
  """
5371
  lu.cfg.SetDiskID(device, node)
5372
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5373
                                       instance.name, force_open, info)
5374
  result.Raise("Can't create block device %s on"
5375
               " node %s for instance %s" % (device, node, instance.name))
5376
  if device.physical_id is None:
5377
    device.physical_id = result.payload
5378

    
5379

    
5380
def _GenerateUniqueNames(lu, exts):
5381
  """Generate a suitable LV name.
5382

5383
  This will generate a logical volume name for the given instance.
5384

5385
  """
5386
  results = []
5387
  for val in exts:
5388
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5389
    results.append("%s%s" % (new_id, val))
5390
  return results
5391

    
5392

    
5393
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5394
                         p_minor, s_minor):
5395
  """Generate a drbd8 device complete with its children.
5396

5397
  """
5398
  port = lu.cfg.AllocatePort()
5399
  vgname = lu.cfg.GetVGName()
5400
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5401
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5402
                          logical_id=(vgname, names[0]))
5403
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5404
                          logical_id=(vgname, names[1]))
5405
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5406
                          logical_id=(primary, secondary, port,
5407
                                      p_minor, s_minor,
5408
                                      shared_secret),
5409
                          children=[dev_data, dev_meta],
5410
                          iv_name=iv_name)
5411
  return drbd_dev
5412

    
5413

    
5414
def _GenerateDiskTemplate(lu, template_name,
5415
                          instance_name, primary_node,
5416
                          secondary_nodes, disk_info,
5417
                          file_storage_dir, file_driver,
5418
                          base_index):
5419
  """Generate the entire disk layout for a given template type.
5420

5421
  """
5422
  #TODO: compute space requirements
5423

    
5424
  vgname = lu.cfg.GetVGName()
5425
  disk_count = len(disk_info)
5426
  disks = []
5427
  if template_name == constants.DT_DISKLESS:
5428
    pass
5429
  elif template_name == constants.DT_PLAIN:
5430
    if len(secondary_nodes) != 0:
5431
      raise errors.ProgrammerError("Wrong template configuration")
5432

    
5433
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5434
                                      for i in range(disk_count)])
5435
    for idx, disk in enumerate(disk_info):
5436
      disk_index = idx + base_index
5437
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5438
                              logical_id=(vgname, names[idx]),
5439
                              iv_name="disk/%d" % disk_index,
5440
                              mode=disk["mode"])
5441
      disks.append(disk_dev)
5442
  elif template_name == constants.DT_DRBD8:
5443
    if len(secondary_nodes) != 1:
5444
      raise errors.ProgrammerError("Wrong template configuration")
5445
    remote_node = secondary_nodes[0]
5446
    minors = lu.cfg.AllocateDRBDMinor(
5447
      [primary_node, remote_node] * len(disk_info), instance_name)
5448

    
5449
    names = []
5450
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5451
                                               for i in range(disk_count)]):
5452
      names.append(lv_prefix + "_data")
5453
      names.append(lv_prefix + "_meta")
5454
    for idx, disk in enumerate(disk_info):
5455
      disk_index = idx + base_index
5456
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5457
                                      disk["size"], names[idx*2:idx*2+2],
5458
                                      "disk/%d" % disk_index,
5459
                                      minors[idx*2], minors[idx*2+1])
5460
      disk_dev.mode = disk["mode"]
5461
      disks.append(disk_dev)
5462
  elif template_name == constants.DT_FILE:
5463
    if len(secondary_nodes) != 0:
5464
      raise errors.ProgrammerError("Wrong template configuration")
5465

    
5466
    for idx, disk in enumerate(disk_info):
5467
      disk_index = idx + base_index
5468
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5469
                              iv_name="disk/%d" % disk_index,
5470
                              logical_id=(file_driver,
5471
                                          "%s/disk%d" % (file_storage_dir,
5472
                                                         disk_index)),
5473
                              mode=disk["mode"])
5474
      disks.append(disk_dev)
5475
  else:
5476
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5477
  return disks
5478

    
5479

    
5480
def _GetInstanceInfoText(instance):
5481
  """Compute that text that should be added to the disk's metadata.
5482

5483
  """
5484
  return "originstname+%s" % instance.name
5485

    
5486

    
5487
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5488
  """Create all disks for an instance.
5489

5490
  This abstracts away some work from AddInstance.
5491

5492
  @type lu: L{LogicalUnit}
5493
  @param lu: the logical unit on whose behalf we execute
5494
  @type instance: L{objects.Instance}
5495
  @param instance: the instance whose disks we should create
5496
  @type to_skip: list
5497
  @param to_skip: list of indices to skip
5498
  @type target_node: string
5499
  @param target_node: if passed, overrides the target node for creation
5500
  @rtype: boolean
5501
  @return: the success of the creation
5502

5503
  """
5504
  info = _GetInstanceInfoText(instance)
5505
  if target_node is None:
5506
    pnode = instance.primary_node
5507
    all_nodes = instance.all_nodes
5508
  else:
5509
    pnode = target_node
5510
    all_nodes = [pnode]
5511

    
5512
  if instance.disk_template == constants.DT_FILE:
5513
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5514
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5515

    
5516
    result.Raise("Failed to create directory '%s' on"
5517
                 " node %s" % (file_storage_dir, pnode))
5518

    
5519
  # Note: this needs to be kept in sync with adding of disks in
5520
  # LUSetInstanceParams
5521
  for idx, device in enumerate(instance.disks):
5522
    if to_skip and idx in to_skip:
5523
      continue
5524
    logging.info("Creating volume %s for instance %s",
5525
                 device.iv_name, instance.name)
5526
    #HARDCODE
5527
    for node in all_nodes:
5528
      f_create = node == pnode
5529
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5530

    
5531

    
5532
def _RemoveDisks(lu, instance, target_node=None):
5533
  """Remove all disks for an instance.
5534

5535
  This abstracts away some work from `AddInstance()` and
5536
  `RemoveInstance()`. Note that in case some of the devices couldn't
5537
  be removed, the removal will continue with the other ones (compare
5538
  with `_CreateDisks()`).
5539

5540
  @type lu: L{LogicalUnit}
5541
  @param lu: the logical unit on whose behalf we execute
5542
  @type instance: L{objects.Instance}
5543
  @param instance: the instance whose disks we should remove
5544
  @type target_node: string
5545
  @param target_node: used to override the node on which to remove the disks
5546
  @rtype: boolean
5547
  @return: the success of the removal
5548

5549
  """
5550
  logging.info("Removing block devices for instance %s", instance.name)
5551

    
5552
  all_result = True
5553
  for device in instance.disks:
5554
    if target_node:
5555
      edata = [(target_node, device)]
5556
    else:
5557
      edata = device.ComputeNodeTree(instance.primary_node)
5558
    for node, disk in edata:
5559
      lu.cfg.SetDiskID(disk, node)
5560
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5561
      if msg:
5562
        lu.LogWarning("Could not remove block device %s on node %s,"
5563
                      " continuing anyway: %s", device.iv_name, node, msg)
5564
        all_result = False
5565

    
5566
  if instance.disk_template == constants.DT_FILE:
5567
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5568
    if target_node:
5569
      tgt = target_node
5570
    else:
5571
      tgt = instance.primary_node
5572
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5573
    if result.fail_msg:
5574
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5575
                    file_storage_dir, instance.primary_node, result.fail_msg)
5576
      all_result = False
5577

    
5578
  return all_result
5579

    
5580

    
5581
def _ComputeDiskSize(disk_template, disks):
5582
  """Compute disk size requirements in the volume group
5583

5584
  """
5585
  # Required free disk space as a function of disk and swap space
5586
  req_size_dict = {
5587
    constants.DT_DISKLESS: None,
5588
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5589
    # 128 MB are added for drbd metadata for each disk
5590
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5591
    constants.DT_FILE: None,
5592
  }
5593

    
5594
  if disk_template not in req_size_dict:
5595
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5596
                                 " is unknown" %  disk_template)
5597

    
5598
  return req_size_dict[disk_template]
5599

    
5600

    
5601
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5602
  """Hypervisor parameter validation.
5603

5604
  This function abstract the hypervisor parameter validation to be
5605
  used in both instance create and instance modify.
5606

5607
  @type lu: L{LogicalUnit}
5608
  @param lu: the logical unit for which we check
5609
  @type nodenames: list
5610
  @param nodenames: the list of nodes on which we should check
5611
  @type hvname: string
5612
  @param hvname: the name of the hypervisor we should use
5613
  @type hvparams: dict
5614
  @param hvparams: the parameters which we need to check
5615
  @raise errors.OpPrereqError: if the parameters are not valid
5616

5617
  """
5618
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5619
                                                  hvname,
5620
                                                  hvparams)
5621
  for node in nodenames:
5622
    info = hvinfo[node]
5623
    if info.offline:
5624
      continue
5625
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5626

    
5627

    
5628
class LUCreateInstance(LogicalUnit):
5629
  """Create an instance.
5630

5631
  """
5632
  HPATH = "instance-add"
5633
  HTYPE = constants.HTYPE_INSTANCE
5634
  _OP_REQP = ["instance_name", "disks", "disk_template",
5635
              "mode", "start",
5636
              "wait_for_sync", "ip_check", "nics",
5637
              "hvparams", "beparams"]
5638
  REQ_BGL = False
5639

    
5640
  def CheckArguments(self):
5641
    """Check arguments.
5642

5643
    """
5644
    # do not require name_check to ease forward/backward compatibility
5645
    # for tools
5646
    if not hasattr(self.op, "name_check"):
5647
      self.op.name_check = True
5648
    if self.op.ip_check and not self.op.name_check:
5649
      # TODO: make the ip check more flexible and not depend on the name check
5650
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5651
                                 errors.ECODE_INVAL)
5652

    
5653
  def _ExpandNode(self, node):
5654
    """Expands and checks one node name.
5655

5656
    """
5657
    node_full = self.cfg.ExpandNodeName(node)
5658
    if node_full is None:
5659
      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
5660
    return node_full
5661

    
5662
  def ExpandNames(self):
5663
    """ExpandNames for CreateInstance.
5664

5665
    Figure out the right locks for instance creation.
5666

5667
    """
5668
    self.needed_locks = {}
5669

    
5670
    # set optional parameters to none if they don't exist
5671
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5672
      if not hasattr(self.op, attr):
5673
        setattr(self.op, attr, None)
5674

    
5675
    # cheap checks, mostly valid constants given
5676

    
5677
    # verify creation mode
5678
    if self.op.mode not in (constants.INSTANCE_CREATE,
5679
                            constants.INSTANCE_IMPORT):
5680
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5681
                                 self.op.mode, errors.ECODE_INVAL)
5682

    
5683
    # disk template and mirror node verification
5684
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5685
      raise errors.OpPrereqError("Invalid disk template name",
5686
                                 errors.ECODE_INVAL)
5687

    
5688
    if self.op.hypervisor is None:
5689
      self.op.hypervisor = self.cfg.GetHypervisorType()
5690

    
5691
    cluster = self.cfg.GetClusterInfo()
5692
    enabled_hvs = cluster.enabled_hypervisors
5693
    if self.op.hypervisor not in enabled_hvs:
5694
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5695
                                 " cluster (%s)" % (self.op.hypervisor,
5696
                                  ",".join(enabled_hvs)),
5697
                                 errors.ECODE_STATE)
5698

    
5699
    # check hypervisor parameter syntax (locally)
5700
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5701
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5702
                                  self.op.hvparams)
5703
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5704
    hv_type.CheckParameterSyntax(filled_hvp)
5705
    self.hv_full = filled_hvp
5706
    # check that we don't specify global parameters on an instance
5707
    _CheckGlobalHvParams(self.op.hvparams)
5708

    
5709
    # fill and remember the beparams dict
5710
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5711
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5712
                                    self.op.beparams)
5713

    
5714
    #### instance parameters check
5715

    
5716
    # instance name verification
5717
    if self.op.name_check:
5718
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5719
      self.op.instance_name = instance_name = hostname1.name
5720
      # used in CheckPrereq for ip ping check
5721
      self.check_ip = hostname1.ip
5722
    else:
5723
      instance_name = self.op.instance_name
5724
      self.check_ip = None
5725

    
5726
    # this is just a preventive check, but someone might still add this
5727
    # instance in the meantime, and creation will fail at lock-add time
5728
    if instance_name in self.cfg.GetInstanceList():
5729
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5730
                                 instance_name, errors.ECODE_EXISTS)
5731

    
5732
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5733

    
5734
    # NIC buildup
5735
    self.nics = []
5736
    for idx, nic in enumerate(self.op.nics):
5737
      nic_mode_req = nic.get("mode", None)
5738
      nic_mode = nic_mode_req
5739
      if nic_mode is None:
5740
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5741

    
5742
      # in routed mode, for the first nic, the default ip is 'auto'
5743
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5744
        default_ip_mode = constants.VALUE_AUTO
5745
      else:
5746
        default_ip_mode = constants.VALUE_NONE
5747

    
5748
      # ip validity checks
5749
      ip = nic.get("ip", default_ip_mode)
5750
      if ip is None or ip.lower() == constants.VALUE_NONE:
5751
        nic_ip = None
5752
      elif ip.lower() == constants.VALUE_AUTO:
5753
        if not self.op.name_check:
5754
          raise errors.OpPrereqError("IP address set to auto but name checks"
5755
                                     " have been skipped. Aborting.",
5756
                                     errors.ECODE_INVAL)
5757
        nic_ip = hostname1.ip
5758
      else:
5759
        if not utils.IsValidIP(ip):
5760
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5761
                                     " like a valid IP" % ip,
5762
                                     errors.ECODE_INVAL)
5763
        nic_ip = ip
5764

    
5765
      # TODO: check the ip address for uniqueness
5766
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5767
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5768
                                   errors.ECODE_INVAL)
5769

    
5770
      # MAC address verification
5771
      mac = nic.get("mac", constants.VALUE_AUTO)
5772
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5773
        if not utils.IsValidMac(mac.lower()):
5774
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5775
                                     mac, errors.ECODE_INVAL)
5776
        else:
5777
          try:
5778
            self.cfg.ReserveMAC(mac, self.proc.GetECId())
5779
          except errors.ReservationError:
5780
            raise errors.OpPrereqError("MAC address %s already in use"
5781
                                       " in cluster" % mac,
5782
                                       errors.ECODE_NOTUNIQUE)
5783

    
5784
      # bridge verification
5785
      bridge = nic.get("bridge", None)
5786
      link = nic.get("link", None)
5787
      if bridge and link:
5788
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5789
                                   " at the same time", errors.ECODE_INVAL)
5790
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5791
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5792
                                   errors.ECODE_INVAL)
5793
      elif bridge:
5794
        link = bridge
5795

    
5796
      nicparams = {}
5797
      if nic_mode_req:
5798
        nicparams[constants.NIC_MODE] = nic_mode_req
5799
      if link:
5800
        nicparams[constants.NIC_LINK] = link
5801

    
5802
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5803
                                      nicparams)
5804
      objects.NIC.CheckParameterSyntax(check_params)
5805
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5806

    
5807
    # disk checks/pre-build
5808
    self.disks = []
5809
    for disk in self.op.disks:
5810
      mode = disk.get("mode", constants.DISK_RDWR)
5811
      if mode not in constants.DISK_ACCESS_SET:
5812
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5813
                                   mode, errors.ECODE_INVAL)
5814
      size = disk.get("size", None)
5815
      if size is None:
5816
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5817
      try:
5818
        size = int(size)
5819
      except ValueError:
5820
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5821
                                   errors.ECODE_INVAL)
5822
      self.disks.append({"size": size, "mode": mode})
5823

    
5824
    # file storage checks
5825
    if (self.op.file_driver and
5826
        not self.op.file_driver in constants.FILE_DRIVER):
5827
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5828
                                 self.op.file_driver, errors.ECODE_INVAL)
5829

    
5830
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5831
      raise errors.OpPrereqError("File storage directory path not absolute",
5832
                                 errors.ECODE_INVAL)
5833

    
5834
    ### Node/iallocator related checks
5835
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5836
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5837
                                 " node must be given",
5838
                                 errors.ECODE_INVAL)
5839

    
5840
    if self.op.iallocator:
5841
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5842
    else:
5843
      self.op.pnode = self._ExpandNode(self.op.pnode)
5844
      nodelist = [self.op.pnode]
5845
      if self.op.snode is not None:
5846
        self.op.snode = self._ExpandNode(self.op.snode)
5847
        nodelist.append(self.op.snode)
5848
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5849

    
5850
    # in case of import lock the source node too
5851
    if self.op.mode == constants.INSTANCE_IMPORT:
5852
      src_node = getattr(self.op, "src_node", None)
5853
      src_path = getattr(self.op, "src_path", None)
5854

    
5855
      if src_path is None:
5856
        self.op.src_path = src_path = self.op.instance_name
5857

    
5858
      if src_node is None:
5859
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5860
        self.op.src_node = None
5861
        if os.path.isabs(src_path):
5862
          raise errors.OpPrereqError("Importing an instance from an absolute"
5863
                                     " path requires a source node option.",
5864
                                     errors.ECODE_INVAL)
5865
      else:
5866
        self.op.src_node = src_node = self._ExpandNode(src_node)
5867
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5868
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5869
        if not os.path.isabs(src_path):
5870
          self.op.src_path = src_path = \
5871
            os.path.join(constants.EXPORT_DIR, src_path)
5872

    
5873
      # On import force_variant must be True, because if we forced it at
5874
      # initial install, our only chance when importing it back is that it
5875
      # works again!
5876
      self.op.force_variant = True
5877

    
5878
    else: # INSTANCE_CREATE
5879
      if getattr(self.op, "os_type", None) is None:
5880
        raise errors.OpPrereqError("No guest OS specified",
5881
                                   errors.ECODE_INVAL)
5882
      self.op.force_variant = getattr(self.op, "force_variant", False)
5883

    
5884
  def _RunAllocator(self):
5885
    """Run the allocator based on input opcode.
5886

5887
    """
5888
    nics = [n.ToDict() for n in self.nics]
5889
    ial = IAllocator(self.cfg, self.rpc,
5890
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5891
                     name=self.op.instance_name,
5892
                     disk_template=self.op.disk_template,
5893
                     tags=[],
5894
                     os=self.op.os_type,
5895
                     vcpus=self.be_full[constants.BE_VCPUS],
5896
                     mem_size=self.be_full[constants.BE_MEMORY],
5897
                     disks=self.disks,
5898
                     nics=nics,
5899
                     hypervisor=self.op.hypervisor,
5900
                     )
5901

    
5902
    ial.Run(self.op.iallocator)
5903

    
5904
    if not ial.success:
5905
      raise errors.OpPrereqError("Can't compute nodes using"
5906
                                 " iallocator '%s': %s" %
5907
                                 (self.op.iallocator, ial.info),
5908
                                 errors.ECODE_NORES)
5909
    if len(ial.nodes) != ial.required_nodes:
5910
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5911
                                 " of nodes (%s), required %s" %
5912
                                 (self.op.iallocator, len(ial.nodes),
5913
                                  ial.required_nodes), errors.ECODE_FAULT)
5914
    self.op.pnode = ial.nodes[0]
5915
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5916
                 self.op.instance_name, self.op.iallocator,
5917
                 utils.CommaJoin(ial.nodes))
5918
    if ial.required_nodes == 2:
5919
      self.op.snode = ial.nodes[1]
5920

    
5921
  def BuildHooksEnv(self):
5922
    """Build hooks env.
5923

5924
    This runs on master, primary and secondary nodes of the instance.
5925

5926
    """
5927
    env = {
5928
      "ADD_MODE": self.op.mode,
5929
      }
5930
    if self.op.mode == constants.INSTANCE_IMPORT:
5931
      env["SRC_NODE"] = self.op.src_node
5932
      env["SRC_PATH"] = self.op.src_path
5933
      env["SRC_IMAGES"] = self.src_images
5934

    
5935
    env.update(_BuildInstanceHookEnv(
5936
      name=self.op.instance_name,
5937
      primary_node=self.op.pnode,
5938
      secondary_nodes=self.secondaries,
5939
      status=self.op.start,
5940
      os_type=self.op.os_type,
5941
      memory=self.be_full[constants.BE_MEMORY],
5942
      vcpus=self.be_full[constants.BE_VCPUS],
5943
      nics=_NICListToTuple(self, self.nics),
5944
      disk_template=self.op.disk_template,
5945
      disks=[(d["size"], d["mode"]) for d in self.disks],
5946
      bep=self.be_full,
5947
      hvp=self.hv_full,
5948
      hypervisor_name=self.op.hypervisor,
5949
    ))
5950

    
5951
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5952
          self.secondaries)
5953
    return env, nl, nl
5954

    
5955

    
5956
  def CheckPrereq(self):
5957
    """Check prerequisites.
5958

5959
    """
5960
    if (not self.cfg.GetVGName() and
5961
        self.op.disk_template not in constants.DTS_NOT_LVM):
5962
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5963
                                 " instances", errors.ECODE_STATE)
5964

    
5965
    if self.op.mode == constants.INSTANCE_IMPORT:
5966
      src_node = self.op.src_node
5967
      src_path = self.op.src_path
5968

    
5969
      if src_node is None:
5970
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5971
        exp_list = self.rpc.call_export_list(locked_nodes)
5972
        found = False
5973
        for node in exp_list:
5974
          if exp_list[node].fail_msg:
5975
            continue
5976
          if src_path in exp_list[node].payload:
5977
            found = True
5978
            self.op.src_node = src_node = node
5979
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5980
                                                       src_path)
5981
            break
5982
        if not found:
5983
          raise errors.OpPrereqError("No export found for relative path %s" %
5984
                                      src_path, errors.ECODE_INVAL)
5985

    
5986
      _CheckNodeOnline(self, src_node)
5987
      result = self.rpc.call_export_info(src_node, src_path)
5988
      result.Raise("No export or invalid export found in dir %s" % src_path)
5989

    
5990
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5991
      if not export_info.has_section(constants.INISECT_EXP):
5992
        raise errors.ProgrammerError("Corrupted export config",
5993
                                     errors.ECODE_ENVIRON)
5994

    
5995
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5996
      if (int(ei_version) != constants.EXPORT_VERSION):
5997
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5998
                                   (ei_version, constants.EXPORT_VERSION),
5999
                                   errors.ECODE_ENVIRON)
6000

    
6001
      # Check that the new instance doesn't have less disks than the export
6002
      instance_disks = len(self.disks)
6003
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6004
      if instance_disks < export_disks:
6005
        raise errors.OpPrereqError("Not enough disks to import."
6006
                                   " (instance: %d, export: %d)" %
6007
                                   (instance_disks, export_disks),
6008
                                   errors.ECODE_INVAL)
6009

    
6010
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6011
      disk_images = []
6012
      for idx in range(export_disks):
6013
        option = 'disk%d_dump' % idx
6014
        if export_info.has_option(constants.INISECT_INS, option):
6015
          # FIXME: are the old os-es, disk sizes, etc. useful?
6016
          export_name = export_info.get(constants.INISECT_INS, option)
6017
          image = os.path.join(src_path, export_name)
6018
          disk_images.append(image)
6019
        else:
6020
          disk_images.append(False)
6021

    
6022
      self.src_images = disk_images
6023

    
6024
      old_name = export_info.get(constants.INISECT_INS, 'name')
6025
      # FIXME: int() here could throw a ValueError on broken exports
6026
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6027
      if self.op.instance_name == old_name:
6028
        for idx, nic in enumerate(self.nics):
6029
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6030
            nic_mac_ini = 'nic%d_mac' % idx
6031
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6032

    
6033
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6034

    
6035
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6036
    if self.op.ip_check:
6037
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6038
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6039
                                   (self.check_ip, self.op.instance_name),
6040
                                   errors.ECODE_NOTUNIQUE)
6041

    
6042
    #### mac address generation
6043
    # By generating here the mac address both the allocator and the hooks get
6044
    # the real final mac address rather than the 'auto' or 'generate' value.
6045
    # There is a race condition between the generation and the instance object
6046
    # creation, which means that we know the mac is valid now, but we're not
6047
    # sure it will be when we actually add the instance. If things go bad
6048
    # adding the instance will abort because of a duplicate mac, and the
6049
    # creation job will fail.
6050
    for nic in self.nics:
6051
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6052
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6053

    
6054
    #### allocator run
6055

    
6056
    if self.op.iallocator is not None:
6057
      self._RunAllocator()
6058

    
6059
    #### node related checks
6060

    
6061
    # check primary node
6062
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6063
    assert self.pnode is not None, \
6064
      "Cannot retrieve locked node %s" % self.op.pnode
6065
    if pnode.offline:
6066
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6067
                                 pnode.name, errors.ECODE_STATE)
6068
    if pnode.drained:
6069
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6070
                                 pnode.name, errors.ECODE_STATE)
6071

    
6072
    self.secondaries = []
6073

    
6074
    # mirror node verification
6075
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6076
      if self.op.snode is None:
6077
        raise errors.OpPrereqError("The networked disk templates need"
6078
                                   " a mirror node", errors.ECODE_INVAL)
6079
      if self.op.snode == pnode.name:
6080
        raise errors.OpPrereqError("The secondary node cannot be the"
6081
                                   " primary node.", errors.ECODE_INVAL)
6082
      _CheckNodeOnline(self, self.op.snode)
6083
      _CheckNodeNotDrained(self, self.op.snode)
6084
      self.secondaries.append(self.op.snode)
6085

    
6086
    nodenames = [pnode.name] + self.secondaries
6087

    
6088
    req_size = _ComputeDiskSize(self.op.disk_template,
6089
                                self.disks)
6090

    
6091
    # Check lv size requirements
6092
    if req_size is not None:
6093
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6094
                                         self.op.hypervisor)
6095
      for node in nodenames:
6096
        info = nodeinfo[node]
6097
        info.Raise("Cannot get current information from node %s" % node)
6098
        info = info.payload
6099
        vg_free = info.get('vg_free', None)
6100
        if not isinstance(vg_free, int):
6101
          raise errors.OpPrereqError("Can't compute free disk space on"
6102
                                     " node %s" % node, errors.ECODE_ENVIRON)
6103
        if req_size > vg_free:
6104
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6105
                                     " %d MB available, %d MB required" %
6106
                                     (node, vg_free, req_size),
6107
                                     errors.ECODE_NORES)
6108

    
6109
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6110

    
6111
    # os verification
6112
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6113
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6114
                 (self.op.os_type, pnode.name),
6115
                 prereq=True, ecode=errors.ECODE_INVAL)
6116
    if not self.op.force_variant:
6117
      _CheckOSVariant(result.payload, self.op.os_type)
6118

    
6119
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6120

    
6121
    # memory check on primary node
6122
    if self.op.start:
6123
      _CheckNodeFreeMemory(self, self.pnode.name,
6124
                           "creating instance %s" % self.op.instance_name,
6125
                           self.be_full[constants.BE_MEMORY],
6126
                           self.op.hypervisor)
6127

    
6128
    self.dry_run_result = list(nodenames)
6129

    
6130
  def Exec(self, feedback_fn):
6131
    """Create and add the instance to the cluster.
6132

6133
    """
6134
    instance = self.op.instance_name
6135
    pnode_name = self.pnode.name
6136

    
6137
    ht_kind = self.op.hypervisor
6138
    if ht_kind in constants.HTS_REQ_PORT:
6139
      network_port = self.cfg.AllocatePort()
6140
    else:
6141
      network_port = None
6142

    
6143
    ##if self.op.vnc_bind_address is None:
6144
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6145

    
6146
    # this is needed because os.path.join does not accept None arguments
6147
    if self.op.file_storage_dir is None:
6148
      string_file_storage_dir = ""
6149
    else:
6150
      string_file_storage_dir = self.op.file_storage_dir
6151

    
6152
    # build the full file storage dir path
6153
    file_storage_dir = os.path.normpath(os.path.join(
6154
                                        self.cfg.GetFileStorageDir(),
6155
                                        string_file_storage_dir, instance))
6156

    
6157

    
6158
    disks = _GenerateDiskTemplate(self,
6159
                                  self.op.disk_template,
6160
                                  instance, pnode_name,
6161
                                  self.secondaries,
6162
                                  self.disks,
6163
                                  file_storage_dir,
6164
                                  self.op.file_driver,
6165
                                  0)
6166

    
6167
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6168
                            primary_node=pnode_name,
6169
                            nics=self.nics, disks=disks,
6170
                            disk_template=self.op.disk_template,
6171
                            admin_up=False,
6172
                            network_port=network_port,
6173
                            beparams=self.op.beparams,
6174
                            hvparams=self.op.hvparams,
6175
                            hypervisor=self.op.hypervisor,
6176
                            )
6177

    
6178
    feedback_fn("* creating instance disks...")
6179
    try:
6180
      _CreateDisks(self, iobj)
6181
    except errors.OpExecError:
6182
      self.LogWarning("Device creation failed, reverting...")
6183
      try:
6184
        _RemoveDisks(self, iobj)
6185
      finally:
6186
        self.cfg.ReleaseDRBDMinors(instance)
6187
        raise
6188

    
6189
    feedback_fn("adding instance %s to cluster config" % instance)
6190

    
6191
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6192

    
6193
    # Declare that we don't want to remove the instance lock anymore, as we've
6194
    # added the instance to the config
6195
    del self.remove_locks[locking.LEVEL_INSTANCE]
6196
    # Unlock all the nodes
6197
    if self.op.mode == constants.INSTANCE_IMPORT:
6198
      nodes_keep = [self.op.src_node]
6199
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6200
                       if node != self.op.src_node]
6201
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6202
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6203
    else:
6204
      self.context.glm.release(locking.LEVEL_NODE)
6205
      del self.acquired_locks[locking.LEVEL_NODE]
6206

    
6207
    if self.op.wait_for_sync:
6208
      disk_abort = not _WaitForSync(self, iobj)
6209
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6210
      # make sure the disks are not degraded (still sync-ing is ok)
6211
      time.sleep(15)
6212
      feedback_fn("* checking mirrors status")
6213
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6214
    else:
6215
      disk_abort = False
6216

    
6217
    if disk_abort:
6218
      _RemoveDisks(self, iobj)
6219
      self.cfg.RemoveInstance(iobj.name)
6220
      # Make sure the instance lock gets removed
6221
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6222
      raise errors.OpExecError("There are some degraded disks for"
6223
                               " this instance")
6224

    
6225
    feedback_fn("creating os for instance %s on node %s" %
6226
                (instance, pnode_name))
6227

    
6228
    if iobj.disk_template != constants.DT_DISKLESS:
6229
      if self.op.mode == constants.INSTANCE_CREATE:
6230
        feedback_fn("* running the instance OS create scripts...")
6231
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6232
        result.Raise("Could not add os for instance %s"
6233
                     " on node %s" % (instance, pnode_name))
6234

    
6235
      elif self.op.mode == constants.INSTANCE_IMPORT:
6236
        feedback_fn("* running the instance OS import scripts...")
6237
        src_node = self.op.src_node
6238
        src_images = self.src_images
6239
        cluster_name = self.cfg.GetClusterName()
6240
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6241
                                                         src_node, src_images,
6242
                                                         cluster_name)
6243
        msg = import_result.fail_msg
6244
        if msg:
6245
          self.LogWarning("Error while importing the disk images for instance"
6246
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6247
      else:
6248
        # also checked in the prereq part
6249
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6250
                                     % self.op.mode)
6251

    
6252
    if self.op.start:
6253
      iobj.admin_up = True
6254
      self.cfg.Update(iobj, feedback_fn)
6255
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6256
      feedback_fn("* starting instance...")
6257
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6258
      result.Raise("Could not start instance")
6259

    
6260
    return list(iobj.all_nodes)
6261

    
6262

    
6263
class LUConnectConsole(NoHooksLU):
6264
  """Connect to an instance's console.
6265

6266
  This is somewhat special in that it returns the command line that
6267
  you need to run on the master node in order to connect to the
6268
  console.
6269

6270
  """
6271
  _OP_REQP = ["instance_name"]
6272
  REQ_BGL = False
6273

    
6274
  def ExpandNames(self):
6275
    self._ExpandAndLockInstance()
6276

    
6277
  def CheckPrereq(self):
6278
    """Check prerequisites.
6279

6280
    This checks that the instance is in the cluster.
6281

6282
    """
6283
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6284
    assert self.instance is not None, \
6285
      "Cannot retrieve locked instance %s" % self.op.instance_name
6286
    _CheckNodeOnline(self, self.instance.primary_node)
6287

    
6288
  def Exec(self, feedback_fn):
6289
    """Connect to the console of an instance
6290

6291
    """
6292
    instance = self.instance
6293
    node = instance.primary_node
6294

    
6295
    node_insts = self.rpc.call_instance_list([node],
6296
                                             [instance.hypervisor])[node]
6297
    node_insts.Raise("Can't get node information from %s" % node)
6298

    
6299
    if instance.name not in node_insts.payload:
6300
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6301

    
6302
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6303

    
6304
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6305
    cluster = self.cfg.GetClusterInfo()
6306
    # beparams and hvparams are passed separately, to avoid editing the
6307
    # instance and then saving the defaults in the instance itself.
6308
    hvparams = cluster.FillHV(instance)
6309
    beparams = cluster.FillBE(instance)
6310
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6311

    
6312
    # build ssh cmdline
6313
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6314

    
6315

    
6316
class LUReplaceDisks(LogicalUnit):
6317
  """Replace the disks of an instance.
6318

6319
  """
6320
  HPATH = "mirrors-replace"
6321
  HTYPE = constants.HTYPE_INSTANCE
6322
  _OP_REQP = ["instance_name", "mode", "disks"]
6323
  REQ_BGL = False
6324

    
6325
  def CheckArguments(self):
6326
    if not hasattr(self.op, "remote_node"):
6327
      self.op.remote_node = None
6328
    if not hasattr(self.op, "iallocator"):
6329
      self.op.iallocator = None
6330

    
6331
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6332
                                  self.op.iallocator)
6333

    
6334
  def ExpandNames(self):
6335
    self._ExpandAndLockInstance()
6336

    
6337
    if self.op.iallocator is not None:
6338
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6339

    
6340
    elif self.op.remote_node is not None:
6341
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6342
      if remote_node is None:
6343
        raise errors.OpPrereqError("Node '%s' not known" %
6344
                                   self.op.remote_node, errors.ECODE_NOENT)
6345

    
6346
      self.op.remote_node = remote_node
6347

    
6348
      # Warning: do not remove the locking of the new secondary here
6349
      # unless DRBD8.AddChildren is changed to work in parallel;
6350
      # currently it doesn't since parallel invocations of
6351
      # FindUnusedMinor will conflict
6352
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6353
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6354

    
6355
    else:
6356
      self.needed_locks[locking.LEVEL_NODE] = []
6357
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6358

    
6359
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6360
                                   self.op.iallocator, self.op.remote_node,
6361
                                   self.op.disks)
6362

    
6363
    self.tasklets = [self.replacer]
6364

    
6365
  def DeclareLocks(self, level):
6366
    # If we're not already locking all nodes in the set we have to declare the
6367
    # instance's primary/secondary nodes.
6368
    if (level == locking.LEVEL_NODE and
6369
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6370
      self._LockInstancesNodes()
6371

    
6372
  def BuildHooksEnv(self):
6373
    """Build hooks env.
6374

6375
    This runs on the master, the primary and all the secondaries.
6376

6377
    """
6378
    instance = self.replacer.instance
6379
    env = {
6380
      "MODE": self.op.mode,
6381
      "NEW_SECONDARY": self.op.remote_node,
6382
      "OLD_SECONDARY": instance.secondary_nodes[0],
6383
      }
6384
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6385
    nl = [
6386
      self.cfg.GetMasterNode(),
6387
      instance.primary_node,
6388
      ]
6389
    if self.op.remote_node is not None:
6390
      nl.append(self.op.remote_node)
6391
    return env, nl, nl
6392

    
6393

    
6394
class LUEvacuateNode(LogicalUnit):
6395
  """Relocate the secondary instances from a node.
6396

6397
  """
6398
  HPATH = "node-evacuate"
6399
  HTYPE = constants.HTYPE_NODE
6400
  _OP_REQP = ["node_name"]
6401
  REQ_BGL = False
6402

    
6403
  def CheckArguments(self):
6404
    if not hasattr(self.op, "remote_node"):
6405
      self.op.remote_node = None
6406
    if not hasattr(self.op, "iallocator"):
6407
      self.op.iallocator = None
6408

    
6409
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6410
                                  self.op.remote_node,
6411
                                  self.op.iallocator)
6412

    
6413
  def ExpandNames(self):
6414
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6415
    if self.op.node_name is None:
6416
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
6417
                                 errors.ECODE_NOENT)
6418

    
6419
    self.needed_locks = {}
6420

    
6421
    # Declare node locks
6422
    if self.op.iallocator is not None:
6423
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6424

    
6425
    elif self.op.remote_node is not None:
6426
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6427
      if remote_node is None:
6428
        raise errors.OpPrereqError("Node '%s' not known" %
6429
                                   self.op.remote_node, errors.ECODE_NOENT)
6430

    
6431
      self.op.remote_node = remote_node
6432

    
6433
      # Warning: do not remove the locking of the new secondary here
6434
      # unless DRBD8.AddChildren is changed to work in parallel;
6435
      # currently it doesn't since parallel invocations of
6436
      # FindUnusedMinor will conflict
6437
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6438
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6439

    
6440
    else:
6441
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6442

    
6443
    # Create tasklets for replacing disks for all secondary instances on this
6444
    # node
6445
    names = []
6446
    tasklets = []
6447

    
6448
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6449
      logging.debug("Replacing disks for instance %s", inst.name)
6450
      names.append(inst.name)
6451

    
6452
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6453
                                self.op.iallocator, self.op.remote_node, [])
6454
      tasklets.append(replacer)
6455

    
6456
    self.tasklets = tasklets
6457
    self.instance_names = names
6458

    
6459
    # Declare instance locks
6460
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6461

    
6462
  def DeclareLocks(self, level):
6463
    # If we're not already locking all nodes in the set we have to declare the
6464
    # instance's primary/secondary nodes.
6465
    if (level == locking.LEVEL_NODE and
6466
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6467
      self._LockInstancesNodes()
6468

    
6469
  def BuildHooksEnv(self):
6470
    """Build hooks env.
6471

6472
    This runs on the master, the primary and all the secondaries.
6473

6474
    """
6475
    env = {
6476
      "NODE_NAME": self.op.node_name,
6477
      }
6478

    
6479
    nl = [self.cfg.GetMasterNode()]
6480

    
6481
    if self.op.remote_node is not None:
6482
      env["NEW_SECONDARY"] = self.op.remote_node
6483
      nl.append(self.op.remote_node)
6484

    
6485
    return (env, nl, nl)
6486

    
6487

    
6488
class TLReplaceDisks(Tasklet):
6489
  """Replaces disks for an instance.
6490

6491
  Note: Locking is not within the scope of this class.
6492

6493
  """
6494
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6495
               disks):
6496
    """Initializes this class.
6497

6498
    """
6499
    Tasklet.__init__(self, lu)
6500

    
6501
    # Parameters
6502
    self.instance_name = instance_name
6503
    self.mode = mode
6504
    self.iallocator_name = iallocator_name
6505
    self.remote_node = remote_node
6506
    self.disks = disks
6507

    
6508
    # Runtime data
6509
    self.instance = None
6510
    self.new_node = None
6511
    self.target_node = None
6512
    self.other_node = None
6513
    self.remote_node_info = None
6514
    self.node_secondary_ip = None
6515

    
6516
  @staticmethod
6517
  def CheckArguments(mode, remote_node, iallocator):
6518
    """Helper function for users of this class.
6519

6520
    """
6521
    # check for valid parameter combination
6522
    if mode == constants.REPLACE_DISK_CHG:
6523
      if remote_node is None and iallocator is None:
6524
        raise errors.OpPrereqError("When changing the secondary either an"
6525
                                   " iallocator script must be used or the"
6526
                                   " new node given", errors.ECODE_INVAL)
6527

    
6528
      if remote_node is not None and iallocator is not None:
6529
        raise errors.OpPrereqError("Give either the iallocator or the new"
6530
                                   " secondary, not both", errors.ECODE_INVAL)
6531

    
6532
    elif remote_node is not None or iallocator is not None:
6533
      # Not replacing the secondary
6534
      raise errors.OpPrereqError("The iallocator and new node options can"
6535
                                 " only be used when changing the"
6536
                                 " secondary node", errors.ECODE_INVAL)
6537

    
6538
  @staticmethod
6539
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6540
    """Compute a new secondary node using an IAllocator.
6541

6542
    """
6543
    ial = IAllocator(lu.cfg, lu.rpc,
6544
                     mode=constants.IALLOCATOR_MODE_RELOC,
6545
                     name=instance_name,
6546
                     relocate_from=relocate_from)
6547

    
6548
    ial.Run(iallocator_name)
6549

    
6550
    if not ial.success:
6551
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6552
                                 " %s" % (iallocator_name, ial.info),
6553
                                 errors.ECODE_NORES)
6554

    
6555
    if len(ial.nodes) != ial.required_nodes:
6556
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6557
                                 " of nodes (%s), required %s" %
6558
                                 (iallocator_name,
6559
                                  len(ial.nodes), ial.required_nodes),
6560
                                 errors.ECODE_FAULT)
6561

    
6562
    remote_node_name = ial.nodes[0]
6563

    
6564
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6565
               instance_name, remote_node_name)
6566

    
6567
    return remote_node_name
6568

    
6569
  def _FindFaultyDisks(self, node_name):
6570
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6571
                                    node_name, True)
6572

    
6573
  def CheckPrereq(self):
6574
    """Check prerequisites.
6575

6576
    This checks that the instance is in the cluster.
6577

6578
    """
6579
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6580
    assert instance is not None, \
6581
      "Cannot retrieve locked instance %s" % self.instance_name
6582

    
6583
    if instance.disk_template != constants.DT_DRBD8:
6584
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6585
                                 " instances", errors.ECODE_INVAL)
6586

    
6587
    if len(instance.secondary_nodes) != 1:
6588
      raise errors.OpPrereqError("The instance has a strange layout,"
6589
                                 " expected one secondary but found %d" %
6590
                                 len(instance.secondary_nodes),
6591
                                 errors.ECODE_FAULT)
6592

    
6593
    secondary_node = instance.secondary_nodes[0]
6594

    
6595
    if self.iallocator_name is None:
6596
      remote_node = self.remote_node
6597
    else:
6598
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6599
                                       instance.name, instance.secondary_nodes)
6600

    
6601
    if remote_node is not None:
6602
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6603
      assert self.remote_node_info is not None, \
6604
        "Cannot retrieve locked node %s" % remote_node
6605
    else:
6606
      self.remote_node_info = None
6607

    
6608
    if remote_node == self.instance.primary_node:
6609
      raise errors.OpPrereqError("The specified node is the primary node of"
6610
                                 " the instance.", errors.ECODE_INVAL)
6611

    
6612
    if remote_node == secondary_node:
6613
      raise errors.OpPrereqError("The specified node is already the"
6614
                                 " secondary node of the instance.",
6615
                                 errors.ECODE_INVAL)
6616

    
6617
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6618
                                    constants.REPLACE_DISK_CHG):
6619
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6620
                                 errors.ECODE_INVAL)
6621

    
6622
    if self.mode == constants.REPLACE_DISK_AUTO:
6623
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6624
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6625

    
6626
      if faulty_primary and faulty_secondary:
6627
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6628
                                   " one node and can not be repaired"
6629
                                   " automatically" % self.instance_name,
6630
                                   errors.ECODE_STATE)
6631

    
6632
      if faulty_primary:
6633
        self.disks = faulty_primary
6634
        self.target_node = instance.primary_node
6635
        self.other_node = secondary_node
6636
        check_nodes = [self.target_node, self.other_node]
6637
      elif faulty_secondary:
6638
        self.disks = faulty_secondary
6639
        self.target_node = secondary_node
6640
        self.other_node = instance.primary_node
6641
        check_nodes = [self.target_node, self.other_node]
6642
      else:
6643
        self.disks = []
6644
        check_nodes = []
6645

    
6646
    else:
6647
      # Non-automatic modes
6648
      if self.mode == constants.REPLACE_DISK_PRI:
6649
        self.target_node = instance.primary_node
6650
        self.other_node = secondary_node
6651
        check_nodes = [self.target_node, self.other_node]
6652

    
6653
      elif self.mode == constants.REPLACE_DISK_SEC:
6654
        self.target_node = secondary_node
6655
        self.other_node = instance.primary_node
6656
        check_nodes = [self.target_node, self.other_node]
6657

    
6658
      elif self.mode == constants.REPLACE_DISK_CHG:
6659
        self.new_node = remote_node
6660
        self.other_node = instance.primary_node
6661
        self.target_node = secondary_node
6662
        check_nodes = [self.new_node, self.other_node]
6663

    
6664
        _CheckNodeNotDrained(self.lu, remote_node)
6665

    
6666
      else:
6667
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6668
                                     self.mode)
6669

    
6670
      # If not specified all disks should be replaced
6671
      if not self.disks:
6672
        self.disks = range(len(self.instance.disks))
6673

    
6674
    for node in check_nodes:
6675
      _CheckNodeOnline(self.lu, node)
6676

    
6677
    # Check whether disks are valid
6678
    for disk_idx in self.disks:
6679
      instance.FindDisk(disk_idx)
6680

    
6681
    # Get secondary node IP addresses
6682
    node_2nd_ip = {}
6683

    
6684
    for node_name in [self.target_node, self.other_node, self.new_node]:
6685
      if node_name is not None:
6686
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6687

    
6688
    self.node_secondary_ip = node_2nd_ip
6689

    
6690
  def Exec(self, feedback_fn):
6691
    """Execute disk replacement.
6692

6693
    This dispatches the disk replacement to the appropriate handler.
6694

6695
    """
6696
    if not self.disks:
6697
      feedback_fn("No disks need replacement")
6698
      return
6699

    
6700
    feedback_fn("Replacing disk(s) %s for %s" %
6701
                (utils.CommaJoin(self.disks), self.instance.name))
6702

    
6703
    activate_disks = (not self.instance.admin_up)
6704

    
6705
    # Activate the instance disks if we're replacing them on a down instance
6706
    if activate_disks:
6707
      _StartInstanceDisks(self.lu, self.instance, True)
6708

    
6709
    try:
6710
      # Should we replace the secondary node?
6711
      if self.new_node is not None:
6712
        fn = self._ExecDrbd8Secondary
6713
      else:
6714
        fn = self._ExecDrbd8DiskOnly
6715

    
6716
      return fn(feedback_fn)
6717

    
6718
    finally:
6719
      # Deactivate the instance disks if we're replacing them on a
6720
      # down instance
6721
      if activate_disks:
6722
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6723

    
6724
  def _CheckVolumeGroup(self, nodes):
6725
    self.lu.LogInfo("Checking volume groups")
6726

    
6727
    vgname = self.cfg.GetVGName()
6728

    
6729
    # Make sure volume group exists on all involved nodes
6730
    results = self.rpc.call_vg_list(nodes)
6731
    if not results:
6732
      raise errors.OpExecError("Can't list volume groups on the nodes")
6733

    
6734
    for node in nodes:
6735
      res = results[node]
6736
      res.Raise("Error checking node %s" % node)
6737
      if vgname not in res.payload:
6738
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6739
                                 (vgname, node))
6740

    
6741
  def _CheckDisksExistence(self, nodes):
6742
    # Check disk existence
6743
    for idx, dev in enumerate(self.instance.disks):
6744
      if idx not in self.disks:
6745
        continue
6746

    
6747
      for node in nodes:
6748
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6749
        self.cfg.SetDiskID(dev, node)
6750

    
6751
        result = self.rpc.call_blockdev_find(node, dev)
6752

    
6753
        msg = result.fail_msg
6754
        if msg or not result.payload:
6755
          if not msg:
6756
            msg = "disk not found"
6757
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6758
                                   (idx, node, msg))
6759

    
6760
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6761
    for idx, dev in enumerate(self.instance.disks):
6762
      if idx not in self.disks:
6763
        continue
6764

    
6765
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6766
                      (idx, node_name))
6767

    
6768
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6769
                                   ldisk=ldisk):
6770
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6771
                                 " replace disks for instance %s" %
6772
                                 (node_name, self.instance.name))
6773

    
6774
  def _CreateNewStorage(self, node_name):
6775
    vgname = self.cfg.GetVGName()
6776
    iv_names = {}
6777

    
6778
    for idx, dev in enumerate(self.instance.disks):
6779
      if idx not in self.disks:
6780
        continue
6781

    
6782
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6783

    
6784
      self.cfg.SetDiskID(dev, node_name)
6785

    
6786
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6787
      names = _GenerateUniqueNames(self.lu, lv_names)
6788

    
6789
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6790
                             logical_id=(vgname, names[0]))
6791
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6792
                             logical_id=(vgname, names[1]))
6793

    
6794
      new_lvs = [lv_data, lv_meta]
6795
      old_lvs = dev.children
6796
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6797

    
6798
      # we pass force_create=True to force the LVM creation
6799
      for new_lv in new_lvs:
6800
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6801
                        _GetInstanceInfoText(self.instance), False)
6802

    
6803
    return iv_names
6804

    
6805
  def _CheckDevices(self, node_name, iv_names):
6806
    for name, (dev, _, _) in iv_names.iteritems():
6807
      self.cfg.SetDiskID(dev, node_name)
6808

    
6809
      result = self.rpc.call_blockdev_find(node_name, dev)
6810

    
6811
      msg = result.fail_msg
6812
      if msg or not result.payload:
6813
        if not msg:
6814
          msg = "disk not found"
6815
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6816
                                 (name, msg))
6817

    
6818
      if result.payload.is_degraded:
6819
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6820

    
6821
  def _RemoveOldStorage(self, node_name, iv_names):
6822
    for name, (_, old_lvs, _) in iv_names.iteritems():
6823
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6824

    
6825
      for lv in old_lvs:
6826
        self.cfg.SetDiskID(lv, node_name)
6827

    
6828
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6829
        if msg:
6830
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6831
                             hint="remove unused LVs manually")
6832

    
6833
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6834
    """Replace a disk on the primary or secondary for DRBD 8.
6835

6836
    The algorithm for replace is quite complicated:
6837

6838
      1. for each disk to be replaced:
6839

6840
        1. create new LVs on the target node with unique names
6841
        1. detach old LVs from the drbd device
6842
        1. rename old LVs to name_replaced.<time_t>
6843
        1. rename new LVs to old LVs
6844
        1. attach the new LVs (with the old names now) to the drbd device
6845

6846
      1. wait for sync across all devices
6847

6848
      1. for each modified disk:
6849

6850
        1. remove old LVs (which have the name name_replaces.<time_t>)
6851

6852
    Failures are not very well handled.
6853

6854
    """
6855
    steps_total = 6
6856

    
6857
    # Step: check device activation
6858
    self.lu.LogStep(1, steps_total, "Check device existence")
6859
    self._CheckDisksExistence([self.other_node, self.target_node])
6860
    self._CheckVolumeGroup([self.target_node, self.other_node])
6861

    
6862
    # Step: check other node consistency
6863
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6864
    self._CheckDisksConsistency(self.other_node,
6865
                                self.other_node == self.instance.primary_node,
6866
                                False)
6867

    
6868
    # Step: create new storage
6869
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6870
    iv_names = self._CreateNewStorage(self.target_node)
6871

    
6872
    # Step: for each lv, detach+rename*2+attach
6873
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6874
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6875
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6876

    
6877
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6878
                                                     old_lvs)
6879
      result.Raise("Can't detach drbd from local storage on node"
6880
                   " %s for device %s" % (self.target_node, dev.iv_name))
6881
      #dev.children = []
6882
      #cfg.Update(instance)
6883

    
6884
      # ok, we created the new LVs, so now we know we have the needed
6885
      # storage; as such, we proceed on the target node to rename
6886
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6887
      # using the assumption that logical_id == physical_id (which in
6888
      # turn is the unique_id on that node)
6889

    
6890
      # FIXME(iustin): use a better name for the replaced LVs
6891
      temp_suffix = int(time.time())
6892
      ren_fn = lambda d, suff: (d.physical_id[0],
6893
                                d.physical_id[1] + "_replaced-%s" % suff)
6894

    
6895
      # Build the rename list based on what LVs exist on the node
6896
      rename_old_to_new = []
6897
      for to_ren in old_lvs:
6898
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6899
        if not result.fail_msg and result.payload:
6900
          # device exists
6901
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6902

    
6903
      self.lu.LogInfo("Renaming the old LVs on the target node")
6904
      result = self.rpc.call_blockdev_rename(self.target_node,
6905
                                             rename_old_to_new)
6906
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6907

    
6908
      # Now we rename the new LVs to the old LVs
6909
      self.lu.LogInfo("Renaming the new LVs on the target node")
6910
      rename_new_to_old = [(new, old.physical_id)
6911
                           for old, new in zip(old_lvs, new_lvs)]
6912
      result = self.rpc.call_blockdev_rename(self.target_node,
6913
                                             rename_new_to_old)
6914
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6915

    
6916
      for old, new in zip(old_lvs, new_lvs):
6917
        new.logical_id = old.logical_id
6918
        self.cfg.SetDiskID(new, self.target_node)
6919

    
6920
      for disk in old_lvs:
6921
        disk.logical_id = ren_fn(disk, temp_suffix)
6922
        self.cfg.SetDiskID(disk, self.target_node)
6923

    
6924
      # Now that the new lvs have the old name, we can add them to the device
6925
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6926
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6927
                                                  new_lvs)
6928
      msg = result.fail_msg
6929
      if msg:
6930
        for new_lv in new_lvs:
6931
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6932
                                               new_lv).fail_msg
6933
          if msg2:
6934
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6935
                               hint=("cleanup manually the unused logical"
6936
                                     "volumes"))
6937
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6938

    
6939
      dev.children = new_lvs
6940

    
6941
      self.cfg.Update(self.instance, feedback_fn)
6942

    
6943
    # Wait for sync
6944
    # This can fail as the old devices are degraded and _WaitForSync
6945
    # does a combined result over all disks, so we don't check its return value
6946
    self.lu.LogStep(5, steps_total, "Sync devices")
6947
    _WaitForSync(self.lu, self.instance)
6948

    
6949
    # Check all devices manually
6950
    self._CheckDevices(self.instance.primary_node, iv_names)
6951

    
6952
    # Step: remove old storage
6953
    self.lu.LogStep(6, steps_total, "Removing old storage")
6954
    self._RemoveOldStorage(self.target_node, iv_names)
6955

    
6956
  def _ExecDrbd8Secondary(self, feedback_fn):
6957
    """Replace the secondary node for DRBD 8.
6958

6959
    The algorithm for replace is quite complicated:
6960
      - for all disks of the instance:
6961
        - create new LVs on the new node with same names
6962
        - shutdown the drbd device on the old secondary
6963
        - disconnect the drbd network on the primary
6964
        - create the drbd device on the new secondary
6965
        - network attach the drbd on the primary, using an artifice:
6966
          the drbd code for Attach() will connect to the network if it
6967
          finds a device which is connected to the good local disks but
6968
          not network enabled
6969
      - wait for sync across all devices
6970
      - remove all disks from the old secondary
6971

6972
    Failures are not very well handled.
6973

6974
    """
6975
    steps_total = 6
6976

    
6977
    # Step: check device activation
6978
    self.lu.LogStep(1, steps_total, "Check device existence")
6979
    self._CheckDisksExistence([self.instance.primary_node])
6980
    self._CheckVolumeGroup([self.instance.primary_node])
6981

    
6982
    # Step: check other node consistency
6983
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6984
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6985

    
6986
    # Step: create new storage
6987
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6988
    for idx, dev in enumerate(self.instance.disks):
6989
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6990
                      (self.new_node, idx))
6991
      # we pass force_create=True to force LVM creation
6992
      for new_lv in dev.children:
6993
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6994
                        _GetInstanceInfoText(self.instance), False)
6995

    
6996
    # Step 4: dbrd minors and drbd setups changes
6997
    # after this, we must manually remove the drbd minors on both the
6998
    # error and the success paths
6999
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7000
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7001
                                         for dev in self.instance.disks],
7002
                                        self.instance.name)
7003
    logging.debug("Allocated minors %r", minors)
7004

    
7005
    iv_names = {}
7006
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7007
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7008
                      (self.new_node, idx))
7009
      # create new devices on new_node; note that we create two IDs:
7010
      # one without port, so the drbd will be activated without
7011
      # networking information on the new node at this stage, and one
7012
      # with network, for the latter activation in step 4
7013
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7014
      if self.instance.primary_node == o_node1:
7015
        p_minor = o_minor1
7016
      else:
7017
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7018
        p_minor = o_minor2
7019

    
7020
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7021
                      p_minor, new_minor, o_secret)
7022
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7023
                    p_minor, new_minor, o_secret)
7024

    
7025
      iv_names[idx] = (dev, dev.children, new_net_id)
7026
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7027
                    new_net_id)
7028
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7029
                              logical_id=new_alone_id,
7030
                              children=dev.children,
7031
                              size=dev.size)
7032
      try:
7033
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7034
                              _GetInstanceInfoText(self.instance), False)
7035
      except errors.GenericError:
7036
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7037
        raise
7038

    
7039
    # We have new devices, shutdown the drbd on the old secondary
7040
    for idx, dev in enumerate(self.instance.disks):
7041
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7042
      self.cfg.SetDiskID(dev, self.target_node)
7043
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7044
      if msg:
7045
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7046
                           "node: %s" % (idx, msg),
7047
                           hint=("Please cleanup this device manually as"
7048
                                 " soon as possible"))
7049

    
7050
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7051
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7052
                                               self.node_secondary_ip,
7053
                                               self.instance.disks)\
7054
                                              [self.instance.primary_node]
7055

    
7056
    msg = result.fail_msg
7057
    if msg:
7058
      # detaches didn't succeed (unlikely)
7059
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7060
      raise errors.OpExecError("Can't detach the disks from the network on"
7061
                               " old node: %s" % (msg,))
7062

    
7063
    # if we managed to detach at least one, we update all the disks of
7064
    # the instance to point to the new secondary
7065
    self.lu.LogInfo("Updating instance configuration")
7066
    for dev, _, new_logical_id in iv_names.itervalues():
7067
      dev.logical_id = new_logical_id
7068
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7069

    
7070
    self.cfg.Update(self.instance, feedback_fn)
7071

    
7072
    # and now perform the drbd attach
7073
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7074
                    " (standalone => connected)")
7075
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7076
                                            self.new_node],
7077
                                           self.node_secondary_ip,
7078
                                           self.instance.disks,
7079
                                           self.instance.name,
7080
                                           False)
7081
    for to_node, to_result in result.items():
7082
      msg = to_result.fail_msg
7083
      if msg:
7084
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7085
                           to_node, msg,
7086
                           hint=("please do a gnt-instance info to see the"
7087
                                 " status of disks"))
7088

    
7089
    # Wait for sync
7090
    # This can fail as the old devices are degraded and _WaitForSync
7091
    # does a combined result over all disks, so we don't check its return value
7092
    self.lu.LogStep(5, steps_total, "Sync devices")
7093
    _WaitForSync(self.lu, self.instance)
7094

    
7095
    # Check all devices manually
7096
    self._CheckDevices(self.instance.primary_node, iv_names)
7097

    
7098
    # Step: remove old storage
7099
    self.lu.LogStep(6, steps_total, "Removing old storage")
7100
    self._RemoveOldStorage(self.target_node, iv_names)
7101

    
7102

    
7103
class LURepairNodeStorage(NoHooksLU):
7104
  """Repairs the volume group on a node.
7105

7106
  """
7107
  _OP_REQP = ["node_name"]
7108
  REQ_BGL = False
7109

    
7110
  def CheckArguments(self):
7111
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
7112
    if node_name is None:
7113
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
7114
                                 errors.ECODE_NOENT)
7115

    
7116
    self.op.node_name = node_name
7117

    
7118
  def ExpandNames(self):
7119
    self.needed_locks = {
7120
      locking.LEVEL_NODE: [self.op.node_name],
7121
      }
7122

    
7123
  def _CheckFaultyDisks(self, instance, node_name):
7124
    """Ensure faulty disks abort the opcode or at least warn."""
7125
    try:
7126
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7127
                                  node_name, True):
7128
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7129
                                   " node '%s'" % (instance.name, node_name),
7130
                                   errors.ECODE_STATE)
7131
    except errors.OpPrereqError, err:
7132
      if self.op.ignore_consistency:
7133
        self.proc.LogWarning(str(err.args[0]))
7134
      else:
7135
        raise
7136

    
7137
  def CheckPrereq(self):
7138
    """Check prerequisites.
7139

7140
    """
7141
    storage_type = self.op.storage_type
7142

    
7143
    if (constants.SO_FIX_CONSISTENCY not in
7144
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7145
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7146
                                 " repaired" % storage_type,
7147
                                 errors.ECODE_INVAL)
7148

    
7149
    # Check whether any instance on this node has faulty disks
7150
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7151
      if not inst.admin_up:
7152
        continue
7153
      check_nodes = set(inst.all_nodes)
7154
      check_nodes.discard(self.op.node_name)
7155
      for inst_node_name in check_nodes:
7156
        self._CheckFaultyDisks(inst, inst_node_name)
7157

    
7158
  def Exec(self, feedback_fn):
7159
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7160
                (self.op.name, self.op.node_name))
7161

    
7162
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7163
    result = self.rpc.call_storage_execute(self.op.node_name,
7164
                                           self.op.storage_type, st_args,
7165
                                           self.op.name,
7166
                                           constants.SO_FIX_CONSISTENCY)
7167
    result.Raise("Failed to repair storage unit '%s' on %s" %
7168
                 (self.op.name, self.op.node_name))
7169

    
7170

    
7171
class LUGrowDisk(LogicalUnit):
7172
  """Grow a disk of an instance.
7173

7174
  """
7175
  HPATH = "disk-grow"
7176
  HTYPE = constants.HTYPE_INSTANCE
7177
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7178
  REQ_BGL = False
7179

    
7180
  def ExpandNames(self):
7181
    self._ExpandAndLockInstance()
7182
    self.needed_locks[locking.LEVEL_NODE] = []
7183
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7184

    
7185
  def DeclareLocks(self, level):
7186
    if level == locking.LEVEL_NODE:
7187
      self._LockInstancesNodes()
7188

    
7189
  def BuildHooksEnv(self):
7190
    """Build hooks env.
7191

7192
    This runs on the master, the primary and all the secondaries.
7193

7194
    """
7195
    env = {
7196
      "DISK": self.op.disk,
7197
      "AMOUNT": self.op.amount,
7198
      }
7199
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7200
    nl = [
7201
      self.cfg.GetMasterNode(),
7202
      self.instance.primary_node,
7203
      ]
7204
    return env, nl, nl
7205

    
7206
  def CheckPrereq(self):
7207
    """Check prerequisites.
7208

7209
    This checks that the instance is in the cluster.
7210

7211
    """
7212
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7213
    assert instance is not None, \
7214
      "Cannot retrieve locked instance %s" % self.op.instance_name
7215
    nodenames = list(instance.all_nodes)
7216
    for node in nodenames:
7217
      _CheckNodeOnline(self, node)
7218

    
7219

    
7220
    self.instance = instance
7221

    
7222
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7223
      raise errors.OpPrereqError("Instance's disk layout does not support"
7224
                                 " growing.", errors.ECODE_INVAL)
7225

    
7226
    self.disk = instance.FindDisk(self.op.disk)
7227

    
7228
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7229
                                       instance.hypervisor)
7230
    for node in nodenames:
7231
      info = nodeinfo[node]
7232
      info.Raise("Cannot get current information from node %s" % node)
7233
      vg_free = info.payload.get('vg_free', None)
7234
      if not isinstance(vg_free, int):
7235
        raise errors.OpPrereqError("Can't compute free disk space on"
7236
                                   " node %s" % node, errors.ECODE_ENVIRON)
7237
      if self.op.amount > vg_free:
7238
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7239
                                   " %d MiB available, %d MiB required" %
7240
                                   (node, vg_free, self.op.amount),
7241
                                   errors.ECODE_NORES)
7242

    
7243
  def Exec(self, feedback_fn):
7244
    """Execute disk grow.
7245

7246
    """
7247
    instance = self.instance
7248
    disk = self.disk
7249
    for node in instance.all_nodes:
7250
      self.cfg.SetDiskID(disk, node)
7251
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7252
      result.Raise("Grow request failed to node %s" % node)
7253

    
7254
      # TODO: Rewrite code to work properly
7255
      # DRBD goes into sync mode for a short amount of time after executing the
7256
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7257
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7258
      # time is a work-around.
7259
      time.sleep(5)
7260

    
7261
    disk.RecordGrow(self.op.amount)
7262
    self.cfg.Update(instance, feedback_fn)
7263
    if self.op.wait_for_sync:
7264
      disk_abort = not _WaitForSync(self, instance)
7265
      if disk_abort:
7266
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7267
                             " status.\nPlease check the instance.")
7268

    
7269

    
7270
class LUQueryInstanceData(NoHooksLU):
7271
  """Query runtime instance data.
7272

7273
  """
7274
  _OP_REQP = ["instances", "static"]
7275
  REQ_BGL = False
7276

    
7277
  def ExpandNames(self):
7278
    self.needed_locks = {}
7279
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7280

    
7281
    if not isinstance(self.op.instances, list):
7282
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7283
                                 errors.ECODE_INVAL)
7284

    
7285
    if self.op.instances:
7286
      self.wanted_names = []
7287
      for name in self.op.instances:
7288
        full_name = self.cfg.ExpandInstanceName(name)
7289
        if full_name is None:
7290
          raise errors.OpPrereqError("Instance '%s' not known" % name,
7291
                                     errors.ECODE_NOENT)
7292
        self.wanted_names.append(full_name)
7293
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7294
    else:
7295
      self.wanted_names = None
7296
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7297

    
7298
    self.needed_locks[locking.LEVEL_NODE] = []
7299
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7300

    
7301
  def DeclareLocks(self, level):
7302
    if level == locking.LEVEL_NODE:
7303
      self._LockInstancesNodes()
7304

    
7305
  def CheckPrereq(self):
7306
    """Check prerequisites.
7307

7308
    This only checks the optional instance list against the existing names.
7309

7310
    """
7311
    if self.wanted_names is None:
7312
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7313

    
7314
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7315
                             in self.wanted_names]
7316
    return
7317

    
7318
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7319
    """Returns the status of a block device
7320

7321
    """
7322
    if self.op.static or not node:
7323
      return None
7324

    
7325
    self.cfg.SetDiskID(dev, node)
7326

    
7327
    result = self.rpc.call_blockdev_find(node, dev)
7328
    if result.offline:
7329
      return None
7330

    
7331
    result.Raise("Can't compute disk status for %s" % instance_name)
7332

    
7333
    status = result.payload
7334
    if status is None:
7335
      return None
7336

    
7337
    return (status.dev_path, status.major, status.minor,
7338
            status.sync_percent, status.estimated_time,
7339
            status.is_degraded, status.ldisk_status)
7340

    
7341
  def _ComputeDiskStatus(self, instance, snode, dev):
7342
    """Compute block device status.
7343

7344
    """
7345
    if dev.dev_type in constants.LDS_DRBD:
7346
      # we change the snode then (otherwise we use the one passed in)
7347
      if dev.logical_id[0] == instance.primary_node:
7348
        snode = dev.logical_id[1]
7349
      else:
7350
        snode = dev.logical_id[0]
7351

    
7352
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7353
                                              instance.name, dev)
7354
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7355

    
7356
    if dev.children:
7357
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7358
                      for child in dev.children]
7359
    else:
7360
      dev_children = []
7361

    
7362
    data = {
7363
      "iv_name": dev.iv_name,
7364
      "dev_type": dev.dev_type,
7365
      "logical_id": dev.logical_id,
7366
      "physical_id": dev.physical_id,
7367
      "pstatus": dev_pstatus,
7368
      "sstatus": dev_sstatus,
7369
      "children": dev_children,
7370
      "mode": dev.mode,
7371
      "size": dev.size,
7372
      }
7373

    
7374
    return data
7375

    
7376
  def Exec(self, feedback_fn):
7377
    """Gather and return data"""
7378
    result = {}
7379

    
7380
    cluster = self.cfg.GetClusterInfo()
7381

    
7382
    for instance in self.wanted_instances:
7383
      if not self.op.static:
7384
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7385
                                                  instance.name,
7386
                                                  instance.hypervisor)
7387
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7388
        remote_info = remote_info.payload
7389
        if remote_info and "state" in remote_info:
7390
          remote_state = "up"
7391
        else:
7392
          remote_state = "down"
7393
      else:
7394
        remote_state = None
7395
      if instance.admin_up:
7396
        config_state = "up"
7397
      else:
7398
        config_state = "down"
7399

    
7400
      disks = [self._ComputeDiskStatus(instance, None, device)
7401
               for device in instance.disks]
7402

    
7403
      idict = {
7404
        "name": instance.name,
7405
        "config_state": config_state,
7406
        "run_state": remote_state,
7407
        "pnode": instance.primary_node,
7408
        "snodes": instance.secondary_nodes,
7409
        "os": instance.os,
7410
        # this happens to be the same format used for hooks
7411
        "nics": _NICListToTuple(self, instance.nics),
7412
        "disks": disks,
7413
        "hypervisor": instance.hypervisor,
7414
        "network_port": instance.network_port,
7415
        "hv_instance": instance.hvparams,
7416
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7417
        "be_instance": instance.beparams,
7418
        "be_actual": cluster.FillBE(instance),
7419
        "serial_no": instance.serial_no,
7420
        "mtime": instance.mtime,
7421
        "ctime": instance.ctime,
7422
        "uuid": instance.uuid,
7423
        }
7424

    
7425
      result[instance.name] = idict
7426

    
7427
    return result
7428

    
7429

    
7430
class LUSetInstanceParams(LogicalUnit):
7431
  """Modifies an instances's parameters.
7432

7433
  """
7434
  HPATH = "instance-modify"
7435
  HTYPE = constants.HTYPE_INSTANCE
7436
  _OP_REQP = ["instance_name"]
7437
  REQ_BGL = False
7438

    
7439
  def CheckArguments(self):
7440
    if not hasattr(self.op, 'nics'):
7441
      self.op.nics = []
7442
    if not hasattr(self.op, 'disks'):
7443
      self.op.disks = []
7444
    if not hasattr(self.op, 'beparams'):
7445
      self.op.beparams = {}
7446
    if not hasattr(self.op, 'hvparams'):
7447
      self.op.hvparams = {}
7448
    self.op.force = getattr(self.op, "force", False)
7449
    if not (self.op.nics or self.op.disks or
7450
            self.op.hvparams or self.op.beparams):
7451
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7452

    
7453
    if self.op.hvparams:
7454
      _CheckGlobalHvParams(self.op.hvparams)
7455

    
7456
    # Disk validation
7457
    disk_addremove = 0
7458
    for disk_op, disk_dict in self.op.disks:
7459
      if disk_op == constants.DDM_REMOVE:
7460
        disk_addremove += 1
7461
        continue
7462
      elif disk_op == constants.DDM_ADD:
7463
        disk_addremove += 1
7464
      else:
7465
        if not isinstance(disk_op, int):
7466
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7467
        if not isinstance(disk_dict, dict):
7468
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7469
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7470

    
7471
      if disk_op == constants.DDM_ADD:
7472
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7473
        if mode not in constants.DISK_ACCESS_SET:
7474
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7475
                                     errors.ECODE_INVAL)
7476
        size = disk_dict.get('size', None)
7477
        if size is None:
7478
          raise errors.OpPrereqError("Required disk parameter size missing",
7479
                                     errors.ECODE_INVAL)
7480
        try:
7481
          size = int(size)
7482
        except ValueError, err:
7483
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7484
                                     str(err), errors.ECODE_INVAL)
7485
        disk_dict['size'] = size
7486
      else:
7487
        # modification of disk
7488
        if 'size' in disk_dict:
7489
          raise errors.OpPrereqError("Disk size change not possible, use"
7490
                                     " grow-disk", errors.ECODE_INVAL)
7491

    
7492
    if disk_addremove > 1:
7493
      raise errors.OpPrereqError("Only one disk add or remove operation"
7494
                                 " supported at a time", errors.ECODE_INVAL)
7495

    
7496
    # NIC validation
7497
    nic_addremove = 0
7498
    for nic_op, nic_dict in self.op.nics:
7499
      if nic_op == constants.DDM_REMOVE:
7500
        nic_addremove += 1
7501
        continue
7502
      elif nic_op == constants.DDM_ADD:
7503
        nic_addremove += 1
7504
      else:
7505
        if not isinstance(nic_op, int):
7506
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7507
        if not isinstance(nic_dict, dict):
7508
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7509
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7510

    
7511
      # nic_dict should be a dict
7512
      nic_ip = nic_dict.get('ip', None)
7513
      if nic_ip is not None:
7514
        if nic_ip.lower() == constants.VALUE_NONE:
7515
          nic_dict['ip'] = None
7516
        else:
7517
          if not utils.IsValidIP(nic_ip):
7518
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7519
                                       errors.ECODE_INVAL)
7520

    
7521
      nic_bridge = nic_dict.get('bridge', None)
7522
      nic_link = nic_dict.get('link', None)
7523
      if nic_bridge and nic_link:
7524
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7525
                                   " at the same time", errors.ECODE_INVAL)
7526
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7527
        nic_dict['bridge'] = None
7528
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7529
        nic_dict['link'] = None
7530

    
7531
      if nic_op == constants.DDM_ADD:
7532
        nic_mac = nic_dict.get('mac', None)
7533
        if nic_mac is None:
7534
          nic_dict['mac'] = constants.VALUE_AUTO
7535

    
7536
      if 'mac' in nic_dict:
7537
        nic_mac = nic_dict['mac']
7538
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7539
          if not utils.IsValidMac(nic_mac):
7540
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac,
7541
                                       errors.ECODE_INVAL)
7542
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7543
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7544
                                     " modifying an existing nic",
7545
                                     errors.ECODE_INVAL)
7546

    
7547
    if nic_addremove > 1:
7548
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7549
                                 " supported at a time", errors.ECODE_INVAL)
7550

    
7551
  def ExpandNames(self):
7552
    self._ExpandAndLockInstance()
7553
    self.needed_locks[locking.LEVEL_NODE] = []
7554
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7555

    
7556
  def DeclareLocks(self, level):
7557
    if level == locking.LEVEL_NODE:
7558
      self._LockInstancesNodes()
7559

    
7560
  def BuildHooksEnv(self):
7561
    """Build hooks env.
7562

7563
    This runs on the master, primary and secondaries.
7564

7565
    """
7566
    args = dict()
7567
    if constants.BE_MEMORY in self.be_new:
7568
      args['memory'] = self.be_new[constants.BE_MEMORY]
7569
    if constants.BE_VCPUS in self.be_new:
7570
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7571
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7572
    # information at all.
7573
    if self.op.nics:
7574
      args['nics'] = []
7575
      nic_override = dict(self.op.nics)
7576
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7577
      for idx, nic in enumerate(self.instance.nics):
7578
        if idx in nic_override:
7579
          this_nic_override = nic_override[idx]
7580
        else:
7581
          this_nic_override = {}
7582
        if 'ip' in this_nic_override:
7583
          ip = this_nic_override['ip']
7584
        else:
7585
          ip = nic.ip
7586
        if 'mac' in this_nic_override:
7587
          mac = this_nic_override['mac']
7588
        else:
7589
          mac = nic.mac
7590
        if idx in self.nic_pnew:
7591
          nicparams = self.nic_pnew[idx]
7592
        else:
7593
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7594
        mode = nicparams[constants.NIC_MODE]
7595
        link = nicparams[constants.NIC_LINK]
7596
        args['nics'].append((ip, mac, mode, link))
7597
      if constants.DDM_ADD in nic_override:
7598
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7599
        mac = nic_override[constants.DDM_ADD]['mac']
7600
        nicparams = self.nic_pnew[constants.DDM_ADD]
7601
        mode = nicparams[constants.NIC_MODE]
7602
        link = nicparams[constants.NIC_LINK]
7603
        args['nics'].append((ip, mac, mode, link))
7604
      elif constants.DDM_REMOVE in nic_override:
7605
        del args['nics'][-1]
7606

    
7607
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7608
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7609
    return env, nl, nl
7610

    
7611
  @staticmethod
7612
  def _GetUpdatedParams(old_params, update_dict,
7613
                        default_values, parameter_types):
7614
    """Return the new params dict for the given params.
7615

7616
    @type old_params: dict
7617
    @param old_params: old parameters
7618
    @type update_dict: dict
7619
    @param update_dict: dict containing new parameter values,
7620
                        or constants.VALUE_DEFAULT to reset the
7621
                        parameter to its default value
7622
    @type default_values: dict
7623
    @param default_values: default values for the filled parameters
7624
    @type parameter_types: dict
7625
    @param parameter_types: dict mapping target dict keys to types
7626
                            in constants.ENFORCEABLE_TYPES
7627
    @rtype: (dict, dict)
7628
    @return: (new_parameters, filled_parameters)
7629

7630
    """
7631
    params_copy = copy.deepcopy(old_params)
7632
    for key, val in update_dict.iteritems():
7633
      if val == constants.VALUE_DEFAULT:
7634
        try:
7635
          del params_copy[key]
7636
        except KeyError:
7637
          pass
7638
      else:
7639
        params_copy[key] = val
7640
    utils.ForceDictType(params_copy, parameter_types)
7641
    params_filled = objects.FillDict(default_values, params_copy)
7642
    return (params_copy, params_filled)
7643

    
7644
  def CheckPrereq(self):
7645
    """Check prerequisites.
7646

7647
    This only checks the instance list against the existing names.
7648

7649
    """
7650
    self.force = self.op.force
7651

    
7652
    # checking the new params on the primary/secondary nodes
7653

    
7654
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7655
    cluster = self.cluster = self.cfg.GetClusterInfo()
7656
    assert self.instance is not None, \
7657
      "Cannot retrieve locked instance %s" % self.op.instance_name
7658
    pnode = instance.primary_node
7659
    nodelist = list(instance.all_nodes)
7660

    
7661
    # hvparams processing
7662
    if self.op.hvparams:
7663
      i_hvdict, hv_new = self._GetUpdatedParams(
7664
                             instance.hvparams, self.op.hvparams,
7665
                             cluster.hvparams[instance.hypervisor],
7666
                             constants.HVS_PARAMETER_TYPES)
7667
      # local check
7668
      hypervisor.GetHypervisor(
7669
        instance.hypervisor).CheckParameterSyntax(hv_new)
7670
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7671
      self.hv_new = hv_new # the new actual values
7672
      self.hv_inst = i_hvdict # the new dict (without defaults)
7673
    else:
7674
      self.hv_new = self.hv_inst = {}
7675

    
7676
    # beparams processing
7677
    if self.op.beparams:
7678
      i_bedict, be_new = self._GetUpdatedParams(
7679
                             instance.beparams, self.op.beparams,
7680
                             cluster.beparams[constants.PP_DEFAULT],
7681
                             constants.BES_PARAMETER_TYPES)
7682
      self.be_new = be_new # the new actual values
7683
      self.be_inst = i_bedict # the new dict (without defaults)
7684
    else:
7685
      self.be_new = self.be_inst = {}
7686

    
7687
    self.warn = []
7688

    
7689
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7690
      mem_check_list = [pnode]
7691
      if be_new[constants.BE_AUTO_BALANCE]:
7692
        # either we changed auto_balance to yes or it was from before
7693
        mem_check_list.extend(instance.secondary_nodes)
7694
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7695
                                                  instance.hypervisor)
7696
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7697
                                         instance.hypervisor)
7698
      pninfo = nodeinfo[pnode]
7699
      msg = pninfo.fail_msg
7700
      if msg:
7701
        # Assume the primary node is unreachable and go ahead
7702
        self.warn.append("Can't get info from primary node %s: %s" %
7703
                         (pnode,  msg))
7704
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7705
        self.warn.append("Node data from primary node %s doesn't contain"
7706
                         " free memory information" % pnode)
7707
      elif instance_info.fail_msg:
7708
        self.warn.append("Can't get instance runtime information: %s" %
7709
                        instance_info.fail_msg)
7710
      else:
7711
        if instance_info.payload:
7712
          current_mem = int(instance_info.payload['memory'])
7713
        else:
7714
          # Assume instance not running
7715
          # (there is a slight race condition here, but it's not very probable,
7716
          # and we have no other way to check)
7717
          current_mem = 0
7718
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7719
                    pninfo.payload['memory_free'])
7720
        if miss_mem > 0:
7721
          raise errors.OpPrereqError("This change will prevent the instance"
7722
                                     " from starting, due to %d MB of memory"
7723
                                     " missing on its primary node" % miss_mem,
7724
                                     errors.ECODE_NORES)
7725

    
7726
      if be_new[constants.BE_AUTO_BALANCE]:
7727
        for node, nres in nodeinfo.items():
7728
          if node not in instance.secondary_nodes:
7729
            continue
7730
          msg = nres.fail_msg
7731
          if msg:
7732
            self.warn.append("Can't get info from secondary node %s: %s" %
7733
                             (node, msg))
7734
          elif not isinstance(nres.payload.get('memory_free', None), int):
7735
            self.warn.append("Secondary node %s didn't return free"
7736
                             " memory information" % node)
7737
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7738
            self.warn.append("Not enough memory to failover instance to"
7739
                             " secondary node %s" % node)
7740

    
7741
    # NIC processing
7742
    self.nic_pnew = {}
7743
    self.nic_pinst = {}
7744
    for nic_op, nic_dict in self.op.nics:
7745
      if nic_op == constants.DDM_REMOVE:
7746
        if not instance.nics:
7747
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7748
                                     errors.ECODE_INVAL)
7749
        continue
7750
      if nic_op != constants.DDM_ADD:
7751
        # an existing nic
7752
        if not instance.nics:
7753
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7754
                                     " no NICs" % nic_op,
7755
                                     errors.ECODE_INVAL)
7756
        if nic_op < 0 or nic_op >= len(instance.nics):
7757
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7758
                                     " are 0 to %d" %
7759
                                     (nic_op, len(instance.nics) - 1),
7760
                                     errors.ECODE_INVAL)
7761
        old_nic_params = instance.nics[nic_op].nicparams
7762
        old_nic_ip = instance.nics[nic_op].ip
7763
      else:
7764
        old_nic_params = {}
7765
        old_nic_ip = None
7766

    
7767
      update_params_dict = dict([(key, nic_dict[key])
7768
                                 for key in constants.NICS_PARAMETERS
7769
                                 if key in nic_dict])
7770

    
7771
      if 'bridge' in nic_dict:
7772
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7773

    
7774
      new_nic_params, new_filled_nic_params = \
7775
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7776
                                 cluster.nicparams[constants.PP_DEFAULT],
7777
                                 constants.NICS_PARAMETER_TYPES)
7778
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7779
      self.nic_pinst[nic_op] = new_nic_params
7780
      self.nic_pnew[nic_op] = new_filled_nic_params
7781
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7782

    
7783
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7784
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7785
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7786
        if msg:
7787
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7788
          if self.force:
7789
            self.warn.append(msg)
7790
          else:
7791
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7792
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7793
        if 'ip' in nic_dict:
7794
          nic_ip = nic_dict['ip']
7795
        else:
7796
          nic_ip = old_nic_ip
7797
        if nic_ip is None:
7798
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7799
                                     ' on a routed nic', errors.ECODE_INVAL)
7800
      if 'mac' in nic_dict:
7801
        nic_mac = nic_dict['mac']
7802
        if nic_mac is None:
7803
          raise errors.OpPrereqError('Cannot set the nic mac to None',
7804
                                     errors.ECODE_INVAL)
7805
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7806
          # otherwise generate the mac
7807
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7808
        else:
7809
          # or validate/reserve the current one
7810
          try:
7811
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7812
          except errors.ReservationError:
7813
            raise errors.OpPrereqError("MAC address %s already in use"
7814
                                       " in cluster" % nic_mac,
7815
                                       errors.ECODE_NOTUNIQUE)
7816

    
7817
    # DISK processing
7818
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7819
      raise errors.OpPrereqError("Disk operations not supported for"
7820
                                 " diskless instances",
7821
                                 errors.ECODE_INVAL)
7822
    for disk_op, _ in self.op.disks:
7823
      if disk_op == constants.DDM_REMOVE:
7824
        if len(instance.disks) == 1:
7825
          raise errors.OpPrereqError("Cannot remove the last disk of"
7826
                                     " an instance",
7827
                                     errors.ECODE_INVAL)
7828
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7829
        ins_l = ins_l[pnode]
7830
        msg = ins_l.fail_msg
7831
        if msg:
7832
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7833
                                     (pnode, msg), errors.ECODE_ENVIRON)
7834
        if instance.name in ins_l.payload:
7835
          raise errors.OpPrereqError("Instance is running, can't remove"
7836
                                     " disks.", errors.ECODE_STATE)
7837

    
7838
      if (disk_op == constants.DDM_ADD and
7839
          len(instance.nics) >= constants.MAX_DISKS):
7840
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7841
                                   " add more" % constants.MAX_DISKS,
7842
                                   errors.ECODE_STATE)
7843
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7844
        # an existing disk
7845
        if disk_op < 0 or disk_op >= len(instance.disks):
7846
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7847
                                     " are 0 to %d" %
7848
                                     (disk_op, len(instance.disks)),
7849
                                     errors.ECODE_INVAL)
7850

    
7851
    return
7852

    
7853
  def Exec(self, feedback_fn):
7854
    """Modifies an instance.
7855

7856
    All parameters take effect only at the next restart of the instance.
7857

7858
    """
7859
    # Process here the warnings from CheckPrereq, as we don't have a
7860
    # feedback_fn there.
7861
    for warn in self.warn:
7862
      feedback_fn("WARNING: %s" % warn)
7863

    
7864
    result = []
7865
    instance = self.instance
7866
    # disk changes
7867
    for disk_op, disk_dict in self.op.disks:
7868
      if disk_op == constants.DDM_REMOVE:
7869
        # remove the last disk
7870
        device = instance.disks.pop()
7871
        device_idx = len(instance.disks)
7872
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7873
          self.cfg.SetDiskID(disk, node)
7874
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7875
          if msg:
7876
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7877
                            " continuing anyway", device_idx, node, msg)
7878
        result.append(("disk/%d" % device_idx, "remove"))
7879
      elif disk_op == constants.DDM_ADD:
7880
        # add a new disk
7881
        if instance.disk_template == constants.DT_FILE:
7882
          file_driver, file_path = instance.disks[0].logical_id
7883
          file_path = os.path.dirname(file_path)
7884
        else:
7885
          file_driver = file_path = None
7886
        disk_idx_base = len(instance.disks)
7887
        new_disk = _GenerateDiskTemplate(self,
7888
                                         instance.disk_template,
7889
                                         instance.name, instance.primary_node,
7890
                                         instance.secondary_nodes,
7891
                                         [disk_dict],
7892
                                         file_path,
7893
                                         file_driver,
7894
                                         disk_idx_base)[0]
7895
        instance.disks.append(new_disk)
7896
        info = _GetInstanceInfoText(instance)
7897

    
7898
        logging.info("Creating volume %s for instance %s",
7899
                     new_disk.iv_name, instance.name)
7900
        # Note: this needs to be kept in sync with _CreateDisks
7901
        #HARDCODE
7902
        for node in instance.all_nodes:
7903
          f_create = node == instance.primary_node
7904
          try:
7905
            _CreateBlockDev(self, node, instance, new_disk,
7906
                            f_create, info, f_create)
7907
          except errors.OpExecError, err:
7908
            self.LogWarning("Failed to create volume %s (%s) on"
7909
                            " node %s: %s",
7910
                            new_disk.iv_name, new_disk, node, err)
7911
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7912
                       (new_disk.size, new_disk.mode)))
7913
      else:
7914
        # change a given disk
7915
        instance.disks[disk_op].mode = disk_dict['mode']
7916
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7917
    # NIC changes
7918
    for nic_op, nic_dict in self.op.nics:
7919
      if nic_op == constants.DDM_REMOVE:
7920
        # remove the last nic
7921
        del instance.nics[-1]
7922
        result.append(("nic.%d" % len(instance.nics), "remove"))
7923
      elif nic_op == constants.DDM_ADD:
7924
        # mac and bridge should be set, by now
7925
        mac = nic_dict['mac']
7926
        ip = nic_dict.get('ip', None)
7927
        nicparams = self.nic_pinst[constants.DDM_ADD]
7928
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7929
        instance.nics.append(new_nic)
7930
        result.append(("nic.%d" % (len(instance.nics) - 1),
7931
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7932
                       (new_nic.mac, new_nic.ip,
7933
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7934
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7935
                       )))
7936
      else:
7937
        for key in 'mac', 'ip':
7938
          if key in nic_dict:
7939
            setattr(instance.nics[nic_op], key, nic_dict[key])
7940
        if nic_op in self.nic_pinst:
7941
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7942
        for key, val in nic_dict.iteritems():
7943
          result.append(("nic.%s/%d" % (key, nic_op), val))
7944

    
7945
    # hvparams changes
7946
    if self.op.hvparams:
7947
      instance.hvparams = self.hv_inst
7948
      for key, val in self.op.hvparams.iteritems():
7949
        result.append(("hv/%s" % key, val))
7950

    
7951
    # beparams changes
7952
    if self.op.beparams:
7953
      instance.beparams = self.be_inst
7954
      for key, val in self.op.beparams.iteritems():
7955
        result.append(("be/%s" % key, val))
7956

    
7957
    self.cfg.Update(instance, feedback_fn)
7958

    
7959
    return result
7960

    
7961

    
7962
class LUQueryExports(NoHooksLU):
7963
  """Query the exports list
7964

7965
  """
7966
  _OP_REQP = ['nodes']
7967
  REQ_BGL = False
7968

    
7969
  def ExpandNames(self):
7970
    self.needed_locks = {}
7971
    self.share_locks[locking.LEVEL_NODE] = 1
7972
    if not self.op.nodes:
7973
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7974
    else:
7975
      self.needed_locks[locking.LEVEL_NODE] = \
7976
        _GetWantedNodes(self, self.op.nodes)
7977

    
7978
  def CheckPrereq(self):
7979
    """Check prerequisites.
7980

7981
    """
7982
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7983

    
7984
  def Exec(self, feedback_fn):
7985
    """Compute the list of all the exported system images.
7986

7987
    @rtype: dict
7988
    @return: a dictionary with the structure node->(export-list)
7989
        where export-list is a list of the instances exported on
7990
        that node.
7991

7992
    """
7993
    rpcresult = self.rpc.call_export_list(self.nodes)
7994
    result = {}
7995
    for node in rpcresult:
7996
      if rpcresult[node].fail_msg:
7997
        result[node] = False
7998
      else:
7999
        result[node] = rpcresult[node].payload
8000

    
8001
    return result
8002

    
8003

    
8004
class LUExportInstance(LogicalUnit):
8005
  """Export an instance to an image in the cluster.
8006

8007
  """
8008
  HPATH = "instance-export"
8009
  HTYPE = constants.HTYPE_INSTANCE
8010
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8011
  REQ_BGL = False
8012

    
8013
  def CheckArguments(self):
8014
    """Check the arguments.
8015

8016
    """
8017
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8018
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8019

    
8020
  def ExpandNames(self):
8021
    self._ExpandAndLockInstance()
8022
    # FIXME: lock only instance primary and destination node
8023
    #
8024
    # Sad but true, for now we have do lock all nodes, as we don't know where
8025
    # the previous export might be, and and in this LU we search for it and
8026
    # remove it from its current node. In the future we could fix this by:
8027
    #  - making a tasklet to search (share-lock all), then create the new one,
8028
    #    then one to remove, after
8029
    #  - removing the removal operation altogether
8030
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8031

    
8032
  def DeclareLocks(self, level):
8033
    """Last minute lock declaration."""
8034
    # All nodes are locked anyway, so nothing to do here.
8035

    
8036
  def BuildHooksEnv(self):
8037
    """Build hooks env.
8038

8039
    This will run on the master, primary node and target node.
8040

8041
    """
8042
    env = {
8043
      "EXPORT_NODE": self.op.target_node,
8044
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8045
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8046
      }
8047
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8048
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8049
          self.op.target_node]
8050
    return env, nl, nl
8051

    
8052
  def CheckPrereq(self):
8053
    """Check prerequisites.
8054

8055
    This checks that the instance and node names are valid.
8056

8057
    """
8058
    instance_name = self.op.instance_name
8059
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8060
    assert self.instance is not None, \
8061
          "Cannot retrieve locked instance %s" % self.op.instance_name
8062
    _CheckNodeOnline(self, self.instance.primary_node)
8063

    
8064
    self.dst_node = self.cfg.GetNodeInfo(
8065
      self.cfg.ExpandNodeName(self.op.target_node))
8066

    
8067
    if self.dst_node is None:
8068
      # This is wrong node name, not a non-locked node
8069
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
8070
                                 errors.ECODE_NOENT)
8071
    _CheckNodeOnline(self, self.dst_node.name)
8072
    _CheckNodeNotDrained(self, self.dst_node.name)
8073

    
8074
    # instance disk type verification
8075
    for disk in self.instance.disks:
8076
      if disk.dev_type == constants.LD_FILE:
8077
        raise errors.OpPrereqError("Export not supported for instances with"
8078
                                   " file-based disks", errors.ECODE_INVAL)
8079

    
8080
  def Exec(self, feedback_fn):
8081
    """Export an instance to an image in the cluster.
8082

8083
    """
8084
    instance = self.instance
8085
    dst_node = self.dst_node
8086
    src_node = instance.primary_node
8087

    
8088
    if self.op.shutdown:
8089
      # shutdown the instance, but not the disks
8090
      feedback_fn("Shutting down instance %s" % instance.name)
8091
      result = self.rpc.call_instance_shutdown(src_node, instance,
8092
                                               self.shutdown_timeout)
8093
      result.Raise("Could not shutdown instance %s on"
8094
                   " node %s" % (instance.name, src_node))
8095

    
8096
    vgname = self.cfg.GetVGName()
8097

    
8098
    snap_disks = []
8099

    
8100
    # set the disks ID correctly since call_instance_start needs the
8101
    # correct drbd minor to create the symlinks
8102
    for disk in instance.disks:
8103
      self.cfg.SetDiskID(disk, src_node)
8104

    
8105
    activate_disks = (not instance.admin_up)
8106

    
8107
    if activate_disks:
8108
      # Activate the instance disks if we'exporting a stopped instance
8109
      feedback_fn("Activating disks for %s" % instance.name)
8110
      _StartInstanceDisks(self, instance, None)
8111

    
8112
    try:
8113
      # per-disk results
8114
      dresults = []
8115
      try:
8116
        for idx, disk in enumerate(instance.disks):
8117
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8118
                      (idx, src_node))
8119

    
8120
          # result.payload will be a snapshot of an lvm leaf of the one we
8121
          # passed
8122
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8123
          msg = result.fail_msg
8124
          if msg:
8125
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8126
                            idx, src_node, msg)
8127
            snap_disks.append(False)
8128
          else:
8129
            disk_id = (vgname, result.payload)
8130
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8131
                                   logical_id=disk_id, physical_id=disk_id,
8132
                                   iv_name=disk.iv_name)
8133
            snap_disks.append(new_dev)
8134

    
8135
      finally:
8136
        if self.op.shutdown and instance.admin_up:
8137
          feedback_fn("Starting instance %s" % instance.name)
8138
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8139
          msg = result.fail_msg
8140
          if msg:
8141
            _ShutdownInstanceDisks(self, instance)
8142
            raise errors.OpExecError("Could not start instance: %s" % msg)
8143

    
8144
      # TODO: check for size
8145

    
8146
      cluster_name = self.cfg.GetClusterName()
8147
      for idx, dev in enumerate(snap_disks):
8148
        feedback_fn("Exporting snapshot %s from %s to %s" %
8149
                    (idx, src_node, dst_node.name))
8150
        if dev:
8151
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8152
                                                 instance, cluster_name, idx)
8153
          msg = result.fail_msg
8154
          if msg:
8155
            self.LogWarning("Could not export disk/%s from node %s to"
8156
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8157
            dresults.append(False)
8158
          else:
8159
            dresults.append(True)
8160
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8161
          if msg:
8162
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8163
                            " %s: %s", idx, src_node, msg)
8164
        else:
8165
          dresults.append(False)
8166

    
8167
      feedback_fn("Finalizing export on %s" % dst_node.name)
8168
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8169
                                             snap_disks)
8170
      fin_resu = True
8171
      msg = result.fail_msg
8172
      if msg:
8173
        self.LogWarning("Could not finalize export for instance %s"
8174
                        " on node %s: %s", instance.name, dst_node.name, msg)
8175
        fin_resu = False
8176

    
8177
    finally:
8178
      if activate_disks:
8179
        feedback_fn("Deactivating disks for %s" % instance.name)
8180
        _ShutdownInstanceDisks(self, instance)
8181

    
8182
    nodelist = self.cfg.GetNodeList()
8183
    nodelist.remove(dst_node.name)
8184

    
8185
    # on one-node clusters nodelist will be empty after the removal
8186
    # if we proceed the backup would be removed because OpQueryExports
8187
    # substitutes an empty list with the full cluster node list.
8188
    iname = instance.name
8189
    if nodelist:
8190
      feedback_fn("Removing old exports for instance %s" % iname)
8191
      exportlist = self.rpc.call_export_list(nodelist)
8192
      for node in exportlist:
8193
        if exportlist[node].fail_msg:
8194
          continue
8195
        if iname in exportlist[node].payload:
8196
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8197
          if msg:
8198
            self.LogWarning("Could not remove older export for instance %s"
8199
                            " on node %s: %s", iname, node, msg)
8200
    return fin_resu, dresults
8201

    
8202

    
8203
class LURemoveExport(NoHooksLU):
8204
  """Remove exports related to the named instance.
8205

8206
  """
8207
  _OP_REQP = ["instance_name"]
8208
  REQ_BGL = False
8209

    
8210
  def ExpandNames(self):
8211
    self.needed_locks = {}
8212
    # We need all nodes to be locked in order for RemoveExport to work, but we
8213
    # don't need to lock the instance itself, as nothing will happen to it (and
8214
    # we can remove exports also for a removed instance)
8215
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8216

    
8217
  def CheckPrereq(self):
8218
    """Check prerequisites.
8219
    """
8220
    pass
8221

    
8222
  def Exec(self, feedback_fn):
8223
    """Remove any export.
8224

8225
    """
8226
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8227
    # If the instance was not found we'll try with the name that was passed in.
8228
    # This will only work if it was an FQDN, though.
8229
    fqdn_warn = False
8230
    if not instance_name:
8231
      fqdn_warn = True
8232
      instance_name = self.op.instance_name
8233

    
8234
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8235
    exportlist = self.rpc.call_export_list(locked_nodes)
8236
    found = False
8237
    for node in exportlist:
8238
      msg = exportlist[node].fail_msg
8239
      if msg:
8240
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8241
        continue
8242
      if instance_name in exportlist[node].payload:
8243
        found = True
8244
        result = self.rpc.call_export_remove(node, instance_name)
8245
        msg = result.fail_msg
8246
        if msg:
8247
          logging.error("Could not remove export for instance %s"
8248
                        " on node %s: %s", instance_name, node, msg)
8249

    
8250
    if fqdn_warn and not found:
8251
      feedback_fn("Export not found. If trying to remove an export belonging"
8252
                  " to a deleted instance please use its Fully Qualified"
8253
                  " Domain Name.")
8254

    
8255

    
8256
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8257
  """Generic tags LU.
8258

8259
  This is an abstract class which is the parent of all the other tags LUs.
8260

8261
  """
8262

    
8263
  def ExpandNames(self):
8264
    self.needed_locks = {}
8265
    if self.op.kind == constants.TAG_NODE:
8266
      name = self.cfg.ExpandNodeName(self.op.name)
8267
      if name is None:
8268
        raise errors.OpPrereqError("Invalid node name (%s)" %
8269
                                   (self.op.name,), errors.ECODE_NOENT)
8270
      self.op.name = name
8271
      self.needed_locks[locking.LEVEL_NODE] = name
8272
    elif self.op.kind == constants.TAG_INSTANCE:
8273
      name = self.cfg.ExpandInstanceName(self.op.name)
8274
      if name is None:
8275
        raise errors.OpPrereqError("Invalid instance name (%s)" %
8276
                                   (self.op.name,), errors.ECODE_NOENT)
8277
      self.op.name = name
8278
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8279

    
8280
  def CheckPrereq(self):
8281
    """Check prerequisites.
8282

8283
    """
8284
    if self.op.kind == constants.TAG_CLUSTER:
8285
      self.target = self.cfg.GetClusterInfo()
8286
    elif self.op.kind == constants.TAG_NODE:
8287
      self.target = self.cfg.GetNodeInfo(self.op.name)
8288
    elif self.op.kind == constants.TAG_INSTANCE:
8289
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8290
    else:
8291
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8292
                                 str(self.op.kind), errors.ECODE_INVAL)
8293

    
8294

    
8295
class LUGetTags(TagsLU):
8296
  """Returns the tags of a given object.
8297

8298
  """
8299
  _OP_REQP = ["kind", "name"]
8300
  REQ_BGL = False
8301

    
8302
  def Exec(self, feedback_fn):
8303
    """Returns the tag list.
8304

8305
    """
8306
    return list(self.target.GetTags())
8307

    
8308

    
8309
class LUSearchTags(NoHooksLU):
8310
  """Searches the tags for a given pattern.
8311

8312
  """
8313
  _OP_REQP = ["pattern"]
8314
  REQ_BGL = False
8315

    
8316
  def ExpandNames(self):
8317
    self.needed_locks = {}
8318

    
8319
  def CheckPrereq(self):
8320
    """Check prerequisites.
8321

8322
    This checks the pattern passed for validity by compiling it.
8323

8324
    """
8325
    try:
8326
      self.re = re.compile(self.op.pattern)
8327
    except re.error, err:
8328
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8329
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8330

    
8331
  def Exec(self, feedback_fn):
8332
    """Returns the tag list.
8333

8334
    """
8335
    cfg = self.cfg
8336
    tgts = [("/cluster", cfg.GetClusterInfo())]
8337
    ilist = cfg.GetAllInstancesInfo().values()
8338
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8339
    nlist = cfg.GetAllNodesInfo().values()
8340
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8341
    results = []
8342
    for path, target in tgts:
8343
      for tag in target.GetTags():
8344
        if self.re.search(tag):
8345
          results.append((path, tag))
8346
    return results
8347

    
8348

    
8349
class LUAddTags(TagsLU):
8350
  """Sets a tag on a given object.
8351

8352
  """
8353
  _OP_REQP = ["kind", "name", "tags"]
8354
  REQ_BGL = False
8355

    
8356
  def CheckPrereq(self):
8357
    """Check prerequisites.
8358

8359
    This checks the type and length of the tag name and value.
8360

8361
    """
8362
    TagsLU.CheckPrereq(self)
8363
    for tag in self.op.tags:
8364
      objects.TaggableObject.ValidateTag(tag)
8365

    
8366
  def Exec(self, feedback_fn):
8367
    """Sets the tag.
8368

8369
    """
8370
    try:
8371
      for tag in self.op.tags:
8372
        self.target.AddTag(tag)
8373
    except errors.TagError, err:
8374
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8375
    self.cfg.Update(self.target, feedback_fn)
8376

    
8377

    
8378
class LUDelTags(TagsLU):
8379
  """Delete a list of tags from a given object.
8380

8381
  """
8382
  _OP_REQP = ["kind", "name", "tags"]
8383
  REQ_BGL = False
8384

    
8385
  def CheckPrereq(self):
8386
    """Check prerequisites.
8387

8388
    This checks that we have the given tag.
8389

8390
    """
8391
    TagsLU.CheckPrereq(self)
8392
    for tag in self.op.tags:
8393
      objects.TaggableObject.ValidateTag(tag)
8394
    del_tags = frozenset(self.op.tags)
8395
    cur_tags = self.target.GetTags()
8396
    if not del_tags <= cur_tags:
8397
      diff_tags = del_tags - cur_tags
8398
      diff_names = ["'%s'" % tag for tag in diff_tags]
8399
      diff_names.sort()
8400
      raise errors.OpPrereqError("Tag(s) %s not found" %
8401
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8402

    
8403
  def Exec(self, feedback_fn):
8404
    """Remove the tag from the object.
8405

8406
    """
8407
    for tag in self.op.tags:
8408
      self.target.RemoveTag(tag)
8409
    self.cfg.Update(self.target, feedback_fn)
8410

    
8411

    
8412
class LUTestDelay(NoHooksLU):
8413
  """Sleep for a specified amount of time.
8414

8415
  This LU sleeps on the master and/or nodes for a specified amount of
8416
  time.
8417

8418
  """
8419
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8420
  REQ_BGL = False
8421

    
8422
  def ExpandNames(self):
8423
    """Expand names and set required locks.
8424

8425
    This expands the node list, if any.
8426

8427
    """
8428
    self.needed_locks = {}
8429
    if self.op.on_nodes:
8430
      # _GetWantedNodes can be used here, but is not always appropriate to use
8431
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8432
      # more information.
8433
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8434
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8435

    
8436
  def CheckPrereq(self):
8437
    """Check prerequisites.
8438

8439
    """
8440

    
8441
  def Exec(self, feedback_fn):
8442
    """Do the actual sleep.
8443

8444
    """
8445
    if self.op.on_master:
8446
      if not utils.TestDelay(self.op.duration):
8447
        raise errors.OpExecError("Error during master delay test")
8448
    if self.op.on_nodes:
8449
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8450
      for node, node_result in result.items():
8451
        node_result.Raise("Failure during rpc call to node %s" % node)
8452

    
8453

    
8454
class IAllocator(object):
8455
  """IAllocator framework.
8456

8457
  An IAllocator instance has three sets of attributes:
8458
    - cfg that is needed to query the cluster
8459
    - input data (all members of the _KEYS class attribute are required)
8460
    - four buffer attributes (in|out_data|text), that represent the
8461
      input (to the external script) in text and data structure format,
8462
      and the output from it, again in two formats
8463
    - the result variables from the script (success, info, nodes) for
8464
      easy usage
8465

8466
  """
8467
  # pylint: disable-msg=R0902
8468
  # lots of instance attributes
8469
  _ALLO_KEYS = [
8470
    "mem_size", "disks", "disk_template",
8471
    "os", "tags", "nics", "vcpus", "hypervisor",
8472
    ]
8473
  _RELO_KEYS = [
8474
    "relocate_from",
8475
    ]
8476

    
8477
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8478
    self.cfg = cfg
8479
    self.rpc = rpc
8480
    # init buffer variables
8481
    self.in_text = self.out_text = self.in_data = self.out_data = None
8482
    # init all input fields so that pylint is happy
8483
    self.mode = mode
8484
    self.name = name
8485
    self.mem_size = self.disks = self.disk_template = None
8486
    self.os = self.tags = self.nics = self.vcpus = None
8487
    self.hypervisor = None
8488
    self.relocate_from = None
8489
    # computed fields
8490
    self.required_nodes = None
8491
    # init result fields
8492
    self.success = self.info = self.nodes = None
8493
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8494
      keyset = self._ALLO_KEYS
8495
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8496
      keyset = self._RELO_KEYS
8497
    else:
8498
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8499
                                   " IAllocator" % self.mode)
8500
    for key in kwargs:
8501
      if key not in keyset:
8502
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8503
                                     " IAllocator" % key)
8504
      setattr(self, key, kwargs[key])
8505
    for key in keyset:
8506
      if key not in kwargs:
8507
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8508
                                     " IAllocator" % key)
8509
    self._BuildInputData()
8510

    
8511
  def _ComputeClusterData(self):
8512
    """Compute the generic allocator input data.
8513

8514
    This is the data that is independent of the actual operation.
8515

8516
    """
8517
    cfg = self.cfg
8518
    cluster_info = cfg.GetClusterInfo()
8519
    # cluster data
8520
    data = {
8521
      "version": constants.IALLOCATOR_VERSION,
8522
      "cluster_name": cfg.GetClusterName(),
8523
      "cluster_tags": list(cluster_info.GetTags()),
8524
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8525
      # we don't have job IDs
8526
      }
8527
    iinfo = cfg.GetAllInstancesInfo().values()
8528
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8529

    
8530
    # node data
8531
    node_results = {}
8532
    node_list = cfg.GetNodeList()
8533

    
8534
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8535
      hypervisor_name = self.hypervisor
8536
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8537
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8538

    
8539
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8540
                                        hypervisor_name)
8541
    node_iinfo = \
8542
      self.rpc.call_all_instances_info(node_list,
8543
                                       cluster_info.enabled_hypervisors)
8544
    for nname, nresult in node_data.items():
8545
      # first fill in static (config-based) values
8546
      ninfo = cfg.GetNodeInfo(nname)
8547
      pnr = {
8548
        "tags": list(ninfo.GetTags()),
8549
        "primary_ip": ninfo.primary_ip,
8550
        "secondary_ip": ninfo.secondary_ip,
8551
        "offline": ninfo.offline,
8552
        "drained": ninfo.drained,
8553
        "master_candidate": ninfo.master_candidate,
8554
        }
8555

    
8556
      if not (ninfo.offline or ninfo.drained):
8557
        nresult.Raise("Can't get data for node %s" % nname)
8558
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8559
                                nname)
8560
        remote_info = nresult.payload
8561

    
8562
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8563
                     'vg_size', 'vg_free', 'cpu_total']:
8564
          if attr not in remote_info:
8565
            raise errors.OpExecError("Node '%s' didn't return attribute"
8566
                                     " '%s'" % (nname, attr))
8567
          if not isinstance(remote_info[attr], int):
8568
            raise errors.OpExecError("Node '%s' returned invalid value"
8569
                                     " for '%s': %s" %
8570
                                     (nname, attr, remote_info[attr]))
8571
        # compute memory used by primary instances
8572
        i_p_mem = i_p_up_mem = 0
8573
        for iinfo, beinfo in i_list:
8574
          if iinfo.primary_node == nname:
8575
            i_p_mem += beinfo[constants.BE_MEMORY]
8576
            if iinfo.name not in node_iinfo[nname].payload:
8577
              i_used_mem = 0
8578
            else:
8579
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8580
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8581
            remote_info['memory_free'] -= max(0, i_mem_diff)
8582

    
8583
            if iinfo.admin_up:
8584
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8585

    
8586
        # compute memory used by instances
8587
        pnr_dyn = {
8588
          "total_memory": remote_info['memory_total'],
8589
          "reserved_memory": remote_info['memory_dom0'],
8590
          "free_memory": remote_info['memory_free'],
8591
          "total_disk": remote_info['vg_size'],
8592
          "free_disk": remote_info['vg_free'],
8593
          "total_cpus": remote_info['cpu_total'],
8594
          "i_pri_memory": i_p_mem,
8595
          "i_pri_up_memory": i_p_up_mem,
8596
          }
8597
        pnr.update(pnr_dyn)
8598

    
8599
      node_results[nname] = pnr
8600
    data["nodes"] = node_results
8601

    
8602
    # instance data
8603
    instance_data = {}
8604
    for iinfo, beinfo in i_list:
8605
      nic_data = []
8606
      for nic in iinfo.nics:
8607
        filled_params = objects.FillDict(
8608
            cluster_info.nicparams[constants.PP_DEFAULT],
8609
            nic.nicparams)
8610
        nic_dict = {"mac": nic.mac,
8611
                    "ip": nic.ip,
8612
                    "mode": filled_params[constants.NIC_MODE],
8613
                    "link": filled_params[constants.NIC_LINK],
8614
                   }
8615
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8616
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8617
        nic_data.append(nic_dict)
8618
      pir = {
8619
        "tags": list(iinfo.GetTags()),
8620
        "admin_up": iinfo.admin_up,
8621
        "vcpus": beinfo[constants.BE_VCPUS],
8622
        "memory": beinfo[constants.BE_MEMORY],
8623
        "os": iinfo.os,
8624
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8625
        "nics": nic_data,
8626
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8627
        "disk_template": iinfo.disk_template,
8628
        "hypervisor": iinfo.hypervisor,
8629
        }
8630
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8631
                                                 pir["disks"])
8632
      instance_data[iinfo.name] = pir
8633

    
8634
    data["instances"] = instance_data
8635

    
8636
    self.in_data = data
8637

    
8638
  def _AddNewInstance(self):
8639
    """Add new instance data to allocator structure.
8640

8641
    This in combination with _AllocatorGetClusterData will create the
8642
    correct structure needed as input for the allocator.
8643

8644
    The checks for the completeness of the opcode must have already been
8645
    done.
8646

8647
    """
8648
    data = self.in_data
8649

    
8650
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8651

    
8652
    if self.disk_template in constants.DTS_NET_MIRROR:
8653
      self.required_nodes = 2
8654
    else:
8655
      self.required_nodes = 1
8656
    request = {
8657
      "type": "allocate",
8658
      "name": self.name,
8659
      "disk_template": self.disk_template,
8660
      "tags": self.tags,
8661
      "os": self.os,
8662
      "vcpus": self.vcpus,
8663
      "memory": self.mem_size,
8664
      "disks": self.disks,
8665
      "disk_space_total": disk_space,
8666
      "nics": self.nics,
8667
      "required_nodes": self.required_nodes,
8668
      }
8669
    data["request"] = request
8670

    
8671
  def _AddRelocateInstance(self):
8672
    """Add relocate instance data to allocator structure.
8673

8674
    This in combination with _IAllocatorGetClusterData will create the
8675
    correct structure needed as input for the allocator.
8676

8677
    The checks for the completeness of the opcode must have already been
8678
    done.
8679

8680
    """
8681
    instance = self.cfg.GetInstanceInfo(self.name)
8682
    if instance is None:
8683
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8684
                                   " IAllocator" % self.name)
8685

    
8686
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8687
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8688
                                 errors.ECODE_INVAL)
8689

    
8690
    if len(instance.secondary_nodes) != 1:
8691
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8692
                                 errors.ECODE_STATE)
8693

    
8694
    self.required_nodes = 1
8695
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8696
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8697

    
8698
    request = {
8699
      "type": "relocate",
8700
      "name": self.name,
8701
      "disk_space_total": disk_space,
8702
      "required_nodes": self.required_nodes,
8703
      "relocate_from": self.relocate_from,
8704
      }
8705
    self.in_data["request"] = request
8706

    
8707
  def _BuildInputData(self):
8708
    """Build input data structures.
8709

8710
    """
8711
    self._ComputeClusterData()
8712

    
8713
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8714
      self._AddNewInstance()
8715
    else:
8716
      self._AddRelocateInstance()
8717

    
8718
    self.in_text = serializer.Dump(self.in_data)
8719

    
8720
  def Run(self, name, validate=True, call_fn=None):
8721
    """Run an instance allocator and return the results.
8722

8723
    """
8724
    if call_fn is None:
8725
      call_fn = self.rpc.call_iallocator_runner
8726

    
8727
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8728
    result.Raise("Failure while running the iallocator script")
8729

    
8730
    self.out_text = result.payload
8731
    if validate:
8732
      self._ValidateResult()
8733

    
8734
  def _ValidateResult(self):
8735
    """Process the allocator results.
8736

8737
    This will process and if successful save the result in
8738
    self.out_data and the other parameters.
8739

8740
    """
8741
    try:
8742
      rdict = serializer.Load(self.out_text)
8743
    except Exception, err:
8744
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8745

    
8746
    if not isinstance(rdict, dict):
8747
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8748

    
8749
    for key in "success", "info", "nodes":
8750
      if key not in rdict:
8751
        raise errors.OpExecError("Can't parse iallocator results:"
8752
                                 " missing key '%s'" % key)
8753
      setattr(self, key, rdict[key])
8754

    
8755
    if not isinstance(rdict["nodes"], list):
8756
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8757
                               " is not a list")
8758
    self.out_data = rdict
8759

    
8760

    
8761
class LUTestAllocator(NoHooksLU):
8762
  """Run allocator tests.
8763

8764
  This LU runs the allocator tests
8765

8766
  """
8767
  _OP_REQP = ["direction", "mode", "name"]
8768

    
8769
  def CheckPrereq(self):
8770
    """Check prerequisites.
8771

8772
    This checks the opcode parameters depending on the director and mode test.
8773

8774
    """
8775
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8776
      for attr in ["name", "mem_size", "disks", "disk_template",
8777
                   "os", "tags", "nics", "vcpus"]:
8778
        if not hasattr(self.op, attr):
8779
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8780
                                     attr, errors.ECODE_INVAL)
8781
      iname = self.cfg.ExpandInstanceName(self.op.name)
8782
      if iname is not None:
8783
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8784
                                   iname, errors.ECODE_EXISTS)
8785
      if not isinstance(self.op.nics, list):
8786
        raise errors.OpPrereqError("Invalid parameter 'nics'",
8787
                                   errors.ECODE_INVAL)
8788
      for row in self.op.nics:
8789
        if (not isinstance(row, dict) or
8790
            "mac" not in row or
8791
            "ip" not in row or
8792
            "bridge" not in row):
8793
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
8794
                                     " parameter", errors.ECODE_INVAL)
8795
      if not isinstance(self.op.disks, list):
8796
        raise errors.OpPrereqError("Invalid parameter 'disks'",
8797
                                   errors.ECODE_INVAL)
8798
      for row in self.op.disks:
8799
        if (not isinstance(row, dict) or
8800
            "size" not in row or
8801
            not isinstance(row["size"], int) or
8802
            "mode" not in row or
8803
            row["mode"] not in ['r', 'w']):
8804
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
8805
                                     " parameter", errors.ECODE_INVAL)
8806
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8807
        self.op.hypervisor = self.cfg.GetHypervisorType()
8808
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8809
      if not hasattr(self.op, "name"):
8810
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8811
                                   errors.ECODE_INVAL)
8812
      fname = self.cfg.ExpandInstanceName(self.op.name)
8813
      if fname is None:
8814
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8815
                                   self.op.name, errors.ECODE_NOENT)
8816
      self.op.name = fname
8817
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8818
    else:
8819
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8820
                                 self.op.mode, errors.ECODE_INVAL)
8821

    
8822
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8823
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8824
        raise errors.OpPrereqError("Missing allocator name",
8825
                                   errors.ECODE_INVAL)
8826
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8827
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8828
                                 self.op.direction, errors.ECODE_INVAL)
8829

    
8830
  def Exec(self, feedback_fn):
8831
    """Run the allocator test.
8832

8833
    """
8834
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8835
      ial = IAllocator(self.cfg, self.rpc,
8836
                       mode=self.op.mode,
8837
                       name=self.op.name,
8838
                       mem_size=self.op.mem_size,
8839
                       disks=self.op.disks,
8840
                       disk_template=self.op.disk_template,
8841
                       os=self.op.os,
8842
                       tags=self.op.tags,
8843
                       nics=self.op.nics,
8844
                       vcpus=self.op.vcpus,
8845
                       hypervisor=self.op.hypervisor,
8846
                       )
8847
    else:
8848
      ial = IAllocator(self.cfg, self.rpc,
8849
                       mode=self.op.mode,
8850
                       name=self.op.name,
8851
                       relocate_from=list(self.relocate_from),
8852
                       )
8853

    
8854
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8855
      result = ial.in_text
8856
    else:
8857
      ial.Run(self.op.allocator, validate=False)
8858
      result = ial.out_text
8859
    return result