Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1f864b60

History | View | Annotate | Download (307 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name, errors.ECODE_INVAL)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                 self.op.instance_name, errors.ECODE_NOENT)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
421
                               errors.ECODE_INVAL)
422

    
423
  if not nodes:
424
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
425
      " non-empty list of nodes whose name is to be expanded.")
426

    
427
  wanted = []
428
  for name in nodes:
429
    node = lu.cfg.ExpandNodeName(name)
430
    if node is None:
431
      raise errors.OpPrereqError("No such node name '%s'" % name,
432
                                 errors.ECODE_NOENT)
433
    wanted.append(node)
434

    
435
  return utils.NiceSort(wanted)
436

    
437

    
438
def _GetWantedInstances(lu, instances):
439
  """Returns list of checked and expanded instance names.
440

441
  @type lu: L{LogicalUnit}
442
  @param lu: the logical unit on whose behalf we execute
443
  @type instances: list
444
  @param instances: list of instance names or None for all instances
445
  @rtype: list
446
  @return: the list of instances, sorted
447
  @raise errors.OpPrereqError: if the instances parameter is wrong type
448
  @raise errors.OpPrereqError: if any of the passed instances is not found
449

450
  """
451
  if not isinstance(instances, list):
452
    raise errors.OpPrereqError("Invalid argument type 'instances'",
453
                               errors.ECODE_INVAL)
454

    
455
  if instances:
456
    wanted = []
457

    
458
    for name in instances:
459
      instance = lu.cfg.ExpandInstanceName(name)
460
      if instance is None:
461
        raise errors.OpPrereqError("No such instance name '%s'" % name,
462
                                   errors.ECODE_NOENT)
463
      wanted.append(instance)
464

    
465
  else:
466
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
467
  return wanted
468

    
469

    
470
def _CheckOutputFields(static, dynamic, selected):
471
  """Checks whether all selected fields are valid.
472

473
  @type static: L{utils.FieldSet}
474
  @param static: static fields set
475
  @type dynamic: L{utils.FieldSet}
476
  @param dynamic: dynamic fields set
477

478
  """
479
  f = utils.FieldSet()
480
  f.Extend(static)
481
  f.Extend(dynamic)
482

    
483
  delta = f.NonMatching(selected)
484
  if delta:
485
    raise errors.OpPrereqError("Unknown output fields selected: %s"
486
                               % ",".join(delta), errors.ECODE_INVAL)
487

    
488

    
489
def _CheckBooleanOpField(op, name):
490
  """Validates boolean opcode parameters.
491

492
  This will ensure that an opcode parameter is either a boolean value,
493
  or None (but that it always exists).
494

495
  """
496
  val = getattr(op, name, None)
497
  if not (val is None or isinstance(val, bool)):
498
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
499
                               (name, str(val)), errors.ECODE_INVAL)
500
  setattr(op, name, val)
501

    
502

    
503
def _CheckGlobalHvParams(params):
504
  """Validates that given hypervisor params are not global ones.
505

506
  This will ensure that instances don't get customised versions of
507
  global params.
508

509
  """
510
  used_globals = constants.HVC_GLOBALS.intersection(params)
511
  if used_globals:
512
    msg = ("The following hypervisor parameters are global and cannot"
513
           " be customized at instance level, please modify them at"
514
           " cluster level: %s" % utils.CommaJoin(used_globals))
515
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
516

    
517

    
518
def _CheckNodeOnline(lu, node):
519
  """Ensure that a given node is online.
520

521
  @param lu: the LU on behalf of which we make the check
522
  @param node: the node to check
523
  @raise errors.OpPrereqError: if the node is offline
524

525
  """
526
  if lu.cfg.GetNodeInfo(node).offline:
527
    raise errors.OpPrereqError("Can't use offline node %s" % node,
528
                               errors.ECODE_INVAL)
529

    
530

    
531
def _CheckNodeNotDrained(lu, node):
532
  """Ensure that a given node is not drained.
533

534
  @param lu: the LU on behalf of which we make the check
535
  @param node: the node to check
536
  @raise errors.OpPrereqError: if the node is drained
537

538
  """
539
  if lu.cfg.GetNodeInfo(node).drained:
540
    raise errors.OpPrereqError("Can't use drained node %s" % node,
541
                               errors.ECODE_INVAL)
542

    
543

    
544
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
545
                          memory, vcpus, nics, disk_template, disks,
546
                          bep, hvp, hypervisor_name):
547
  """Builds instance related env variables for hooks
548

549
  This builds the hook environment from individual variables.
550

551
  @type name: string
552
  @param name: the name of the instance
553
  @type primary_node: string
554
  @param primary_node: the name of the instance's primary node
555
  @type secondary_nodes: list
556
  @param secondary_nodes: list of secondary nodes as strings
557
  @type os_type: string
558
  @param os_type: the name of the instance's OS
559
  @type status: boolean
560
  @param status: the should_run status of the instance
561
  @type memory: string
562
  @param memory: the memory size of the instance
563
  @type vcpus: string
564
  @param vcpus: the count of VCPUs the instance has
565
  @type nics: list
566
  @param nics: list of tuples (ip, mac, mode, link) representing
567
      the NICs the instance has
568
  @type disk_template: string
569
  @param disk_template: the disk template of the instance
570
  @type disks: list
571
  @param disks: the list of (size, mode) pairs
572
  @type bep: dict
573
  @param bep: the backend parameters for the instance
574
  @type hvp: dict
575
  @param hvp: the hypervisor parameters for the instance
576
  @type hypervisor_name: string
577
  @param hypervisor_name: the hypervisor for the instance
578
  @rtype: dict
579
  @return: the hook environment for this instance
580

581
  """
582
  if status:
583
    str_status = "up"
584
  else:
585
    str_status = "down"
586
  env = {
587
    "OP_TARGET": name,
588
    "INSTANCE_NAME": name,
589
    "INSTANCE_PRIMARY": primary_node,
590
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
591
    "INSTANCE_OS_TYPE": os_type,
592
    "INSTANCE_STATUS": str_status,
593
    "INSTANCE_MEMORY": memory,
594
    "INSTANCE_VCPUS": vcpus,
595
    "INSTANCE_DISK_TEMPLATE": disk_template,
596
    "INSTANCE_HYPERVISOR": hypervisor_name,
597
  }
598

    
599
  if nics:
600
    nic_count = len(nics)
601
    for idx, (ip, mac, mode, link) in enumerate(nics):
602
      if ip is None:
603
        ip = ""
604
      env["INSTANCE_NIC%d_IP" % idx] = ip
605
      env["INSTANCE_NIC%d_MAC" % idx] = mac
606
      env["INSTANCE_NIC%d_MODE" % idx] = mode
607
      env["INSTANCE_NIC%d_LINK" % idx] = link
608
      if mode == constants.NIC_MODE_BRIDGED:
609
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
610
  else:
611
    nic_count = 0
612

    
613
  env["INSTANCE_NIC_COUNT"] = nic_count
614

    
615
  if disks:
616
    disk_count = len(disks)
617
    for idx, (size, mode) in enumerate(disks):
618
      env["INSTANCE_DISK%d_SIZE" % idx] = size
619
      env["INSTANCE_DISK%d_MODE" % idx] = mode
620
  else:
621
    disk_count = 0
622

    
623
  env["INSTANCE_DISK_COUNT"] = disk_count
624

    
625
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
626
    for key, value in source.items():
627
      env["INSTANCE_%s_%s" % (kind, key)] = value
628

    
629
  return env
630

    
631

    
632
def _NICListToTuple(lu, nics):
633
  """Build a list of nic information tuples.
634

635
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
636
  value in LUQueryInstanceData.
637

638
  @type lu:  L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type nics: list of L{objects.NIC}
641
  @param nics: list of nics to convert to hooks tuples
642

643
  """
644
  hooks_nics = []
645
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
646
  for nic in nics:
647
    ip = nic.ip
648
    mac = nic.mac
649
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
650
    mode = filled_params[constants.NIC_MODE]
651
    link = filled_params[constants.NIC_LINK]
652
    hooks_nics.append((ip, mac, mode, link))
653
  return hooks_nics
654

    
655

    
656
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
657
  """Builds instance related env variables for hooks from an object.
658

659
  @type lu: L{LogicalUnit}
660
  @param lu: the logical unit on whose behalf we execute
661
  @type instance: L{objects.Instance}
662
  @param instance: the instance for which we should build the
663
      environment
664
  @type override: dict
665
  @param override: dictionary with key/values that will override
666
      our values
667
  @rtype: dict
668
  @return: the hook environment dictionary
669

670
  """
671
  cluster = lu.cfg.GetClusterInfo()
672
  bep = cluster.FillBE(instance)
673
  hvp = cluster.FillHV(instance)
674
  args = {
675
    'name': instance.name,
676
    'primary_node': instance.primary_node,
677
    'secondary_nodes': instance.secondary_nodes,
678
    'os_type': instance.os,
679
    'status': instance.admin_up,
680
    'memory': bep[constants.BE_MEMORY],
681
    'vcpus': bep[constants.BE_VCPUS],
682
    'nics': _NICListToTuple(lu, instance.nics),
683
    'disk_template': instance.disk_template,
684
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
685
    'bep': bep,
686
    'hvp': hvp,
687
    'hypervisor_name': instance.hypervisor,
688
  }
689
  if override:
690
    args.update(override)
691
  return _BuildInstanceHookEnv(**args)
692

    
693

    
694
def _AdjustCandidatePool(lu, exceptions):
695
  """Adjust the candidate pool after node operations.
696

697
  """
698
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
699
  if mod_list:
700
    lu.LogInfo("Promoted nodes to master candidate role: %s",
701
               utils.CommaJoin(node.name for node in mod_list))
702
    for name in mod_list:
703
      lu.context.ReaddNode(name)
704
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
705
  if mc_now > mc_max:
706
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
707
               (mc_now, mc_max))
708

    
709

    
710
def _DecideSelfPromotion(lu, exceptions=None):
711
  """Decide whether I should promote myself as a master candidate.
712

713
  """
714
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
715
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
716
  # the new node will increase mc_max with one, so:
717
  mc_should = min(mc_should + 1, cp_size)
718
  return mc_now < mc_should
719

    
720

    
721
def _CheckNicsBridgesExist(lu, target_nics, target_node,
722
                               profile=constants.PP_DEFAULT):
723
  """Check that the brigdes needed by a list of nics exist.
724

725
  """
726
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
727
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
728
                for nic in target_nics]
729
  brlist = [params[constants.NIC_LINK] for params in paramslist
730
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
731
  if brlist:
732
    result = lu.rpc.call_bridges_exist(target_node, brlist)
733
    result.Raise("Error checking bridges on destination node '%s'" %
734
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
735

    
736

    
737
def _CheckInstanceBridgesExist(lu, instance, node=None):
738
  """Check that the brigdes needed by an instance exist.
739

740
  """
741
  if node is None:
742
    node = instance.primary_node
743
  _CheckNicsBridgesExist(lu, instance.nics, node)
744

    
745

    
746
def _CheckOSVariant(os_obj, name):
747
  """Check whether an OS name conforms to the os variants specification.
748

749
  @type os_obj: L{objects.OS}
750
  @param os_obj: OS object to check
751
  @type name: string
752
  @param name: OS name passed by the user, to check for validity
753

754
  """
755
  if not os_obj.supported_variants:
756
    return
757
  try:
758
    variant = name.split("+", 1)[1]
759
  except IndexError:
760
    raise errors.OpPrereqError("OS name must include a variant",
761
                               errors.ECODE_INVAL)
762

    
763
  if variant not in os_obj.supported_variants:
764
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
765

    
766

    
767
def _GetNodeInstancesInner(cfg, fn):
768
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
769

    
770

    
771
def _GetNodeInstances(cfg, node_name):
772
  """Returns a list of all primary and secondary instances on a node.
773

774
  """
775

    
776
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
777

    
778

    
779
def _GetNodePrimaryInstances(cfg, node_name):
780
  """Returns primary instances on a node.
781

782
  """
783
  return _GetNodeInstancesInner(cfg,
784
                                lambda inst: node_name == inst.primary_node)
785

    
786

    
787
def _GetNodeSecondaryInstances(cfg, node_name):
788
  """Returns secondary instances on a node.
789

790
  """
791
  return _GetNodeInstancesInner(cfg,
792
                                lambda inst: node_name in inst.secondary_nodes)
793

    
794

    
795
def _GetStorageTypeArgs(cfg, storage_type):
796
  """Returns the arguments for a storage type.
797

798
  """
799
  # Special case for file storage
800
  if storage_type == constants.ST_FILE:
801
    # storage.FileStorage wants a list of storage directories
802
    return [[cfg.GetFileStorageDir()]]
803

    
804
  return []
805

    
806

    
807
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
808
  faulty = []
809

    
810
  for dev in instance.disks:
811
    cfg.SetDiskID(dev, node_name)
812

    
813
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
814
  result.Raise("Failed to get disk status from node %s" % node_name,
815
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
816

    
817
  for idx, bdev_status in enumerate(result.payload):
818
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
819
      faulty.append(idx)
820

    
821
  return faulty
822

    
823

    
824
class LUPostInitCluster(LogicalUnit):
825
  """Logical unit for running hooks after cluster initialization.
826

827
  """
828
  HPATH = "cluster-init"
829
  HTYPE = constants.HTYPE_CLUSTER
830
  _OP_REQP = []
831

    
832
  def BuildHooksEnv(self):
833
    """Build hooks env.
834

835
    """
836
    env = {"OP_TARGET": self.cfg.GetClusterName()}
837
    mn = self.cfg.GetMasterNode()
838
    return env, [], [mn]
839

    
840
  def CheckPrereq(self):
841
    """No prerequisites to check.
842

843
    """
844
    return True
845

    
846
  def Exec(self, feedback_fn):
847
    """Nothing to do.
848

849
    """
850
    return True
851

    
852

    
853
class LUDestroyCluster(LogicalUnit):
854
  """Logical unit for destroying the cluster.
855

856
  """
857
  HPATH = "cluster-destroy"
858
  HTYPE = constants.HTYPE_CLUSTER
859
  _OP_REQP = []
860

    
861
  def BuildHooksEnv(self):
862
    """Build hooks env.
863

864
    """
865
    env = {"OP_TARGET": self.cfg.GetClusterName()}
866
    return env, [], []
867

    
868
  def CheckPrereq(self):
869
    """Check prerequisites.
870

871
    This checks whether the cluster is empty.
872

873
    Any errors are signaled by raising errors.OpPrereqError.
874

875
    """
876
    master = self.cfg.GetMasterNode()
877

    
878
    nodelist = self.cfg.GetNodeList()
879
    if len(nodelist) != 1 or nodelist[0] != master:
880
      raise errors.OpPrereqError("There are still %d node(s) in"
881
                                 " this cluster." % (len(nodelist) - 1),
882
                                 errors.ECODE_INVAL)
883
    instancelist = self.cfg.GetInstanceList()
884
    if instancelist:
885
      raise errors.OpPrereqError("There are still %d instance(s) in"
886
                                 " this cluster." % len(instancelist),
887
                                 errors.ECODE_INVAL)
888

    
889
  def Exec(self, feedback_fn):
890
    """Destroys the cluster.
891

892
    """
893
    master = self.cfg.GetMasterNode()
894
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
895

    
896
    # Run post hooks on master node before it's removed
897
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
898
    try:
899
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
900
    except:
901
      self.LogWarning("Errors occurred running hooks on %s" % master)
902

    
903
    result = self.rpc.call_node_stop_master(master, False)
904
    result.Raise("Could not disable the master role")
905

    
906
    if modify_ssh_setup:
907
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
908
      utils.CreateBackup(priv_key)
909
      utils.CreateBackup(pub_key)
910

    
911
    return master
912

    
913

    
914
class LUVerifyCluster(LogicalUnit):
915
  """Verifies the cluster status.
916

917
  """
918
  HPATH = "cluster-verify"
919
  HTYPE = constants.HTYPE_CLUSTER
920
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
921
  REQ_BGL = False
922

    
923
  TCLUSTER = "cluster"
924
  TNODE = "node"
925
  TINSTANCE = "instance"
926

    
927
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
928
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
929
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
930
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
931
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
932
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
933
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
934
  ENODEDRBD = (TNODE, "ENODEDRBD")
935
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
936
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
937
  ENODEHV = (TNODE, "ENODEHV")
938
  ENODELVM = (TNODE, "ENODELVM")
939
  ENODEN1 = (TNODE, "ENODEN1")
940
  ENODENET = (TNODE, "ENODENET")
941
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
942
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
943
  ENODERPC = (TNODE, "ENODERPC")
944
  ENODESSH = (TNODE, "ENODESSH")
945
  ENODEVERSION = (TNODE, "ENODEVERSION")
946
  ENODESETUP = (TNODE, "ENODESETUP")
947

    
948
  ETYPE_FIELD = "code"
949
  ETYPE_ERROR = "ERROR"
950
  ETYPE_WARNING = "WARNING"
951

    
952
  def ExpandNames(self):
953
    self.needed_locks = {
954
      locking.LEVEL_NODE: locking.ALL_SET,
955
      locking.LEVEL_INSTANCE: locking.ALL_SET,
956
    }
957
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
958

    
959
  def _Error(self, ecode, item, msg, *args, **kwargs):
960
    """Format an error message.
961

962
    Based on the opcode's error_codes parameter, either format a
963
    parseable error code, or a simpler error string.
964

965
    This must be called only from Exec and functions called from Exec.
966

967
    """
968
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
969
    itype, etxt = ecode
970
    # first complete the msg
971
    if args:
972
      msg = msg % args
973
    # then format the whole message
974
    if self.op.error_codes:
975
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
976
    else:
977
      if item:
978
        item = " " + item
979
      else:
980
        item = ""
981
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
982
    # and finally report it via the feedback_fn
983
    self._feedback_fn("  - %s" % msg)
984

    
985
  def _ErrorIf(self, cond, *args, **kwargs):
986
    """Log an error message if the passed condition is True.
987

988
    """
989
    cond = bool(cond) or self.op.debug_simulate_errors
990
    if cond:
991
      self._Error(*args, **kwargs)
992
    # do not mark the operation as failed for WARN cases only
993
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
994
      self.bad = self.bad or cond
995

    
996
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
997
                  node_result, master_files, drbd_map, vg_name):
998
    """Run multiple tests against a node.
999

1000
    Test list:
1001

1002
      - compares ganeti version
1003
      - checks vg existence and size > 20G
1004
      - checks config file checksum
1005
      - checks ssh to other nodes
1006

1007
    @type nodeinfo: L{objects.Node}
1008
    @param nodeinfo: the node to check
1009
    @param file_list: required list of files
1010
    @param local_cksum: dictionary of local files and their checksums
1011
    @param node_result: the results from the node
1012
    @param master_files: list of files that only masters should have
1013
    @param drbd_map: the useddrbd minors for this node, in
1014
        form of minor: (instance, must_exist) which correspond to instances
1015
        and their running status
1016
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1017

1018
    """
1019
    node = nodeinfo.name
1020
    _ErrorIf = self._ErrorIf
1021

    
1022
    # main result, node_result should be a non-empty dict
1023
    test = not node_result or not isinstance(node_result, dict)
1024
    _ErrorIf(test, self.ENODERPC, node,
1025
                  "unable to verify node: no data returned")
1026
    if test:
1027
      return
1028

    
1029
    # compares ganeti version
1030
    local_version = constants.PROTOCOL_VERSION
1031
    remote_version = node_result.get('version', None)
1032
    test = not (remote_version and
1033
                isinstance(remote_version, (list, tuple)) and
1034
                len(remote_version) == 2)
1035
    _ErrorIf(test, self.ENODERPC, node,
1036
             "connection to node returned invalid data")
1037
    if test:
1038
      return
1039

    
1040
    test = local_version != remote_version[0]
1041
    _ErrorIf(test, self.ENODEVERSION, node,
1042
             "incompatible protocol versions: master %s,"
1043
             " node %s", local_version, remote_version[0])
1044
    if test:
1045
      return
1046

    
1047
    # node seems compatible, we can actually try to look into its results
1048

    
1049
    # full package version
1050
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1051
                  self.ENODEVERSION, node,
1052
                  "software version mismatch: master %s, node %s",
1053
                  constants.RELEASE_VERSION, remote_version[1],
1054
                  code=self.ETYPE_WARNING)
1055

    
1056
    # checks vg existence and size > 20G
1057
    if vg_name is not None:
1058
      vglist = node_result.get(constants.NV_VGLIST, None)
1059
      test = not vglist
1060
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1061
      if not test:
1062
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1063
                                              constants.MIN_VG_SIZE)
1064
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1065

    
1066
    # checks config file checksum
1067

    
1068
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1069
    test = not isinstance(remote_cksum, dict)
1070
    _ErrorIf(test, self.ENODEFILECHECK, node,
1071
             "node hasn't returned file checksum data")
1072
    if not test:
1073
      for file_name in file_list:
1074
        node_is_mc = nodeinfo.master_candidate
1075
        must_have = (file_name not in master_files) or node_is_mc
1076
        # missing
1077
        test1 = file_name not in remote_cksum
1078
        # invalid checksum
1079
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1080
        # existing and good
1081
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1082
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1083
                 "file '%s' missing", file_name)
1084
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1085
                 "file '%s' has wrong checksum", file_name)
1086
        # not candidate and this is not a must-have file
1087
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1088
                 "file '%s' should not exist on non master"
1089
                 " candidates (and the file is outdated)", file_name)
1090
        # all good, except non-master/non-must have combination
1091
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1092
                 "file '%s' should not exist"
1093
                 " on non master candidates", file_name)
1094

    
1095
    # checks ssh to any
1096

    
1097
    test = constants.NV_NODELIST not in node_result
1098
    _ErrorIf(test, self.ENODESSH, node,
1099
             "node hasn't returned node ssh connectivity data")
1100
    if not test:
1101
      if node_result[constants.NV_NODELIST]:
1102
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1103
          _ErrorIf(True, self.ENODESSH, node,
1104
                   "ssh communication with node '%s': %s", a_node, a_msg)
1105

    
1106
    test = constants.NV_NODENETTEST not in node_result
1107
    _ErrorIf(test, self.ENODENET, node,
1108
             "node hasn't returned node tcp connectivity data")
1109
    if not test:
1110
      if node_result[constants.NV_NODENETTEST]:
1111
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1112
        for anode in nlist:
1113
          _ErrorIf(True, self.ENODENET, node,
1114
                   "tcp communication with node '%s': %s",
1115
                   anode, node_result[constants.NV_NODENETTEST][anode])
1116

    
1117
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1118
    if isinstance(hyp_result, dict):
1119
      for hv_name, hv_result in hyp_result.iteritems():
1120
        test = hv_result is not None
1121
        _ErrorIf(test, self.ENODEHV, node,
1122
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1123

    
1124
    # check used drbd list
1125
    if vg_name is not None:
1126
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1127
      test = not isinstance(used_minors, (tuple, list))
1128
      _ErrorIf(test, self.ENODEDRBD, node,
1129
               "cannot parse drbd status file: %s", str(used_minors))
1130
      if not test:
1131
        for minor, (iname, must_exist) in drbd_map.items():
1132
          test = minor not in used_minors and must_exist
1133
          _ErrorIf(test, self.ENODEDRBD, node,
1134
                   "drbd minor %d of instance %s is not active",
1135
                   minor, iname)
1136
        for minor in used_minors:
1137
          test = minor not in drbd_map
1138
          _ErrorIf(test, self.ENODEDRBD, node,
1139
                   "unallocated drbd minor %d is in use", minor)
1140
    test = node_result.get(constants.NV_NODESETUP,
1141
                           ["Missing NODESETUP results"])
1142
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1143
             "; ".join(test))
1144

    
1145
    # check pv names
1146
    if vg_name is not None:
1147
      pvlist = node_result.get(constants.NV_PVLIST, None)
1148
      test = pvlist is None
1149
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1150
      if not test:
1151
        # check that ':' is not present in PV names, since it's a
1152
        # special character for lvcreate (denotes the range of PEs to
1153
        # use on the PV)
1154
        for size, pvname, owner_vg in pvlist:
1155
          test = ":" in pvname
1156
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1157
                   " '%s' of VG '%s'", pvname, owner_vg)
1158

    
1159
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1160
                      node_instance, n_offline):
1161
    """Verify an instance.
1162

1163
    This function checks to see if the required block devices are
1164
    available on the instance's node.
1165

1166
    """
1167
    _ErrorIf = self._ErrorIf
1168
    node_current = instanceconfig.primary_node
1169

    
1170
    node_vol_should = {}
1171
    instanceconfig.MapLVsByNode(node_vol_should)
1172

    
1173
    for node in node_vol_should:
1174
      if node in n_offline:
1175
        # ignore missing volumes on offline nodes
1176
        continue
1177
      for volume in node_vol_should[node]:
1178
        test = node not in node_vol_is or volume not in node_vol_is[node]
1179
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1180
                 "volume %s missing on node %s", volume, node)
1181

    
1182
    if instanceconfig.admin_up:
1183
      test = ((node_current not in node_instance or
1184
               not instance in node_instance[node_current]) and
1185
              node_current not in n_offline)
1186
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1187
               "instance not running on its primary node %s",
1188
               node_current)
1189

    
1190
    for node in node_instance:
1191
      if (not node == node_current):
1192
        test = instance in node_instance[node]
1193
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1194
                 "instance should not run on node %s", node)
1195

    
1196
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1197
    """Verify if there are any unknown volumes in the cluster.
1198

1199
    The .os, .swap and backup volumes are ignored. All other volumes are
1200
    reported as unknown.
1201

1202
    """
1203
    for node in node_vol_is:
1204
      for volume in node_vol_is[node]:
1205
        test = (node not in node_vol_should or
1206
                volume not in node_vol_should[node])
1207
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1208
                      "volume %s is unknown", volume)
1209

    
1210
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1211
    """Verify the list of running instances.
1212

1213
    This checks what instances are running but unknown to the cluster.
1214

1215
    """
1216
    for node in node_instance:
1217
      for o_inst in node_instance[node]:
1218
        test = o_inst not in instancelist
1219
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1220
                      "instance %s on node %s should not exist", o_inst, node)
1221

    
1222
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1223
    """Verify N+1 Memory Resilience.
1224

1225
    Check that if one single node dies we can still start all the instances it
1226
    was primary for.
1227

1228
    """
1229
    for node, nodeinfo in node_info.iteritems():
1230
      # This code checks that every node which is now listed as secondary has
1231
      # enough memory to host all instances it is supposed to should a single
1232
      # other node in the cluster fail.
1233
      # FIXME: not ready for failover to an arbitrary node
1234
      # FIXME: does not support file-backed instances
1235
      # WARNING: we currently take into account down instances as well as up
1236
      # ones, considering that even if they're down someone might want to start
1237
      # them even in the event of a node failure.
1238
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1239
        needed_mem = 0
1240
        for instance in instances:
1241
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1242
          if bep[constants.BE_AUTO_BALANCE]:
1243
            needed_mem += bep[constants.BE_MEMORY]
1244
        test = nodeinfo['mfree'] < needed_mem
1245
        self._ErrorIf(test, self.ENODEN1, node,
1246
                      "not enough memory on to accommodate"
1247
                      " failovers should peer node %s fail", prinode)
1248

    
1249
  def CheckPrereq(self):
1250
    """Check prerequisites.
1251

1252
    Transform the list of checks we're going to skip into a set and check that
1253
    all its members are valid.
1254

1255
    """
1256
    self.skip_set = frozenset(self.op.skip_checks)
1257
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1258
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1259
                                 errors.ECODE_INVAL)
1260

    
1261
  def BuildHooksEnv(self):
1262
    """Build hooks env.
1263

1264
    Cluster-Verify hooks just ran in the post phase and their failure makes
1265
    the output be logged in the verify output and the verification to fail.
1266

1267
    """
1268
    all_nodes = self.cfg.GetNodeList()
1269
    env = {
1270
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1271
      }
1272
    for node in self.cfg.GetAllNodesInfo().values():
1273
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1274

    
1275
    return env, [], all_nodes
1276

    
1277
  def Exec(self, feedback_fn):
1278
    """Verify integrity of cluster, performing various test on nodes.
1279

1280
    """
1281
    self.bad = False
1282
    _ErrorIf = self._ErrorIf
1283
    verbose = self.op.verbose
1284
    self._feedback_fn = feedback_fn
1285
    feedback_fn("* Verifying global settings")
1286
    for msg in self.cfg.VerifyConfig():
1287
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1288

    
1289
    vg_name = self.cfg.GetVGName()
1290
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1291
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1292
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1293
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1294
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1295
                        for iname in instancelist)
1296
    i_non_redundant = [] # Non redundant instances
1297
    i_non_a_balanced = [] # Non auto-balanced instances
1298
    n_offline = [] # List of offline nodes
1299
    n_drained = [] # List of nodes being drained
1300
    node_volume = {}
1301
    node_instance = {}
1302
    node_info = {}
1303
    instance_cfg = {}
1304

    
1305
    # FIXME: verify OS list
1306
    # do local checksums
1307
    master_files = [constants.CLUSTER_CONF_FILE]
1308

    
1309
    file_names = ssconf.SimpleStore().GetFileList()
1310
    file_names.append(constants.SSL_CERT_FILE)
1311
    file_names.append(constants.RAPI_CERT_FILE)
1312
    file_names.extend(master_files)
1313

    
1314
    local_checksums = utils.FingerprintFiles(file_names)
1315

    
1316
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1317
    node_verify_param = {
1318
      constants.NV_FILELIST: file_names,
1319
      constants.NV_NODELIST: [node.name for node in nodeinfo
1320
                              if not node.offline],
1321
      constants.NV_HYPERVISOR: hypervisors,
1322
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1323
                                  node.secondary_ip) for node in nodeinfo
1324
                                 if not node.offline],
1325
      constants.NV_INSTANCELIST: hypervisors,
1326
      constants.NV_VERSION: None,
1327
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1328
      constants.NV_NODESETUP: None,
1329
      }
1330
    if vg_name is not None:
1331
      node_verify_param[constants.NV_VGLIST] = None
1332
      node_verify_param[constants.NV_LVLIST] = vg_name
1333
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1334
      node_verify_param[constants.NV_DRBDLIST] = None
1335
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1336
                                           self.cfg.GetClusterName())
1337

    
1338
    cluster = self.cfg.GetClusterInfo()
1339
    master_node = self.cfg.GetMasterNode()
1340
    all_drbd_map = self.cfg.ComputeDRBDMap()
1341

    
1342
    feedback_fn("* Verifying node status")
1343
    for node_i in nodeinfo:
1344
      node = node_i.name
1345

    
1346
      if node_i.offline:
1347
        if verbose:
1348
          feedback_fn("* Skipping offline node %s" % (node,))
1349
        n_offline.append(node)
1350
        continue
1351

    
1352
      if node == master_node:
1353
        ntype = "master"
1354
      elif node_i.master_candidate:
1355
        ntype = "master candidate"
1356
      elif node_i.drained:
1357
        ntype = "drained"
1358
        n_drained.append(node)
1359
      else:
1360
        ntype = "regular"
1361
      if verbose:
1362
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1363

    
1364
      msg = all_nvinfo[node].fail_msg
1365
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1366
      if msg:
1367
        continue
1368

    
1369
      nresult = all_nvinfo[node].payload
1370
      node_drbd = {}
1371
      for minor, instance in all_drbd_map[node].items():
1372
        test = instance not in instanceinfo
1373
        _ErrorIf(test, self.ECLUSTERCFG, None,
1374
                 "ghost instance '%s' in temporary DRBD map", instance)
1375
          # ghost instance should not be running, but otherwise we
1376
          # don't give double warnings (both ghost instance and
1377
          # unallocated minor in use)
1378
        if test:
1379
          node_drbd[minor] = (instance, False)
1380
        else:
1381
          instance = instanceinfo[instance]
1382
          node_drbd[minor] = (instance.name, instance.admin_up)
1383
      self._VerifyNode(node_i, file_names, local_checksums,
1384
                       nresult, master_files, node_drbd, vg_name)
1385

    
1386
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1387
      if vg_name is None:
1388
        node_volume[node] = {}
1389
      elif isinstance(lvdata, basestring):
1390
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1391
                 utils.SafeEncode(lvdata))
1392
        node_volume[node] = {}
1393
      elif not isinstance(lvdata, dict):
1394
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1395
        continue
1396
      else:
1397
        node_volume[node] = lvdata
1398

    
1399
      # node_instance
1400
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1401
      test = not isinstance(idata, list)
1402
      _ErrorIf(test, self.ENODEHV, node,
1403
               "rpc call to node failed (instancelist)")
1404
      if test:
1405
        continue
1406

    
1407
      node_instance[node] = idata
1408

    
1409
      # node_info
1410
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1411
      test = not isinstance(nodeinfo, dict)
1412
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1413
      if test:
1414
        continue
1415

    
1416
      try:
1417
        node_info[node] = {
1418
          "mfree": int(nodeinfo['memory_free']),
1419
          "pinst": [],
1420
          "sinst": [],
1421
          # dictionary holding all instances this node is secondary for,
1422
          # grouped by their primary node. Each key is a cluster node, and each
1423
          # value is a list of instances which have the key as primary and the
1424
          # current node as secondary.  this is handy to calculate N+1 memory
1425
          # availability if you can only failover from a primary to its
1426
          # secondary.
1427
          "sinst-by-pnode": {},
1428
        }
1429
        # FIXME: devise a free space model for file based instances as well
1430
        if vg_name is not None:
1431
          test = (constants.NV_VGLIST not in nresult or
1432
                  vg_name not in nresult[constants.NV_VGLIST])
1433
          _ErrorIf(test, self.ENODELVM, node,
1434
                   "node didn't return data for the volume group '%s'"
1435
                   " - it is either missing or broken", vg_name)
1436
          if test:
1437
            continue
1438
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1439
      except (ValueError, KeyError):
1440
        _ErrorIf(True, self.ENODERPC, node,
1441
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1442
        continue
1443

    
1444
    node_vol_should = {}
1445

    
1446
    feedback_fn("* Verifying instance status")
1447
    for instance in instancelist:
1448
      if verbose:
1449
        feedback_fn("* Verifying instance %s" % instance)
1450
      inst_config = instanceinfo[instance]
1451
      self._VerifyInstance(instance, inst_config, node_volume,
1452
                           node_instance, n_offline)
1453
      inst_nodes_offline = []
1454

    
1455
      inst_config.MapLVsByNode(node_vol_should)
1456

    
1457
      instance_cfg[instance] = inst_config
1458

    
1459
      pnode = inst_config.primary_node
1460
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1461
               self.ENODERPC, pnode, "instance %s, connection to"
1462
               " primary node failed", instance)
1463
      if pnode in node_info:
1464
        node_info[pnode]['pinst'].append(instance)
1465

    
1466
      if pnode in n_offline:
1467
        inst_nodes_offline.append(pnode)
1468

    
1469
      # If the instance is non-redundant we cannot survive losing its primary
1470
      # node, so we are not N+1 compliant. On the other hand we have no disk
1471
      # templates with more than one secondary so that situation is not well
1472
      # supported either.
1473
      # FIXME: does not support file-backed instances
1474
      if len(inst_config.secondary_nodes) == 0:
1475
        i_non_redundant.append(instance)
1476
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1477
               self.EINSTANCELAYOUT, instance,
1478
               "instance has multiple secondary nodes", code="WARNING")
1479

    
1480
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1481
        i_non_a_balanced.append(instance)
1482

    
1483
      for snode in inst_config.secondary_nodes:
1484
        _ErrorIf(snode not in node_info and snode not in n_offline,
1485
                 self.ENODERPC, snode,
1486
                 "instance %s, connection to secondary node"
1487
                 "failed", instance)
1488

    
1489
        if snode in node_info:
1490
          node_info[snode]['sinst'].append(instance)
1491
          if pnode not in node_info[snode]['sinst-by-pnode']:
1492
            node_info[snode]['sinst-by-pnode'][pnode] = []
1493
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1494

    
1495
        if snode in n_offline:
1496
          inst_nodes_offline.append(snode)
1497

    
1498
      # warn that the instance lives on offline nodes
1499
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1500
               "instance lives on offline node(s) %s",
1501
               utils.CommaJoin(inst_nodes_offline))
1502

    
1503
    feedback_fn("* Verifying orphan volumes")
1504
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1505

    
1506
    feedback_fn("* Verifying remaining instances")
1507
    self._VerifyOrphanInstances(instancelist, node_instance)
1508

    
1509
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1510
      feedback_fn("* Verifying N+1 Memory redundancy")
1511
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1512

    
1513
    feedback_fn("* Other Notes")
1514
    if i_non_redundant:
1515
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1516
                  % len(i_non_redundant))
1517

    
1518
    if i_non_a_balanced:
1519
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1520
                  % len(i_non_a_balanced))
1521

    
1522
    if n_offline:
1523
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1524

    
1525
    if n_drained:
1526
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1527

    
1528
    return not self.bad
1529

    
1530
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1531
    """Analyze the post-hooks' result
1532

1533
    This method analyses the hook result, handles it, and sends some
1534
    nicely-formatted feedback back to the user.
1535

1536
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1537
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1538
    @param hooks_results: the results of the multi-node hooks rpc call
1539
    @param feedback_fn: function used send feedback back to the caller
1540
    @param lu_result: previous Exec result
1541
    @return: the new Exec result, based on the previous result
1542
        and hook results
1543

1544
    """
1545
    # We only really run POST phase hooks, and are only interested in
1546
    # their results
1547
    if phase == constants.HOOKS_PHASE_POST:
1548
      # Used to change hooks' output to proper indentation
1549
      indent_re = re.compile('^', re.M)
1550
      feedback_fn("* Hooks Results")
1551
      assert hooks_results, "invalid result from hooks"
1552

    
1553
      for node_name in hooks_results:
1554
        show_node_header = True
1555
        res = hooks_results[node_name]
1556
        msg = res.fail_msg
1557
        test = msg and not res.offline
1558
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1559
                      "Communication failure in hooks execution: %s", msg)
1560
        if test:
1561
          # override manually lu_result here as _ErrorIf only
1562
          # overrides self.bad
1563
          lu_result = 1
1564
          continue
1565
        for script, hkr, output in res.payload:
1566
          test = hkr == constants.HKR_FAIL
1567
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1568
                        "Script %s failed, output:", script)
1569
          if test:
1570
            output = indent_re.sub('      ', output)
1571
            feedback_fn("%s" % output)
1572
            lu_result = 1
1573

    
1574
      return lu_result
1575

    
1576

    
1577
class LUVerifyDisks(NoHooksLU):
1578
  """Verifies the cluster disks status.
1579

1580
  """
1581
  _OP_REQP = []
1582
  REQ_BGL = False
1583

    
1584
  def ExpandNames(self):
1585
    self.needed_locks = {
1586
      locking.LEVEL_NODE: locking.ALL_SET,
1587
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1588
    }
1589
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1590

    
1591
  def CheckPrereq(self):
1592
    """Check prerequisites.
1593

1594
    This has no prerequisites.
1595

1596
    """
1597
    pass
1598

    
1599
  def Exec(self, feedback_fn):
1600
    """Verify integrity of cluster disks.
1601

1602
    @rtype: tuple of three items
1603
    @return: a tuple of (dict of node-to-node_error, list of instances
1604
        which need activate-disks, dict of instance: (node, volume) for
1605
        missing volumes
1606

1607
    """
1608
    result = res_nodes, res_instances, res_missing = {}, [], {}
1609

    
1610
    vg_name = self.cfg.GetVGName()
1611
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1612
    instances = [self.cfg.GetInstanceInfo(name)
1613
                 for name in self.cfg.GetInstanceList()]
1614

    
1615
    nv_dict = {}
1616
    for inst in instances:
1617
      inst_lvs = {}
1618
      if (not inst.admin_up or
1619
          inst.disk_template not in constants.DTS_NET_MIRROR):
1620
        continue
1621
      inst.MapLVsByNode(inst_lvs)
1622
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1623
      for node, vol_list in inst_lvs.iteritems():
1624
        for vol in vol_list:
1625
          nv_dict[(node, vol)] = inst
1626

    
1627
    if not nv_dict:
1628
      return result
1629

    
1630
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1631

    
1632
    for node in nodes:
1633
      # node_volume
1634
      node_res = node_lvs[node]
1635
      if node_res.offline:
1636
        continue
1637
      msg = node_res.fail_msg
1638
      if msg:
1639
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1640
        res_nodes[node] = msg
1641
        continue
1642

    
1643
      lvs = node_res.payload
1644
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1645
        inst = nv_dict.pop((node, lv_name), None)
1646
        if (not lv_online and inst is not None
1647
            and inst.name not in res_instances):
1648
          res_instances.append(inst.name)
1649

    
1650
    # any leftover items in nv_dict are missing LVs, let's arrange the
1651
    # data better
1652
    for key, inst in nv_dict.iteritems():
1653
      if inst.name not in res_missing:
1654
        res_missing[inst.name] = []
1655
      res_missing[inst.name].append(key)
1656

    
1657
    return result
1658

    
1659

    
1660
class LURepairDiskSizes(NoHooksLU):
1661
  """Verifies the cluster disks sizes.
1662

1663
  """
1664
  _OP_REQP = ["instances"]
1665
  REQ_BGL = False
1666

    
1667
  def ExpandNames(self):
1668
    if not isinstance(self.op.instances, list):
1669
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1670
                                 errors.ECODE_INVAL)
1671

    
1672
    if self.op.instances:
1673
      self.wanted_names = []
1674
      for name in self.op.instances:
1675
        full_name = self.cfg.ExpandInstanceName(name)
1676
        if full_name is None:
1677
          raise errors.OpPrereqError("Instance '%s' not known" % name,
1678
                                     errors.ECODE_NOENT)
1679
        self.wanted_names.append(full_name)
1680
      self.needed_locks = {
1681
        locking.LEVEL_NODE: [],
1682
        locking.LEVEL_INSTANCE: self.wanted_names,
1683
        }
1684
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1685
    else:
1686
      self.wanted_names = None
1687
      self.needed_locks = {
1688
        locking.LEVEL_NODE: locking.ALL_SET,
1689
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1690
        }
1691
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1692

    
1693
  def DeclareLocks(self, level):
1694
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1695
      self._LockInstancesNodes(primary_only=True)
1696

    
1697
  def CheckPrereq(self):
1698
    """Check prerequisites.
1699

1700
    This only checks the optional instance list against the existing names.
1701

1702
    """
1703
    if self.wanted_names is None:
1704
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1705

    
1706
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1707
                             in self.wanted_names]
1708

    
1709
  def _EnsureChildSizes(self, disk):
1710
    """Ensure children of the disk have the needed disk size.
1711

1712
    This is valid mainly for DRBD8 and fixes an issue where the
1713
    children have smaller disk size.
1714

1715
    @param disk: an L{ganeti.objects.Disk} object
1716

1717
    """
1718
    if disk.dev_type == constants.LD_DRBD8:
1719
      assert disk.children, "Empty children for DRBD8?"
1720
      fchild = disk.children[0]
1721
      mismatch = fchild.size < disk.size
1722
      if mismatch:
1723
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1724
                     fchild.size, disk.size)
1725
        fchild.size = disk.size
1726

    
1727
      # and we recurse on this child only, not on the metadev
1728
      return self._EnsureChildSizes(fchild) or mismatch
1729
    else:
1730
      return False
1731

    
1732
  def Exec(self, feedback_fn):
1733
    """Verify the size of cluster disks.
1734

1735
    """
1736
    # TODO: check child disks too
1737
    # TODO: check differences in size between primary/secondary nodes
1738
    per_node_disks = {}
1739
    for instance in self.wanted_instances:
1740
      pnode = instance.primary_node
1741
      if pnode not in per_node_disks:
1742
        per_node_disks[pnode] = []
1743
      for idx, disk in enumerate(instance.disks):
1744
        per_node_disks[pnode].append((instance, idx, disk))
1745

    
1746
    changed = []
1747
    for node, dskl in per_node_disks.items():
1748
      newl = [v[2].Copy() for v in dskl]
1749
      for dsk in newl:
1750
        self.cfg.SetDiskID(dsk, node)
1751
      result = self.rpc.call_blockdev_getsizes(node, newl)
1752
      if result.fail_msg:
1753
        self.LogWarning("Failure in blockdev_getsizes call to node"
1754
                        " %s, ignoring", node)
1755
        continue
1756
      if len(result.data) != len(dskl):
1757
        self.LogWarning("Invalid result from node %s, ignoring node results",
1758
                        node)
1759
        continue
1760
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1761
        if size is None:
1762
          self.LogWarning("Disk %d of instance %s did not return size"
1763
                          " information, ignoring", idx, instance.name)
1764
          continue
1765
        if not isinstance(size, (int, long)):
1766
          self.LogWarning("Disk %d of instance %s did not return valid"
1767
                          " size information, ignoring", idx, instance.name)
1768
          continue
1769
        size = size >> 20
1770
        if size != disk.size:
1771
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1772
                       " correcting: recorded %d, actual %d", idx,
1773
                       instance.name, disk.size, size)
1774
          disk.size = size
1775
          self.cfg.Update(instance, feedback_fn)
1776
          changed.append((instance.name, idx, size))
1777
        if self._EnsureChildSizes(disk):
1778
          self.cfg.Update(instance, feedback_fn)
1779
          changed.append((instance.name, idx, disk.size))
1780
    return changed
1781

    
1782

    
1783
class LURenameCluster(LogicalUnit):
1784
  """Rename the cluster.
1785

1786
  """
1787
  HPATH = "cluster-rename"
1788
  HTYPE = constants.HTYPE_CLUSTER
1789
  _OP_REQP = ["name"]
1790

    
1791
  def BuildHooksEnv(self):
1792
    """Build hooks env.
1793

1794
    """
1795
    env = {
1796
      "OP_TARGET": self.cfg.GetClusterName(),
1797
      "NEW_NAME": self.op.name,
1798
      }
1799
    mn = self.cfg.GetMasterNode()
1800
    return env, [mn], [mn]
1801

    
1802
  def CheckPrereq(self):
1803
    """Verify that the passed name is a valid one.
1804

1805
    """
1806
    hostname = utils.GetHostInfo(self.op.name)
1807

    
1808
    new_name = hostname.name
1809
    self.ip = new_ip = hostname.ip
1810
    old_name = self.cfg.GetClusterName()
1811
    old_ip = self.cfg.GetMasterIP()
1812
    if new_name == old_name and new_ip == old_ip:
1813
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1814
                                 " cluster has changed",
1815
                                 errors.ECODE_INVAL)
1816
    if new_ip != old_ip:
1817
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1818
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1819
                                   " reachable on the network. Aborting." %
1820
                                   new_ip, errors.ECODE_NOTUNIQUE)
1821

    
1822
    self.op.name = new_name
1823

    
1824
  def Exec(self, feedback_fn):
1825
    """Rename the cluster.
1826

1827
    """
1828
    clustername = self.op.name
1829
    ip = self.ip
1830

    
1831
    # shutdown the master IP
1832
    master = self.cfg.GetMasterNode()
1833
    result = self.rpc.call_node_stop_master(master, False)
1834
    result.Raise("Could not disable the master role")
1835

    
1836
    try:
1837
      cluster = self.cfg.GetClusterInfo()
1838
      cluster.cluster_name = clustername
1839
      cluster.master_ip = ip
1840
      self.cfg.Update(cluster, feedback_fn)
1841

    
1842
      # update the known hosts file
1843
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1844
      node_list = self.cfg.GetNodeList()
1845
      try:
1846
        node_list.remove(master)
1847
      except ValueError:
1848
        pass
1849
      result = self.rpc.call_upload_file(node_list,
1850
                                         constants.SSH_KNOWN_HOSTS_FILE)
1851
      for to_node, to_result in result.iteritems():
1852
        msg = to_result.fail_msg
1853
        if msg:
1854
          msg = ("Copy of file %s to node %s failed: %s" %
1855
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1856
          self.proc.LogWarning(msg)
1857

    
1858
    finally:
1859
      result = self.rpc.call_node_start_master(master, False, False)
1860
      msg = result.fail_msg
1861
      if msg:
1862
        self.LogWarning("Could not re-enable the master role on"
1863
                        " the master, please restart manually: %s", msg)
1864

    
1865

    
1866
def _RecursiveCheckIfLVMBased(disk):
1867
  """Check if the given disk or its children are lvm-based.
1868

1869
  @type disk: L{objects.Disk}
1870
  @param disk: the disk to check
1871
  @rtype: boolean
1872
  @return: boolean indicating whether a LD_LV dev_type was found or not
1873

1874
  """
1875
  if disk.children:
1876
    for chdisk in disk.children:
1877
      if _RecursiveCheckIfLVMBased(chdisk):
1878
        return True
1879
  return disk.dev_type == constants.LD_LV
1880

    
1881

    
1882
class LUSetClusterParams(LogicalUnit):
1883
  """Change the parameters of the cluster.
1884

1885
  """
1886
  HPATH = "cluster-modify"
1887
  HTYPE = constants.HTYPE_CLUSTER
1888
  _OP_REQP = []
1889
  REQ_BGL = False
1890

    
1891
  def CheckArguments(self):
1892
    """Check parameters
1893

1894
    """
1895
    if not hasattr(self.op, "candidate_pool_size"):
1896
      self.op.candidate_pool_size = None
1897
    if self.op.candidate_pool_size is not None:
1898
      try:
1899
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1900
      except (ValueError, TypeError), err:
1901
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1902
                                   str(err), errors.ECODE_INVAL)
1903
      if self.op.candidate_pool_size < 1:
1904
        raise errors.OpPrereqError("At least one master candidate needed",
1905
                                   errors.ECODE_INVAL)
1906

    
1907
  def ExpandNames(self):
1908
    # FIXME: in the future maybe other cluster params won't require checking on
1909
    # all nodes to be modified.
1910
    self.needed_locks = {
1911
      locking.LEVEL_NODE: locking.ALL_SET,
1912
    }
1913
    self.share_locks[locking.LEVEL_NODE] = 1
1914

    
1915
  def BuildHooksEnv(self):
1916
    """Build hooks env.
1917

1918
    """
1919
    env = {
1920
      "OP_TARGET": self.cfg.GetClusterName(),
1921
      "NEW_VG_NAME": self.op.vg_name,
1922
      }
1923
    mn = self.cfg.GetMasterNode()
1924
    return env, [mn], [mn]
1925

    
1926
  def CheckPrereq(self):
1927
    """Check prerequisites.
1928

1929
    This checks whether the given params don't conflict and
1930
    if the given volume group is valid.
1931

1932
    """
1933
    if self.op.vg_name is not None and not self.op.vg_name:
1934
      instances = self.cfg.GetAllInstancesInfo().values()
1935
      for inst in instances:
1936
        for disk in inst.disks:
1937
          if _RecursiveCheckIfLVMBased(disk):
1938
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1939
                                       " lvm-based instances exist",
1940
                                       errors.ECODE_INVAL)
1941

    
1942
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1943

    
1944
    # if vg_name not None, checks given volume group on all nodes
1945
    if self.op.vg_name:
1946
      vglist = self.rpc.call_vg_list(node_list)
1947
      for node in node_list:
1948
        msg = vglist[node].fail_msg
1949
        if msg:
1950
          # ignoring down node
1951
          self.LogWarning("Error while gathering data on node %s"
1952
                          " (ignoring node): %s", node, msg)
1953
          continue
1954
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1955
                                              self.op.vg_name,
1956
                                              constants.MIN_VG_SIZE)
1957
        if vgstatus:
1958
          raise errors.OpPrereqError("Error on node '%s': %s" %
1959
                                     (node, vgstatus), errors.ECODE_ENVIRON)
1960

    
1961
    self.cluster = cluster = self.cfg.GetClusterInfo()
1962
    # validate params changes
1963
    if self.op.beparams:
1964
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1965
      self.new_beparams = objects.FillDict(
1966
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1967

    
1968
    if self.op.nicparams:
1969
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1970
      self.new_nicparams = objects.FillDict(
1971
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1972
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1973
      nic_errors = []
1974

    
1975
      # check all instances for consistency
1976
      for instance in self.cfg.GetAllInstancesInfo().values():
1977
        for nic_idx, nic in enumerate(instance.nics):
1978
          params_copy = copy.deepcopy(nic.nicparams)
1979
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
1980

    
1981
          # check parameter syntax
1982
          try:
1983
            objects.NIC.CheckParameterSyntax(params_filled)
1984
          except errors.ConfigurationError, err:
1985
            nic_errors.append("Instance %s, nic/%d: %s" %
1986
                              (instance.name, nic_idx, err))
1987

    
1988
          # if we're moving instances to routed, check that they have an ip
1989
          target_mode = params_filled[constants.NIC_MODE]
1990
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
1991
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
1992
                              (instance.name, nic_idx))
1993
      if nic_errors:
1994
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
1995
                                   "\n".join(nic_errors))
1996

    
1997
    # hypervisor list/parameters
1998
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1999
    if self.op.hvparams:
2000
      if not isinstance(self.op.hvparams, dict):
2001
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2002
                                   errors.ECODE_INVAL)
2003
      for hv_name, hv_dict in self.op.hvparams.items():
2004
        if hv_name not in self.new_hvparams:
2005
          self.new_hvparams[hv_name] = hv_dict
2006
        else:
2007
          self.new_hvparams[hv_name].update(hv_dict)
2008

    
2009
    if self.op.enabled_hypervisors is not None:
2010
      self.hv_list = self.op.enabled_hypervisors
2011
      if not self.hv_list:
2012
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2013
                                   " least one member",
2014
                                   errors.ECODE_INVAL)
2015
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2016
      if invalid_hvs:
2017
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2018
                                   " entries: %s" %
2019
                                   utils.CommaJoin(invalid_hvs),
2020
                                   errors.ECODE_INVAL)
2021
    else:
2022
      self.hv_list = cluster.enabled_hypervisors
2023

    
2024
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2025
      # either the enabled list has changed, or the parameters have, validate
2026
      for hv_name, hv_params in self.new_hvparams.items():
2027
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2028
            (self.op.enabled_hypervisors and
2029
             hv_name in self.op.enabled_hypervisors)):
2030
          # either this is a new hypervisor, or its parameters have changed
2031
          hv_class = hypervisor.GetHypervisor(hv_name)
2032
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2033
          hv_class.CheckParameterSyntax(hv_params)
2034
          _CheckHVParams(self, node_list, hv_name, hv_params)
2035

    
2036
  def Exec(self, feedback_fn):
2037
    """Change the parameters of the cluster.
2038

2039
    """
2040
    if self.op.vg_name is not None:
2041
      new_volume = self.op.vg_name
2042
      if not new_volume:
2043
        new_volume = None
2044
      if new_volume != self.cfg.GetVGName():
2045
        self.cfg.SetVGName(new_volume)
2046
      else:
2047
        feedback_fn("Cluster LVM configuration already in desired"
2048
                    " state, not changing")
2049
    if self.op.hvparams:
2050
      self.cluster.hvparams = self.new_hvparams
2051
    if self.op.enabled_hypervisors is not None:
2052
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2053
    if self.op.beparams:
2054
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2055
    if self.op.nicparams:
2056
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2057

    
2058
    if self.op.candidate_pool_size is not None:
2059
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2060
      # we need to update the pool size here, otherwise the save will fail
2061
      _AdjustCandidatePool(self, [])
2062

    
2063
    self.cfg.Update(self.cluster, feedback_fn)
2064

    
2065

    
2066
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2067
  """Distribute additional files which are part of the cluster configuration.
2068

2069
  ConfigWriter takes care of distributing the config and ssconf files, but
2070
  there are more files which should be distributed to all nodes. This function
2071
  makes sure those are copied.
2072

2073
  @param lu: calling logical unit
2074
  @param additional_nodes: list of nodes not in the config to distribute to
2075

2076
  """
2077
  # 1. Gather target nodes
2078
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2079
  dist_nodes = lu.cfg.GetNodeList()
2080
  if additional_nodes is not None:
2081
    dist_nodes.extend(additional_nodes)
2082
  if myself.name in dist_nodes:
2083
    dist_nodes.remove(myself.name)
2084

    
2085
  # 2. Gather files to distribute
2086
  dist_files = set([constants.ETC_HOSTS,
2087
                    constants.SSH_KNOWN_HOSTS_FILE,
2088
                    constants.RAPI_CERT_FILE,
2089
                    constants.RAPI_USERS_FILE,
2090
                    constants.HMAC_CLUSTER_KEY,
2091
                   ])
2092

    
2093
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2094
  for hv_name in enabled_hypervisors:
2095
    hv_class = hypervisor.GetHypervisor(hv_name)
2096
    dist_files.update(hv_class.GetAncillaryFiles())
2097

    
2098
  # 3. Perform the files upload
2099
  for fname in dist_files:
2100
    if os.path.exists(fname):
2101
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2102
      for to_node, to_result in result.items():
2103
        msg = to_result.fail_msg
2104
        if msg:
2105
          msg = ("Copy of file %s to node %s failed: %s" %
2106
                 (fname, to_node, msg))
2107
          lu.proc.LogWarning(msg)
2108

    
2109

    
2110
class LURedistributeConfig(NoHooksLU):
2111
  """Force the redistribution of cluster configuration.
2112

2113
  This is a very simple LU.
2114

2115
  """
2116
  _OP_REQP = []
2117
  REQ_BGL = False
2118

    
2119
  def ExpandNames(self):
2120
    self.needed_locks = {
2121
      locking.LEVEL_NODE: locking.ALL_SET,
2122
    }
2123
    self.share_locks[locking.LEVEL_NODE] = 1
2124

    
2125
  def CheckPrereq(self):
2126
    """Check prerequisites.
2127

2128
    """
2129

    
2130
  def Exec(self, feedback_fn):
2131
    """Redistribute the configuration.
2132

2133
    """
2134
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2135
    _RedistributeAncillaryFiles(self)
2136

    
2137

    
2138
def _WaitForSync(lu, instance, oneshot=False):
2139
  """Sleep and poll for an instance's disk to sync.
2140

2141
  """
2142
  if not instance.disks:
2143
    return True
2144

    
2145
  if not oneshot:
2146
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2147

    
2148
  node = instance.primary_node
2149

    
2150
  for dev in instance.disks:
2151
    lu.cfg.SetDiskID(dev, node)
2152

    
2153
  # TODO: Convert to utils.Retry
2154

    
2155
  retries = 0
2156
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2157
  while True:
2158
    max_time = 0
2159
    done = True
2160
    cumul_degraded = False
2161
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2162
    msg = rstats.fail_msg
2163
    if msg:
2164
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2165
      retries += 1
2166
      if retries >= 10:
2167
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2168
                                 " aborting." % node)
2169
      time.sleep(6)
2170
      continue
2171
    rstats = rstats.payload
2172
    retries = 0
2173
    for i, mstat in enumerate(rstats):
2174
      if mstat is None:
2175
        lu.LogWarning("Can't compute data for node %s/%s",
2176
                           node, instance.disks[i].iv_name)
2177
        continue
2178

    
2179
      cumul_degraded = (cumul_degraded or
2180
                        (mstat.is_degraded and mstat.sync_percent is None))
2181
      if mstat.sync_percent is not None:
2182
        done = False
2183
        if mstat.estimated_time is not None:
2184
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2185
          max_time = mstat.estimated_time
2186
        else:
2187
          rem_time = "no time estimate"
2188
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2189
                        (instance.disks[i].iv_name, mstat.sync_percent,
2190
                         rem_time))
2191

    
2192
    # if we're done but degraded, let's do a few small retries, to
2193
    # make sure we see a stable and not transient situation; therefore
2194
    # we force restart of the loop
2195
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2196
      logging.info("Degraded disks found, %d retries left", degr_retries)
2197
      degr_retries -= 1
2198
      time.sleep(1)
2199
      continue
2200

    
2201
    if done or oneshot:
2202
      break
2203

    
2204
    time.sleep(min(60, max_time))
2205

    
2206
  if done:
2207
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2208
  return not cumul_degraded
2209

    
2210

    
2211
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2212
  """Check that mirrors are not degraded.
2213

2214
  The ldisk parameter, if True, will change the test from the
2215
  is_degraded attribute (which represents overall non-ok status for
2216
  the device(s)) to the ldisk (representing the local storage status).
2217

2218
  """
2219
  lu.cfg.SetDiskID(dev, node)
2220

    
2221
  result = True
2222

    
2223
  if on_primary or dev.AssembleOnSecondary():
2224
    rstats = lu.rpc.call_blockdev_find(node, dev)
2225
    msg = rstats.fail_msg
2226
    if msg:
2227
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2228
      result = False
2229
    elif not rstats.payload:
2230
      lu.LogWarning("Can't find disk on node %s", node)
2231
      result = False
2232
    else:
2233
      if ldisk:
2234
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2235
      else:
2236
        result = result and not rstats.payload.is_degraded
2237

    
2238
  if dev.children:
2239
    for child in dev.children:
2240
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2241

    
2242
  return result
2243

    
2244

    
2245
class LUDiagnoseOS(NoHooksLU):
2246
  """Logical unit for OS diagnose/query.
2247

2248
  """
2249
  _OP_REQP = ["output_fields", "names"]
2250
  REQ_BGL = False
2251
  _FIELDS_STATIC = utils.FieldSet()
2252
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2253
  # Fields that need calculation of global os validity
2254
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2255

    
2256
  def ExpandNames(self):
2257
    if self.op.names:
2258
      raise errors.OpPrereqError("Selective OS query not supported",
2259
                                 errors.ECODE_INVAL)
2260

    
2261
    _CheckOutputFields(static=self._FIELDS_STATIC,
2262
                       dynamic=self._FIELDS_DYNAMIC,
2263
                       selected=self.op.output_fields)
2264

    
2265
    # Lock all nodes, in shared mode
2266
    # Temporary removal of locks, should be reverted later
2267
    # TODO: reintroduce locks when they are lighter-weight
2268
    self.needed_locks = {}
2269
    #self.share_locks[locking.LEVEL_NODE] = 1
2270
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2271

    
2272
  def CheckPrereq(self):
2273
    """Check prerequisites.
2274

2275
    """
2276

    
2277
  @staticmethod
2278
  def _DiagnoseByOS(node_list, rlist):
2279
    """Remaps a per-node return list into an a per-os per-node dictionary
2280

2281
    @param node_list: a list with the names of all nodes
2282
    @param rlist: a map with node names as keys and OS objects as values
2283

2284
    @rtype: dict
2285
    @return: a dictionary with osnames as keys and as value another map, with
2286
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2287

2288
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2289
                                     (/srv/..., False, "invalid api")],
2290
                           "node2": [(/srv/..., True, "")]}
2291
          }
2292

2293
    """
2294
    all_os = {}
2295
    # we build here the list of nodes that didn't fail the RPC (at RPC
2296
    # level), so that nodes with a non-responding node daemon don't
2297
    # make all OSes invalid
2298
    good_nodes = [node_name for node_name in rlist
2299
                  if not rlist[node_name].fail_msg]
2300
    for node_name, nr in rlist.items():
2301
      if nr.fail_msg or not nr.payload:
2302
        continue
2303
      for name, path, status, diagnose, variants in nr.payload:
2304
        if name not in all_os:
2305
          # build a list of nodes for this os containing empty lists
2306
          # for each node in node_list
2307
          all_os[name] = {}
2308
          for nname in good_nodes:
2309
            all_os[name][nname] = []
2310
        all_os[name][node_name].append((path, status, diagnose, variants))
2311
    return all_os
2312

    
2313
  def Exec(self, feedback_fn):
2314
    """Compute the list of OSes.
2315

2316
    """
2317
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2318
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2319
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2320
    output = []
2321
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2322
    calc_variants = "variants" in self.op.output_fields
2323

    
2324
    for os_name, os_data in pol.items():
2325
      row = []
2326
      if calc_valid:
2327
        valid = True
2328
        variants = None
2329
        for osl in os_data.values():
2330
          valid = valid and osl and osl[0][1]
2331
          if not valid:
2332
            variants = None
2333
            break
2334
          if calc_variants:
2335
            node_variants = osl[0][3]
2336
            if variants is None:
2337
              variants = node_variants
2338
            else:
2339
              variants = [v for v in variants if v in node_variants]
2340

    
2341
      for field in self.op.output_fields:
2342
        if field == "name":
2343
          val = os_name
2344
        elif field == "valid":
2345
          val = valid
2346
        elif field == "node_status":
2347
          # this is just a copy of the dict
2348
          val = {}
2349
          for node_name, nos_list in os_data.items():
2350
            val[node_name] = nos_list
2351
        elif field == "variants":
2352
          val =  variants
2353
        else:
2354
          raise errors.ParameterError(field)
2355
        row.append(val)
2356
      output.append(row)
2357

    
2358
    return output
2359

    
2360

    
2361
class LURemoveNode(LogicalUnit):
2362
  """Logical unit for removing a node.
2363

2364
  """
2365
  HPATH = "node-remove"
2366
  HTYPE = constants.HTYPE_NODE
2367
  _OP_REQP = ["node_name"]
2368

    
2369
  def BuildHooksEnv(self):
2370
    """Build hooks env.
2371

2372
    This doesn't run on the target node in the pre phase as a failed
2373
    node would then be impossible to remove.
2374

2375
    """
2376
    env = {
2377
      "OP_TARGET": self.op.node_name,
2378
      "NODE_NAME": self.op.node_name,
2379
      }
2380
    all_nodes = self.cfg.GetNodeList()
2381
    if self.op.node_name in all_nodes:
2382
      all_nodes.remove(self.op.node_name)
2383
    return env, all_nodes, all_nodes
2384

    
2385
  def CheckPrereq(self):
2386
    """Check prerequisites.
2387

2388
    This checks:
2389
     - the node exists in the configuration
2390
     - it does not have primary or secondary instances
2391
     - it's not the master
2392

2393
    Any errors are signaled by raising errors.OpPrereqError.
2394

2395
    """
2396
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2397
    if node is None:
2398
      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2399
                                 errors.ECODE_NOENT)
2400

    
2401
    instance_list = self.cfg.GetInstanceList()
2402

    
2403
    masternode = self.cfg.GetMasterNode()
2404
    if node.name == masternode:
2405
      raise errors.OpPrereqError("Node is the master node,"
2406
                                 " you need to failover first.",
2407
                                 errors.ECODE_INVAL)
2408

    
2409
    for instance_name in instance_list:
2410
      instance = self.cfg.GetInstanceInfo(instance_name)
2411
      if node.name in instance.all_nodes:
2412
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2413
                                   " please remove first." % instance_name,
2414
                                   errors.ECODE_INVAL)
2415
    self.op.node_name = node.name
2416
    self.node = node
2417

    
2418
  def Exec(self, feedback_fn):
2419
    """Removes the node from the cluster.
2420

2421
    """
2422
    node = self.node
2423
    logging.info("Stopping the node daemon and removing configs from node %s",
2424
                 node.name)
2425

    
2426
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2427

    
2428
    # Promote nodes to master candidate as needed
2429
    _AdjustCandidatePool(self, exceptions=[node.name])
2430
    self.context.RemoveNode(node.name)
2431

    
2432
    # Run post hooks on the node before it's removed
2433
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2434
    try:
2435
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2436
    except:
2437
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2438

    
2439
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2440
    msg = result.fail_msg
2441
    if msg:
2442
      self.LogWarning("Errors encountered on the remote node while leaving"
2443
                      " the cluster: %s", msg)
2444

    
2445

    
2446
class LUQueryNodes(NoHooksLU):
2447
  """Logical unit for querying nodes.
2448

2449
  """
2450
  _OP_REQP = ["output_fields", "names", "use_locking"]
2451
  REQ_BGL = False
2452

    
2453
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2454
                    "master_candidate", "offline", "drained"]
2455

    
2456
  _FIELDS_DYNAMIC = utils.FieldSet(
2457
    "dtotal", "dfree",
2458
    "mtotal", "mnode", "mfree",
2459
    "bootid",
2460
    "ctotal", "cnodes", "csockets",
2461
    )
2462

    
2463
  _FIELDS_STATIC = utils.FieldSet(*[
2464
    "pinst_cnt", "sinst_cnt",
2465
    "pinst_list", "sinst_list",
2466
    "pip", "sip", "tags",
2467
    "master",
2468
    "role"] + _SIMPLE_FIELDS
2469
    )
2470

    
2471
  def ExpandNames(self):
2472
    _CheckOutputFields(static=self._FIELDS_STATIC,
2473
                       dynamic=self._FIELDS_DYNAMIC,
2474
                       selected=self.op.output_fields)
2475

    
2476
    self.needed_locks = {}
2477
    self.share_locks[locking.LEVEL_NODE] = 1
2478

    
2479
    if self.op.names:
2480
      self.wanted = _GetWantedNodes(self, self.op.names)
2481
    else:
2482
      self.wanted = locking.ALL_SET
2483

    
2484
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2485
    self.do_locking = self.do_node_query and self.op.use_locking
2486
    if self.do_locking:
2487
      # if we don't request only static fields, we need to lock the nodes
2488
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2489

    
2490
  def CheckPrereq(self):
2491
    """Check prerequisites.
2492

2493
    """
2494
    # The validation of the node list is done in the _GetWantedNodes,
2495
    # if non empty, and if empty, there's no validation to do
2496
    pass
2497

    
2498
  def Exec(self, feedback_fn):
2499
    """Computes the list of nodes and their attributes.
2500

2501
    """
2502
    all_info = self.cfg.GetAllNodesInfo()
2503
    if self.do_locking:
2504
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2505
    elif self.wanted != locking.ALL_SET:
2506
      nodenames = self.wanted
2507
      missing = set(nodenames).difference(all_info.keys())
2508
      if missing:
2509
        raise errors.OpExecError(
2510
          "Some nodes were removed before retrieving their data: %s" % missing)
2511
    else:
2512
      nodenames = all_info.keys()
2513

    
2514
    nodenames = utils.NiceSort(nodenames)
2515
    nodelist = [all_info[name] for name in nodenames]
2516

    
2517
    # begin data gathering
2518

    
2519
    if self.do_node_query:
2520
      live_data = {}
2521
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2522
                                          self.cfg.GetHypervisorType())
2523
      for name in nodenames:
2524
        nodeinfo = node_data[name]
2525
        if not nodeinfo.fail_msg and nodeinfo.payload:
2526
          nodeinfo = nodeinfo.payload
2527
          fn = utils.TryConvert
2528
          live_data[name] = {
2529
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2530
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2531
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2532
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2533
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2534
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2535
            "bootid": nodeinfo.get('bootid', None),
2536
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2537
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2538
            }
2539
        else:
2540
          live_data[name] = {}
2541
    else:
2542
      live_data = dict.fromkeys(nodenames, {})
2543

    
2544
    node_to_primary = dict([(name, set()) for name in nodenames])
2545
    node_to_secondary = dict([(name, set()) for name in nodenames])
2546

    
2547
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2548
                             "sinst_cnt", "sinst_list"))
2549
    if inst_fields & frozenset(self.op.output_fields):
2550
      instancelist = self.cfg.GetInstanceList()
2551

    
2552
      for instance_name in instancelist:
2553
        inst = self.cfg.GetInstanceInfo(instance_name)
2554
        if inst.primary_node in node_to_primary:
2555
          node_to_primary[inst.primary_node].add(inst.name)
2556
        for secnode in inst.secondary_nodes:
2557
          if secnode in node_to_secondary:
2558
            node_to_secondary[secnode].add(inst.name)
2559

    
2560
    master_node = self.cfg.GetMasterNode()
2561

    
2562
    # end data gathering
2563

    
2564
    output = []
2565
    for node in nodelist:
2566
      node_output = []
2567
      for field in self.op.output_fields:
2568
        if field in self._SIMPLE_FIELDS:
2569
          val = getattr(node, field)
2570
        elif field == "pinst_list":
2571
          val = list(node_to_primary[node.name])
2572
        elif field == "sinst_list":
2573
          val = list(node_to_secondary[node.name])
2574
        elif field == "pinst_cnt":
2575
          val = len(node_to_primary[node.name])
2576
        elif field == "sinst_cnt":
2577
          val = len(node_to_secondary[node.name])
2578
        elif field == "pip":
2579
          val = node.primary_ip
2580
        elif field == "sip":
2581
          val = node.secondary_ip
2582
        elif field == "tags":
2583
          val = list(node.GetTags())
2584
        elif field == "master":
2585
          val = node.name == master_node
2586
        elif self._FIELDS_DYNAMIC.Matches(field):
2587
          val = live_data[node.name].get(field, None)
2588
        elif field == "role":
2589
          if node.name == master_node:
2590
            val = "M"
2591
          elif node.master_candidate:
2592
            val = "C"
2593
          elif node.drained:
2594
            val = "D"
2595
          elif node.offline:
2596
            val = "O"
2597
          else:
2598
            val = "R"
2599
        else:
2600
          raise errors.ParameterError(field)
2601
        node_output.append(val)
2602
      output.append(node_output)
2603

    
2604
    return output
2605

    
2606

    
2607
class LUQueryNodeVolumes(NoHooksLU):
2608
  """Logical unit for getting volumes on node(s).
2609

2610
  """
2611
  _OP_REQP = ["nodes", "output_fields"]
2612
  REQ_BGL = False
2613
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2614
  _FIELDS_STATIC = utils.FieldSet("node")
2615

    
2616
  def ExpandNames(self):
2617
    _CheckOutputFields(static=self._FIELDS_STATIC,
2618
                       dynamic=self._FIELDS_DYNAMIC,
2619
                       selected=self.op.output_fields)
2620

    
2621
    self.needed_locks = {}
2622
    self.share_locks[locking.LEVEL_NODE] = 1
2623
    if not self.op.nodes:
2624
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2625
    else:
2626
      self.needed_locks[locking.LEVEL_NODE] = \
2627
        _GetWantedNodes(self, self.op.nodes)
2628

    
2629
  def CheckPrereq(self):
2630
    """Check prerequisites.
2631

2632
    This checks that the fields required are valid output fields.
2633

2634
    """
2635
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2636

    
2637
  def Exec(self, feedback_fn):
2638
    """Computes the list of nodes and their attributes.
2639

2640
    """
2641
    nodenames = self.nodes
2642
    volumes = self.rpc.call_node_volumes(nodenames)
2643

    
2644
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2645
             in self.cfg.GetInstanceList()]
2646

    
2647
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2648

    
2649
    output = []
2650
    for node in nodenames:
2651
      nresult = volumes[node]
2652
      if nresult.offline:
2653
        continue
2654
      msg = nresult.fail_msg
2655
      if msg:
2656
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2657
        continue
2658

    
2659
      node_vols = nresult.payload[:]
2660
      node_vols.sort(key=lambda vol: vol['dev'])
2661

    
2662
      for vol in node_vols:
2663
        node_output = []
2664
        for field in self.op.output_fields:
2665
          if field == "node":
2666
            val = node
2667
          elif field == "phys":
2668
            val = vol['dev']
2669
          elif field == "vg":
2670
            val = vol['vg']
2671
          elif field == "name":
2672
            val = vol['name']
2673
          elif field == "size":
2674
            val = int(float(vol['size']))
2675
          elif field == "instance":
2676
            for inst in ilist:
2677
              if node not in lv_by_node[inst]:
2678
                continue
2679
              if vol['name'] in lv_by_node[inst][node]:
2680
                val = inst.name
2681
                break
2682
            else:
2683
              val = '-'
2684
          else:
2685
            raise errors.ParameterError(field)
2686
          node_output.append(str(val))
2687

    
2688
        output.append(node_output)
2689

    
2690
    return output
2691

    
2692

    
2693
class LUQueryNodeStorage(NoHooksLU):
2694
  """Logical unit for getting information on storage units on node(s).
2695

2696
  """
2697
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2698
  REQ_BGL = False
2699
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2700

    
2701
  def ExpandNames(self):
2702
    storage_type = self.op.storage_type
2703

    
2704
    if storage_type not in constants.VALID_STORAGE_TYPES:
2705
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2706
                                 errors.ECODE_INVAL)
2707

    
2708
    _CheckOutputFields(static=self._FIELDS_STATIC,
2709
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2710
                       selected=self.op.output_fields)
2711

    
2712
    self.needed_locks = {}
2713
    self.share_locks[locking.LEVEL_NODE] = 1
2714

    
2715
    if self.op.nodes:
2716
      self.needed_locks[locking.LEVEL_NODE] = \
2717
        _GetWantedNodes(self, self.op.nodes)
2718
    else:
2719
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2720

    
2721
  def CheckPrereq(self):
2722
    """Check prerequisites.
2723

2724
    This checks that the fields required are valid output fields.
2725

2726
    """
2727
    self.op.name = getattr(self.op, "name", None)
2728

    
2729
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2730

    
2731
  def Exec(self, feedback_fn):
2732
    """Computes the list of nodes and their attributes.
2733

2734
    """
2735
    # Always get name to sort by
2736
    if constants.SF_NAME in self.op.output_fields:
2737
      fields = self.op.output_fields[:]
2738
    else:
2739
      fields = [constants.SF_NAME] + self.op.output_fields
2740

    
2741
    # Never ask for node or type as it's only known to the LU
2742
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2743
      while extra in fields:
2744
        fields.remove(extra)
2745

    
2746
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2747
    name_idx = field_idx[constants.SF_NAME]
2748

    
2749
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2750
    data = self.rpc.call_storage_list(self.nodes,
2751
                                      self.op.storage_type, st_args,
2752
                                      self.op.name, fields)
2753

    
2754
    result = []
2755

    
2756
    for node in utils.NiceSort(self.nodes):
2757
      nresult = data[node]
2758
      if nresult.offline:
2759
        continue
2760

    
2761
      msg = nresult.fail_msg
2762
      if msg:
2763
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2764
        continue
2765

    
2766
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2767

    
2768
      for name in utils.NiceSort(rows.keys()):
2769
        row = rows[name]
2770

    
2771
        out = []
2772

    
2773
        for field in self.op.output_fields:
2774
          if field == constants.SF_NODE:
2775
            val = node
2776
          elif field == constants.SF_TYPE:
2777
            val = self.op.storage_type
2778
          elif field in field_idx:
2779
            val = row[field_idx[field]]
2780
          else:
2781
            raise errors.ParameterError(field)
2782

    
2783
          out.append(val)
2784

    
2785
        result.append(out)
2786

    
2787
    return result
2788

    
2789

    
2790
class LUModifyNodeStorage(NoHooksLU):
2791
  """Logical unit for modifying a storage volume on a node.
2792

2793
  """
2794
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2795
  REQ_BGL = False
2796

    
2797
  def CheckArguments(self):
2798
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2799
    if node_name is None:
2800
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2801
                                 errors.ECODE_NOENT)
2802

    
2803
    self.op.node_name = node_name
2804

    
2805
    storage_type = self.op.storage_type
2806
    if storage_type not in constants.VALID_STORAGE_TYPES:
2807
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2808
                                 errors.ECODE_INVAL)
2809

    
2810
  def ExpandNames(self):
2811
    self.needed_locks = {
2812
      locking.LEVEL_NODE: self.op.node_name,
2813
      }
2814

    
2815
  def CheckPrereq(self):
2816
    """Check prerequisites.
2817

2818
    """
2819
    storage_type = self.op.storage_type
2820

    
2821
    try:
2822
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2823
    except KeyError:
2824
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2825
                                 " modified" % storage_type,
2826
                                 errors.ECODE_INVAL)
2827

    
2828
    diff = set(self.op.changes.keys()) - modifiable
2829
    if diff:
2830
      raise errors.OpPrereqError("The following fields can not be modified for"
2831
                                 " storage units of type '%s': %r" %
2832
                                 (storage_type, list(diff)),
2833
                                 errors.ECODE_INVAL)
2834

    
2835
  def Exec(self, feedback_fn):
2836
    """Computes the list of nodes and their attributes.
2837

2838
    """
2839
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2840
    result = self.rpc.call_storage_modify(self.op.node_name,
2841
                                          self.op.storage_type, st_args,
2842
                                          self.op.name, self.op.changes)
2843
    result.Raise("Failed to modify storage unit '%s' on %s" %
2844
                 (self.op.name, self.op.node_name))
2845

    
2846

    
2847
class LUAddNode(LogicalUnit):
2848
  """Logical unit for adding node to the cluster.
2849

2850
  """
2851
  HPATH = "node-add"
2852
  HTYPE = constants.HTYPE_NODE
2853
  _OP_REQP = ["node_name"]
2854

    
2855
  def BuildHooksEnv(self):
2856
    """Build hooks env.
2857

2858
    This will run on all nodes before, and on all nodes + the new node after.
2859

2860
    """
2861
    env = {
2862
      "OP_TARGET": self.op.node_name,
2863
      "NODE_NAME": self.op.node_name,
2864
      "NODE_PIP": self.op.primary_ip,
2865
      "NODE_SIP": self.op.secondary_ip,
2866
      }
2867
    nodes_0 = self.cfg.GetNodeList()
2868
    nodes_1 = nodes_0 + [self.op.node_name, ]
2869
    return env, nodes_0, nodes_1
2870

    
2871
  def CheckPrereq(self):
2872
    """Check prerequisites.
2873

2874
    This checks:
2875
     - the new node is not already in the config
2876
     - it is resolvable
2877
     - its parameters (single/dual homed) matches the cluster
2878

2879
    Any errors are signaled by raising errors.OpPrereqError.
2880

2881
    """
2882
    node_name = self.op.node_name
2883
    cfg = self.cfg
2884

    
2885
    dns_data = utils.GetHostInfo(node_name)
2886

    
2887
    node = dns_data.name
2888
    primary_ip = self.op.primary_ip = dns_data.ip
2889
    secondary_ip = getattr(self.op, "secondary_ip", None)
2890
    if secondary_ip is None:
2891
      secondary_ip = primary_ip
2892
    if not utils.IsValidIP(secondary_ip):
2893
      raise errors.OpPrereqError("Invalid secondary IP given",
2894
                                 errors.ECODE_INVAL)
2895
    self.op.secondary_ip = secondary_ip
2896

    
2897
    node_list = cfg.GetNodeList()
2898
    if not self.op.readd and node in node_list:
2899
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2900
                                 node, errors.ECODE_EXISTS)
2901
    elif self.op.readd and node not in node_list:
2902
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2903
                                 errors.ECODE_NOENT)
2904

    
2905
    for existing_node_name in node_list:
2906
      existing_node = cfg.GetNodeInfo(existing_node_name)
2907

    
2908
      if self.op.readd and node == existing_node_name:
2909
        if (existing_node.primary_ip != primary_ip or
2910
            existing_node.secondary_ip != secondary_ip):
2911
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2912
                                     " address configuration as before",
2913
                                     errors.ECODE_INVAL)
2914
        continue
2915

    
2916
      if (existing_node.primary_ip == primary_ip or
2917
          existing_node.secondary_ip == primary_ip or
2918
          existing_node.primary_ip == secondary_ip or
2919
          existing_node.secondary_ip == secondary_ip):
2920
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2921
                                   " existing node %s" % existing_node.name,
2922
                                   errors.ECODE_NOTUNIQUE)
2923

    
2924
    # check that the type of the node (single versus dual homed) is the
2925
    # same as for the master
2926
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2927
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2928
    newbie_singlehomed = secondary_ip == primary_ip
2929
    if master_singlehomed != newbie_singlehomed:
2930
      if master_singlehomed:
2931
        raise errors.OpPrereqError("The master has no private ip but the"
2932
                                   " new node has one",
2933
                                   errors.ECODE_INVAL)
2934
      else:
2935
        raise errors.OpPrereqError("The master has a private ip but the"
2936
                                   " new node doesn't have one",
2937
                                   errors.ECODE_INVAL)
2938

    
2939
    # checks reachability
2940
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2941
      raise errors.OpPrereqError("Node not reachable by ping",
2942
                                 errors.ECODE_ENVIRON)
2943

    
2944
    if not newbie_singlehomed:
2945
      # check reachability from my secondary ip to newbie's secondary ip
2946
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2947
                           source=myself.secondary_ip):
2948
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2949
                                   " based ping to noded port",
2950
                                   errors.ECODE_ENVIRON)
2951

    
2952
    if self.op.readd:
2953
      exceptions = [node]
2954
    else:
2955
      exceptions = []
2956

    
2957
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2958

    
2959
    if self.op.readd:
2960
      self.new_node = self.cfg.GetNodeInfo(node)
2961
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2962
    else:
2963
      self.new_node = objects.Node(name=node,
2964
                                   primary_ip=primary_ip,
2965
                                   secondary_ip=secondary_ip,
2966
                                   master_candidate=self.master_candidate,
2967
                                   offline=False, drained=False)
2968

    
2969
  def Exec(self, feedback_fn):
2970
    """Adds the new node to the cluster.
2971

2972
    """
2973
    new_node = self.new_node
2974
    node = new_node.name
2975

    
2976
    # for re-adds, reset the offline/drained/master-candidate flags;
2977
    # we need to reset here, otherwise offline would prevent RPC calls
2978
    # later in the procedure; this also means that if the re-add
2979
    # fails, we are left with a non-offlined, broken node
2980
    if self.op.readd:
2981
      new_node.drained = new_node.offline = False
2982
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2983
      # if we demote the node, we do cleanup later in the procedure
2984
      new_node.master_candidate = self.master_candidate
2985

    
2986
    # notify the user about any possible mc promotion
2987
    if new_node.master_candidate:
2988
      self.LogInfo("Node will be a master candidate")
2989

    
2990
    # check connectivity
2991
    result = self.rpc.call_version([node])[node]
2992
    result.Raise("Can't get version information from node %s" % node)
2993
    if constants.PROTOCOL_VERSION == result.payload:
2994
      logging.info("Communication to node %s fine, sw version %s match",
2995
                   node, result.payload)
2996
    else:
2997
      raise errors.OpExecError("Version mismatch master version %s,"
2998
                               " node version %s" %
2999
                               (constants.PROTOCOL_VERSION, result.payload))
3000

    
3001
    # setup ssh on node
3002
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3003
      logging.info("Copy ssh key to node %s", node)
3004
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3005
      keyarray = []
3006
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3007
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3008
                  priv_key, pub_key]
3009

    
3010
      for i in keyfiles:
3011
        keyarray.append(utils.ReadFile(i))
3012

    
3013
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3014
                                      keyarray[2], keyarray[3], keyarray[4],
3015
                                      keyarray[5])
3016
      result.Raise("Cannot transfer ssh keys to the new node")
3017

    
3018
    # Add node to our /etc/hosts, and add key to known_hosts
3019
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3020
      utils.AddHostToEtcHosts(new_node.name)
3021

    
3022
    if new_node.secondary_ip != new_node.primary_ip:
3023
      result = self.rpc.call_node_has_ip_address(new_node.name,
3024
                                                 new_node.secondary_ip)
3025
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3026
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3027
      if not result.payload:
3028
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3029
                                 " you gave (%s). Please fix and re-run this"
3030
                                 " command." % new_node.secondary_ip)
3031

    
3032
    node_verify_list = [self.cfg.GetMasterNode()]
3033
    node_verify_param = {
3034
      constants.NV_NODELIST: [node],
3035
      # TODO: do a node-net-test as well?
3036
    }
3037

    
3038
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3039
                                       self.cfg.GetClusterName())
3040
    for verifier in node_verify_list:
3041
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3042
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3043
      if nl_payload:
3044
        for failed in nl_payload:
3045
          feedback_fn("ssh/hostname verification failed"
3046
                      " (checking from %s): %s" %
3047
                      (verifier, nl_payload[failed]))
3048
        raise errors.OpExecError("ssh/hostname verification failed.")
3049

    
3050
    if self.op.readd:
3051
      _RedistributeAncillaryFiles(self)
3052
      self.context.ReaddNode(new_node)
3053
      # make sure we redistribute the config
3054
      self.cfg.Update(new_node, feedback_fn)
3055
      # and make sure the new node will not have old files around
3056
      if not new_node.master_candidate:
3057
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3058
        msg = result.fail_msg
3059
        if msg:
3060
          self.LogWarning("Node failed to demote itself from master"
3061
                          " candidate status: %s" % msg)
3062
    else:
3063
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3064
      self.context.AddNode(new_node, self.proc.GetECId())
3065

    
3066

    
3067
class LUSetNodeParams(LogicalUnit):
3068
  """Modifies the parameters of a node.
3069

3070
  """
3071
  HPATH = "node-modify"
3072
  HTYPE = constants.HTYPE_NODE
3073
  _OP_REQP = ["node_name"]
3074
  REQ_BGL = False
3075

    
3076
  def CheckArguments(self):
3077
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3078
    if node_name is None:
3079
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3080
                                 errors.ECODE_INVAL)
3081
    self.op.node_name = node_name
3082
    _CheckBooleanOpField(self.op, 'master_candidate')
3083
    _CheckBooleanOpField(self.op, 'offline')
3084
    _CheckBooleanOpField(self.op, 'drained')
3085
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3086
    if all_mods.count(None) == 3:
3087
      raise errors.OpPrereqError("Please pass at least one modification",
3088
                                 errors.ECODE_INVAL)
3089
    if all_mods.count(True) > 1:
3090
      raise errors.OpPrereqError("Can't set the node into more than one"
3091
                                 " state at the same time",
3092
                                 errors.ECODE_INVAL)
3093

    
3094
  def ExpandNames(self):
3095
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3096

    
3097
  def BuildHooksEnv(self):
3098
    """Build hooks env.
3099

3100
    This runs on the master node.
3101

3102
    """
3103
    env = {
3104
      "OP_TARGET": self.op.node_name,
3105
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3106
      "OFFLINE": str(self.op.offline),
3107
      "DRAINED": str(self.op.drained),
3108
      }
3109
    nl = [self.cfg.GetMasterNode(),
3110
          self.op.node_name]
3111
    return env, nl, nl
3112

    
3113
  def CheckPrereq(self):
3114
    """Check prerequisites.
3115

3116
    This only checks the instance list against the existing names.
3117

3118
    """
3119
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3120

    
3121
    if (self.op.master_candidate is not None or
3122
        self.op.drained is not None or
3123
        self.op.offline is not None):
3124
      # we can't change the master's node flags
3125
      if self.op.node_name == self.cfg.GetMasterNode():
3126
        raise errors.OpPrereqError("The master role can be changed"
3127
                                   " only via masterfailover",
3128
                                   errors.ECODE_INVAL)
3129

    
3130
    # Boolean value that tells us whether we're offlining or draining the node
3131
    offline_or_drain = self.op.offline == True or self.op.drained == True
3132
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3133

    
3134
    if (node.master_candidate and
3135
        (self.op.master_candidate == False or offline_or_drain)):
3136
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3137
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3138
      if mc_now <= cp_size:
3139
        msg = ("Not enough master candidates (desired"
3140
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3141
        # Only allow forcing the operation if it's an offline/drain operation,
3142
        # and we could not possibly promote more nodes.
3143
        # FIXME: this can still lead to issues if in any way another node which
3144
        # could be promoted appears in the meantime.
3145
        if self.op.force and offline_or_drain and mc_should == mc_max:
3146
          self.LogWarning(msg)
3147
        else:
3148
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3149

    
3150
    if (self.op.master_candidate == True and
3151
        ((node.offline and not self.op.offline == False) or
3152
         (node.drained and not self.op.drained == False))):
3153
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3154
                                 " to master_candidate" % node.name,
3155
                                 errors.ECODE_INVAL)
3156

    
3157
    # If we're being deofflined/drained, we'll MC ourself if needed
3158
    if (deoffline_or_drain and not offline_or_drain and not
3159
        self.op.master_candidate == True):
3160
      self.op.master_candidate = _DecideSelfPromotion(self)
3161
      if self.op.master_candidate:
3162
        self.LogInfo("Autopromoting node to master candidate")
3163

    
3164
    return
3165

    
3166
  def Exec(self, feedback_fn):
3167
    """Modifies a node.
3168

3169
    """
3170
    node = self.node
3171

    
3172
    result = []
3173
    changed_mc = False
3174

    
3175
    if self.op.offline is not None:
3176
      node.offline = self.op.offline
3177
      result.append(("offline", str(self.op.offline)))
3178
      if self.op.offline == True:
3179
        if node.master_candidate:
3180
          node.master_candidate = False
3181
          changed_mc = True
3182
          result.append(("master_candidate", "auto-demotion due to offline"))
3183
        if node.drained:
3184
          node.drained = False
3185
          result.append(("drained", "clear drained status due to offline"))
3186

    
3187
    if self.op.master_candidate is not None:
3188
      node.master_candidate = self.op.master_candidate
3189
      changed_mc = True
3190
      result.append(("master_candidate", str(self.op.master_candidate)))
3191
      if self.op.master_candidate == False:
3192
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3193
        msg = rrc.fail_msg
3194
        if msg:
3195
          self.LogWarning("Node failed to demote itself: %s" % msg)
3196

    
3197
    if self.op.drained is not None:
3198
      node.drained = self.op.drained
3199
      result.append(("drained", str(self.op.drained)))
3200
      if self.op.drained == True:
3201
        if node.master_candidate:
3202
          node.master_candidate = False
3203
          changed_mc = True
3204
          result.append(("master_candidate", "auto-demotion due to drain"))
3205
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3206
          msg = rrc.fail_msg
3207
          if msg:
3208
            self.LogWarning("Node failed to demote itself: %s" % msg)
3209
        if node.offline:
3210
          node.offline = False
3211
          result.append(("offline", "clear offline status due to drain"))
3212

    
3213
    # this will trigger configuration file update, if needed
3214
    self.cfg.Update(node, feedback_fn)
3215
    # this will trigger job queue propagation or cleanup
3216
    if changed_mc:
3217
      self.context.ReaddNode(node)
3218

    
3219
    return result
3220

    
3221

    
3222
class LUPowercycleNode(NoHooksLU):
3223
  """Powercycles a node.
3224

3225
  """
3226
  _OP_REQP = ["node_name", "force"]
3227
  REQ_BGL = False
3228

    
3229
  def CheckArguments(self):
3230
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3231
    if node_name is None:
3232
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3233
                                 errors.ECODE_NOENT)
3234
    self.op.node_name = node_name
3235
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3236
      raise errors.OpPrereqError("The node is the master and the force"
3237
                                 " parameter was not set",
3238
                                 errors.ECODE_INVAL)
3239

    
3240
  def ExpandNames(self):
3241
    """Locking for PowercycleNode.
3242

3243
    This is a last-resort option and shouldn't block on other
3244
    jobs. Therefore, we grab no locks.
3245

3246
    """
3247
    self.needed_locks = {}
3248

    
3249
  def CheckPrereq(self):
3250
    """Check prerequisites.
3251

3252
    This LU has no prereqs.
3253

3254
    """
3255
    pass
3256

    
3257
  def Exec(self, feedback_fn):
3258
    """Reboots a node.
3259

3260
    """
3261
    result = self.rpc.call_node_powercycle(self.op.node_name,
3262
                                           self.cfg.GetHypervisorType())
3263
    result.Raise("Failed to schedule the reboot")
3264
    return result.payload
3265

    
3266

    
3267
class LUQueryClusterInfo(NoHooksLU):
3268
  """Query cluster configuration.
3269

3270
  """
3271
  _OP_REQP = []
3272
  REQ_BGL = False
3273

    
3274
  def ExpandNames(self):
3275
    self.needed_locks = {}
3276

    
3277
  def CheckPrereq(self):
3278
    """No prerequsites needed for this LU.
3279

3280
    """
3281
    pass
3282

    
3283
  def Exec(self, feedback_fn):
3284
    """Return cluster config.
3285

3286
    """
3287
    cluster = self.cfg.GetClusterInfo()
3288
    result = {
3289
      "software_version": constants.RELEASE_VERSION,
3290
      "protocol_version": constants.PROTOCOL_VERSION,
3291
      "config_version": constants.CONFIG_VERSION,
3292
      "os_api_version": max(constants.OS_API_VERSIONS),
3293
      "export_version": constants.EXPORT_VERSION,
3294
      "architecture": (platform.architecture()[0], platform.machine()),
3295
      "name": cluster.cluster_name,
3296
      "master": cluster.master_node,
3297
      "default_hypervisor": cluster.enabled_hypervisors[0],
3298
      "enabled_hypervisors": cluster.enabled_hypervisors,
3299
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3300
                        for hypervisor_name in cluster.enabled_hypervisors]),
3301
      "beparams": cluster.beparams,
3302
      "nicparams": cluster.nicparams,
3303
      "candidate_pool_size": cluster.candidate_pool_size,
3304
      "master_netdev": cluster.master_netdev,
3305
      "volume_group_name": cluster.volume_group_name,
3306
      "file_storage_dir": cluster.file_storage_dir,
3307
      "ctime": cluster.ctime,
3308
      "mtime": cluster.mtime,
3309
      "uuid": cluster.uuid,
3310
      "tags": list(cluster.GetTags()),
3311
      }
3312

    
3313
    return result
3314

    
3315

    
3316
class LUQueryConfigValues(NoHooksLU):
3317
  """Return configuration values.
3318

3319
  """
3320
  _OP_REQP = []
3321
  REQ_BGL = False
3322
  _FIELDS_DYNAMIC = utils.FieldSet()
3323
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3324
                                  "watcher_pause")
3325

    
3326
  def ExpandNames(self):
3327
    self.needed_locks = {}
3328

    
3329
    _CheckOutputFields(static=self._FIELDS_STATIC,
3330
                       dynamic=self._FIELDS_DYNAMIC,
3331
                       selected=self.op.output_fields)
3332

    
3333
  def CheckPrereq(self):
3334
    """No prerequisites.
3335

3336
    """
3337
    pass
3338

    
3339
  def Exec(self, feedback_fn):
3340
    """Dump a representation of the cluster config to the standard output.
3341

3342
    """
3343
    values = []
3344
    for field in self.op.output_fields:
3345
      if field == "cluster_name":
3346
        entry = self.cfg.GetClusterName()
3347
      elif field == "master_node":
3348
        entry = self.cfg.GetMasterNode()
3349
      elif field == "drain_flag":
3350
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3351
      elif field == "watcher_pause":
3352
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3353
      else:
3354
        raise errors.ParameterError(field)
3355
      values.append(entry)
3356
    return values
3357

    
3358

    
3359
class LUActivateInstanceDisks(NoHooksLU):
3360
  """Bring up an instance's disks.
3361

3362
  """
3363
  _OP_REQP = ["instance_name"]
3364
  REQ_BGL = False
3365

    
3366
  def ExpandNames(self):
3367
    self._ExpandAndLockInstance()
3368
    self.needed_locks[locking.LEVEL_NODE] = []
3369
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3370

    
3371
  def DeclareLocks(self, level):
3372
    if level == locking.LEVEL_NODE:
3373
      self._LockInstancesNodes()
3374

    
3375
  def CheckPrereq(self):
3376
    """Check prerequisites.
3377

3378
    This checks that the instance is in the cluster.
3379

3380
    """
3381
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3382
    assert self.instance is not None, \
3383
      "Cannot retrieve locked instance %s" % self.op.instance_name
3384
    _CheckNodeOnline(self, self.instance.primary_node)
3385
    if not hasattr(self.op, "ignore_size"):
3386
      self.op.ignore_size = False
3387

    
3388
  def Exec(self, feedback_fn):
3389
    """Activate the disks.
3390

3391
    """
3392
    disks_ok, disks_info = \
3393
              _AssembleInstanceDisks(self, self.instance,
3394
                                     ignore_size=self.op.ignore_size)
3395
    if not disks_ok:
3396
      raise errors.OpExecError("Cannot activate block devices")
3397

    
3398
    return disks_info
3399

    
3400

    
3401
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3402
                           ignore_size=False):
3403
  """Prepare the block devices for an instance.
3404

3405
  This sets up the block devices on all nodes.
3406

3407
  @type lu: L{LogicalUnit}
3408
  @param lu: the logical unit on whose behalf we execute
3409
  @type instance: L{objects.Instance}
3410
  @param instance: the instance for whose disks we assemble
3411
  @type ignore_secondaries: boolean
3412
  @param ignore_secondaries: if true, errors on secondary nodes
3413
      won't result in an error return from the function
3414
  @type ignore_size: boolean
3415
  @param ignore_size: if true, the current known size of the disk
3416
      will not be used during the disk activation, useful for cases
3417
      when the size is wrong
3418
  @return: False if the operation failed, otherwise a list of
3419
      (host, instance_visible_name, node_visible_name)
3420
      with the mapping from node devices to instance devices
3421

3422
  """
3423
  device_info = []
3424
  disks_ok = True
3425
  iname = instance.name
3426
  # With the two passes mechanism we try to reduce the window of
3427
  # opportunity for the race condition of switching DRBD to primary
3428
  # before handshaking occured, but we do not eliminate it
3429

    
3430
  # The proper fix would be to wait (with some limits) until the
3431
  # connection has been made and drbd transitions from WFConnection
3432
  # into any other network-connected state (Connected, SyncTarget,
3433
  # SyncSource, etc.)
3434

    
3435
  # 1st pass, assemble on all nodes in secondary mode
3436
  for inst_disk in instance.disks:
3437
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3438
      if ignore_size:
3439
        node_disk = node_disk.Copy()
3440
        node_disk.UnsetSize()
3441
      lu.cfg.SetDiskID(node_disk, node)
3442
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3443
      msg = result.fail_msg
3444
      if msg:
3445
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3446
                           " (is_primary=False, pass=1): %s",
3447
                           inst_disk.iv_name, node, msg)
3448
        if not ignore_secondaries:
3449
          disks_ok = False
3450

    
3451
  # FIXME: race condition on drbd migration to primary
3452

    
3453
  # 2nd pass, do only the primary node
3454
  for inst_disk in instance.disks:
3455
    dev_path = None
3456

    
3457
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3458
      if node != instance.primary_node:
3459
        continue
3460
      if ignore_size:
3461
        node_disk = node_disk.Copy()
3462
        node_disk.UnsetSize()
3463
      lu.cfg.SetDiskID(node_disk, node)
3464
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3465
      msg = result.fail_msg
3466
      if msg:
3467
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3468
                           " (is_primary=True, pass=2): %s",
3469
                           inst_disk.iv_name, node, msg)
3470
        disks_ok = False
3471
      else:
3472
        dev_path = result.payload
3473

    
3474
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3475

    
3476
  # leave the disks configured for the primary node
3477
  # this is a workaround that would be fixed better by
3478
  # improving the logical/physical id handling
3479
  for disk in instance.disks:
3480
    lu.cfg.SetDiskID(disk, instance.primary_node)
3481

    
3482
  return disks_ok, device_info
3483

    
3484

    
3485
def _StartInstanceDisks(lu, instance, force):
3486
  """Start the disks of an instance.
3487

3488
  """
3489
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3490
                                           ignore_secondaries=force)
3491
  if not disks_ok:
3492
    _ShutdownInstanceDisks(lu, instance)
3493
    if force is not None and not force:
3494
      lu.proc.LogWarning("", hint="If the message above refers to a"
3495
                         " secondary node,"
3496
                         " you can retry the operation using '--force'.")
3497
    raise errors.OpExecError("Disk consistency error")
3498

    
3499

    
3500
class LUDeactivateInstanceDisks(NoHooksLU):
3501
  """Shutdown an instance's disks.
3502

3503
  """
3504
  _OP_REQP = ["instance_name"]
3505
  REQ_BGL = False
3506

    
3507
  def ExpandNames(self):
3508
    self._ExpandAndLockInstance()
3509
    self.needed_locks[locking.LEVEL_NODE] = []
3510
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3511

    
3512
  def DeclareLocks(self, level):
3513
    if level == locking.LEVEL_NODE:
3514
      self._LockInstancesNodes()
3515

    
3516
  def CheckPrereq(self):
3517
    """Check prerequisites.
3518

3519
    This checks that the instance is in the cluster.
3520

3521
    """
3522
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3523
    assert self.instance is not None, \
3524
      "Cannot retrieve locked instance %s" % self.op.instance_name
3525

    
3526
  def Exec(self, feedback_fn):
3527
    """Deactivate the disks
3528

3529
    """
3530
    instance = self.instance
3531
    _SafeShutdownInstanceDisks(self, instance)
3532

    
3533

    
3534
def _SafeShutdownInstanceDisks(lu, instance):
3535
  """Shutdown block devices of an instance.
3536

3537
  This function checks if an instance is running, before calling
3538
  _ShutdownInstanceDisks.
3539

3540
  """
3541
  pnode = instance.primary_node
3542
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3543
  ins_l.Raise("Can't contact node %s" % pnode)
3544

    
3545
  if instance.name in ins_l.payload:
3546
    raise errors.OpExecError("Instance is running, can't shutdown"
3547
                             " block devices.")
3548

    
3549
  _ShutdownInstanceDisks(lu, instance)
3550

    
3551

    
3552
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3553
  """Shutdown block devices of an instance.
3554

3555
  This does the shutdown on all nodes of the instance.
3556

3557
  If the ignore_primary is false, errors on the primary node are
3558
  ignored.
3559

3560
  """
3561
  all_result = True
3562
  for disk in instance.disks:
3563
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3564
      lu.cfg.SetDiskID(top_disk, node)
3565
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3566
      msg = result.fail_msg
3567
      if msg:
3568
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3569
                      disk.iv_name, node, msg)
3570
        if not ignore_primary or node != instance.primary_node:
3571
          all_result = False
3572
  return all_result
3573

    
3574

    
3575
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3576
  """Checks if a node has enough free memory.
3577

3578
  This function check if a given node has the needed amount of free
3579
  memory. In case the node has less memory or we cannot get the
3580
  information from the node, this function raise an OpPrereqError
3581
  exception.
3582

3583
  @type lu: C{LogicalUnit}
3584
  @param lu: a logical unit from which we get configuration data
3585
  @type node: C{str}
3586
  @param node: the node to check
3587
  @type reason: C{str}
3588
  @param reason: string to use in the error message
3589
  @type requested: C{int}
3590
  @param requested: the amount of memory in MiB to check for
3591
  @type hypervisor_name: C{str}
3592
  @param hypervisor_name: the hypervisor to ask for memory stats
3593
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3594
      we cannot check the node
3595

3596
  """
3597
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3598
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3599
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3600
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3601
  if not isinstance(free_mem, int):
3602
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3603
                               " was '%s'" % (node, free_mem),
3604
                               errors.ECODE_ENVIRON)
3605
  if requested > free_mem:
3606
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3607
                               " needed %s MiB, available %s MiB" %
3608
                               (node, reason, requested, free_mem),
3609
                               errors.ECODE_NORES)
3610

    
3611

    
3612
class LUStartupInstance(LogicalUnit):
3613
  """Starts an instance.
3614

3615
  """
3616
  HPATH = "instance-start"
3617
  HTYPE = constants.HTYPE_INSTANCE
3618
  _OP_REQP = ["instance_name", "force"]
3619
  REQ_BGL = False
3620

    
3621
  def ExpandNames(self):
3622
    self._ExpandAndLockInstance()
3623

    
3624
  def BuildHooksEnv(self):
3625
    """Build hooks env.
3626

3627
    This runs on master, primary and secondary nodes of the instance.
3628

3629
    """
3630
    env = {
3631
      "FORCE": self.op.force,
3632
      }
3633
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3634
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3635
    return env, nl, nl
3636

    
3637
  def CheckPrereq(self):
3638
    """Check prerequisites.
3639

3640
    This checks that the instance is in the cluster.
3641

3642
    """
3643
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3644
    assert self.instance is not None, \
3645
      "Cannot retrieve locked instance %s" % self.op.instance_name
3646

    
3647
    # extra beparams
3648
    self.beparams = getattr(self.op, "beparams", {})
3649
    if self.beparams:
3650
      if not isinstance(self.beparams, dict):
3651
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3652
                                   " dict" % (type(self.beparams), ),
3653
                                   errors.ECODE_INVAL)
3654
      # fill the beparams dict
3655
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3656
      self.op.beparams = self.beparams
3657

    
3658
    # extra hvparams
3659
    self.hvparams = getattr(self.op, "hvparams", {})
3660
    if self.hvparams:
3661
      if not isinstance(self.hvparams, dict):
3662
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3663
                                   " dict" % (type(self.hvparams), ),
3664
                                   errors.ECODE_INVAL)
3665

    
3666
      # check hypervisor parameter syntax (locally)
3667
      cluster = self.cfg.GetClusterInfo()
3668
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3669
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3670
                                    instance.hvparams)
3671
      filled_hvp.update(self.hvparams)
3672
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3673
      hv_type.CheckParameterSyntax(filled_hvp)
3674
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3675
      self.op.hvparams = self.hvparams
3676

    
3677
    _CheckNodeOnline(self, instance.primary_node)
3678

    
3679
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3680
    # check bridges existence
3681
    _CheckInstanceBridgesExist(self, instance)
3682

    
3683
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3684
                                              instance.name,
3685
                                              instance.hypervisor)
3686
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3687
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3688
    if not remote_info.payload: # not running already
3689
      _CheckNodeFreeMemory(self, instance.primary_node,
3690
                           "starting instance %s" % instance.name,
3691
                           bep[constants.BE_MEMORY], instance.hypervisor)
3692

    
3693
  def Exec(self, feedback_fn):
3694
    """Start the instance.
3695

3696
    """
3697
    instance = self.instance
3698
    force = self.op.force
3699

    
3700
    self.cfg.MarkInstanceUp(instance.name)
3701

    
3702
    node_current = instance.primary_node
3703

    
3704
    _StartInstanceDisks(self, instance, force)
3705

    
3706
    result = self.rpc.call_instance_start(node_current, instance,
3707
                                          self.hvparams, self.beparams)
3708
    msg = result.fail_msg
3709
    if msg:
3710
      _ShutdownInstanceDisks(self, instance)
3711
      raise errors.OpExecError("Could not start instance: %s" % msg)
3712

    
3713

    
3714
class LURebootInstance(LogicalUnit):
3715
  """Reboot an instance.
3716

3717
  """
3718
  HPATH = "instance-reboot"
3719
  HTYPE = constants.HTYPE_INSTANCE
3720
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3721
  REQ_BGL = False
3722

    
3723
  def CheckArguments(self):
3724
    """Check the arguments.
3725

3726
    """
3727
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3728
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3729

    
3730
  def ExpandNames(self):
3731
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3732
                                   constants.INSTANCE_REBOOT_HARD,
3733
                                   constants.INSTANCE_REBOOT_FULL]:
3734
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3735
                                  (constants.INSTANCE_REBOOT_SOFT,
3736
                                   constants.INSTANCE_REBOOT_HARD,
3737
                                   constants.INSTANCE_REBOOT_FULL))
3738
    self._ExpandAndLockInstance()
3739

    
3740
  def BuildHooksEnv(self):
3741
    """Build hooks env.
3742

3743
    This runs on master, primary and secondary nodes of the instance.
3744

3745
    """
3746
    env = {
3747
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3748
      "REBOOT_TYPE": self.op.reboot_type,
3749
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3750
      }
3751
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3752
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3753
    return env, nl, nl
3754

    
3755
  def CheckPrereq(self):
3756
    """Check prerequisites.
3757

3758
    This checks that the instance is in the cluster.
3759

3760
    """
3761
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3762
    assert self.instance is not None, \
3763
      "Cannot retrieve locked instance %s" % self.op.instance_name
3764

    
3765
    _CheckNodeOnline(self, instance.primary_node)
3766

    
3767
    # check bridges existence
3768
    _CheckInstanceBridgesExist(self, instance)
3769

    
3770
  def Exec(self, feedback_fn):
3771
    """Reboot the instance.
3772

3773
    """
3774
    instance = self.instance
3775
    ignore_secondaries = self.op.ignore_secondaries
3776
    reboot_type = self.op.reboot_type
3777

    
3778
    node_current = instance.primary_node
3779

    
3780
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3781
                       constants.INSTANCE_REBOOT_HARD]:
3782
      for disk in instance.disks:
3783
        self.cfg.SetDiskID(disk, node_current)
3784
      result = self.rpc.call_instance_reboot(node_current, instance,
3785
                                             reboot_type,
3786
                                             self.shutdown_timeout)
3787
      result.Raise("Could not reboot instance")
3788
    else:
3789
      result = self.rpc.call_instance_shutdown(node_current, instance,
3790
                                               self.shutdown_timeout)
3791
      result.Raise("Could not shutdown instance for full reboot")
3792
      _ShutdownInstanceDisks(self, instance)
3793
      _StartInstanceDisks(self, instance, ignore_secondaries)
3794
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3795
      msg = result.fail_msg
3796
      if msg:
3797
        _ShutdownInstanceDisks(self, instance)
3798
        raise errors.OpExecError("Could not start instance for"
3799
                                 " full reboot: %s" % msg)
3800

    
3801
    self.cfg.MarkInstanceUp(instance.name)
3802

    
3803

    
3804
class LUShutdownInstance(LogicalUnit):
3805
  """Shutdown an instance.
3806

3807
  """
3808
  HPATH = "instance-stop"
3809
  HTYPE = constants.HTYPE_INSTANCE
3810
  _OP_REQP = ["instance_name"]
3811
  REQ_BGL = False
3812

    
3813
  def CheckArguments(self):
3814
    """Check the arguments.
3815

3816
    """
3817
    self.timeout = getattr(self.op, "timeout",
3818
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3819

    
3820
  def ExpandNames(self):
3821
    self._ExpandAndLockInstance()
3822

    
3823
  def BuildHooksEnv(self):
3824
    """Build hooks env.
3825

3826
    This runs on master, primary and secondary nodes of the instance.
3827

3828
    """
3829
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3830
    env["TIMEOUT"] = self.timeout
3831
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3832
    return env, nl, nl
3833

    
3834
  def CheckPrereq(self):
3835
    """Check prerequisites.
3836

3837
    This checks that the instance is in the cluster.
3838

3839
    """
3840
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3841
    assert self.instance is not None, \
3842
      "Cannot retrieve locked instance %s" % self.op.instance_name
3843
    _CheckNodeOnline(self, self.instance.primary_node)
3844

    
3845
  def Exec(self, feedback_fn):
3846
    """Shutdown the instance.
3847

3848
    """
3849
    instance = self.instance
3850
    node_current = instance.primary_node
3851
    timeout = self.timeout
3852
    self.cfg.MarkInstanceDown(instance.name)
3853
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3854
    msg = result.fail_msg
3855
    if msg:
3856
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3857

    
3858
    _ShutdownInstanceDisks(self, instance)
3859

    
3860

    
3861
class LUReinstallInstance(LogicalUnit):
3862
  """Reinstall an instance.
3863

3864
  """
3865
  HPATH = "instance-reinstall"
3866
  HTYPE = constants.HTYPE_INSTANCE
3867
  _OP_REQP = ["instance_name"]
3868
  REQ_BGL = False
3869

    
3870
  def ExpandNames(self):
3871
    self._ExpandAndLockInstance()
3872

    
3873
  def BuildHooksEnv(self):
3874
    """Build hooks env.
3875

3876
    This runs on master, primary and secondary nodes of the instance.
3877

3878
    """
3879
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3880
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3881
    return env, nl, nl
3882

    
3883
  def CheckPrereq(self):
3884
    """Check prerequisites.
3885

3886
    This checks that the instance is in the cluster and is not running.
3887

3888
    """
3889
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3890
    assert instance is not None, \
3891
      "Cannot retrieve locked instance %s" % self.op.instance_name
3892
    _CheckNodeOnline(self, instance.primary_node)
3893

    
3894
    if instance.disk_template == constants.DT_DISKLESS:
3895
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3896
                                 self.op.instance_name,
3897
                                 errors.ECODE_INVAL)
3898
    if instance.admin_up:
3899
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3900
                                 self.op.instance_name,
3901
                                 errors.ECODE_STATE)
3902
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3903
                                              instance.name,
3904
                                              instance.hypervisor)
3905
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3906
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3907
    if remote_info.payload:
3908
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3909
                                 (self.op.instance_name,
3910
                                  instance.primary_node),
3911
                                 errors.ECODE_STATE)
3912

    
3913
    self.op.os_type = getattr(self.op, "os_type", None)
3914
    self.op.force_variant = getattr(self.op, "force_variant", False)
3915
    if self.op.os_type is not None:
3916
      # OS verification
3917
      pnode = self.cfg.GetNodeInfo(
3918
        self.cfg.ExpandNodeName(instance.primary_node))
3919
      if pnode is None:
3920
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3921
                                   self.op.pnode, errors.ECODE_NOENT)
3922
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3923
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3924
                   (self.op.os_type, pnode.name),
3925
                   prereq=True, ecode=errors.ECODE_INVAL)
3926
      if not self.op.force_variant:
3927
        _CheckOSVariant(result.payload, self.op.os_type)
3928

    
3929
    self.instance = instance
3930

    
3931
  def Exec(self, feedback_fn):
3932
    """Reinstall the instance.
3933

3934
    """
3935
    inst = self.instance
3936

    
3937
    if self.op.os_type is not None:
3938
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3939
      inst.os = self.op.os_type
3940
      self.cfg.Update(inst, feedback_fn)
3941

    
3942
    _StartInstanceDisks(self, inst, None)
3943
    try:
3944
      feedback_fn("Running the instance OS create scripts...")
3945
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3946
      result.Raise("Could not install OS for instance %s on node %s" %
3947
                   (inst.name, inst.primary_node))
3948
    finally:
3949
      _ShutdownInstanceDisks(self, inst)
3950

    
3951

    
3952
class LURecreateInstanceDisks(LogicalUnit):
3953
  """Recreate an instance's missing disks.
3954

3955
  """
3956
  HPATH = "instance-recreate-disks"
3957
  HTYPE = constants.HTYPE_INSTANCE
3958
  _OP_REQP = ["instance_name", "disks"]
3959
  REQ_BGL = False
3960

    
3961
  def CheckArguments(self):
3962
    """Check the arguments.
3963

3964
    """
3965
    if not isinstance(self.op.disks, list):
3966
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
3967
    for item in self.op.disks:
3968
      if (not isinstance(item, int) or
3969
          item < 0):
3970
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3971
                                   str(item), errors.ECODE_INVAL)
3972

    
3973
  def ExpandNames(self):
3974
    self._ExpandAndLockInstance()
3975

    
3976
  def BuildHooksEnv(self):
3977
    """Build hooks env.
3978

3979
    This runs on master, primary and secondary nodes of the instance.
3980

3981
    """
3982
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3983
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3984
    return env, nl, nl
3985

    
3986
  def CheckPrereq(self):
3987
    """Check prerequisites.
3988

3989
    This checks that the instance is in the cluster and is not running.
3990

3991
    """
3992
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3993
    assert instance is not None, \
3994
      "Cannot retrieve locked instance %s" % self.op.instance_name
3995
    _CheckNodeOnline(self, instance.primary_node)
3996

    
3997
    if instance.disk_template == constants.DT_DISKLESS:
3998
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3999
                                 self.op.instance_name, errors.ECODE_INVAL)
4000
    if instance.admin_up:
4001
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4002
                                 self.op.instance_name, errors.ECODE_STATE)
4003
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4004
                                              instance.name,
4005
                                              instance.hypervisor)
4006
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4007
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4008
    if remote_info.payload:
4009
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4010
                                 (self.op.instance_name,
4011
                                  instance.primary_node), errors.ECODE_STATE)
4012

    
4013
    if not self.op.disks:
4014
      self.op.disks = range(len(instance.disks))
4015
    else:
4016
      for idx in self.op.disks:
4017
        if idx >= len(instance.disks):
4018
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4019
                                     errors.ECODE_INVAL)
4020

    
4021
    self.instance = instance
4022

    
4023
  def Exec(self, feedback_fn):
4024
    """Recreate the disks.
4025

4026
    """
4027
    to_skip = []
4028
    for idx, disk in enumerate(self.instance.disks):
4029
      if idx not in self.op.disks: # disk idx has not been passed in
4030
        to_skip.append(idx)
4031
        continue
4032

    
4033
    _CreateDisks(self, self.instance, to_skip=to_skip)
4034

    
4035

    
4036
class LURenameInstance(LogicalUnit):
4037
  """Rename an instance.
4038

4039
  """
4040
  HPATH = "instance-rename"
4041
  HTYPE = constants.HTYPE_INSTANCE
4042
  _OP_REQP = ["instance_name", "new_name"]
4043

    
4044
  def BuildHooksEnv(self):
4045
    """Build hooks env.
4046

4047
    This runs on master, primary and secondary nodes of the instance.
4048

4049
    """
4050
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4051
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4052
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4053
    return env, nl, nl
4054

    
4055
  def CheckPrereq(self):
4056
    """Check prerequisites.
4057

4058
    This checks that the instance is in the cluster and is not running.
4059

4060
    """
4061
    instance = self.cfg.GetInstanceInfo(
4062
      self.cfg.ExpandInstanceName(self.op.instance_name))
4063
    if instance is None:
4064
      raise errors.OpPrereqError("Instance '%s' not known" %
4065
                                 self.op.instance_name, errors.ECODE_NOENT)
4066
    _CheckNodeOnline(self, instance.primary_node)
4067

    
4068
    if instance.admin_up:
4069
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4070
                                 self.op.instance_name, errors.ECODE_STATE)
4071
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4072
                                              instance.name,
4073
                                              instance.hypervisor)
4074
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4075
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4076
    if remote_info.payload:
4077
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4078
                                 (self.op.instance_name,
4079
                                  instance.primary_node), errors.ECODE_STATE)
4080
    self.instance = instance
4081

    
4082
    # new name verification
4083
    name_info = utils.GetHostInfo(self.op.new_name)
4084

    
4085
    self.op.new_name = new_name = name_info.name
4086
    instance_list = self.cfg.GetInstanceList()
4087
    if new_name in instance_list:
4088
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4089
                                 new_name, errors.ECODE_EXISTS)
4090

    
4091
    if not getattr(self.op, "ignore_ip", False):
4092
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4093
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4094
                                   (name_info.ip, new_name),
4095
                                   errors.ECODE_NOTUNIQUE)
4096

    
4097

    
4098
  def Exec(self, feedback_fn):
4099
    """Reinstall the instance.
4100

4101
    """
4102
    inst = self.instance
4103
    old_name = inst.name
4104

    
4105
    if inst.disk_template == constants.DT_FILE:
4106
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4107

    
4108
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4109
    # Change the instance lock. This is definitely safe while we hold the BGL
4110
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4111
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4112

    
4113
    # re-read the instance from the configuration after rename
4114
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4115

    
4116
    if inst.disk_template == constants.DT_FILE:
4117
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4118
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4119
                                                     old_file_storage_dir,
4120
                                                     new_file_storage_dir)
4121
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4122
                   " (but the instance has been renamed in Ganeti)" %
4123
                   (inst.primary_node, old_file_storage_dir,
4124
                    new_file_storage_dir))
4125

    
4126
    _StartInstanceDisks(self, inst, None)
4127
    try:
4128
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4129
                                                 old_name)
4130
      msg = result.fail_msg
4131
      if msg:
4132
        msg = ("Could not run OS rename script for instance %s on node %s"
4133
               " (but the instance has been renamed in Ganeti): %s" %
4134
               (inst.name, inst.primary_node, msg))
4135
        self.proc.LogWarning(msg)
4136
    finally:
4137
      _ShutdownInstanceDisks(self, inst)
4138

    
4139

    
4140
class LURemoveInstance(LogicalUnit):
4141
  """Remove an instance.
4142

4143
  """
4144
  HPATH = "instance-remove"
4145
  HTYPE = constants.HTYPE_INSTANCE
4146
  _OP_REQP = ["instance_name", "ignore_failures"]
4147
  REQ_BGL = False
4148

    
4149
  def CheckArguments(self):
4150
    """Check the arguments.
4151

4152
    """
4153
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4154
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4155

    
4156
  def ExpandNames(self):
4157
    self._ExpandAndLockInstance()
4158
    self.needed_locks[locking.LEVEL_NODE] = []
4159
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4160

    
4161
  def DeclareLocks(self, level):
4162
    if level == locking.LEVEL_NODE:
4163
      self._LockInstancesNodes()
4164

    
4165
  def BuildHooksEnv(self):
4166
    """Build hooks env.
4167

4168
    This runs on master, primary and secondary nodes of the instance.
4169

4170
    """
4171
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4172
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4173
    nl = [self.cfg.GetMasterNode()]
4174
    return env, nl, nl
4175

    
4176
  def CheckPrereq(self):
4177
    """Check prerequisites.
4178

4179
    This checks that the instance is in the cluster.
4180

4181
    """
4182
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4183
    assert self.instance is not None, \
4184
      "Cannot retrieve locked instance %s" % self.op.instance_name
4185

    
4186
  def Exec(self, feedback_fn):
4187
    """Remove the instance.
4188

4189
    """
4190
    instance = self.instance
4191
    logging.info("Shutting down instance %s on node %s",
4192
                 instance.name, instance.primary_node)
4193

    
4194
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4195
                                             self.shutdown_timeout)
4196
    msg = result.fail_msg
4197
    if msg:
4198
      if self.op.ignore_failures:
4199
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4200
      else:
4201
        raise errors.OpExecError("Could not shutdown instance %s on"
4202
                                 " node %s: %s" %
4203
                                 (instance.name, instance.primary_node, msg))
4204

    
4205
    logging.info("Removing block devices for instance %s", instance.name)
4206

    
4207
    if not _RemoveDisks(self, instance):
4208
      if self.op.ignore_failures:
4209
        feedback_fn("Warning: can't remove instance's disks")
4210
      else:
4211
        raise errors.OpExecError("Can't remove instance's disks")
4212

    
4213
    logging.info("Removing instance %s out of cluster config", instance.name)
4214

    
4215
    self.cfg.RemoveInstance(instance.name)
4216
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4217

    
4218

    
4219
class LUQueryInstances(NoHooksLU):
4220
  """Logical unit for querying instances.
4221

4222
  """
4223
  _OP_REQP = ["output_fields", "names", "use_locking"]
4224
  REQ_BGL = False
4225
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4226
                    "serial_no", "ctime", "mtime", "uuid"]
4227
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4228
                                    "admin_state",
4229
                                    "disk_template", "ip", "mac", "bridge",
4230
                                    "nic_mode", "nic_link",
4231
                                    "sda_size", "sdb_size", "vcpus", "tags",
4232
                                    "network_port", "beparams",
4233
                                    r"(disk)\.(size)/([0-9]+)",
4234
                                    r"(disk)\.(sizes)", "disk_usage",
4235
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4236
                                    r"(nic)\.(bridge)/([0-9]+)",
4237
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4238
                                    r"(disk|nic)\.(count)",
4239
                                    "hvparams",
4240
                                    ] + _SIMPLE_FIELDS +
4241
                                  ["hv/%s" % name
4242
                                   for name in constants.HVS_PARAMETERS
4243
                                   if name not in constants.HVC_GLOBALS] +
4244
                                  ["be/%s" % name
4245
                                   for name in constants.BES_PARAMETERS])
4246
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4247

    
4248

    
4249
  def ExpandNames(self):
4250
    _CheckOutputFields(static=self._FIELDS_STATIC,
4251
                       dynamic=self._FIELDS_DYNAMIC,
4252
                       selected=self.op.output_fields)
4253

    
4254
    self.needed_locks = {}
4255
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4256
    self.share_locks[locking.LEVEL_NODE] = 1
4257

    
4258
    if self.op.names:
4259
      self.wanted = _GetWantedInstances(self, self.op.names)
4260
    else:
4261
      self.wanted = locking.ALL_SET
4262

    
4263
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4264
    self.do_locking = self.do_node_query and self.op.use_locking
4265
    if self.do_locking:
4266
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4267
      self.needed_locks[locking.LEVEL_NODE] = []
4268
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4269

    
4270
  def DeclareLocks(self, level):
4271
    if level == locking.LEVEL_NODE and self.do_locking:
4272
      self._LockInstancesNodes()
4273

    
4274
  def CheckPrereq(self):
4275
    """Check prerequisites.
4276

4277
    """
4278
    pass
4279

    
4280
  def Exec(self, feedback_fn):
4281
    """Computes the list of nodes and their attributes.
4282

4283
    """
4284
    all_info = self.cfg.GetAllInstancesInfo()
4285
    if self.wanted == locking.ALL_SET:
4286
      # caller didn't specify instance names, so ordering is not important
4287
      if self.do_locking:
4288
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4289
      else:
4290
        instance_names = all_info.keys()
4291
      instance_names = utils.NiceSort(instance_names)
4292
    else:
4293
      # caller did specify names, so we must keep the ordering
4294
      if self.do_locking:
4295
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4296
      else:
4297
        tgt_set = all_info.keys()
4298
      missing = set(self.wanted).difference(tgt_set)
4299
      if missing:
4300
        raise errors.OpExecError("Some instances were removed before"
4301
                                 " retrieving their data: %s" % missing)
4302
      instance_names = self.wanted
4303

    
4304
    instance_list = [all_info[iname] for iname in instance_names]
4305

    
4306
    # begin data gathering
4307

    
4308
    nodes = frozenset([inst.primary_node for inst in instance_list])
4309
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4310

    
4311
    bad_nodes = []
4312
    off_nodes = []
4313
    if self.do_node_query:
4314
      live_data = {}
4315
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4316
      for name in nodes:
4317
        result = node_data[name]
4318
        if result.offline:
4319
          # offline nodes will be in both lists
4320
          off_nodes.append(name)
4321
        if result.fail_msg:
4322
          bad_nodes.append(name)
4323
        else:
4324
          if result.payload:
4325
            live_data.update(result.payload)
4326
          # else no instance is alive
4327
    else:
4328
      live_data = dict([(name, {}) for name in instance_names])
4329

    
4330
    # end data gathering
4331

    
4332
    HVPREFIX = "hv/"
4333
    BEPREFIX = "be/"
4334
    output = []
4335
    cluster = self.cfg.GetClusterInfo()
4336
    for instance in instance_list:
4337
      iout = []
4338
      i_hv = cluster.FillHV(instance, skip_globals=True)
4339
      i_be = cluster.FillBE(instance)
4340
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4341
                                 nic.nicparams) for nic in instance.nics]
4342
      for field in self.op.output_fields:
4343
        st_match = self._FIELDS_STATIC.Matches(field)
4344
        if field in self._SIMPLE_FIELDS:
4345
          val = getattr(instance, field)
4346
        elif field == "pnode":
4347
          val = instance.primary_node
4348
        elif field == "snodes":
4349
          val = list(instance.secondary_nodes)
4350
        elif field == "admin_state":
4351
          val = instance.admin_up
4352
        elif field == "oper_state":
4353
          if instance.primary_node in bad_nodes:
4354
            val = None
4355
          else:
4356
            val = bool(live_data.get(instance.name))
4357
        elif field == "status":
4358
          if instance.primary_node in off_nodes:
4359
            val = "ERROR_nodeoffline"
4360
          elif instance.primary_node in bad_nodes:
4361
            val = "ERROR_nodedown"
4362
          else:
4363
            running = bool(live_data.get(instance.name))
4364
            if running:
4365
              if instance.admin_up:
4366
                val = "running"
4367
              else:
4368
                val = "ERROR_up"
4369
            else:
4370
              if instance.admin_up:
4371
                val = "ERROR_down"
4372
              else:
4373
                val = "ADMIN_down"
4374
        elif field == "oper_ram":
4375
          if instance.primary_node in bad_nodes:
4376
            val = None
4377
          elif instance.name in live_data:
4378
            val = live_data[instance.name].get("memory", "?")
4379
          else:
4380
            val = "-"
4381
        elif field == "vcpus":
4382
          val = i_be[constants.BE_VCPUS]
4383
        elif field == "disk_template":
4384
          val = instance.disk_template
4385
        elif field == "ip":
4386
          if instance.nics:
4387
            val = instance.nics[0].ip
4388
          else:
4389
            val = None
4390
        elif field == "nic_mode":
4391
          if instance.nics:
4392
            val = i_nicp[0][constants.NIC_MODE]
4393
          else:
4394
            val = None
4395
        elif field == "nic_link":
4396
          if instance.nics:
4397
            val = i_nicp[0][constants.NIC_LINK]
4398
          else:
4399
            val = None
4400
        elif field == "bridge":
4401
          if (instance.nics and
4402
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4403
            val = i_nicp[0][constants.NIC_LINK]
4404
          else:
4405
            val = None
4406
        elif field == "mac":
4407
          if instance.nics:
4408
            val = instance.nics[0].mac
4409
          else:
4410
            val = None
4411
        elif field == "sda_size" or field == "sdb_size":
4412
          idx = ord(field[2]) - ord('a')
4413
          try:
4414
            val = instance.FindDisk(idx).size
4415
          except errors.OpPrereqError:
4416
            val = None
4417
        elif field == "disk_usage": # total disk usage per node
4418
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4419
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4420
        elif field == "tags":
4421
          val = list(instance.GetTags())
4422
        elif field == "hvparams":
4423
          val = i_hv
4424
        elif (field.startswith(HVPREFIX) and
4425
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4426
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4427
          val = i_hv.get(field[len(HVPREFIX):], None)
4428
        elif field == "beparams":
4429
          val = i_be
4430
        elif (field.startswith(BEPREFIX) and
4431
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4432
          val = i_be.get(field[len(BEPREFIX):], None)
4433
        elif st_match and st_match.groups():
4434
          # matches a variable list
4435
          st_groups = st_match.groups()
4436
          if st_groups and st_groups[0] == "disk":
4437
            if st_groups[1] == "count":
4438
              val = len(instance.disks)
4439
            elif st_groups[1] == "sizes":
4440
              val = [disk.size for disk in instance.disks]
4441
            elif st_groups[1] == "size":
4442
              try:
4443
                val = instance.FindDisk(st_groups[2]).size
4444
              except errors.OpPrereqError:
4445
                val = None
4446
            else:
4447
              assert False, "Unhandled disk parameter"
4448
          elif st_groups[0] == "nic":
4449
            if st_groups[1] == "count":
4450
              val = len(instance.nics)
4451
            elif st_groups[1] == "macs":
4452
              val = [nic.mac for nic in instance.nics]
4453
            elif st_groups[1] == "ips":
4454
              val = [nic.ip for nic in instance.nics]
4455
            elif st_groups[1] == "modes":
4456
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4457
            elif st_groups[1] == "links":
4458
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4459
            elif st_groups[1] == "bridges":
4460
              val = []
4461
              for nicp in i_nicp:
4462
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4463
                  val.append(nicp[constants.NIC_LINK])
4464
                else:
4465
                  val.append(None)
4466
            else:
4467
              # index-based item
4468
              nic_idx = int(st_groups[2])
4469
              if nic_idx >= len(instance.nics):
4470
                val = None
4471
              else:
4472
                if st_groups[1] == "mac":
4473
                  val = instance.nics[nic_idx].mac
4474
                elif st_groups[1] == "ip":
4475
                  val = instance.nics[nic_idx].ip
4476
                elif st_groups[1] == "mode":
4477
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4478
                elif st_groups[1] == "link":
4479
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4480
                elif st_groups[1] == "bridge":
4481
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4482
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4483
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4484
                  else:
4485
                    val = None
4486
                else:
4487
                  assert False, "Unhandled NIC parameter"
4488
          else:
4489
            assert False, ("Declared but unhandled variable parameter '%s'" %
4490
                           field)
4491
        else:
4492
          assert False, "Declared but unhandled parameter '%s'" % field
4493
        iout.append(val)
4494
      output.append(iout)
4495

    
4496
    return output
4497

    
4498

    
4499
class LUFailoverInstance(LogicalUnit):
4500
  """Failover an instance.
4501

4502
  """
4503
  HPATH = "instance-failover"
4504
  HTYPE = constants.HTYPE_INSTANCE
4505
  _OP_REQP = ["instance_name", "ignore_consistency"]
4506
  REQ_BGL = False
4507

    
4508
  def CheckArguments(self):
4509
    """Check the arguments.
4510

4511
    """
4512
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4513
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4514

    
4515
  def ExpandNames(self):
4516
    self._ExpandAndLockInstance()
4517
    self.needed_locks[locking.LEVEL_NODE] = []
4518
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4519

    
4520
  def DeclareLocks(self, level):
4521
    if level == locking.LEVEL_NODE:
4522
      self._LockInstancesNodes()
4523

    
4524
  def BuildHooksEnv(self):
4525
    """Build hooks env.
4526

4527
    This runs on master, primary and secondary nodes of the instance.
4528

4529
    """
4530
    env = {
4531
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4532
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4533
      }
4534
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4535
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4536
    return env, nl, nl
4537

    
4538
  def CheckPrereq(self):
4539
    """Check prerequisites.
4540

4541
    This checks that the instance is in the cluster.
4542

4543
    """
4544
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4545
    assert self.instance is not None, \
4546
      "Cannot retrieve locked instance %s" % self.op.instance_name
4547

    
4548
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4549
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4550
      raise errors.OpPrereqError("Instance's disk layout is not"
4551
                                 " network mirrored, cannot failover.",
4552
                                 errors.ECODE_STATE)
4553

    
4554
    secondary_nodes = instance.secondary_nodes
4555
    if not secondary_nodes:
4556
      raise errors.ProgrammerError("no secondary node but using "
4557
                                   "a mirrored disk template")
4558

    
4559
    target_node = secondary_nodes[0]
4560
    _CheckNodeOnline(self, target_node)
4561
    _CheckNodeNotDrained(self, target_node)
4562
    if instance.admin_up:
4563
      # check memory requirements on the secondary node
4564
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4565
                           instance.name, bep[constants.BE_MEMORY],
4566
                           instance.hypervisor)
4567
    else:
4568
      self.LogInfo("Not checking memory on the secondary node as"
4569
                   " instance will not be started")
4570

    
4571
    # check bridge existance
4572
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4573

    
4574
  def Exec(self, feedback_fn):
4575
    """Failover an instance.
4576

4577
    The failover is done by shutting it down on its present node and
4578
    starting it on the secondary.
4579

4580
    """
4581
    instance = self.instance
4582

    
4583
    source_node = instance.primary_node
4584
    target_node = instance.secondary_nodes[0]
4585

    
4586
    if instance.admin_up:
4587
      feedback_fn("* checking disk consistency between source and target")
4588
      for dev in instance.disks:
4589
        # for drbd, these are drbd over lvm
4590
        if not _CheckDiskConsistency(self, dev, target_node, False):
4591
          if not self.op.ignore_consistency:
4592
            raise errors.OpExecError("Disk %s is degraded on target node,"
4593
                                     " aborting failover." % dev.iv_name)
4594
    else:
4595
      feedback_fn("* not checking disk consistency as instance is not running")
4596

    
4597
    feedback_fn("* shutting down instance on source node")
4598
    logging.info("Shutting down instance %s on node %s",
4599
                 instance.name, source_node)
4600

    
4601
    result = self.rpc.call_instance_shutdown(source_node, instance,
4602
                                             self.shutdown_timeout)
4603
    msg = result.fail_msg
4604
    if msg:
4605
      if self.op.ignore_consistency:
4606
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4607
                             " Proceeding anyway. Please make sure node"
4608
                             " %s is down. Error details: %s",
4609
                             instance.name, source_node, source_node, msg)
4610
      else:
4611
        raise errors.OpExecError("Could not shutdown instance %s on"
4612
                                 " node %s: %s" %
4613
                                 (instance.name, source_node, msg))
4614

    
4615
    feedback_fn("* deactivating the instance's disks on source node")
4616
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4617
      raise errors.OpExecError("Can't shut down the instance's disks.")
4618

    
4619
    instance.primary_node = target_node
4620
    # distribute new instance config to the other nodes
4621
    self.cfg.Update(instance, feedback_fn)
4622

    
4623
    # Only start the instance if it's marked as up
4624
    if instance.admin_up:
4625
      feedback_fn("* activating the instance's disks on target node")
4626
      logging.info("Starting instance %s on node %s",
4627
                   instance.name, target_node)
4628

    
4629
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4630
                                               ignore_secondaries=True)
4631
      if not disks_ok:
4632
        _ShutdownInstanceDisks(self, instance)
4633
        raise errors.OpExecError("Can't activate the instance's disks")
4634

    
4635
      feedback_fn("* starting the instance on the target node")
4636
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4637
      msg = result.fail_msg
4638
      if msg:
4639
        _ShutdownInstanceDisks(self, instance)
4640
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4641
                                 (instance.name, target_node, msg))
4642

    
4643

    
4644
class LUMigrateInstance(LogicalUnit):
4645
  """Migrate an instance.
4646

4647
  This is migration without shutting down, compared to the failover,
4648
  which is done with shutdown.
4649

4650
  """
4651
  HPATH = "instance-migrate"
4652
  HTYPE = constants.HTYPE_INSTANCE
4653
  _OP_REQP = ["instance_name", "live", "cleanup"]
4654

    
4655
  REQ_BGL = False
4656

    
4657
  def ExpandNames(self):
4658
    self._ExpandAndLockInstance()
4659

    
4660
    self.needed_locks[locking.LEVEL_NODE] = []
4661
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4662

    
4663
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4664
                                       self.op.live, self.op.cleanup)
4665
    self.tasklets = [self._migrater]
4666

    
4667
  def DeclareLocks(self, level):
4668
    if level == locking.LEVEL_NODE:
4669
      self._LockInstancesNodes()
4670

    
4671
  def BuildHooksEnv(self):
4672
    """Build hooks env.
4673

4674
    This runs on master, primary and secondary nodes of the instance.
4675

4676
    """
4677
    instance = self._migrater.instance
4678
    env = _BuildInstanceHookEnvByObject(self, instance)
4679
    env["MIGRATE_LIVE"] = self.op.live
4680
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4681
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4682
    return env, nl, nl
4683

    
4684

    
4685
class LUMoveInstance(LogicalUnit):
4686
  """Move an instance by data-copying.
4687

4688
  """
4689
  HPATH = "instance-move"
4690
  HTYPE = constants.HTYPE_INSTANCE
4691
  _OP_REQP = ["instance_name", "target_node"]
4692
  REQ_BGL = False
4693

    
4694
  def CheckArguments(self):
4695
    """Check the arguments.
4696

4697
    """
4698
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4699
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4700

    
4701
  def ExpandNames(self):
4702
    self._ExpandAndLockInstance()
4703
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4704
    if target_node is None:
4705
      raise errors.OpPrereqError("Node '%s' not known" %
4706
                                  self.op.target_node, errors.ECODE_NOENT)
4707
    self.op.target_node = target_node
4708
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4709
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4710

    
4711
  def DeclareLocks(self, level):
4712
    if level == locking.LEVEL_NODE:
4713
      self._LockInstancesNodes(primary_only=True)
4714

    
4715
  def BuildHooksEnv(self):
4716
    """Build hooks env.
4717

4718
    This runs on master, primary and secondary nodes of the instance.
4719

4720
    """
4721
    env = {
4722
      "TARGET_NODE": self.op.target_node,
4723
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4724
      }
4725
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4726
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4727
                                       self.op.target_node]
4728
    return env, nl, nl
4729

    
4730
  def CheckPrereq(self):
4731
    """Check prerequisites.
4732

4733
    This checks that the instance is in the cluster.
4734

4735
    """
4736
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4737
    assert self.instance is not None, \
4738
      "Cannot retrieve locked instance %s" % self.op.instance_name
4739

    
4740
    node = self.cfg.GetNodeInfo(self.op.target_node)
4741
    assert node is not None, \
4742
      "Cannot retrieve locked node %s" % self.op.target_node
4743

    
4744
    self.target_node = target_node = node.name
4745

    
4746
    if target_node == instance.primary_node:
4747
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4748
                                 (instance.name, target_node),
4749
                                 errors.ECODE_STATE)
4750

    
4751
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4752

    
4753
    for idx, dsk in enumerate(instance.disks):
4754
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4755
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4756
                                   " cannot copy", errors.ECODE_STATE)
4757

    
4758
    _CheckNodeOnline(self, target_node)
4759
    _CheckNodeNotDrained(self, target_node)
4760

    
4761
    if instance.admin_up:
4762
      # check memory requirements on the secondary node
4763
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4764
                           instance.name, bep[constants.BE_MEMORY],
4765
                           instance.hypervisor)
4766
    else:
4767
      self.LogInfo("Not checking memory on the secondary node as"
4768
                   " instance will not be started")
4769

    
4770
    # check bridge existance
4771
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4772

    
4773
  def Exec(self, feedback_fn):
4774
    """Move an instance.
4775

4776
    The move is done by shutting it down on its present node, copying
4777
    the data over (slow) and starting it on the new node.
4778

4779
    """
4780
    instance = self.instance
4781

    
4782
    source_node = instance.primary_node
4783
    target_node = self.target_node
4784

    
4785
    self.LogInfo("Shutting down instance %s on source node %s",
4786
                 instance.name, source_node)
4787

    
4788
    result = self.rpc.call_instance_shutdown(source_node, instance,
4789
                                             self.shutdown_timeout)
4790
    msg = result.fail_msg
4791
    if msg:
4792
      if self.op.ignore_consistency:
4793
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4794
                             " Proceeding anyway. Please make sure node"
4795
                             " %s is down. Error details: %s",
4796
                             instance.name, source_node, source_node, msg)
4797
      else:
4798
        raise errors.OpExecError("Could not shutdown instance %s on"
4799
                                 " node %s: %s" %
4800
                                 (instance.name, source_node, msg))
4801

    
4802
    # create the target disks
4803
    try:
4804
      _CreateDisks(self, instance, target_node=target_node)
4805
    except errors.OpExecError:
4806
      self.LogWarning("Device creation failed, reverting...")
4807
      try:
4808
        _RemoveDisks(self, instance, target_node=target_node)
4809
      finally:
4810
        self.cfg.ReleaseDRBDMinors(instance.name)
4811
        raise
4812

    
4813
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4814

    
4815
    errs = []
4816
    # activate, get path, copy the data over
4817
    for idx, disk in enumerate(instance.disks):
4818
      self.LogInfo("Copying data for disk %d", idx)
4819
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4820
                                               instance.name, True)
4821
      if result.fail_msg:
4822
        self.LogWarning("Can't assemble newly created disk %d: %s",
4823
                        idx, result.fail_msg)
4824
        errs.append(result.fail_msg)
4825
        break
4826
      dev_path = result.payload
4827
      result = self.rpc.call_blockdev_export(source_node, disk,
4828
                                             target_node, dev_path,
4829
                                             cluster_name)
4830
      if result.fail_msg:
4831
        self.LogWarning("Can't copy data over for disk %d: %s",
4832
                        idx, result.fail_msg)
4833
        errs.append(result.fail_msg)
4834
        break
4835

    
4836
    if errs:
4837
      self.LogWarning("Some disks failed to copy, aborting")
4838
      try:
4839
        _RemoveDisks(self, instance, target_node=target_node)
4840
      finally:
4841
        self.cfg.ReleaseDRBDMinors(instance.name)
4842
        raise errors.OpExecError("Errors during disk copy: %s" %
4843
                                 (",".join(errs),))
4844

    
4845
    instance.primary_node = target_node
4846
    self.cfg.Update(instance, feedback_fn)
4847

    
4848
    self.LogInfo("Removing the disks on the original node")
4849
    _RemoveDisks(self, instance, target_node=source_node)
4850

    
4851
    # Only start the instance if it's marked as up
4852
    if instance.admin_up:
4853
      self.LogInfo("Starting instance %s on node %s",
4854
                   instance.name, target_node)
4855

    
4856
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4857
                                           ignore_secondaries=True)
4858
      if not disks_ok:
4859
        _ShutdownInstanceDisks(self, instance)
4860
        raise errors.OpExecError("Can't activate the instance's disks")
4861

    
4862
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4863
      msg = result.fail_msg
4864
      if msg:
4865
        _ShutdownInstanceDisks(self, instance)
4866
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4867
                                 (instance.name, target_node, msg))
4868

    
4869

    
4870
class LUMigrateNode(LogicalUnit):
4871
  """Migrate all instances from a node.
4872

4873
  """
4874
  HPATH = "node-migrate"
4875
  HTYPE = constants.HTYPE_NODE
4876
  _OP_REQP = ["node_name", "live"]
4877
  REQ_BGL = False
4878

    
4879
  def ExpandNames(self):
4880
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4881
    if self.op.node_name is None:
4882
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
4883
                                 errors.ECODE_NOENT)
4884

    
4885
    self.needed_locks = {
4886
      locking.LEVEL_NODE: [self.op.node_name],
4887
      }
4888

    
4889
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4890

    
4891
    # Create tasklets for migrating instances for all instances on this node
4892
    names = []
4893
    tasklets = []
4894

    
4895
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4896
      logging.debug("Migrating instance %s", inst.name)
4897
      names.append(inst.name)
4898

    
4899
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4900

    
4901
    self.tasklets = tasklets
4902

    
4903
    # Declare instance locks
4904
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4905

    
4906
  def DeclareLocks(self, level):
4907
    if level == locking.LEVEL_NODE:
4908
      self._LockInstancesNodes()
4909

    
4910
  def BuildHooksEnv(self):
4911
    """Build hooks env.
4912

4913
    This runs on the master, the primary and all the secondaries.
4914

4915
    """
4916
    env = {
4917
      "NODE_NAME": self.op.node_name,
4918
      }
4919

    
4920
    nl = [self.cfg.GetMasterNode()]
4921

    
4922
    return (env, nl, nl)
4923

    
4924

    
4925
class TLMigrateInstance(Tasklet):
4926
  def __init__(self, lu, instance_name, live, cleanup):
4927
    """Initializes this class.
4928

4929
    """
4930
    Tasklet.__init__(self, lu)
4931

    
4932
    # Parameters
4933
    self.instance_name = instance_name
4934
    self.live = live
4935
    self.cleanup = cleanup
4936

    
4937
  def CheckPrereq(self):
4938
    """Check prerequisites.
4939

4940
    This checks that the instance is in the cluster.
4941

4942
    """
4943
    instance = self.cfg.GetInstanceInfo(
4944
      self.cfg.ExpandInstanceName(self.instance_name))
4945
    if instance is None:
4946
      raise errors.OpPrereqError("Instance '%s' not known" %
4947
                                 self.instance_name, errors.ECODE_NOENT)
4948

    
4949
    if instance.disk_template != constants.DT_DRBD8:
4950
      raise errors.OpPrereqError("Instance's disk layout is not"
4951
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
4952

    
4953
    secondary_nodes = instance.secondary_nodes
4954
    if not secondary_nodes:
4955
      raise errors.ConfigurationError("No secondary node but using"
4956
                                      " drbd8 disk template")
4957

    
4958
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4959

    
4960
    target_node = secondary_nodes[0]
4961
    # check memory requirements on the secondary node
4962
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4963
                         instance.name, i_be[constants.BE_MEMORY],
4964
                         instance.hypervisor)
4965

    
4966
    # check bridge existance
4967
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4968

    
4969
    if not self.cleanup:
4970
      _CheckNodeNotDrained(self, target_node)
4971
      result = self.rpc.call_instance_migratable(instance.primary_node,
4972
                                                 instance)
4973
      result.Raise("Can't migrate, please use failover",
4974
                   prereq=True, ecode=errors.ECODE_STATE)
4975

    
4976
    self.instance = instance
4977

    
4978
  def _WaitUntilSync(self):
4979
    """Poll with custom rpc for disk sync.
4980

4981
    This uses our own step-based rpc call.
4982

4983
    """
4984
    self.feedback_fn("* wait until resync is done")
4985
    all_done = False
4986
    while not all_done:
4987
      all_done = True
4988
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4989
                                            self.nodes_ip,
4990
                                            self.instance.disks)
4991
      min_percent = 100
4992
      for node, nres in result.items():
4993
        nres.Raise("Cannot resync disks on node %s" % node)
4994
        node_done, node_percent = nres.payload
4995
        all_done = all_done and node_done
4996
        if node_percent is not None:
4997
          min_percent = min(min_percent, node_percent)
4998
      if not all_done:
4999
        if min_percent < 100:
5000
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5001
        time.sleep(2)
5002

    
5003
  def _EnsureSecondary(self, node):
5004
    """Demote a node to secondary.
5005

5006
    """
5007
    self.feedback_fn("* switching node %s to secondary mode" % node)
5008

    
5009
    for dev in self.instance.disks:
5010
      self.cfg.SetDiskID(dev, node)
5011

    
5012
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5013
                                          self.instance.disks)
5014
    result.Raise("Cannot change disk to secondary on node %s" % node)
5015

    
5016
  def _GoStandalone(self):
5017
    """Disconnect from the network.
5018

5019
    """
5020
    self.feedback_fn("* changing into standalone mode")
5021
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5022
                                               self.instance.disks)
5023
    for node, nres in result.items():
5024
      nres.Raise("Cannot disconnect disks node %s" % node)
5025

    
5026
  def _GoReconnect(self, multimaster):
5027
    """Reconnect to the network.
5028

5029
    """
5030
    if multimaster:
5031
      msg = "dual-master"
5032
    else:
5033
      msg = "single-master"
5034
    self.feedback_fn("* changing disks into %s mode" % msg)
5035
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5036
                                           self.instance.disks,
5037
                                           self.instance.name, multimaster)
5038
    for node, nres in result.items():
5039
      nres.Raise("Cannot change disks config on node %s" % node)
5040

    
5041
  def _ExecCleanup(self):
5042
    """Try to cleanup after a failed migration.
5043

5044
    The cleanup is done by:
5045
      - check that the instance is running only on one node
5046
        (and update the config if needed)
5047
      - change disks on its secondary node to secondary
5048
      - wait until disks are fully synchronized
5049
      - disconnect from the network
5050
      - change disks into single-master mode
5051
      - wait again until disks are fully synchronized
5052

5053
    """
5054
    instance = self.instance
5055
    target_node = self.target_node
5056
    source_node = self.source_node
5057

    
5058
    # check running on only one node
5059
    self.feedback_fn("* checking where the instance actually runs"
5060
                     " (if this hangs, the hypervisor might be in"
5061
                     " a bad state)")
5062
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5063
    for node, result in ins_l.items():
5064
      result.Raise("Can't contact node %s" % node)
5065

    
5066
    runningon_source = instance.name in ins_l[source_node].payload
5067
    runningon_target = instance.name in ins_l[target_node].payload
5068

    
5069
    if runningon_source and runningon_target:
5070
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5071
                               " or the hypervisor is confused. You will have"
5072
                               " to ensure manually that it runs only on one"
5073
                               " and restart this operation.")
5074

    
5075
    if not (runningon_source or runningon_target):
5076
      raise errors.OpExecError("Instance does not seem to be running at all."
5077
                               " In this case, it's safer to repair by"
5078
                               " running 'gnt-instance stop' to ensure disk"
5079
                               " shutdown, and then restarting it.")
5080

    
5081
    if runningon_target:
5082
      # the migration has actually succeeded, we need to update the config
5083
      self.feedback_fn("* instance running on secondary node (%s),"
5084
                       " updating config" % target_node)
5085
      instance.primary_node = target_node
5086
      self.cfg.Update(instance, self.feedback_fn)
5087
      demoted_node = source_node
5088
    else:
5089
      self.feedback_fn("* instance confirmed to be running on its"
5090
                       " primary node (%s)" % source_node)
5091
      demoted_node = target_node
5092

    
5093
    self._EnsureSecondary(demoted_node)
5094
    try:
5095
      self._WaitUntilSync()
5096
    except errors.OpExecError:
5097
      # we ignore here errors, since if the device is standalone, it
5098
      # won't be able to sync
5099
      pass
5100
    self._GoStandalone()
5101
    self._GoReconnect(False)
5102
    self._WaitUntilSync()
5103

    
5104
    self.feedback_fn("* done")
5105

    
5106
  def _RevertDiskStatus(self):
5107
    """Try to revert the disk status after a failed migration.
5108

5109
    """
5110
    target_node = self.target_node
5111
    try:
5112
      self._EnsureSecondary(target_node)
5113
      self._GoStandalone()
5114
      self._GoReconnect(False)
5115
      self._WaitUntilSync()
5116
    except errors.OpExecError, err:
5117
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5118
                         " drives: error '%s'\n"
5119
                         "Please look and recover the instance status" %
5120
                         str(err))
5121

    
5122
  def _AbortMigration(self):
5123
    """Call the hypervisor code to abort a started migration.
5124

5125
    """
5126
    instance = self.instance
5127
    target_node = self.target_node
5128
    migration_info = self.migration_info
5129

    
5130
    abort_result = self.rpc.call_finalize_migration(target_node,
5131
                                                    instance,
5132
                                                    migration_info,
5133
                                                    False)
5134
    abort_msg = abort_result.fail_msg
5135
    if abort_msg:
5136
      logging.error("Aborting migration failed on target node %s: %s",
5137
                    target_node, abort_msg)
5138
      # Don't raise an exception here, as we stil have to try to revert the
5139
      # disk status, even if this step failed.
5140

    
5141
  def _ExecMigration(self):
5142
    """Migrate an instance.
5143

5144
    The migrate is done by:
5145
      - change the disks into dual-master mode
5146
      - wait until disks are fully synchronized again
5147
      - migrate the instance
5148
      - change disks on the new secondary node (the old primary) to secondary
5149
      - wait until disks are fully synchronized
5150
      - change disks into single-master mode
5151

5152
    """
5153
    instance = self.instance
5154
    target_node = self.target_node
5155
    source_node = self.source_node
5156

    
5157
    self.feedback_fn("* checking disk consistency between source and target")
5158
    for dev in instance.disks:
5159
      if not _CheckDiskConsistency(self, dev, target_node, False):
5160
        raise errors.OpExecError("Disk %s is degraded or not fully"
5161
                                 " synchronized on target node,"
5162
                                 " aborting migrate." % dev.iv_name)
5163

    
5164
    # First get the migration information from the remote node
5165
    result = self.rpc.call_migration_info(source_node, instance)
5166
    msg = result.fail_msg
5167
    if msg:
5168
      log_err = ("Failed fetching source migration information from %s: %s" %
5169
                 (source_node, msg))
5170
      logging.error(log_err)
5171
      raise errors.OpExecError(log_err)
5172

    
5173
    self.migration_info = migration_info = result.payload
5174

    
5175
    # Then switch the disks to master/master mode
5176
    self._EnsureSecondary(target_node)
5177
    self._GoStandalone()
5178
    self._GoReconnect(True)
5179
    self._WaitUntilSync()
5180

    
5181
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5182
    result = self.rpc.call_accept_instance(target_node,
5183
                                           instance,
5184
                                           migration_info,
5185
                                           self.nodes_ip[target_node])
5186

    
5187
    msg = result.fail_msg
5188
    if msg:
5189
      logging.error("Instance pre-migration failed, trying to revert"
5190
                    " disk status: %s", msg)
5191
      self.feedback_fn("Pre-migration failed, aborting")
5192
      self._AbortMigration()
5193
      self._RevertDiskStatus()
5194
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5195
                               (instance.name, msg))
5196

    
5197
    self.feedback_fn("* migrating instance to %s" % target_node)
5198
    time.sleep(10)
5199
    result = self.rpc.call_instance_migrate(source_node, instance,
5200
                                            self.nodes_ip[target_node],
5201
                                            self.live)
5202
    msg = result.fail_msg
5203
    if msg:
5204
      logging.error("Instance migration failed, trying to revert"
5205
                    " disk status: %s", msg)
5206
      self.feedback_fn("Migration failed, aborting")
5207
      self._AbortMigration()
5208
      self._RevertDiskStatus()
5209
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5210
                               (instance.name, msg))
5211
    time.sleep(10)
5212

    
5213
    instance.primary_node = target_node
5214
    # distribute new instance config to the other nodes
5215
    self.cfg.Update(instance, self.feedback_fn)
5216

    
5217
    result = self.rpc.call_finalize_migration(target_node,
5218
                                              instance,
5219
                                              migration_info,
5220
                                              True)
5221
    msg = result.fail_msg
5222
    if msg:
5223
      logging.error("Instance migration succeeded, but finalization failed:"
5224
                    " %s", msg)
5225
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5226
                               msg)
5227

    
5228
    self._EnsureSecondary(source_node)
5229
    self._WaitUntilSync()
5230
    self._GoStandalone()
5231
    self._GoReconnect(False)
5232
    self._WaitUntilSync()
5233

    
5234
    self.feedback_fn("* done")
5235

    
5236
  def Exec(self, feedback_fn):
5237
    """Perform the migration.
5238

5239
    """
5240
    feedback_fn("Migrating instance %s" % self.instance.name)
5241

    
5242
    self.feedback_fn = feedback_fn
5243

    
5244
    self.source_node = self.instance.primary_node
5245
    self.target_node = self.instance.secondary_nodes[0]
5246
    self.all_nodes = [self.source_node, self.target_node]
5247
    self.nodes_ip = {
5248
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5249
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5250
      }
5251

    
5252
    if self.cleanup:
5253
      return self._ExecCleanup()
5254
    else:
5255
      return self._ExecMigration()
5256

    
5257

    
5258
def _CreateBlockDev(lu, node, instance, device, force_create,
5259
                    info, force_open):
5260
  """Create a tree of block devices on a given node.
5261

5262
  If this device type has to be created on secondaries, create it and
5263
  all its children.
5264

5265
  If not, just recurse to children keeping the same 'force' value.
5266

5267
  @param lu: the lu on whose behalf we execute
5268
  @param node: the node on which to create the device
5269
  @type instance: L{objects.Instance}
5270
  @param instance: the instance which owns the device
5271
  @type device: L{objects.Disk}
5272
  @param device: the device to create
5273
  @type force_create: boolean
5274
  @param force_create: whether to force creation of this device; this
5275
      will be change to True whenever we find a device which has
5276
      CreateOnSecondary() attribute
5277
  @param info: the extra 'metadata' we should attach to the device
5278
      (this will be represented as a LVM tag)
5279
  @type force_open: boolean
5280
  @param force_open: this parameter will be passes to the
5281
      L{backend.BlockdevCreate} function where it specifies
5282
      whether we run on primary or not, and it affects both
5283
      the child assembly and the device own Open() execution
5284

5285
  """
5286
  if device.CreateOnSecondary():
5287
    force_create = True
5288

    
5289
  if device.children:
5290
    for child in device.children:
5291
      _CreateBlockDev(lu, node, instance, child, force_create,
5292
                      info, force_open)
5293

    
5294
  if not force_create:
5295
    return
5296

    
5297
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5298

    
5299

    
5300
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5301
  """Create a single block device on a given node.
5302

5303
  This will not recurse over children of the device, so they must be
5304
  created in advance.
5305

5306
  @param lu: the lu on whose behalf we execute
5307
  @param node: the node on which to create the device
5308
  @type instance: L{objects.Instance}
5309
  @param instance: the instance which owns the device
5310
  @type device: L{objects.Disk}
5311
  @param device: the device to create
5312
  @param info: the extra 'metadata' we should attach to the device
5313
      (this will be represented as a LVM tag)
5314
  @type force_open: boolean
5315
  @param force_open: this parameter will be passes to the
5316
      L{backend.BlockdevCreate} function where it specifies
5317
      whether we run on primary or not, and it affects both
5318
      the child assembly and the device own Open() execution
5319

5320
  """
5321
  lu.cfg.SetDiskID(device, node)
5322
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5323
                                       instance.name, force_open, info)
5324
  result.Raise("Can't create block device %s on"
5325
               " node %s for instance %s" % (device, node, instance.name))
5326
  if device.physical_id is None:
5327
    device.physical_id = result.payload
5328

    
5329

    
5330
def _GenerateUniqueNames(lu, exts):
5331
  """Generate a suitable LV name.
5332

5333
  This will generate a logical volume name for the given instance.
5334

5335
  """
5336
  results = []
5337
  for val in exts:
5338
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5339
    results.append("%s%s" % (new_id, val))
5340
  return results
5341

    
5342

    
5343
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5344
                         p_minor, s_minor):
5345
  """Generate a drbd8 device complete with its children.
5346

5347
  """
5348
  port = lu.cfg.AllocatePort()
5349
  vgname = lu.cfg.GetVGName()
5350
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5351
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5352
                          logical_id=(vgname, names[0]))
5353
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5354
                          logical_id=(vgname, names[1]))
5355
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5356
                          logical_id=(primary, secondary, port,
5357
                                      p_minor, s_minor,
5358
                                      shared_secret),
5359
                          children=[dev_data, dev_meta],
5360
                          iv_name=iv_name)
5361
  return drbd_dev
5362

    
5363

    
5364
def _GenerateDiskTemplate(lu, template_name,
5365
                          instance_name, primary_node,
5366
                          secondary_nodes, disk_info,
5367
                          file_storage_dir, file_driver,
5368
                          base_index):
5369
  """Generate the entire disk layout for a given template type.
5370

5371
  """
5372
  #TODO: compute space requirements
5373

    
5374
  vgname = lu.cfg.GetVGName()
5375
  disk_count = len(disk_info)
5376
  disks = []
5377
  if template_name == constants.DT_DISKLESS:
5378
    pass
5379
  elif template_name == constants.DT_PLAIN:
5380
    if len(secondary_nodes) != 0:
5381
      raise errors.ProgrammerError("Wrong template configuration")
5382

    
5383
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5384
                                      for i in range(disk_count)])
5385
    for idx, disk in enumerate(disk_info):
5386
      disk_index = idx + base_index
5387
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5388
                              logical_id=(vgname, names[idx]),
5389
                              iv_name="disk/%d" % disk_index,
5390
                              mode=disk["mode"])
5391
      disks.append(disk_dev)
5392
  elif template_name == constants.DT_DRBD8:
5393
    if len(secondary_nodes) != 1:
5394
      raise errors.ProgrammerError("Wrong template configuration")
5395
    remote_node = secondary_nodes[0]
5396
    minors = lu.cfg.AllocateDRBDMinor(
5397
      [primary_node, remote_node] * len(disk_info), instance_name)
5398

    
5399
    names = []
5400
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5401
                                               for i in range(disk_count)]):
5402
      names.append(lv_prefix + "_data")
5403
      names.append(lv_prefix + "_meta")
5404
    for idx, disk in enumerate(disk_info):
5405
      disk_index = idx + base_index
5406
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5407
                                      disk["size"], names[idx*2:idx*2+2],
5408
                                      "disk/%d" % disk_index,
5409
                                      minors[idx*2], minors[idx*2+1])
5410
      disk_dev.mode = disk["mode"]
5411
      disks.append(disk_dev)
5412
  elif template_name == constants.DT_FILE:
5413
    if len(secondary_nodes) != 0:
5414
      raise errors.ProgrammerError("Wrong template configuration")
5415

    
5416
    for idx, disk in enumerate(disk_info):
5417
      disk_index = idx + base_index
5418
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5419
                              iv_name="disk/%d" % disk_index,
5420
                              logical_id=(file_driver,
5421
                                          "%s/disk%d" % (file_storage_dir,
5422
                                                         disk_index)),
5423
                              mode=disk["mode"])
5424
      disks.append(disk_dev)
5425
  else:
5426
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5427
  return disks
5428

    
5429

    
5430
def _GetInstanceInfoText(instance):
5431
  """Compute that text that should be added to the disk's metadata.
5432

5433
  """
5434
  return "originstname+%s" % instance.name
5435

    
5436

    
5437
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5438
  """Create all disks for an instance.
5439

5440
  This abstracts away some work from AddInstance.
5441

5442
  @type lu: L{LogicalUnit}
5443
  @param lu: the logical unit on whose behalf we execute
5444
  @type instance: L{objects.Instance}
5445
  @param instance: the instance whose disks we should create
5446
  @type to_skip: list
5447
  @param to_skip: list of indices to skip
5448
  @type target_node: string
5449
  @param target_node: if passed, overrides the target node for creation
5450
  @rtype: boolean
5451
  @return: the success of the creation
5452

5453
  """
5454
  info = _GetInstanceInfoText(instance)
5455
  if target_node is None:
5456
    pnode = instance.primary_node
5457
    all_nodes = instance.all_nodes
5458
  else:
5459
    pnode = target_node
5460
    all_nodes = [pnode]
5461

    
5462
  if instance.disk_template == constants.DT_FILE:
5463
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5464
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5465

    
5466
    result.Raise("Failed to create directory '%s' on"
5467
                 " node %s" % (file_storage_dir, pnode))
5468

    
5469
  # Note: this needs to be kept in sync with adding of disks in
5470
  # LUSetInstanceParams
5471
  for idx, device in enumerate(instance.disks):
5472
    if to_skip and idx in to_skip:
5473
      continue
5474
    logging.info("Creating volume %s for instance %s",
5475
                 device.iv_name, instance.name)
5476
    #HARDCODE
5477
    for node in all_nodes:
5478
      f_create = node == pnode
5479
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5480

    
5481

    
5482
def _RemoveDisks(lu, instance, target_node=None):
5483
  """Remove all disks for an instance.
5484

5485
  This abstracts away some work from `AddInstance()` and
5486
  `RemoveInstance()`. Note that in case some of the devices couldn't
5487
  be removed, the removal will continue with the other ones (compare
5488
  with `_CreateDisks()`).
5489

5490
  @type lu: L{LogicalUnit}
5491
  @param lu: the logical unit on whose behalf we execute
5492
  @type instance: L{objects.Instance}
5493
  @param instance: the instance whose disks we should remove
5494
  @type target_node: string
5495
  @param target_node: used to override the node on which to remove the disks
5496
  @rtype: boolean
5497
  @return: the success of the removal
5498

5499
  """
5500
  logging.info("Removing block devices for instance %s", instance.name)
5501

    
5502
  all_result = True
5503
  for device in instance.disks:
5504
    if target_node:
5505
      edata = [(target_node, device)]
5506
    else:
5507
      edata = device.ComputeNodeTree(instance.primary_node)
5508
    for node, disk in edata:
5509
      lu.cfg.SetDiskID(disk, node)
5510
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5511
      if msg:
5512
        lu.LogWarning("Could not remove block device %s on node %s,"
5513
                      " continuing anyway: %s", device.iv_name, node, msg)
5514
        all_result = False
5515

    
5516
  if instance.disk_template == constants.DT_FILE:
5517
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5518
    if target_node:
5519
      tgt = target_node
5520
    else:
5521
      tgt = instance.primary_node
5522
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5523
    if result.fail_msg:
5524
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5525
                    file_storage_dir, instance.primary_node, result.fail_msg)
5526
      all_result = False
5527

    
5528
  return all_result
5529

    
5530

    
5531
def _ComputeDiskSize(disk_template, disks):
5532
  """Compute disk size requirements in the volume group
5533

5534
  """
5535
  # Required free disk space as a function of disk and swap space
5536
  req_size_dict = {
5537
    constants.DT_DISKLESS: None,
5538
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5539
    # 128 MB are added for drbd metadata for each disk
5540
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5541
    constants.DT_FILE: None,
5542
  }
5543

    
5544
  if disk_template not in req_size_dict:
5545
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5546
                                 " is unknown" %  disk_template)
5547

    
5548
  return req_size_dict[disk_template]
5549

    
5550

    
5551
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5552
  """Hypervisor parameter validation.
5553

5554
  This function abstract the hypervisor parameter validation to be
5555
  used in both instance create and instance modify.
5556

5557
  @type lu: L{LogicalUnit}
5558
  @param lu: the logical unit for which we check
5559
  @type nodenames: list
5560
  @param nodenames: the list of nodes on which we should check
5561
  @type hvname: string
5562
  @param hvname: the name of the hypervisor we should use
5563
  @type hvparams: dict
5564
  @param hvparams: the parameters which we need to check
5565
  @raise errors.OpPrereqError: if the parameters are not valid
5566

5567
  """
5568
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5569
                                                  hvname,
5570
                                                  hvparams)
5571
  for node in nodenames:
5572
    info = hvinfo[node]
5573
    if info.offline:
5574
      continue
5575
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5576

    
5577

    
5578
class LUCreateInstance(LogicalUnit):
5579
  """Create an instance.
5580

5581
  """
5582
  HPATH = "instance-add"
5583
  HTYPE = constants.HTYPE_INSTANCE
5584
  _OP_REQP = ["instance_name", "disks", "disk_template",
5585
              "mode", "start",
5586
              "wait_for_sync", "ip_check", "nics",
5587
              "hvparams", "beparams"]
5588
  REQ_BGL = False
5589

    
5590
  def _ExpandNode(self, node):
5591
    """Expands and checks one node name.
5592

5593
    """
5594
    node_full = self.cfg.ExpandNodeName(node)
5595
    if node_full is None:
5596
      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
5597
    return node_full
5598

    
5599
  def ExpandNames(self):
5600
    """ExpandNames for CreateInstance.
5601

5602
    Figure out the right locks for instance creation.
5603

5604
    """
5605
    self.needed_locks = {}
5606

    
5607
    # set optional parameters to none if they don't exist
5608
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5609
      if not hasattr(self.op, attr):
5610
        setattr(self.op, attr, None)
5611

    
5612
    # cheap checks, mostly valid constants given
5613

    
5614
    # verify creation mode
5615
    if self.op.mode not in (constants.INSTANCE_CREATE,
5616
                            constants.INSTANCE_IMPORT):
5617
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5618
                                 self.op.mode, errors.ECODE_INVAL)
5619

    
5620
    # disk template and mirror node verification
5621
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5622
      raise errors.OpPrereqError("Invalid disk template name",
5623
                                 errors.ECODE_INVAL)
5624

    
5625
    if self.op.hypervisor is None:
5626
      self.op.hypervisor = self.cfg.GetHypervisorType()
5627

    
5628
    cluster = self.cfg.GetClusterInfo()
5629
    enabled_hvs = cluster.enabled_hypervisors
5630
    if self.op.hypervisor not in enabled_hvs:
5631
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5632
                                 " cluster (%s)" % (self.op.hypervisor,
5633
                                  ",".join(enabled_hvs)),
5634
                                 errors.ECODE_STATE)
5635

    
5636
    # check hypervisor parameter syntax (locally)
5637
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5638
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5639
                                  self.op.hvparams)
5640
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5641
    hv_type.CheckParameterSyntax(filled_hvp)
5642
    self.hv_full = filled_hvp
5643
    # check that we don't specify global parameters on an instance
5644
    _CheckGlobalHvParams(self.op.hvparams)
5645

    
5646
    # fill and remember the beparams dict
5647
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5648
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5649
                                    self.op.beparams)
5650

    
5651
    #### instance parameters check
5652

    
5653
    # instance name verification
5654
    hostname1 = utils.GetHostInfo(self.op.instance_name)
5655
    self.op.instance_name = instance_name = hostname1.name
5656

    
5657
    # this is just a preventive check, but someone might still add this
5658
    # instance in the meantime, and creation will fail at lock-add time
5659
    if instance_name in self.cfg.GetInstanceList():
5660
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5661
                                 instance_name, errors.ECODE_EXISTS)
5662

    
5663
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5664

    
5665
    # NIC buildup
5666
    self.nics = []
5667
    for idx, nic in enumerate(self.op.nics):
5668
      nic_mode_req = nic.get("mode", None)
5669
      nic_mode = nic_mode_req
5670
      if nic_mode is None:
5671
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5672

    
5673
      # in routed mode, for the first nic, the default ip is 'auto'
5674
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5675
        default_ip_mode = constants.VALUE_AUTO
5676
      else:
5677
        default_ip_mode = constants.VALUE_NONE
5678

    
5679
      # ip validity checks
5680
      ip = nic.get("ip", default_ip_mode)
5681
      if ip is None or ip.lower() == constants.VALUE_NONE:
5682
        nic_ip = None
5683
      elif ip.lower() == constants.VALUE_AUTO:
5684
        nic_ip = hostname1.ip
5685
      else:
5686
        if not utils.IsValidIP(ip):
5687
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5688
                                     " like a valid IP" % ip,
5689
                                     errors.ECODE_INVAL)
5690
        nic_ip = ip
5691

    
5692
      # TODO: check the ip address for uniqueness
5693
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5694
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5695
                                   errors.ECODE_INVAL)
5696

    
5697
      # MAC address verification
5698
      mac = nic.get("mac", constants.VALUE_AUTO)
5699
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5700
        if not utils.IsValidMac(mac.lower()):
5701
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5702
                                     mac, errors.ECODE_INVAL)
5703
        else:
5704
          try:
5705
            self.cfg.ReserveMAC(mac, self.proc.GetECId())
5706
          except errors.ReservationError:
5707
            raise errors.OpPrereqError("MAC address %s already in use"
5708
                                       " in cluster" % mac,
5709
                                       errors.ECODE_NOTUNIQUE)
5710

    
5711
      # bridge verification
5712
      bridge = nic.get("bridge", None)
5713
      link = nic.get("link", None)
5714
      if bridge and link:
5715
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5716
                                   " at the same time", errors.ECODE_INVAL)
5717
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5718
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5719
                                   errors.ECODE_INVAL)
5720
      elif bridge:
5721
        link = bridge
5722

    
5723
      nicparams = {}
5724
      if nic_mode_req:
5725
        nicparams[constants.NIC_MODE] = nic_mode_req
5726
      if link:
5727
        nicparams[constants.NIC_LINK] = link
5728

    
5729
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5730
                                      nicparams)
5731
      objects.NIC.CheckParameterSyntax(check_params)
5732
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5733

    
5734
    # disk checks/pre-build
5735
    self.disks = []
5736
    for disk in self.op.disks:
5737
      mode = disk.get("mode", constants.DISK_RDWR)
5738
      if mode not in constants.DISK_ACCESS_SET:
5739
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5740
                                   mode, errors.ECODE_INVAL)
5741
      size = disk.get("size", None)
5742
      if size is None:
5743
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5744
      try:
5745
        size = int(size)
5746
      except ValueError:
5747
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5748
                                   errors.ECODE_INVAL)
5749
      self.disks.append({"size": size, "mode": mode})
5750

    
5751
    # used in CheckPrereq for ip ping check
5752
    self.check_ip = hostname1.ip
5753

    
5754
    # file storage checks
5755
    if (self.op.file_driver and
5756
        not self.op.file_driver in constants.FILE_DRIVER):
5757
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5758
                                 self.op.file_driver, errors.ECODE_INVAL)
5759

    
5760
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5761
      raise errors.OpPrereqError("File storage directory path not absolute",
5762
                                 errors.ECODE_INVAL)
5763

    
5764
    ### Node/iallocator related checks
5765
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5766
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5767
                                 " node must be given",
5768
                                 errors.ECODE_INVAL)
5769

    
5770
    if self.op.iallocator:
5771
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5772
    else:
5773
      self.op.pnode = self._ExpandNode(self.op.pnode)
5774
      nodelist = [self.op.pnode]
5775
      if self.op.snode is not None:
5776
        self.op.snode = self._ExpandNode(self.op.snode)
5777
        nodelist.append(self.op.snode)
5778
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5779

    
5780
    # in case of import lock the source node too
5781
    if self.op.mode == constants.INSTANCE_IMPORT:
5782
      src_node = getattr(self.op, "src_node", None)
5783
      src_path = getattr(self.op, "src_path", None)
5784

    
5785
      if src_path is None:
5786
        self.op.src_path = src_path = self.op.instance_name
5787

    
5788
      if src_node is None:
5789
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5790
        self.op.src_node = None
5791
        if os.path.isabs(src_path):
5792
          raise errors.OpPrereqError("Importing an instance from an absolute"
5793
                                     " path requires a source node option.",
5794
                                     errors.ECODE_INVAL)
5795
      else:
5796
        self.op.src_node = src_node = self._ExpandNode(src_node)
5797
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5798
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5799
        if not os.path.isabs(src_path):
5800
          self.op.src_path = src_path = \
5801
            os.path.join(constants.EXPORT_DIR, src_path)
5802

    
5803
      # On import force_variant must be True, because if we forced it at
5804
      # initial install, our only chance when importing it back is that it
5805
      # works again!
5806
      self.op.force_variant = True
5807

    
5808
    else: # INSTANCE_CREATE
5809
      if getattr(self.op, "os_type", None) is None:
5810
        raise errors.OpPrereqError("No guest OS specified",
5811
                                   errors.ECODE_INVAL)
5812
      self.op.force_variant = getattr(self.op, "force_variant", False)
5813

    
5814
  def _RunAllocator(self):
5815
    """Run the allocator based on input opcode.
5816

5817
    """
5818
    nics = [n.ToDict() for n in self.nics]
5819
    ial = IAllocator(self.cfg, self.rpc,
5820
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5821
                     name=self.op.instance_name,
5822
                     disk_template=self.op.disk_template,
5823
                     tags=[],
5824
                     os=self.op.os_type,
5825
                     vcpus=self.be_full[constants.BE_VCPUS],
5826
                     mem_size=self.be_full[constants.BE_MEMORY],
5827
                     disks=self.disks,
5828
                     nics=nics,
5829
                     hypervisor=self.op.hypervisor,
5830
                     )
5831

    
5832
    ial.Run(self.op.iallocator)
5833

    
5834
    if not ial.success:
5835
      raise errors.OpPrereqError("Can't compute nodes using"
5836
                                 " iallocator '%s': %s" %
5837
                                 (self.op.iallocator, ial.info),
5838
                                 errors.ECODE_NORES)
5839
    if len(ial.nodes) != ial.required_nodes:
5840
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5841
                                 " of nodes (%s), required %s" %
5842
                                 (self.op.iallocator, len(ial.nodes),
5843
                                  ial.required_nodes), errors.ECODE_FAULT)
5844
    self.op.pnode = ial.nodes[0]
5845
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5846
                 self.op.instance_name, self.op.iallocator,
5847
                 utils.CommaJoin(ial.nodes))
5848
    if ial.required_nodes == 2:
5849
      self.op.snode = ial.nodes[1]
5850

    
5851
  def BuildHooksEnv(self):
5852
    """Build hooks env.
5853

5854
    This runs on master, primary and secondary nodes of the instance.
5855

5856
    """
5857
    env = {
5858
      "ADD_MODE": self.op.mode,
5859
      }
5860
    if self.op.mode == constants.INSTANCE_IMPORT:
5861
      env["SRC_NODE"] = self.op.src_node
5862
      env["SRC_PATH"] = self.op.src_path
5863
      env["SRC_IMAGES"] = self.src_images
5864

    
5865
    env.update(_BuildInstanceHookEnv(
5866
      name=self.op.instance_name,
5867
      primary_node=self.op.pnode,
5868
      secondary_nodes=self.secondaries,
5869
      status=self.op.start,
5870
      os_type=self.op.os_type,
5871
      memory=self.be_full[constants.BE_MEMORY],
5872
      vcpus=self.be_full[constants.BE_VCPUS],
5873
      nics=_NICListToTuple(self, self.nics),
5874
      disk_template=self.op.disk_template,
5875
      disks=[(d["size"], d["mode"]) for d in self.disks],
5876
      bep=self.be_full,
5877
      hvp=self.hv_full,
5878
      hypervisor_name=self.op.hypervisor,
5879
    ))
5880

    
5881
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5882
          self.secondaries)
5883
    return env, nl, nl
5884

    
5885

    
5886
  def CheckPrereq(self):
5887
    """Check prerequisites.
5888

5889
    """
5890
    if (not self.cfg.GetVGName() and
5891
        self.op.disk_template not in constants.DTS_NOT_LVM):
5892
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5893
                                 " instances", errors.ECODE_STATE)
5894

    
5895
    if self.op.mode == constants.INSTANCE_IMPORT:
5896
      src_node = self.op.src_node
5897
      src_path = self.op.src_path
5898

    
5899
      if src_node is None:
5900
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5901
        exp_list = self.rpc.call_export_list(locked_nodes)
5902
        found = False
5903
        for node in exp_list:
5904
          if exp_list[node].fail_msg:
5905
            continue
5906
          if src_path in exp_list[node].payload:
5907
            found = True
5908
            self.op.src_node = src_node = node
5909
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5910
                                                       src_path)
5911
            break
5912
        if not found:
5913
          raise errors.OpPrereqError("No export found for relative path %s" %
5914
                                      src_path, errors.ECODE_INVAL)
5915

    
5916
      _CheckNodeOnline(self, src_node)
5917
      result = self.rpc.call_export_info(src_node, src_path)
5918
      result.Raise("No export or invalid export found in dir %s" % src_path)
5919

    
5920
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5921
      if not export_info.has_section(constants.INISECT_EXP):
5922
        raise errors.ProgrammerError("Corrupted export config",
5923
                                     errors.ECODE_ENVIRON)
5924

    
5925
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5926
      if (int(ei_version) != constants.EXPORT_VERSION):
5927
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5928
                                   (ei_version, constants.EXPORT_VERSION),
5929
                                   errors.ECODE_ENVIRON)
5930

    
5931
      # Check that the new instance doesn't have less disks than the export
5932
      instance_disks = len(self.disks)
5933
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5934
      if instance_disks < export_disks:
5935
        raise errors.OpPrereqError("Not enough disks to import."
5936
                                   " (instance: %d, export: %d)" %
5937
                                   (instance_disks, export_disks),
5938
                                   errors.ECODE_INVAL)
5939

    
5940
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5941
      disk_images = []
5942
      for idx in range(export_disks):
5943
        option = 'disk%d_dump' % idx
5944
        if export_info.has_option(constants.INISECT_INS, option):
5945
          # FIXME: are the old os-es, disk sizes, etc. useful?
5946
          export_name = export_info.get(constants.INISECT_INS, option)
5947
          image = os.path.join(src_path, export_name)
5948
          disk_images.append(image)
5949
        else:
5950
          disk_images.append(False)
5951

    
5952
      self.src_images = disk_images
5953

    
5954
      old_name = export_info.get(constants.INISECT_INS, 'name')
5955
      # FIXME: int() here could throw a ValueError on broken exports
5956
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5957
      if self.op.instance_name == old_name:
5958
        for idx, nic in enumerate(self.nics):
5959
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5960
            nic_mac_ini = 'nic%d_mac' % idx
5961
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5962

    
5963
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5964
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5965
    if self.op.start and not self.op.ip_check:
5966
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5967
                                 " adding an instance in start mode",
5968
                                 errors.ECODE_INVAL)
5969

    
5970
    if self.op.ip_check:
5971
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5972
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5973
                                   (self.check_ip, self.op.instance_name),
5974
                                   errors.ECODE_NOTUNIQUE)
5975

    
5976
    #### mac address generation
5977
    # By generating here the mac address both the allocator and the hooks get
5978
    # the real final mac address rather than the 'auto' or 'generate' value.
5979
    # There is a race condition between the generation and the instance object
5980
    # creation, which means that we know the mac is valid now, but we're not
5981
    # sure it will be when we actually add the instance. If things go bad
5982
    # adding the instance will abort because of a duplicate mac, and the
5983
    # creation job will fail.
5984
    for nic in self.nics:
5985
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5986
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
5987

    
5988
    #### allocator run
5989

    
5990
    if self.op.iallocator is not None:
5991
      self._RunAllocator()
5992

    
5993
    #### node related checks
5994

    
5995
    # check primary node
5996
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5997
    assert self.pnode is not None, \
5998
      "Cannot retrieve locked node %s" % self.op.pnode
5999
    if pnode.offline:
6000
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6001
                                 pnode.name, errors.ECODE_STATE)
6002
    if pnode.drained:
6003
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6004
                                 pnode.name, errors.ECODE_STATE)
6005

    
6006
    self.secondaries = []
6007

    
6008
    # mirror node verification
6009
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6010
      if self.op.snode is None:
6011
        raise errors.OpPrereqError("The networked disk templates need"
6012
                                   " a mirror node", errors.ECODE_INVAL)
6013
      if self.op.snode == pnode.name:
6014
        raise errors.OpPrereqError("The secondary node cannot be the"
6015
                                   " primary node.", errors.ECODE_INVAL)
6016
      _CheckNodeOnline(self, self.op.snode)
6017
      _CheckNodeNotDrained(self, self.op.snode)
6018
      self.secondaries.append(self.op.snode)
6019

    
6020
    nodenames = [pnode.name] + self.secondaries
6021

    
6022
    req_size = _ComputeDiskSize(self.op.disk_template,
6023
                                self.disks)
6024

    
6025
    # Check lv size requirements
6026
    if req_size is not None:
6027
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6028
                                         self.op.hypervisor)
6029
      for node in nodenames:
6030
        info = nodeinfo[node]
6031
        info.Raise("Cannot get current information from node %s" % node)
6032
        info = info.payload
6033
        vg_free = info.get('vg_free', None)
6034
        if not isinstance(vg_free, int):
6035
          raise errors.OpPrereqError("Can't compute free disk space on"
6036
                                     " node %s" % node, errors.ECODE_ENVIRON)
6037
        if req_size > vg_free:
6038
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6039
                                     " %d MB available, %d MB required" %
6040
                                     (node, vg_free, req_size),
6041
                                     errors.ECODE_NORES)
6042

    
6043
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6044

    
6045
    # os verification
6046
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6047
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6048
                 (self.op.os_type, pnode.name),
6049
                 prereq=True, ecode=errors.ECODE_INVAL)
6050
    if not self.op.force_variant:
6051
      _CheckOSVariant(result.payload, self.op.os_type)
6052

    
6053
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6054

    
6055
    # memory check on primary node
6056
    if self.op.start:
6057
      _CheckNodeFreeMemory(self, self.pnode.name,
6058
                           "creating instance %s" % self.op.instance_name,
6059
                           self.be_full[constants.BE_MEMORY],
6060
                           self.op.hypervisor)
6061

    
6062
    self.dry_run_result = list(nodenames)
6063

    
6064
  def Exec(self, feedback_fn):
6065
    """Create and add the instance to the cluster.
6066

6067
    """
6068
    instance = self.op.instance_name
6069
    pnode_name = self.pnode.name
6070

    
6071
    ht_kind = self.op.hypervisor
6072
    if ht_kind in constants.HTS_REQ_PORT:
6073
      network_port = self.cfg.AllocatePort()
6074
    else:
6075
      network_port = None
6076

    
6077
    ##if self.op.vnc_bind_address is None:
6078
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6079

    
6080
    # this is needed because os.path.join does not accept None arguments
6081
    if self.op.file_storage_dir is None:
6082
      string_file_storage_dir = ""
6083
    else:
6084
      string_file_storage_dir = self.op.file_storage_dir
6085

    
6086
    # build the full file storage dir path
6087
    file_storage_dir = os.path.normpath(os.path.join(
6088
                                        self.cfg.GetFileStorageDir(),
6089
                                        string_file_storage_dir, instance))
6090

    
6091

    
6092
    disks = _GenerateDiskTemplate(self,
6093
                                  self.op.disk_template,
6094
                                  instance, pnode_name,
6095
                                  self.secondaries,
6096
                                  self.disks,
6097
                                  file_storage_dir,
6098
                                  self.op.file_driver,
6099
                                  0)
6100

    
6101
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6102
                            primary_node=pnode_name,
6103
                            nics=self.nics, disks=disks,
6104
                            disk_template=self.op.disk_template,
6105
                            admin_up=False,
6106
                            network_port=network_port,
6107
                            beparams=self.op.beparams,
6108
                            hvparams=self.op.hvparams,
6109
                            hypervisor=self.op.hypervisor,
6110
                            )
6111

    
6112
    feedback_fn("* creating instance disks...")
6113
    try:
6114
      _CreateDisks(self, iobj)
6115
    except errors.OpExecError:
6116
      self.LogWarning("Device creation failed, reverting...")
6117
      try:
6118
        _RemoveDisks(self, iobj)
6119
      finally:
6120
        self.cfg.ReleaseDRBDMinors(instance)
6121
        raise
6122

    
6123
    feedback_fn("adding instance %s to cluster config" % instance)
6124

    
6125
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6126

    
6127
    # Declare that we don't want to remove the instance lock anymore, as we've
6128
    # added the instance to the config
6129
    del self.remove_locks[locking.LEVEL_INSTANCE]
6130
    # Unlock all the nodes
6131
    if self.op.mode == constants.INSTANCE_IMPORT:
6132
      nodes_keep = [self.op.src_node]
6133
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6134
                       if node != self.op.src_node]
6135
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6136
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6137
    else:
6138
      self.context.glm.release(locking.LEVEL_NODE)
6139
      del self.acquired_locks[locking.LEVEL_NODE]
6140

    
6141
    if self.op.wait_for_sync:
6142
      disk_abort = not _WaitForSync(self, iobj)
6143
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6144
      # make sure the disks are not degraded (still sync-ing is ok)
6145
      time.sleep(15)
6146
      feedback_fn("* checking mirrors status")
6147
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6148
    else:
6149
      disk_abort = False
6150

    
6151
    if disk_abort:
6152
      _RemoveDisks(self, iobj)
6153
      self.cfg.RemoveInstance(iobj.name)
6154
      # Make sure the instance lock gets removed
6155
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6156
      raise errors.OpExecError("There are some degraded disks for"
6157
                               " this instance")
6158

    
6159
    feedback_fn("creating os for instance %s on node %s" %
6160
                (instance, pnode_name))
6161

    
6162
    if iobj.disk_template != constants.DT_DISKLESS:
6163
      if self.op.mode == constants.INSTANCE_CREATE:
6164
        feedback_fn("* running the instance OS create scripts...")
6165
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6166
        result.Raise("Could not add os for instance %s"
6167
                     " on node %s" % (instance, pnode_name))
6168

    
6169
      elif self.op.mode == constants.INSTANCE_IMPORT:
6170
        feedback_fn("* running the instance OS import scripts...")
6171
        src_node = self.op.src_node
6172
        src_images = self.src_images
6173
        cluster_name = self.cfg.GetClusterName()
6174
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6175
                                                         src_node, src_images,
6176
                                                         cluster_name)
6177
        msg = import_result.fail_msg
6178
        if msg:
6179
          self.LogWarning("Error while importing the disk images for instance"
6180
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6181
      else:
6182
        # also checked in the prereq part
6183
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6184
                                     % self.op.mode)
6185

    
6186
    if self.op.start:
6187
      iobj.admin_up = True
6188
      self.cfg.Update(iobj, feedback_fn)
6189
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6190
      feedback_fn("* starting instance...")
6191
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6192
      result.Raise("Could not start instance")
6193

    
6194
    return list(iobj.all_nodes)
6195

    
6196

    
6197
class LUConnectConsole(NoHooksLU):
6198
  """Connect to an instance's console.
6199

6200
  This is somewhat special in that it returns the command line that
6201
  you need to run on the master node in order to connect to the
6202
  console.
6203

6204
  """
6205
  _OP_REQP = ["instance_name"]
6206
  REQ_BGL = False
6207

    
6208
  def ExpandNames(self):
6209
    self._ExpandAndLockInstance()
6210

    
6211
  def CheckPrereq(self):
6212
    """Check prerequisites.
6213

6214
    This checks that the instance is in the cluster.
6215

6216
    """
6217
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6218
    assert self.instance is not None, \
6219
      "Cannot retrieve locked instance %s" % self.op.instance_name
6220
    _CheckNodeOnline(self, self.instance.primary_node)
6221

    
6222
  def Exec(self, feedback_fn):
6223
    """Connect to the console of an instance
6224

6225
    """
6226
    instance = self.instance
6227
    node = instance.primary_node
6228

    
6229
    node_insts = self.rpc.call_instance_list([node],
6230
                                             [instance.hypervisor])[node]
6231
    node_insts.Raise("Can't get node information from %s" % node)
6232

    
6233
    if instance.name not in node_insts.payload:
6234
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6235

    
6236
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6237

    
6238
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6239
    cluster = self.cfg.GetClusterInfo()
6240
    # beparams and hvparams are passed separately, to avoid editing the
6241
    # instance and then saving the defaults in the instance itself.
6242
    hvparams = cluster.FillHV(instance)
6243
    beparams = cluster.FillBE(instance)
6244
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6245

    
6246
    # build ssh cmdline
6247
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6248

    
6249

    
6250
class LUReplaceDisks(LogicalUnit):
6251
  """Replace the disks of an instance.
6252

6253
  """
6254
  HPATH = "mirrors-replace"
6255
  HTYPE = constants.HTYPE_INSTANCE
6256
  _OP_REQP = ["instance_name", "mode", "disks"]
6257
  REQ_BGL = False
6258

    
6259
  def CheckArguments(self):
6260
    if not hasattr(self.op, "remote_node"):
6261
      self.op.remote_node = None
6262
    if not hasattr(self.op, "iallocator"):
6263
      self.op.iallocator = None
6264

    
6265
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6266
                                  self.op.iallocator)
6267

    
6268
  def ExpandNames(self):
6269
    self._ExpandAndLockInstance()
6270

    
6271
    if self.op.iallocator is not None:
6272
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6273

    
6274
    elif self.op.remote_node is not None:
6275
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6276
      if remote_node is None:
6277
        raise errors.OpPrereqError("Node '%s' not known" %
6278
                                   self.op.remote_node, errors.ECODE_NOENT)
6279

    
6280
      self.op.remote_node = remote_node
6281

    
6282
      # Warning: do not remove the locking of the new secondary here
6283
      # unless DRBD8.AddChildren is changed to work in parallel;
6284
      # currently it doesn't since parallel invocations of
6285
      # FindUnusedMinor will conflict
6286
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6287
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6288

    
6289
    else:
6290
      self.needed_locks[locking.LEVEL_NODE] = []
6291
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6292

    
6293
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6294
                                   self.op.iallocator, self.op.remote_node,
6295
                                   self.op.disks)
6296

    
6297
    self.tasklets = [self.replacer]
6298

    
6299
  def DeclareLocks(self, level):
6300
    # If we're not already locking all nodes in the set we have to declare the
6301
    # instance's primary/secondary nodes.
6302
    if (level == locking.LEVEL_NODE and
6303
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6304
      self._LockInstancesNodes()
6305

    
6306
  def BuildHooksEnv(self):
6307
    """Build hooks env.
6308

6309
    This runs on the master, the primary and all the secondaries.
6310

6311
    """
6312
    instance = self.replacer.instance
6313
    env = {
6314
      "MODE": self.op.mode,
6315
      "NEW_SECONDARY": self.op.remote_node,
6316
      "OLD_SECONDARY": instance.secondary_nodes[0],
6317
      }
6318
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6319
    nl = [
6320
      self.cfg.GetMasterNode(),
6321
      instance.primary_node,
6322
      ]
6323
    if self.op.remote_node is not None:
6324
      nl.append(self.op.remote_node)
6325
    return env, nl, nl
6326

    
6327

    
6328
class LUEvacuateNode(LogicalUnit):
6329
  """Relocate the secondary instances from a node.
6330

6331
  """
6332
  HPATH = "node-evacuate"
6333
  HTYPE = constants.HTYPE_NODE
6334
  _OP_REQP = ["node_name"]
6335
  REQ_BGL = False
6336

    
6337
  def CheckArguments(self):
6338
    if not hasattr(self.op, "remote_node"):
6339
      self.op.remote_node = None
6340
    if not hasattr(self.op, "iallocator"):
6341
      self.op.iallocator = None
6342

    
6343
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6344
                                  self.op.remote_node,
6345
                                  self.op.iallocator)
6346

    
6347
  def ExpandNames(self):
6348
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6349
    if self.op.node_name is None:
6350
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
6351
                                 errors.ECODE_NOENT)
6352

    
6353
    self.needed_locks = {}
6354

    
6355
    # Declare node locks
6356
    if self.op.iallocator is not None:
6357
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6358

    
6359
    elif self.op.remote_node is not None:
6360
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6361
      if remote_node is None:
6362
        raise errors.OpPrereqError("Node '%s' not known" %
6363
                                   self.op.remote_node, errors.ECODE_NOENT)
6364

    
6365
      self.op.remote_node = remote_node
6366

    
6367
      # Warning: do not remove the locking of the new secondary here
6368
      # unless DRBD8.AddChildren is changed to work in parallel;
6369
      # currently it doesn't since parallel invocations of
6370
      # FindUnusedMinor will conflict
6371
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6372
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6373

    
6374
    else:
6375
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6376

    
6377
    # Create tasklets for replacing disks for all secondary instances on this
6378
    # node
6379
    names = []
6380
    tasklets = []
6381

    
6382
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6383
      logging.debug("Replacing disks for instance %s", inst.name)
6384
      names.append(inst.name)
6385

    
6386
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6387
                                self.op.iallocator, self.op.remote_node, [])
6388
      tasklets.append(replacer)
6389

    
6390
    self.tasklets = tasklets
6391
    self.instance_names = names
6392

    
6393
    # Declare instance locks
6394
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6395

    
6396
  def DeclareLocks(self, level):
6397
    # If we're not already locking all nodes in the set we have to declare the
6398
    # instance's primary/secondary nodes.
6399
    if (level == locking.LEVEL_NODE and
6400
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6401
      self._LockInstancesNodes()
6402

    
6403
  def BuildHooksEnv(self):
6404
    """Build hooks env.
6405

6406
    This runs on the master, the primary and all the secondaries.
6407

6408
    """
6409
    env = {
6410
      "NODE_NAME": self.op.node_name,
6411
      }
6412

    
6413
    nl = [self.cfg.GetMasterNode()]
6414

    
6415
    if self.op.remote_node is not None:
6416
      env["NEW_SECONDARY"] = self.op.remote_node
6417
      nl.append(self.op.remote_node)
6418

    
6419
    return (env, nl, nl)
6420

    
6421

    
6422
class TLReplaceDisks(Tasklet):
6423
  """Replaces disks for an instance.
6424

6425
  Note: Locking is not within the scope of this class.
6426

6427
  """
6428
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6429
               disks):
6430
    """Initializes this class.
6431

6432
    """
6433
    Tasklet.__init__(self, lu)
6434

    
6435
    # Parameters
6436
    self.instance_name = instance_name
6437
    self.mode = mode
6438
    self.iallocator_name = iallocator_name
6439
    self.remote_node = remote_node
6440
    self.disks = disks
6441

    
6442
    # Runtime data
6443
    self.instance = None
6444
    self.new_node = None
6445
    self.target_node = None
6446
    self.other_node = None
6447
    self.remote_node_info = None
6448
    self.node_secondary_ip = None
6449

    
6450
  @staticmethod
6451
  def CheckArguments(mode, remote_node, iallocator):
6452
    """Helper function for users of this class.
6453

6454
    """
6455
    # check for valid parameter combination
6456
    if mode == constants.REPLACE_DISK_CHG:
6457
      if remote_node is None and iallocator is None:
6458
        raise errors.OpPrereqError("When changing the secondary either an"
6459
                                   " iallocator script must be used or the"
6460
                                   " new node given", errors.ECODE_INVAL)
6461

    
6462
      if remote_node is not None and iallocator is not None:
6463
        raise errors.OpPrereqError("Give either the iallocator or the new"
6464
                                   " secondary, not both", errors.ECODE_INVAL)
6465

    
6466
    elif remote_node is not None or iallocator is not None:
6467
      # Not replacing the secondary
6468
      raise errors.OpPrereqError("The iallocator and new node options can"
6469
                                 " only be used when changing the"
6470
                                 " secondary node", errors.ECODE_INVAL)
6471

    
6472
  @staticmethod
6473
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6474
    """Compute a new secondary node using an IAllocator.
6475

6476
    """
6477
    ial = IAllocator(lu.cfg, lu.rpc,
6478
                     mode=constants.IALLOCATOR_MODE_RELOC,
6479
                     name=instance_name,
6480
                     relocate_from=relocate_from)
6481

    
6482
    ial.Run(iallocator_name)
6483

    
6484
    if not ial.success:
6485
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6486
                                 " %s" % (iallocator_name, ial.info),
6487
                                 errors.ECODE_NORES)
6488

    
6489
    if len(ial.nodes) != ial.required_nodes:
6490
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6491
                                 " of nodes (%s), required %s" %
6492
                                 (len(ial.nodes), ial.required_nodes),
6493
                                 errors.ECODE_FAULT)
6494

    
6495
    remote_node_name = ial.nodes[0]
6496

    
6497
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6498
               instance_name, remote_node_name)
6499

    
6500
    return remote_node_name
6501

    
6502
  def _FindFaultyDisks(self, node_name):
6503
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6504
                                    node_name, True)
6505

    
6506
  def CheckPrereq(self):
6507
    """Check prerequisites.
6508

6509
    This checks that the instance is in the cluster.
6510

6511
    """
6512
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6513
    assert instance is not None, \
6514
      "Cannot retrieve locked instance %s" % self.instance_name
6515

    
6516
    if instance.disk_template != constants.DT_DRBD8:
6517
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6518
                                 " instances", errors.ECODE_INVAL)
6519

    
6520
    if len(instance.secondary_nodes) != 1:
6521
      raise errors.OpPrereqError("The instance has a strange layout,"
6522
                                 " expected one secondary but found %d" %
6523
                                 len(instance.secondary_nodes),
6524
                                 errors.ECODE_FAULT)
6525

    
6526
    secondary_node = instance.secondary_nodes[0]
6527

    
6528
    if self.iallocator_name is None:
6529
      remote_node = self.remote_node
6530
    else:
6531
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6532
                                       instance.name, instance.secondary_nodes)
6533

    
6534
    if remote_node is not None:
6535
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6536
      assert self.remote_node_info is not None, \
6537
        "Cannot retrieve locked node %s" % remote_node
6538
    else:
6539
      self.remote_node_info = None
6540

    
6541
    if remote_node == self.instance.primary_node:
6542
      raise errors.OpPrereqError("The specified node is the primary node of"
6543
                                 " the instance.", errors.ECODE_INVAL)
6544

    
6545
    if remote_node == secondary_node:
6546
      raise errors.OpPrereqError("The specified node is already the"
6547
                                 " secondary node of the instance.",
6548
                                 errors.ECODE_INVAL)
6549

    
6550
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6551
                                    constants.REPLACE_DISK_CHG):
6552
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6553
                                 errors.ECODE_INVAL)
6554

    
6555
    if self.mode == constants.REPLACE_DISK_AUTO:
6556
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6557
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6558

    
6559
      if faulty_primary and faulty_secondary:
6560
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6561
                                   " one node and can not be repaired"
6562
                                   " automatically" % self.instance_name,
6563
                                   errors.ECODE_STATE)
6564

    
6565
      if faulty_primary:
6566
        self.disks = faulty_primary
6567
        self.target_node = instance.primary_node
6568
        self.other_node = secondary_node
6569
        check_nodes = [self.target_node, self.other_node]
6570
      elif faulty_secondary:
6571
        self.disks = faulty_secondary
6572
        self.target_node = secondary_node
6573
        self.other_node = instance.primary_node
6574
        check_nodes = [self.target_node, self.other_node]
6575
      else:
6576
        self.disks = []
6577
        check_nodes = []
6578

    
6579
    else:
6580
      # Non-automatic modes
6581
      if self.mode == constants.REPLACE_DISK_PRI:
6582
        self.target_node = instance.primary_node
6583
        self.other_node = secondary_node
6584
        check_nodes = [self.target_node, self.other_node]
6585

    
6586
      elif self.mode == constants.REPLACE_DISK_SEC:
6587
        self.target_node = secondary_node
6588
        self.other_node = instance.primary_node
6589
        check_nodes = [self.target_node, self.other_node]
6590

    
6591
      elif self.mode == constants.REPLACE_DISK_CHG:
6592
        self.new_node = remote_node
6593
        self.other_node = instance.primary_node
6594
        self.target_node = secondary_node
6595
        check_nodes = [self.new_node, self.other_node]
6596

    
6597
        _CheckNodeNotDrained(self.lu, remote_node)
6598

    
6599
      else:
6600
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6601
                                     self.mode)
6602

    
6603
      # If not specified all disks should be replaced
6604
      if not self.disks:
6605
        self.disks = range(len(self.instance.disks))
6606

    
6607
    for node in check_nodes:
6608
      _CheckNodeOnline(self.lu, node)
6609

    
6610
    # Check whether disks are valid
6611
    for disk_idx in self.disks:
6612
      instance.FindDisk(disk_idx)
6613

    
6614
    # Get secondary node IP addresses
6615
    node_2nd_ip = {}
6616

    
6617
    for node_name in [self.target_node, self.other_node, self.new_node]:
6618
      if node_name is not None:
6619
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6620

    
6621
    self.node_secondary_ip = node_2nd_ip
6622

    
6623
  def Exec(self, feedback_fn):
6624
    """Execute disk replacement.
6625

6626
    This dispatches the disk replacement to the appropriate handler.
6627

6628
    """
6629
    if not self.disks:
6630
      feedback_fn("No disks need replacement")
6631
      return
6632

    
6633
    feedback_fn("Replacing disk(s) %s for %s" %
6634
                (utils.CommaJoin(self.disks), self.instance.name))
6635

    
6636
    activate_disks = (not self.instance.admin_up)
6637

    
6638
    # Activate the instance disks if we're replacing them on a down instance
6639
    if activate_disks:
6640
      _StartInstanceDisks(self.lu, self.instance, True)
6641

    
6642
    try:
6643
      # Should we replace the secondary node?
6644
      if self.new_node is not None:
6645
        fn = self._ExecDrbd8Secondary
6646
      else:
6647
        fn = self._ExecDrbd8DiskOnly
6648

    
6649
      return fn(feedback_fn)
6650

    
6651
    finally:
6652
      # Deactivate the instance disks if we're replacing them on a
6653
      # down instance
6654
      if activate_disks:
6655
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6656

    
6657
  def _CheckVolumeGroup(self, nodes):
6658
    self.lu.LogInfo("Checking volume groups")
6659

    
6660
    vgname = self.cfg.GetVGName()
6661

    
6662
    # Make sure volume group exists on all involved nodes
6663
    results = self.rpc.call_vg_list(nodes)
6664
    if not results:
6665
      raise errors.OpExecError("Can't list volume groups on the nodes")
6666

    
6667
    for node in nodes:
6668
      res = results[node]
6669
      res.Raise("Error checking node %s" % node)
6670
      if vgname not in res.payload:
6671
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6672
                                 (vgname, node))
6673

    
6674
  def _CheckDisksExistence(self, nodes):
6675
    # Check disk existence
6676
    for idx, dev in enumerate(self.instance.disks):
6677
      if idx not in self.disks:
6678
        continue
6679

    
6680
      for node in nodes:
6681
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6682
        self.cfg.SetDiskID(dev, node)
6683

    
6684
        result = self.rpc.call_blockdev_find(node, dev)
6685

    
6686
        msg = result.fail_msg
6687
        if msg or not result.payload:
6688
          if not msg:
6689
            msg = "disk not found"
6690
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6691
                                   (idx, node, msg))
6692

    
6693
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6694
    for idx, dev in enumerate(self.instance.disks):
6695
      if idx not in self.disks:
6696
        continue
6697

    
6698
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6699
                      (idx, node_name))
6700

    
6701
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6702
                                   ldisk=ldisk):
6703
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6704
                                 " replace disks for instance %s" %
6705
                                 (node_name, self.instance.name))
6706

    
6707
  def _CreateNewStorage(self, node_name):
6708
    vgname = self.cfg.GetVGName()
6709
    iv_names = {}
6710

    
6711
    for idx, dev in enumerate(self.instance.disks):
6712
      if idx not in self.disks:
6713
        continue
6714

    
6715
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6716

    
6717
      self.cfg.SetDiskID(dev, node_name)
6718

    
6719
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6720
      names = _GenerateUniqueNames(self.lu, lv_names)
6721

    
6722
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6723
                             logical_id=(vgname, names[0]))
6724
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6725
                             logical_id=(vgname, names[1]))
6726

    
6727
      new_lvs = [lv_data, lv_meta]
6728
      old_lvs = dev.children
6729
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6730

    
6731
      # we pass force_create=True to force the LVM creation
6732
      for new_lv in new_lvs:
6733
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6734
                        _GetInstanceInfoText(self.instance), False)
6735

    
6736
    return iv_names
6737

    
6738
  def _CheckDevices(self, node_name, iv_names):
6739
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6740
      self.cfg.SetDiskID(dev, node_name)
6741

    
6742
      result = self.rpc.call_blockdev_find(node_name, dev)
6743

    
6744
      msg = result.fail_msg
6745
      if msg or not result.payload:
6746
        if not msg:
6747
          msg = "disk not found"
6748
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6749
                                 (name, msg))
6750

    
6751
      if result.payload.is_degraded:
6752
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6753

    
6754
  def _RemoveOldStorage(self, node_name, iv_names):
6755
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6756
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6757

    
6758
      for lv in old_lvs:
6759
        self.cfg.SetDiskID(lv, node_name)
6760

    
6761
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6762
        if msg:
6763
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6764
                             hint="remove unused LVs manually")
6765

    
6766
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6767
    """Replace a disk on the primary or secondary for DRBD 8.
6768

6769
    The algorithm for replace is quite complicated:
6770

6771
      1. for each disk to be replaced:
6772

6773
        1. create new LVs on the target node with unique names
6774
        1. detach old LVs from the drbd device
6775
        1. rename old LVs to name_replaced.<time_t>
6776
        1. rename new LVs to old LVs
6777
        1. attach the new LVs (with the old names now) to the drbd device
6778

6779
      1. wait for sync across all devices
6780

6781
      1. for each modified disk:
6782

6783
        1. remove old LVs (which have the name name_replaces.<time_t>)
6784

6785
    Failures are not very well handled.
6786

6787
    """
6788
    steps_total = 6
6789

    
6790
    # Step: check device activation
6791
    self.lu.LogStep(1, steps_total, "Check device existence")
6792
    self._CheckDisksExistence([self.other_node, self.target_node])
6793
    self._CheckVolumeGroup([self.target_node, self.other_node])
6794

    
6795
    # Step: check other node consistency
6796
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6797
    self._CheckDisksConsistency(self.other_node,
6798
                                self.other_node == self.instance.primary_node,
6799
                                False)
6800

    
6801
    # Step: create new storage
6802
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6803
    iv_names = self._CreateNewStorage(self.target_node)
6804

    
6805
    # Step: for each lv, detach+rename*2+attach
6806
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6807
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6808
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6809

    
6810
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6811
                                                     old_lvs)
6812
      result.Raise("Can't detach drbd from local storage on node"
6813
                   " %s for device %s" % (self.target_node, dev.iv_name))
6814
      #dev.children = []
6815
      #cfg.Update(instance)
6816

    
6817
      # ok, we created the new LVs, so now we know we have the needed
6818
      # storage; as such, we proceed on the target node to rename
6819
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6820
      # using the assumption that logical_id == physical_id (which in
6821
      # turn is the unique_id on that node)
6822

    
6823
      # FIXME(iustin): use a better name for the replaced LVs
6824
      temp_suffix = int(time.time())
6825
      ren_fn = lambda d, suff: (d.physical_id[0],
6826
                                d.physical_id[1] + "_replaced-%s" % suff)
6827

    
6828
      # Build the rename list based on what LVs exist on the node
6829
      rename_old_to_new = []
6830
      for to_ren in old_lvs:
6831
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6832
        if not result.fail_msg and result.payload:
6833
          # device exists
6834
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6835

    
6836
      self.lu.LogInfo("Renaming the old LVs on the target node")
6837
      result = self.rpc.call_blockdev_rename(self.target_node,
6838
                                             rename_old_to_new)
6839
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6840

    
6841
      # Now we rename the new LVs to the old LVs
6842
      self.lu.LogInfo("Renaming the new LVs on the target node")
6843
      rename_new_to_old = [(new, old.physical_id)
6844
                           for old, new in zip(old_lvs, new_lvs)]
6845
      result = self.rpc.call_blockdev_rename(self.target_node,
6846
                                             rename_new_to_old)
6847
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6848

    
6849
      for old, new in zip(old_lvs, new_lvs):
6850
        new.logical_id = old.logical_id
6851
        self.cfg.SetDiskID(new, self.target_node)
6852

    
6853
      for disk in old_lvs:
6854
        disk.logical_id = ren_fn(disk, temp_suffix)
6855
        self.cfg.SetDiskID(disk, self.target_node)
6856

    
6857
      # Now that the new lvs have the old name, we can add them to the device
6858
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6859
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6860
                                                  new_lvs)
6861
      msg = result.fail_msg
6862
      if msg:
6863
        for new_lv in new_lvs:
6864
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6865
                                               new_lv).fail_msg
6866
          if msg2:
6867
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6868
                               hint=("cleanup manually the unused logical"
6869
                                     "volumes"))
6870
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6871

    
6872
      dev.children = new_lvs
6873

    
6874
      self.cfg.Update(self.instance, feedback_fn)
6875

    
6876
    # Wait for sync
6877
    # This can fail as the old devices are degraded and _WaitForSync
6878
    # does a combined result over all disks, so we don't check its return value
6879
    self.lu.LogStep(5, steps_total, "Sync devices")
6880
    _WaitForSync(self.lu, self.instance)
6881

    
6882
    # Check all devices manually
6883
    self._CheckDevices(self.instance.primary_node, iv_names)
6884

    
6885
    # Step: remove old storage
6886
    self.lu.LogStep(6, steps_total, "Removing old storage")
6887
    self._RemoveOldStorage(self.target_node, iv_names)
6888

    
6889
  def _ExecDrbd8Secondary(self, feedback_fn):
6890
    """Replace the secondary node for DRBD 8.
6891

6892
    The algorithm for replace is quite complicated:
6893
      - for all disks of the instance:
6894
        - create new LVs on the new node with same names
6895
        - shutdown the drbd device on the old secondary
6896
        - disconnect the drbd network on the primary
6897
        - create the drbd device on the new secondary
6898
        - network attach the drbd on the primary, using an artifice:
6899
          the drbd code for Attach() will connect to the network if it
6900
          finds a device which is connected to the good local disks but
6901
          not network enabled
6902
      - wait for sync across all devices
6903
      - remove all disks from the old secondary
6904

6905
    Failures are not very well handled.
6906

6907
    """
6908
    steps_total = 6
6909

    
6910
    # Step: check device activation
6911
    self.lu.LogStep(1, steps_total, "Check device existence")
6912
    self._CheckDisksExistence([self.instance.primary_node])
6913
    self._CheckVolumeGroup([self.instance.primary_node])
6914

    
6915
    # Step: check other node consistency
6916
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6917
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6918

    
6919
    # Step: create new storage
6920
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6921
    for idx, dev in enumerate(self.instance.disks):
6922
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6923
                      (self.new_node, idx))
6924
      # we pass force_create=True to force LVM creation
6925
      for new_lv in dev.children:
6926
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6927
                        _GetInstanceInfoText(self.instance), False)
6928

    
6929
    # Step 4: dbrd minors and drbd setups changes
6930
    # after this, we must manually remove the drbd minors on both the
6931
    # error and the success paths
6932
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6933
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6934
                                         for dev in self.instance.disks],
6935
                                        self.instance.name)
6936
    logging.debug("Allocated minors %r", minors)
6937

    
6938
    iv_names = {}
6939
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6940
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6941
                      (self.new_node, idx))
6942
      # create new devices on new_node; note that we create two IDs:
6943
      # one without port, so the drbd will be activated without
6944
      # networking information on the new node at this stage, and one
6945
      # with network, for the latter activation in step 4
6946
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6947
      if self.instance.primary_node == o_node1:
6948
        p_minor = o_minor1
6949
      else:
6950
        p_minor = o_minor2
6951

    
6952
      new_alone_id = (self.instance.primary_node, self.new_node, None,
6953
                      p_minor, new_minor, o_secret)
6954
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
6955
                    p_minor, new_minor, o_secret)
6956

    
6957
      iv_names[idx] = (dev, dev.children, new_net_id)
6958
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6959
                    new_net_id)
6960
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6961
                              logical_id=new_alone_id,
6962
                              children=dev.children,
6963
                              size=dev.size)
6964
      try:
6965
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6966
                              _GetInstanceInfoText(self.instance), False)
6967
      except errors.GenericError:
6968
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6969
        raise
6970

    
6971
    # We have new devices, shutdown the drbd on the old secondary
6972
    for idx, dev in enumerate(self.instance.disks):
6973
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6974
      self.cfg.SetDiskID(dev, self.target_node)
6975
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6976
      if msg:
6977
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6978
                           "node: %s" % (idx, msg),
6979
                           hint=("Please cleanup this device manually as"
6980
                                 " soon as possible"))
6981

    
6982
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6983
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6984
                                               self.node_secondary_ip,
6985
                                               self.instance.disks)\
6986
                                              [self.instance.primary_node]
6987

    
6988
    msg = result.fail_msg
6989
    if msg:
6990
      # detaches didn't succeed (unlikely)
6991
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6992
      raise errors.OpExecError("Can't detach the disks from the network on"
6993
                               " old node: %s" % (msg,))
6994

    
6995
    # if we managed to detach at least one, we update all the disks of
6996
    # the instance to point to the new secondary
6997
    self.lu.LogInfo("Updating instance configuration")
6998
    for dev, _, new_logical_id in iv_names.itervalues():
6999
      dev.logical_id = new_logical_id
7000
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7001

    
7002
    self.cfg.Update(self.instance, feedback_fn)
7003

    
7004
    # and now perform the drbd attach
7005
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7006
                    " (standalone => connected)")
7007
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7008
                                            self.new_node],
7009
                                           self.node_secondary_ip,
7010
                                           self.instance.disks,
7011
                                           self.instance.name,
7012
                                           False)
7013
    for to_node, to_result in result.items():
7014
      msg = to_result.fail_msg
7015
      if msg:
7016
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7017
                           to_node, msg,
7018
                           hint=("please do a gnt-instance info to see the"
7019
                                 " status of disks"))
7020

    
7021
    # Wait for sync
7022
    # This can fail as the old devices are degraded and _WaitForSync
7023
    # does a combined result over all disks, so we don't check its return value
7024
    self.lu.LogStep(5, steps_total, "Sync devices")
7025
    _WaitForSync(self.lu, self.instance)
7026

    
7027
    # Check all devices manually
7028
    self._CheckDevices(self.instance.primary_node, iv_names)
7029

    
7030
    # Step: remove old storage
7031
    self.lu.LogStep(6, steps_total, "Removing old storage")
7032
    self._RemoveOldStorage(self.target_node, iv_names)
7033

    
7034

    
7035
class LURepairNodeStorage(NoHooksLU):
7036
  """Repairs the volume group on a node.
7037

7038
  """
7039
  _OP_REQP = ["node_name"]
7040
  REQ_BGL = False
7041

    
7042
  def CheckArguments(self):
7043
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
7044
    if node_name is None:
7045
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
7046
                                 errors.ECODE_NOENT)
7047

    
7048
    self.op.node_name = node_name
7049

    
7050
  def ExpandNames(self):
7051
    self.needed_locks = {
7052
      locking.LEVEL_NODE: [self.op.node_name],
7053
      }
7054

    
7055
  def _CheckFaultyDisks(self, instance, node_name):
7056
    """Ensure faulty disks abort the opcode or at least warn."""
7057
    try:
7058
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7059
                                  node_name, True):
7060
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7061
                                   " node '%s'" % (instance.name, node_name),
7062
                                   errors.ECODE_STATE)
7063
    except errors.OpPrereqError, err:
7064
      if self.op.ignore_consistency:
7065
        self.proc.LogWarning(str(err.args[0]))
7066
      else:
7067
        raise
7068

    
7069
  def CheckPrereq(self):
7070
    """Check prerequisites.
7071

7072
    """
7073
    storage_type = self.op.storage_type
7074

    
7075
    if (constants.SO_FIX_CONSISTENCY not in
7076
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7077
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7078
                                 " repaired" % storage_type,
7079
                                 errors.ECODE_INVAL)
7080

    
7081
    # Check whether any instance on this node has faulty disks
7082
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7083
      if not inst.admin_up:
7084
        continue
7085
      check_nodes = set(inst.all_nodes)
7086
      check_nodes.discard(self.op.node_name)
7087
      for inst_node_name in check_nodes:
7088
        self._CheckFaultyDisks(inst, inst_node_name)
7089

    
7090
  def Exec(self, feedback_fn):
7091
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7092
                (self.op.name, self.op.node_name))
7093

    
7094
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7095
    result = self.rpc.call_storage_execute(self.op.node_name,
7096
                                           self.op.storage_type, st_args,
7097
                                           self.op.name,
7098
                                           constants.SO_FIX_CONSISTENCY)
7099
    result.Raise("Failed to repair storage unit '%s' on %s" %
7100
                 (self.op.name, self.op.node_name))
7101

    
7102

    
7103
class LUGrowDisk(LogicalUnit):
7104
  """Grow a disk of an instance.
7105

7106
  """
7107
  HPATH = "disk-grow"
7108
  HTYPE = constants.HTYPE_INSTANCE
7109
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7110
  REQ_BGL = False
7111

    
7112
  def ExpandNames(self):
7113
    self._ExpandAndLockInstance()
7114
    self.needed_locks[locking.LEVEL_NODE] = []
7115
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7116

    
7117
  def DeclareLocks(self, level):
7118
    if level == locking.LEVEL_NODE:
7119
      self._LockInstancesNodes()
7120

    
7121
  def BuildHooksEnv(self):
7122
    """Build hooks env.
7123

7124
    This runs on the master, the primary and all the secondaries.
7125

7126
    """
7127
    env = {
7128
      "DISK": self.op.disk,
7129
      "AMOUNT": self.op.amount,
7130
      }
7131
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7132
    nl = [
7133
      self.cfg.GetMasterNode(),
7134
      self.instance.primary_node,
7135
      ]
7136
    return env, nl, nl
7137

    
7138
  def CheckPrereq(self):
7139
    """Check prerequisites.
7140

7141
    This checks that the instance is in the cluster.
7142

7143
    """
7144
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145
    assert instance is not None, \
7146
      "Cannot retrieve locked instance %s" % self.op.instance_name
7147
    nodenames = list(instance.all_nodes)
7148
    for node in nodenames:
7149
      _CheckNodeOnline(self, node)
7150

    
7151

    
7152
    self.instance = instance
7153

    
7154
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7155
      raise errors.OpPrereqError("Instance's disk layout does not support"
7156
                                 " growing.", errors.ECODE_INVAL)
7157

    
7158
    self.disk = instance.FindDisk(self.op.disk)
7159

    
7160
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7161
                                       instance.hypervisor)
7162
    for node in nodenames:
7163
      info = nodeinfo[node]
7164
      info.Raise("Cannot get current information from node %s" % node)
7165
      vg_free = info.payload.get('vg_free', None)
7166
      if not isinstance(vg_free, int):
7167
        raise errors.OpPrereqError("Can't compute free disk space on"
7168
                                   " node %s" % node, errors.ECODE_ENVIRON)
7169
      if self.op.amount > vg_free:
7170
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7171
                                   " %d MiB available, %d MiB required" %
7172
                                   (node, vg_free, self.op.amount),
7173
                                   errors.ECODE_NORES)
7174

    
7175
  def Exec(self, feedback_fn):
7176
    """Execute disk grow.
7177

7178
    """
7179
    instance = self.instance
7180
    disk = self.disk
7181
    for node in instance.all_nodes:
7182
      self.cfg.SetDiskID(disk, node)
7183
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7184
      result.Raise("Grow request failed to node %s" % node)
7185
    disk.RecordGrow(self.op.amount)
7186
    self.cfg.Update(instance, feedback_fn)
7187
    if self.op.wait_for_sync:
7188
      disk_abort = not _WaitForSync(self, instance)
7189
      if disk_abort:
7190
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7191
                             " status.\nPlease check the instance.")
7192

    
7193

    
7194
class LUQueryInstanceData(NoHooksLU):
7195
  """Query runtime instance data.
7196

7197
  """
7198
  _OP_REQP = ["instances", "static"]
7199
  REQ_BGL = False
7200

    
7201
  def ExpandNames(self):
7202
    self.needed_locks = {}
7203
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7204

    
7205
    if not isinstance(self.op.instances, list):
7206
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7207
                                 errors.ECODE_INVAL)
7208

    
7209
    if self.op.instances:
7210
      self.wanted_names = []
7211
      for name in self.op.instances:
7212
        full_name = self.cfg.ExpandInstanceName(name)
7213
        if full_name is None:
7214
          raise errors.OpPrereqError("Instance '%s' not known" % name,
7215
                                     errors.ECODE_NOENT)
7216
        self.wanted_names.append(full_name)
7217
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7218
    else:
7219
      self.wanted_names = None
7220
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7221

    
7222
    self.needed_locks[locking.LEVEL_NODE] = []
7223
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7224

    
7225
  def DeclareLocks(self, level):
7226
    if level == locking.LEVEL_NODE:
7227
      self._LockInstancesNodes()
7228

    
7229
  def CheckPrereq(self):
7230
    """Check prerequisites.
7231

7232
    This only checks the optional instance list against the existing names.
7233

7234
    """
7235
    if self.wanted_names is None:
7236
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7237

    
7238
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7239
                             in self.wanted_names]
7240
    return
7241

    
7242
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7243
    """Returns the status of a block device
7244

7245
    """
7246
    if self.op.static or not node:
7247
      return None
7248

    
7249
    self.cfg.SetDiskID(dev, node)
7250

    
7251
    result = self.rpc.call_blockdev_find(node, dev)
7252
    if result.offline:
7253
      return None
7254

    
7255
    result.Raise("Can't compute disk status for %s" % instance_name)
7256

    
7257
    status = result.payload
7258
    if status is None:
7259
      return None
7260

    
7261
    return (status.dev_path, status.major, status.minor,
7262
            status.sync_percent, status.estimated_time,
7263
            status.is_degraded, status.ldisk_status)
7264

    
7265
  def _ComputeDiskStatus(self, instance, snode, dev):
7266
    """Compute block device status.
7267

7268
    """
7269
    if dev.dev_type in constants.LDS_DRBD:
7270
      # we change the snode then (otherwise we use the one passed in)
7271
      if dev.logical_id[0] == instance.primary_node:
7272
        snode = dev.logical_id[1]
7273
      else:
7274
        snode = dev.logical_id[0]
7275

    
7276
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7277
                                              instance.name, dev)
7278
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7279

    
7280
    if dev.children:
7281
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7282
                      for child in dev.children]
7283
    else:
7284
      dev_children = []
7285

    
7286
    data = {
7287
      "iv_name": dev.iv_name,
7288
      "dev_type": dev.dev_type,
7289
      "logical_id": dev.logical_id,
7290
      "physical_id": dev.physical_id,
7291
      "pstatus": dev_pstatus,
7292
      "sstatus": dev_sstatus,
7293
      "children": dev_children,
7294
      "mode": dev.mode,
7295
      "size": dev.size,
7296
      }
7297

    
7298
    return data
7299

    
7300
  def Exec(self, feedback_fn):
7301
    """Gather and return data"""
7302
    result = {}
7303

    
7304
    cluster = self.cfg.GetClusterInfo()
7305

    
7306
    for instance in self.wanted_instances:
7307
      if not self.op.static:
7308
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7309
                                                  instance.name,
7310
                                                  instance.hypervisor)
7311
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7312
        remote_info = remote_info.payload
7313
        if remote_info and "state" in remote_info:
7314
          remote_state = "up"
7315
        else:
7316
          remote_state = "down"
7317
      else:
7318
        remote_state = None
7319
      if instance.admin_up:
7320
        config_state = "up"
7321
      else:
7322
        config_state = "down"
7323

    
7324
      disks = [self._ComputeDiskStatus(instance, None, device)
7325
               for device in instance.disks]
7326

    
7327
      idict = {
7328
        "name": instance.name,
7329
        "config_state": config_state,
7330
        "run_state": remote_state,
7331
        "pnode": instance.primary_node,
7332
        "snodes": instance.secondary_nodes,
7333
        "os": instance.os,
7334
        # this happens to be the same format used for hooks
7335
        "nics": _NICListToTuple(self, instance.nics),
7336
        "disks": disks,
7337
        "hypervisor": instance.hypervisor,
7338
        "network_port": instance.network_port,
7339
        "hv_instance": instance.hvparams,
7340
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7341
        "be_instance": instance.beparams,
7342
        "be_actual": cluster.FillBE(instance),
7343
        "serial_no": instance.serial_no,
7344
        "mtime": instance.mtime,
7345
        "ctime": instance.ctime,
7346
        "uuid": instance.uuid,
7347
        }
7348

    
7349
      result[instance.name] = idict
7350

    
7351
    return result
7352

    
7353

    
7354
class LUSetInstanceParams(LogicalUnit):
7355
  """Modifies an instances's parameters.
7356

7357
  """
7358
  HPATH = "instance-modify"
7359
  HTYPE = constants.HTYPE_INSTANCE
7360
  _OP_REQP = ["instance_name"]
7361
  REQ_BGL = False
7362

    
7363
  def CheckArguments(self):
7364
    if not hasattr(self.op, 'nics'):
7365
      self.op.nics = []
7366
    if not hasattr(self.op, 'disks'):
7367
      self.op.disks = []
7368
    if not hasattr(self.op, 'beparams'):
7369
      self.op.beparams = {}
7370
    if not hasattr(self.op, 'hvparams'):
7371
      self.op.hvparams = {}
7372
    self.op.force = getattr(self.op, "force", False)
7373
    if not (self.op.nics or self.op.disks or
7374
            self.op.hvparams or self.op.beparams):
7375
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7376

    
7377
    if self.op.hvparams:
7378
      _CheckGlobalHvParams(self.op.hvparams)
7379

    
7380
    # Disk validation
7381
    disk_addremove = 0
7382
    for disk_op, disk_dict in self.op.disks:
7383
      if disk_op == constants.DDM_REMOVE:
7384
        disk_addremove += 1
7385
        continue
7386
      elif disk_op == constants.DDM_ADD:
7387
        disk_addremove += 1
7388
      else:
7389
        if not isinstance(disk_op, int):
7390
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7391
        if not isinstance(disk_dict, dict):
7392
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7393
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7394

    
7395
      if disk_op == constants.DDM_ADD:
7396
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7397
        if mode not in constants.DISK_ACCESS_SET:
7398
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7399
                                     errors.ECODE_INVAL)
7400
        size = disk_dict.get('size', None)
7401
        if size is None:
7402
          raise errors.OpPrereqError("Required disk parameter size missing",
7403
                                     errors.ECODE_INVAL)
7404
        try:
7405
          size = int(size)
7406
        except ValueError, err:
7407
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7408
                                     str(err), errors.ECODE_INVAL)
7409
        disk_dict['size'] = size
7410
      else:
7411
        # modification of disk
7412
        if 'size' in disk_dict:
7413
          raise errors.OpPrereqError("Disk size change not possible, use"
7414
                                     " grow-disk", errors.ECODE_INVAL)
7415

    
7416
    if disk_addremove > 1:
7417
      raise errors.OpPrereqError("Only one disk add or remove operation"
7418
                                 " supported at a time", errors.ECODE_INVAL)
7419

    
7420
    # NIC validation
7421
    nic_addremove = 0
7422
    for nic_op, nic_dict in self.op.nics:
7423
      if nic_op == constants.DDM_REMOVE:
7424
        nic_addremove += 1
7425
        continue
7426
      elif nic_op == constants.DDM_ADD:
7427
        nic_addremove += 1
7428
      else:
7429
        if not isinstance(nic_op, int):
7430
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7431
        if not isinstance(nic_dict, dict):
7432
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7433
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7434

    
7435
      # nic_dict should be a dict
7436
      nic_ip = nic_dict.get('ip', None)
7437
      if nic_ip is not None:
7438
        if nic_ip.lower() == constants.VALUE_NONE:
7439
          nic_dict['ip'] = None
7440
        else:
7441
          if not utils.IsValidIP(nic_ip):
7442
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7443
                                       errors.ECODE_INVAL)
7444

    
7445
      nic_bridge = nic_dict.get('bridge', None)
7446
      nic_link = nic_dict.get('link', None)
7447
      if nic_bridge and nic_link:
7448
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7449
                                   " at the same time", errors.ECODE_INVAL)
7450
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7451
        nic_dict['bridge'] = None
7452
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7453
        nic_dict['link'] = None
7454

    
7455
      if nic_op == constants.DDM_ADD:
7456
        nic_mac = nic_dict.get('mac', None)
7457
        if nic_mac is None:
7458
          nic_dict['mac'] = constants.VALUE_AUTO
7459

    
7460
      if 'mac' in nic_dict:
7461
        nic_mac = nic_dict['mac']
7462
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7463
          if not utils.IsValidMac(nic_mac):
7464
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac,
7465
                                       errors.ECODE_INVAL)
7466
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7467
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7468
                                     " modifying an existing nic",
7469
                                     errors.ECODE_INVAL)
7470

    
7471
    if nic_addremove > 1:
7472
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7473
                                 " supported at a time", errors.ECODE_INVAL)
7474

    
7475
  def ExpandNames(self):
7476
    self._ExpandAndLockInstance()
7477
    self.needed_locks[locking.LEVEL_NODE] = []
7478
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7479

    
7480
  def DeclareLocks(self, level):
7481
    if level == locking.LEVEL_NODE:
7482
      self._LockInstancesNodes()
7483

    
7484
  def BuildHooksEnv(self):
7485
    """Build hooks env.
7486

7487
    This runs on the master, primary and secondaries.
7488

7489
    """
7490
    args = dict()
7491
    if constants.BE_MEMORY in self.be_new:
7492
      args['memory'] = self.be_new[constants.BE_MEMORY]
7493
    if constants.BE_VCPUS in self.be_new:
7494
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7495
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7496
    # information at all.
7497
    if self.op.nics:
7498
      args['nics'] = []
7499
      nic_override = dict(self.op.nics)
7500
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7501
      for idx, nic in enumerate(self.instance.nics):
7502
        if idx in nic_override:
7503
          this_nic_override = nic_override[idx]
7504
        else:
7505
          this_nic_override = {}
7506
        if 'ip' in this_nic_override:
7507
          ip = this_nic_override['ip']
7508
        else:
7509
          ip = nic.ip
7510
        if 'mac' in this_nic_override:
7511
          mac = this_nic_override['mac']
7512
        else:
7513
          mac = nic.mac
7514
        if idx in self.nic_pnew:
7515
          nicparams = self.nic_pnew[idx]
7516
        else:
7517
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7518
        mode = nicparams[constants.NIC_MODE]
7519
        link = nicparams[constants.NIC_LINK]
7520
        args['nics'].append((ip, mac, mode, link))
7521
      if constants.DDM_ADD in nic_override:
7522
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7523
        mac = nic_override[constants.DDM_ADD]['mac']
7524
        nicparams = self.nic_pnew[constants.DDM_ADD]
7525
        mode = nicparams[constants.NIC_MODE]
7526
        link = nicparams[constants.NIC_LINK]
7527
        args['nics'].append((ip, mac, mode, link))
7528
      elif constants.DDM_REMOVE in nic_override:
7529
        del args['nics'][-1]
7530

    
7531
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7532
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7533
    return env, nl, nl
7534

    
7535
  def _GetUpdatedParams(self, old_params, update_dict,
7536
                        default_values, parameter_types):
7537
    """Return the new params dict for the given params.
7538

7539
    @type old_params: dict
7540
    @param old_params: old parameters
7541
    @type update_dict: dict
7542
    @param update_dict: dict containing new parameter values,
7543
                        or constants.VALUE_DEFAULT to reset the
7544
                        parameter to its default value
7545
    @type default_values: dict
7546
    @param default_values: default values for the filled parameters
7547
    @type parameter_types: dict
7548
    @param parameter_types: dict mapping target dict keys to types
7549
                            in constants.ENFORCEABLE_TYPES
7550
    @rtype: (dict, dict)
7551
    @return: (new_parameters, filled_parameters)
7552

7553
    """
7554
    params_copy = copy.deepcopy(old_params)
7555
    for key, val in update_dict.iteritems():
7556
      if val == constants.VALUE_DEFAULT:
7557
        try:
7558
          del params_copy[key]
7559
        except KeyError:
7560
          pass
7561
      else:
7562
        params_copy[key] = val
7563
    utils.ForceDictType(params_copy, parameter_types)
7564
    params_filled = objects.FillDict(default_values, params_copy)
7565
    return (params_copy, params_filled)
7566

    
7567
  def CheckPrereq(self):
7568
    """Check prerequisites.
7569

7570
    This only checks the instance list against the existing names.
7571

7572
    """
7573
    self.force = self.op.force
7574

    
7575
    # checking the new params on the primary/secondary nodes
7576

    
7577
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7578
    cluster = self.cluster = self.cfg.GetClusterInfo()
7579
    assert self.instance is not None, \
7580
      "Cannot retrieve locked instance %s" % self.op.instance_name
7581
    pnode = instance.primary_node
7582
    nodelist = list(instance.all_nodes)
7583

    
7584
    # hvparams processing
7585
    if self.op.hvparams:
7586
      i_hvdict, hv_new = self._GetUpdatedParams(
7587
                             instance.hvparams, self.op.hvparams,
7588
                             cluster.hvparams[instance.hypervisor],
7589
                             constants.HVS_PARAMETER_TYPES)
7590
      # local check
7591
      hypervisor.GetHypervisor(
7592
        instance.hypervisor).CheckParameterSyntax(hv_new)
7593
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7594
      self.hv_new = hv_new # the new actual values
7595
      self.hv_inst = i_hvdict # the new dict (without defaults)
7596
    else:
7597
      self.hv_new = self.hv_inst = {}
7598

    
7599
    # beparams processing
7600
    if self.op.beparams:
7601
      i_bedict, be_new = self._GetUpdatedParams(
7602
                             instance.beparams, self.op.beparams,
7603
                             cluster.beparams[constants.PP_DEFAULT],
7604
                             constants.BES_PARAMETER_TYPES)
7605
      self.be_new = be_new # the new actual values
7606
      self.be_inst = i_bedict # the new dict (without defaults)
7607
    else:
7608
      self.be_new = self.be_inst = {}
7609

    
7610
    self.warn = []
7611

    
7612
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7613
      mem_check_list = [pnode]
7614
      if be_new[constants.BE_AUTO_BALANCE]:
7615
        # either we changed auto_balance to yes or it was from before
7616
        mem_check_list.extend(instance.secondary_nodes)
7617
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7618
                                                  instance.hypervisor)
7619
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7620
                                         instance.hypervisor)
7621
      pninfo = nodeinfo[pnode]
7622
      msg = pninfo.fail_msg
7623
      if msg:
7624
        # Assume the primary node is unreachable and go ahead
7625
        self.warn.append("Can't get info from primary node %s: %s" %
7626
                         (pnode,  msg))
7627
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7628
        self.warn.append("Node data from primary node %s doesn't contain"
7629
                         " free memory information" % pnode)
7630
      elif instance_info.fail_msg:
7631
        self.warn.append("Can't get instance runtime information: %s" %
7632
                        instance_info.fail_msg)
7633
      else:
7634
        if instance_info.payload:
7635
          current_mem = int(instance_info.payload['memory'])
7636
        else:
7637
          # Assume instance not running
7638
          # (there is a slight race condition here, but it's not very probable,
7639
          # and we have no other way to check)
7640
          current_mem = 0
7641
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7642
                    pninfo.payload['memory_free'])
7643
        if miss_mem > 0:
7644
          raise errors.OpPrereqError("This change will prevent the instance"
7645
                                     " from starting, due to %d MB of memory"
7646
                                     " missing on its primary node" % miss_mem,
7647
                                     errors.ECODE_NORES)
7648

    
7649
      if be_new[constants.BE_AUTO_BALANCE]:
7650
        for node, nres in nodeinfo.items():
7651
          if node not in instance.secondary_nodes:
7652
            continue
7653
          msg = nres.fail_msg
7654
          if msg:
7655
            self.warn.append("Can't get info from secondary node %s: %s" %
7656
                             (node, msg))
7657
          elif not isinstance(nres.payload.get('memory_free', None), int):
7658
            self.warn.append("Secondary node %s didn't return free"
7659
                             " memory information" % node)
7660
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7661
            self.warn.append("Not enough memory to failover instance to"
7662
                             " secondary node %s" % node)
7663

    
7664
    # NIC processing
7665
    self.nic_pnew = {}
7666
    self.nic_pinst = {}
7667
    for nic_op, nic_dict in self.op.nics:
7668
      if nic_op == constants.DDM_REMOVE:
7669
        if not instance.nics:
7670
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7671
                                     errors.ECODE_INVAL)
7672
        continue
7673
      if nic_op != constants.DDM_ADD:
7674
        # an existing nic
7675
        if not instance.nics:
7676
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7677
                                     " no NICs" % nic_op,
7678
                                     errors.ECODE_INVAL)
7679
        if nic_op < 0 or nic_op >= len(instance.nics):
7680
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7681
                                     " are 0 to %d" %
7682
                                     (nic_op, len(instance.nics) - 1),
7683
                                     errors.ECODE_INVAL)
7684
        old_nic_params = instance.nics[nic_op].nicparams
7685
        old_nic_ip = instance.nics[nic_op].ip
7686
      else:
7687
        old_nic_params = {}
7688
        old_nic_ip = None
7689

    
7690
      update_params_dict = dict([(key, nic_dict[key])
7691
                                 for key in constants.NICS_PARAMETERS
7692
                                 if key in nic_dict])
7693

    
7694
      if 'bridge' in nic_dict:
7695
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7696

    
7697
      new_nic_params, new_filled_nic_params = \
7698
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7699
                                 cluster.nicparams[constants.PP_DEFAULT],
7700
                                 constants.NICS_PARAMETER_TYPES)
7701
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7702
      self.nic_pinst[nic_op] = new_nic_params
7703
      self.nic_pnew[nic_op] = new_filled_nic_params
7704
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7705

    
7706
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7707
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7708
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7709
        if msg:
7710
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7711
          if self.force:
7712
            self.warn.append(msg)
7713
          else:
7714
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7715
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7716
        if 'ip' in nic_dict:
7717
          nic_ip = nic_dict['ip']
7718
        else:
7719
          nic_ip = old_nic_ip
7720
        if nic_ip is None:
7721
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7722
                                     ' on a routed nic', errors.ECODE_INVAL)
7723
      if 'mac' in nic_dict:
7724
        nic_mac = nic_dict['mac']
7725
        if nic_mac is None:
7726
          raise errors.OpPrereqError('Cannot set the nic mac to None',
7727
                                     errors.ECODE_INVAL)
7728
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7729
          # otherwise generate the mac
7730
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7731
        else:
7732
          # or validate/reserve the current one
7733
          try:
7734
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7735
          except errors.ReservationError:
7736
            raise errors.OpPrereqError("MAC address %s already in use"
7737
                                       " in cluster" % nic_mac,
7738
                                       errors.ECODE_NOTUNIQUE)
7739

    
7740
    # DISK processing
7741
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7742
      raise errors.OpPrereqError("Disk operations not supported for"
7743
                                 " diskless instances",
7744
                                 errors.ECODE_INVAL)
7745
    for disk_op, disk_dict in self.op.disks:
7746
      if disk_op == constants.DDM_REMOVE:
7747
        if len(instance.disks) == 1:
7748
          raise errors.OpPrereqError("Cannot remove the last disk of"
7749
                                     " an instance",
7750
                                     errors.ECODE_INVAL)
7751
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7752
        ins_l = ins_l[pnode]
7753
        msg = ins_l.fail_msg
7754
        if msg:
7755
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7756
                                     (pnode, msg), errors.ECODE_ENVIRON)
7757
        if instance.name in ins_l.payload:
7758
          raise errors.OpPrereqError("Instance is running, can't remove"
7759
                                     " disks.", errors.ECODE_STATE)
7760

    
7761
      if (disk_op == constants.DDM_ADD and
7762
          len(instance.nics) >= constants.MAX_DISKS):
7763
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7764
                                   " add more" % constants.MAX_DISKS,
7765
                                   errors.ECODE_STATE)
7766
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7767
        # an existing disk
7768
        if disk_op < 0 or disk_op >= len(instance.disks):
7769
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7770
                                     " are 0 to %d" %
7771
                                     (disk_op, len(instance.disks)),
7772
                                     errors.ECODE_INVAL)
7773

    
7774
    return
7775

    
7776
  def Exec(self, feedback_fn):
7777
    """Modifies an instance.
7778

7779
    All parameters take effect only at the next restart of the instance.
7780

7781
    """
7782
    # Process here the warnings from CheckPrereq, as we don't have a
7783
    # feedback_fn there.
7784
    for warn in self.warn:
7785
      feedback_fn("WARNING: %s" % warn)
7786

    
7787
    result = []
7788
    instance = self.instance
7789
    cluster = self.cluster
7790
    # disk changes
7791
    for disk_op, disk_dict in self.op.disks:
7792
      if disk_op == constants.DDM_REMOVE:
7793
        # remove the last disk
7794
        device = instance.disks.pop()
7795
        device_idx = len(instance.disks)
7796
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7797
          self.cfg.SetDiskID(disk, node)
7798
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7799
          if msg:
7800
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7801
                            " continuing anyway", device_idx, node, msg)
7802
        result.append(("disk/%d" % device_idx, "remove"))
7803
      elif disk_op == constants.DDM_ADD:
7804
        # add a new disk
7805
        if instance.disk_template == constants.DT_FILE:
7806
          file_driver, file_path = instance.disks[0].logical_id
7807
          file_path = os.path.dirname(file_path)
7808
        else:
7809
          file_driver = file_path = None
7810
        disk_idx_base = len(instance.disks)
7811
        new_disk = _GenerateDiskTemplate(self,
7812
                                         instance.disk_template,
7813
                                         instance.name, instance.primary_node,
7814
                                         instance.secondary_nodes,
7815
                                         [disk_dict],
7816
                                         file_path,
7817
                                         file_driver,
7818
                                         disk_idx_base)[0]
7819
        instance.disks.append(new_disk)
7820
        info = _GetInstanceInfoText(instance)
7821

    
7822
        logging.info("Creating volume %s for instance %s",
7823
                     new_disk.iv_name, instance.name)
7824
        # Note: this needs to be kept in sync with _CreateDisks
7825
        #HARDCODE
7826
        for node in instance.all_nodes:
7827
          f_create = node == instance.primary_node
7828
          try:
7829
            _CreateBlockDev(self, node, instance, new_disk,
7830
                            f_create, info, f_create)
7831
          except errors.OpExecError, err:
7832
            self.LogWarning("Failed to create volume %s (%s) on"
7833
                            " node %s: %s",
7834
                            new_disk.iv_name, new_disk, node, err)
7835
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7836
                       (new_disk.size, new_disk.mode)))
7837
      else:
7838
        # change a given disk
7839
        instance.disks[disk_op].mode = disk_dict['mode']
7840
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7841
    # NIC changes
7842
    for nic_op, nic_dict in self.op.nics:
7843
      if nic_op == constants.DDM_REMOVE:
7844
        # remove the last nic
7845
        del instance.nics[-1]
7846
        result.append(("nic.%d" % len(instance.nics), "remove"))
7847
      elif nic_op == constants.DDM_ADD:
7848
        # mac and bridge should be set, by now
7849
        mac = nic_dict['mac']
7850
        ip = nic_dict.get('ip', None)
7851
        nicparams = self.nic_pinst[constants.DDM_ADD]
7852
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7853
        instance.nics.append(new_nic)
7854
        result.append(("nic.%d" % (len(instance.nics) - 1),
7855
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7856
                       (new_nic.mac, new_nic.ip,
7857
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7858
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7859
                       )))
7860
      else:
7861
        for key in 'mac', 'ip':
7862
          if key in nic_dict:
7863
            setattr(instance.nics[nic_op], key, nic_dict[key])
7864
        if nic_op in self.nic_pinst:
7865
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7866
        for key, val in nic_dict.iteritems():
7867
          result.append(("nic.%s/%d" % (key, nic_op), val))
7868

    
7869
    # hvparams changes
7870
    if self.op.hvparams:
7871
      instance.hvparams = self.hv_inst
7872
      for key, val in self.op.hvparams.iteritems():
7873
        result.append(("hv/%s" % key, val))
7874

    
7875
    # beparams changes
7876
    if self.op.beparams:
7877
      instance.beparams = self.be_inst
7878
      for key, val in self.op.beparams.iteritems():
7879
        result.append(("be/%s" % key, val))
7880

    
7881
    self.cfg.Update(instance, feedback_fn)
7882

    
7883
    return result
7884

    
7885

    
7886
class LUQueryExports(NoHooksLU):
7887
  """Query the exports list
7888

7889
  """
7890
  _OP_REQP = ['nodes']
7891
  REQ_BGL = False
7892

    
7893
  def ExpandNames(self):
7894
    self.needed_locks = {}
7895
    self.share_locks[locking.LEVEL_NODE] = 1
7896
    if not self.op.nodes:
7897
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7898
    else:
7899
      self.needed_locks[locking.LEVEL_NODE] = \
7900
        _GetWantedNodes(self, self.op.nodes)
7901

    
7902
  def CheckPrereq(self):
7903
    """Check prerequisites.
7904

7905
    """
7906
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7907

    
7908
  def Exec(self, feedback_fn):
7909
    """Compute the list of all the exported system images.
7910

7911
    @rtype: dict
7912
    @return: a dictionary with the structure node->(export-list)
7913
        where export-list is a list of the instances exported on
7914
        that node.
7915

7916
    """
7917
    rpcresult = self.rpc.call_export_list(self.nodes)
7918
    result = {}
7919
    for node in rpcresult:
7920
      if rpcresult[node].fail_msg:
7921
        result[node] = False
7922
      else:
7923
        result[node] = rpcresult[node].payload
7924

    
7925
    return result
7926

    
7927

    
7928
class LUExportInstance(LogicalUnit):
7929
  """Export an instance to an image in the cluster.
7930

7931
  """
7932
  HPATH = "instance-export"
7933
  HTYPE = constants.HTYPE_INSTANCE
7934
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7935
  REQ_BGL = False
7936

    
7937
  def CheckArguments(self):
7938
    """Check the arguments.
7939

7940
    """
7941
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
7942
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
7943

    
7944
  def ExpandNames(self):
7945
    self._ExpandAndLockInstance()
7946
    # FIXME: lock only instance primary and destination node
7947
    #
7948
    # Sad but true, for now we have do lock all nodes, as we don't know where
7949
    # the previous export might be, and and in this LU we search for it and
7950
    # remove it from its current node. In the future we could fix this by:
7951
    #  - making a tasklet to search (share-lock all), then create the new one,
7952
    #    then one to remove, after
7953
    #  - removing the removal operation altogether
7954
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7955

    
7956
  def DeclareLocks(self, level):
7957
    """Last minute lock declaration."""
7958
    # All nodes are locked anyway, so nothing to do here.
7959

    
7960
  def BuildHooksEnv(self):
7961
    """Build hooks env.
7962

7963
    This will run on the master, primary node and target node.
7964

7965
    """
7966
    env = {
7967
      "EXPORT_NODE": self.op.target_node,
7968
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7969
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
7970
      }
7971
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7972
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7973
          self.op.target_node]
7974
    return env, nl, nl
7975

    
7976
  def CheckPrereq(self):
7977
    """Check prerequisites.
7978

7979
    This checks that the instance and node names are valid.
7980

7981
    """
7982
    instance_name = self.op.instance_name
7983
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7984
    assert self.instance is not None, \
7985
          "Cannot retrieve locked instance %s" % self.op.instance_name
7986
    _CheckNodeOnline(self, self.instance.primary_node)
7987

    
7988
    self.dst_node = self.cfg.GetNodeInfo(
7989
      self.cfg.ExpandNodeName(self.op.target_node))
7990

    
7991
    if self.dst_node is None:
7992
      # This is wrong node name, not a non-locked node
7993
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
7994
                                 errors.ECODE_NOENT)
7995
    _CheckNodeOnline(self, self.dst_node.name)
7996
    _CheckNodeNotDrained(self, self.dst_node.name)
7997

    
7998
    # instance disk type verification
7999
    for disk in self.instance.disks:
8000
      if disk.dev_type == constants.LD_FILE:
8001
        raise errors.OpPrereqError("Export not supported for instances with"
8002
                                   " file-based disks", errors.ECODE_INVAL)
8003

    
8004
  def Exec(self, feedback_fn):
8005
    """Export an instance to an image in the cluster.
8006

8007
    """
8008
    instance = self.instance
8009
    dst_node = self.dst_node
8010
    src_node = instance.primary_node
8011

    
8012
    if self.op.shutdown:
8013
      # shutdown the instance, but not the disks
8014
      feedback_fn("Shutting down instance %s" % instance.name)
8015
      result = self.rpc.call_instance_shutdown(src_node, instance,
8016
                                               self.shutdown_timeout)
8017
      result.Raise("Could not shutdown instance %s on"
8018
                   " node %s" % (instance.name, src_node))
8019

    
8020
    vgname = self.cfg.GetVGName()
8021

    
8022
    snap_disks = []
8023

    
8024
    # set the disks ID correctly since call_instance_start needs the
8025
    # correct drbd minor to create the symlinks
8026
    for disk in instance.disks:
8027
      self.cfg.SetDiskID(disk, src_node)
8028

    
8029
    activate_disks = (not instance.admin_up)
8030

    
8031
    if activate_disks:
8032
      # Activate the instance disks if we'exporting a stopped instance
8033
      feedback_fn("Activating disks for %s" % instance.name)
8034
      _StartInstanceDisks(self, instance, None)
8035

    
8036
    try:
8037
      # per-disk results
8038
      dresults = []
8039
      try:
8040
        for idx, disk in enumerate(instance.disks):
8041
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8042
                      (idx, src_node))
8043

    
8044
          # result.payload will be a snapshot of an lvm leaf of the one we
8045
          # passed
8046
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8047
          msg = result.fail_msg
8048
          if msg:
8049
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8050
                            idx, src_node, msg)
8051
            snap_disks.append(False)
8052
          else:
8053
            disk_id = (vgname, result.payload)
8054
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8055
                                   logical_id=disk_id, physical_id=disk_id,
8056
                                   iv_name=disk.iv_name)
8057
            snap_disks.append(new_dev)
8058

    
8059
      finally:
8060
        if self.op.shutdown and instance.admin_up:
8061
          feedback_fn("Starting instance %s" % instance.name)
8062
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8063
          msg = result.fail_msg
8064
          if msg:
8065
            _ShutdownInstanceDisks(self, instance)
8066
            raise errors.OpExecError("Could not start instance: %s" % msg)
8067

    
8068
      # TODO: check for size
8069

    
8070
      cluster_name = self.cfg.GetClusterName()
8071
      for idx, dev in enumerate(snap_disks):
8072
        feedback_fn("Exporting snapshot %s from %s to %s" %
8073
                    (idx, src_node, dst_node.name))
8074
        if dev:
8075
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8076
                                                 instance, cluster_name, idx)
8077
          msg = result.fail_msg
8078
          if msg:
8079
            self.LogWarning("Could not export disk/%s from node %s to"
8080
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8081
            dresults.append(False)
8082
          else:
8083
            dresults.append(True)
8084
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8085
          if msg:
8086
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8087
                            " %s: %s", idx, src_node, msg)
8088
        else:
8089
          dresults.append(False)
8090

    
8091
      feedback_fn("Finalizing export on %s" % dst_node.name)
8092
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8093
                                             snap_disks)
8094
      fin_resu = True
8095
      msg = result.fail_msg
8096
      if msg:
8097
        self.LogWarning("Could not finalize export for instance %s"
8098
                        " on node %s: %s", instance.name, dst_node.name, msg)
8099
        fin_resu = False
8100

    
8101
    finally:
8102
      if activate_disks:
8103
        feedback_fn("Deactivating disks for %s" % instance.name)
8104
        _ShutdownInstanceDisks(self, instance)
8105

    
8106
    nodelist = self.cfg.GetNodeList()
8107
    nodelist.remove(dst_node.name)
8108

    
8109
    # on one-node clusters nodelist will be empty after the removal
8110
    # if we proceed the backup would be removed because OpQueryExports
8111
    # substitutes an empty list with the full cluster node list.
8112
    iname = instance.name
8113
    if nodelist:
8114
      feedback_fn("Removing old exports for instance %s" % iname)
8115
      exportlist = self.rpc.call_export_list(nodelist)
8116
      for node in exportlist:
8117
        if exportlist[node].fail_msg:
8118
          continue
8119
        if iname in exportlist[node].payload:
8120
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8121
          if msg:
8122
            self.LogWarning("Could not remove older export for instance %s"
8123
                            " on node %s: %s", iname, node, msg)
8124
    return fin_resu, dresults
8125

    
8126

    
8127
class LURemoveExport(NoHooksLU):
8128
  """Remove exports related to the named instance.
8129

8130
  """
8131
  _OP_REQP = ["instance_name"]
8132
  REQ_BGL = False
8133

    
8134
  def ExpandNames(self):
8135
    self.needed_locks = {}
8136
    # We need all nodes to be locked in order for RemoveExport to work, but we
8137
    # don't need to lock the instance itself, as nothing will happen to it (and
8138
    # we can remove exports also for a removed instance)
8139
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8140

    
8141
  def CheckPrereq(self):
8142
    """Check prerequisites.
8143
    """
8144
    pass
8145

    
8146
  def Exec(self, feedback_fn):
8147
    """Remove any export.
8148

8149
    """
8150
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8151
    # If the instance was not found we'll try with the name that was passed in.
8152
    # This will only work if it was an FQDN, though.
8153
    fqdn_warn = False
8154
    if not instance_name:
8155
      fqdn_warn = True
8156
      instance_name = self.op.instance_name
8157

    
8158
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8159
    exportlist = self.rpc.call_export_list(locked_nodes)
8160
    found = False
8161
    for node in exportlist:
8162
      msg = exportlist[node].fail_msg
8163
      if msg:
8164
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8165
        continue
8166
      if instance_name in exportlist[node].payload:
8167
        found = True
8168
        result = self.rpc.call_export_remove(node, instance_name)
8169
        msg = result.fail_msg
8170
        if msg:
8171
          logging.error("Could not remove export for instance %s"
8172
                        " on node %s: %s", instance_name, node, msg)
8173

    
8174
    if fqdn_warn and not found:
8175
      feedback_fn("Export not found. If trying to remove an export belonging"
8176
                  " to a deleted instance please use its Fully Qualified"
8177
                  " Domain Name.")
8178

    
8179

    
8180
class TagsLU(NoHooksLU):
8181
  """Generic tags LU.
8182

8183
  This is an abstract class which is the parent of all the other tags LUs.
8184

8185
  """
8186

    
8187
  def ExpandNames(self):
8188
    self.needed_locks = {}
8189
    if self.op.kind == constants.TAG_NODE:
8190
      name = self.cfg.ExpandNodeName(self.op.name)
8191
      if name is None:
8192
        raise errors.OpPrereqError("Invalid node name (%s)" %
8193
                                   (self.op.name,), errors.ECODE_NOENT)
8194
      self.op.name = name
8195
      self.needed_locks[locking.LEVEL_NODE] = name
8196
    elif self.op.kind == constants.TAG_INSTANCE:
8197
      name = self.cfg.ExpandInstanceName(self.op.name)
8198
      if name is None:
8199
        raise errors.OpPrereqError("Invalid instance name (%s)" %
8200
                                   (self.op.name,), errors.ECODE_NOENT)
8201
      self.op.name = name
8202
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8203

    
8204
  def CheckPrereq(self):
8205
    """Check prerequisites.
8206

8207
    """
8208
    if self.op.kind == constants.TAG_CLUSTER:
8209
      self.target = self.cfg.GetClusterInfo()
8210
    elif self.op.kind == constants.TAG_NODE:
8211
      self.target = self.cfg.GetNodeInfo(self.op.name)
8212
    elif self.op.kind == constants.TAG_INSTANCE:
8213
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8214
    else:
8215
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8216
                                 str(self.op.kind), errors.ECODE_INVAL)
8217

    
8218

    
8219
class LUGetTags(TagsLU):
8220
  """Returns the tags of a given object.
8221

8222
  """
8223
  _OP_REQP = ["kind", "name"]
8224
  REQ_BGL = False
8225

    
8226
  def Exec(self, feedback_fn):
8227
    """Returns the tag list.
8228

8229
    """
8230
    return list(self.target.GetTags())
8231

    
8232

    
8233
class LUSearchTags(NoHooksLU):
8234
  """Searches the tags for a given pattern.
8235

8236
  """
8237
  _OP_REQP = ["pattern"]
8238
  REQ_BGL = False
8239

    
8240
  def ExpandNames(self):
8241
    self.needed_locks = {}
8242

    
8243
  def CheckPrereq(self):
8244
    """Check prerequisites.
8245

8246
    This checks the pattern passed for validity by compiling it.
8247

8248
    """
8249
    try:
8250
      self.re = re.compile(self.op.pattern)
8251
    except re.error, err:
8252
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8253
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8254

    
8255
  def Exec(self, feedback_fn):
8256
    """Returns the tag list.
8257

8258
    """
8259
    cfg = self.cfg
8260
    tgts = [("/cluster", cfg.GetClusterInfo())]
8261
    ilist = cfg.GetAllInstancesInfo().values()
8262
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8263
    nlist = cfg.GetAllNodesInfo().values()
8264
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8265
    results = []
8266
    for path, target in tgts:
8267
      for tag in target.GetTags():
8268
        if self.re.search(tag):
8269
          results.append((path, tag))
8270
    return results
8271

    
8272

    
8273
class LUAddTags(TagsLU):
8274
  """Sets a tag on a given object.
8275

8276
  """
8277
  _OP_REQP = ["kind", "name", "tags"]
8278
  REQ_BGL = False
8279

    
8280
  def CheckPrereq(self):
8281
    """Check prerequisites.
8282

8283
    This checks the type and length of the tag name and value.
8284

8285
    """
8286
    TagsLU.CheckPrereq(self)
8287
    for tag in self.op.tags:
8288
      objects.TaggableObject.ValidateTag(tag)
8289

    
8290
  def Exec(self, feedback_fn):
8291
    """Sets the tag.
8292

8293
    """
8294
    try:
8295
      for tag in self.op.tags:
8296
        self.target.AddTag(tag)
8297
    except errors.TagError, err:
8298
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8299
    self.cfg.Update(self.target, feedback_fn)
8300

    
8301

    
8302
class LUDelTags(TagsLU):
8303
  """Delete a list of tags from a given object.
8304

8305
  """
8306
  _OP_REQP = ["kind", "name", "tags"]
8307
  REQ_BGL = False
8308

    
8309
  def CheckPrereq(self):
8310
    """Check prerequisites.
8311

8312
    This checks that we have the given tag.
8313

8314
    """
8315
    TagsLU.CheckPrereq(self)
8316
    for tag in self.op.tags:
8317
      objects.TaggableObject.ValidateTag(tag)
8318
    del_tags = frozenset(self.op.tags)
8319
    cur_tags = self.target.GetTags()
8320
    if not del_tags <= cur_tags:
8321
      diff_tags = del_tags - cur_tags
8322
      diff_names = ["'%s'" % tag for tag in diff_tags]
8323
      diff_names.sort()
8324
      raise errors.OpPrereqError("Tag(s) %s not found" %
8325
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8326

    
8327
  def Exec(self, feedback_fn):
8328
    """Remove the tag from the object.
8329

8330
    """
8331
    for tag in self.op.tags:
8332
      self.target.RemoveTag(tag)
8333
    self.cfg.Update(self.target, feedback_fn)
8334

    
8335

    
8336
class LUTestDelay(NoHooksLU):
8337
  """Sleep for a specified amount of time.
8338

8339
  This LU sleeps on the master and/or nodes for a specified amount of
8340
  time.
8341

8342
  """
8343
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8344
  REQ_BGL = False
8345

    
8346
  def ExpandNames(self):
8347
    """Expand names and set required locks.
8348

8349
    This expands the node list, if any.
8350

8351
    """
8352
    self.needed_locks = {}
8353
    if self.op.on_nodes:
8354
      # _GetWantedNodes can be used here, but is not always appropriate to use
8355
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8356
      # more information.
8357
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8358
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8359

    
8360
  def CheckPrereq(self):
8361
    """Check prerequisites.
8362

8363
    """
8364

    
8365
  def Exec(self, feedback_fn):
8366
    """Do the actual sleep.
8367

8368
    """
8369
    if self.op.on_master:
8370
      if not utils.TestDelay(self.op.duration):
8371
        raise errors.OpExecError("Error during master delay test")
8372
    if self.op.on_nodes:
8373
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8374
      for node, node_result in result.items():
8375
        node_result.Raise("Failure during rpc call to node %s" % node)
8376

    
8377

    
8378
class IAllocator(object):
8379
  """IAllocator framework.
8380

8381
  An IAllocator instance has three sets of attributes:
8382
    - cfg that is needed to query the cluster
8383
    - input data (all members of the _KEYS class attribute are required)
8384
    - four buffer attributes (in|out_data|text), that represent the
8385
      input (to the external script) in text and data structure format,
8386
      and the output from it, again in two formats
8387
    - the result variables from the script (success, info, nodes) for
8388
      easy usage
8389

8390
  """
8391
  _ALLO_KEYS = [
8392
    "mem_size", "disks", "disk_template",
8393
    "os", "tags", "nics", "vcpus", "hypervisor",
8394
    ]
8395
  _RELO_KEYS = [
8396
    "relocate_from",
8397
    ]
8398

    
8399
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8400
    self.cfg = cfg
8401
    self.rpc = rpc
8402
    # init buffer variables
8403
    self.in_text = self.out_text = self.in_data = self.out_data = None
8404
    # init all input fields so that pylint is happy
8405
    self.mode = mode
8406
    self.name = name
8407
    self.mem_size = self.disks = self.disk_template = None
8408
    self.os = self.tags = self.nics = self.vcpus = None
8409
    self.hypervisor = None
8410
    self.relocate_from = None
8411
    # computed fields
8412
    self.required_nodes = None
8413
    # init result fields
8414
    self.success = self.info = self.nodes = None
8415
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8416
      keyset = self._ALLO_KEYS
8417
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8418
      keyset = self._RELO_KEYS
8419
    else:
8420
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8421
                                   " IAllocator" % self.mode)
8422
    for key in kwargs:
8423
      if key not in keyset:
8424
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8425
                                     " IAllocator" % key)
8426
      setattr(self, key, kwargs[key])
8427
    for key in keyset:
8428
      if key not in kwargs:
8429
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8430
                                     " IAllocator" % key)
8431
    self._BuildInputData()
8432

    
8433
  def _ComputeClusterData(self):
8434
    """Compute the generic allocator input data.
8435

8436
    This is the data that is independent of the actual operation.
8437

8438
    """
8439
    cfg = self.cfg
8440
    cluster_info = cfg.GetClusterInfo()
8441
    # cluster data
8442
    data = {
8443
      "version": constants.IALLOCATOR_VERSION,
8444
      "cluster_name": cfg.GetClusterName(),
8445
      "cluster_tags": list(cluster_info.GetTags()),
8446
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8447
      # we don't have job IDs
8448
      }
8449
    iinfo = cfg.GetAllInstancesInfo().values()
8450
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8451

    
8452
    # node data
8453
    node_results = {}
8454
    node_list = cfg.GetNodeList()
8455

    
8456
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8457
      hypervisor_name = self.hypervisor
8458
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8459
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8460

    
8461
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8462
                                        hypervisor_name)
8463
    node_iinfo = \
8464
      self.rpc.call_all_instances_info(node_list,
8465
                                       cluster_info.enabled_hypervisors)
8466
    for nname, nresult in node_data.items():
8467
      # first fill in static (config-based) values
8468
      ninfo = cfg.GetNodeInfo(nname)
8469
      pnr = {
8470
        "tags": list(ninfo.GetTags()),
8471
        "primary_ip": ninfo.primary_ip,
8472
        "secondary_ip": ninfo.secondary_ip,
8473
        "offline": ninfo.offline,
8474
        "drained": ninfo.drained,
8475
        "master_candidate": ninfo.master_candidate,
8476
        }
8477

    
8478
      if not (ninfo.offline or ninfo.drained):
8479
        nresult.Raise("Can't get data for node %s" % nname)
8480
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8481
                                nname)
8482
        remote_info = nresult.payload
8483

    
8484
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8485
                     'vg_size', 'vg_free', 'cpu_total']:
8486
          if attr not in remote_info:
8487
            raise errors.OpExecError("Node '%s' didn't return attribute"
8488
                                     " '%s'" % (nname, attr))
8489
          if not isinstance(remote_info[attr], int):
8490
            raise errors.OpExecError("Node '%s' returned invalid value"
8491
                                     " for '%s': %s" %
8492
                                     (nname, attr, remote_info[attr]))
8493
        # compute memory used by primary instances
8494
        i_p_mem = i_p_up_mem = 0
8495
        for iinfo, beinfo in i_list:
8496
          if iinfo.primary_node == nname:
8497
            i_p_mem += beinfo[constants.BE_MEMORY]
8498
            if iinfo.name not in node_iinfo[nname].payload:
8499
              i_used_mem = 0
8500
            else:
8501
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8502
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8503
            remote_info['memory_free'] -= max(0, i_mem_diff)
8504

    
8505
            if iinfo.admin_up:
8506
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8507

    
8508
        # compute memory used by instances
8509
        pnr_dyn = {
8510
          "total_memory": remote_info['memory_total'],
8511
          "reserved_memory": remote_info['memory_dom0'],
8512
          "free_memory": remote_info['memory_free'],
8513
          "total_disk": remote_info['vg_size'],
8514
          "free_disk": remote_info['vg_free'],
8515
          "total_cpus": remote_info['cpu_total'],
8516
          "i_pri_memory": i_p_mem,
8517
          "i_pri_up_memory": i_p_up_mem,
8518
          }
8519
        pnr.update(pnr_dyn)
8520

    
8521
      node_results[nname] = pnr
8522
    data["nodes"] = node_results
8523

    
8524
    # instance data
8525
    instance_data = {}
8526
    for iinfo, beinfo in i_list:
8527
      nic_data = []
8528
      for nic in iinfo.nics:
8529
        filled_params = objects.FillDict(
8530
            cluster_info.nicparams[constants.PP_DEFAULT],
8531
            nic.nicparams)
8532
        nic_dict = {"mac": nic.mac,
8533
                    "ip": nic.ip,
8534
                    "mode": filled_params[constants.NIC_MODE],
8535
                    "link": filled_params[constants.NIC_LINK],
8536
                   }
8537
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8538
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8539
        nic_data.append(nic_dict)
8540
      pir = {
8541
        "tags": list(iinfo.GetTags()),
8542
        "admin_up": iinfo.admin_up,
8543
        "vcpus": beinfo[constants.BE_VCPUS],
8544
        "memory": beinfo[constants.BE_MEMORY],
8545
        "os": iinfo.os,
8546
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8547
        "nics": nic_data,
8548
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8549
        "disk_template": iinfo.disk_template,
8550
        "hypervisor": iinfo.hypervisor,
8551
        }
8552
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8553
                                                 pir["disks"])
8554
      instance_data[iinfo.name] = pir
8555

    
8556
    data["instances"] = instance_data
8557

    
8558
    self.in_data = data
8559

    
8560
  def _AddNewInstance(self):
8561
    """Add new instance data to allocator structure.
8562

8563
    This in combination with _AllocatorGetClusterData will create the
8564
    correct structure needed as input for the allocator.
8565

8566
    The checks for the completeness of the opcode must have already been
8567
    done.
8568

8569
    """
8570
    data = self.in_data
8571

    
8572
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8573

    
8574
    if self.disk_template in constants.DTS_NET_MIRROR:
8575
      self.required_nodes = 2
8576
    else:
8577
      self.required_nodes = 1
8578
    request = {
8579
      "type": "allocate",
8580
      "name": self.name,
8581
      "disk_template": self.disk_template,
8582
      "tags": self.tags,
8583
      "os": self.os,
8584
      "vcpus": self.vcpus,
8585
      "memory": self.mem_size,
8586
      "disks": self.disks,
8587
      "disk_space_total": disk_space,
8588
      "nics": self.nics,
8589
      "required_nodes": self.required_nodes,
8590
      }
8591
    data["request"] = request
8592

    
8593
  def _AddRelocateInstance(self):
8594
    """Add relocate instance data to allocator structure.
8595

8596
    This in combination with _IAllocatorGetClusterData will create the
8597
    correct structure needed as input for the allocator.
8598

8599
    The checks for the completeness of the opcode must have already been
8600
    done.
8601

8602
    """
8603
    instance = self.cfg.GetInstanceInfo(self.name)
8604
    if instance is None:
8605
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8606
                                   " IAllocator" % self.name)
8607

    
8608
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8609
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8610
                                 errors.ECODE_INVAL)
8611

    
8612
    if len(instance.secondary_nodes) != 1:
8613
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8614
                                 errors.ECODE_STATE)
8615

    
8616
    self.required_nodes = 1
8617
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8618
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8619

    
8620
    request = {
8621
      "type": "relocate",
8622
      "name": self.name,
8623
      "disk_space_total": disk_space,
8624
      "required_nodes": self.required_nodes,
8625
      "relocate_from": self.relocate_from,
8626
      }
8627
    self.in_data["request"] = request
8628

    
8629
  def _BuildInputData(self):
8630
    """Build input data structures.
8631

8632
    """
8633
    self._ComputeClusterData()
8634

    
8635
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8636
      self._AddNewInstance()
8637
    else:
8638
      self._AddRelocateInstance()
8639

    
8640
    self.in_text = serializer.Dump(self.in_data)
8641

    
8642
  def Run(self, name, validate=True, call_fn=None):
8643
    """Run an instance allocator and return the results.
8644

8645
    """
8646
    if call_fn is None:
8647
      call_fn = self.rpc.call_iallocator_runner
8648

    
8649
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8650
    result.Raise("Failure while running the iallocator script")
8651

    
8652
    self.out_text = result.payload
8653
    if validate:
8654
      self._ValidateResult()
8655

    
8656
  def _ValidateResult(self):
8657
    """Process the allocator results.
8658

8659
    This will process and if successful save the result in
8660
    self.out_data and the other parameters.
8661

8662
    """
8663
    try:
8664
      rdict = serializer.Load(self.out_text)
8665
    except Exception, err:
8666
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8667

    
8668
    if not isinstance(rdict, dict):
8669
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8670

    
8671
    for key in "success", "info", "nodes":
8672
      if key not in rdict:
8673
        raise errors.OpExecError("Can't parse iallocator results:"
8674
                                 " missing key '%s'" % key)
8675
      setattr(self, key, rdict[key])
8676

    
8677
    if not isinstance(rdict["nodes"], list):
8678
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8679
                               " is not a list")
8680
    self.out_data = rdict
8681

    
8682

    
8683
class LUTestAllocator(NoHooksLU):
8684
  """Run allocator tests.
8685

8686
  This LU runs the allocator tests
8687

8688
  """
8689
  _OP_REQP = ["direction", "mode", "name"]
8690

    
8691
  def CheckPrereq(self):
8692
    """Check prerequisites.
8693

8694
    This checks the opcode parameters depending on the director and mode test.
8695

8696
    """
8697
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8698
      for attr in ["name", "mem_size", "disks", "disk_template",
8699
                   "os", "tags", "nics", "vcpus"]:
8700
        if not hasattr(self.op, attr):
8701
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8702
                                     attr, errors.ECODE_INVAL)
8703
      iname = self.cfg.ExpandInstanceName(self.op.name)
8704
      if iname is not None:
8705
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8706
                                   iname, errors.ECODE_EXISTS)
8707
      if not isinstance(self.op.nics, list):
8708
        raise errors.OpPrereqError("Invalid parameter 'nics'",
8709
                                   errors.ECODE_INVAL)
8710
      for row in self.op.nics:
8711
        if (not isinstance(row, dict) or
8712
            "mac" not in row or
8713
            "ip" not in row or
8714
            "bridge" not in row):
8715
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
8716
                                     " parameter", errors.ECODE_INVAL)
8717
      if not isinstance(self.op.disks, list):
8718
        raise errors.OpPrereqError("Invalid parameter 'disks'",
8719
                                   errors.ECODE_INVAL)
8720
      for row in self.op.disks:
8721
        if (not isinstance(row, dict) or
8722
            "size" not in row or
8723
            not isinstance(row["size"], int) or
8724
            "mode" not in row or
8725
            row["mode"] not in ['r', 'w']):
8726
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
8727
                                     " parameter", errors.ECODE_INVAL)
8728
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8729
        self.op.hypervisor = self.cfg.GetHypervisorType()
8730
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8731
      if not hasattr(self.op, "name"):
8732
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8733
                                   errors.ECODE_INVAL)
8734
      fname = self.cfg.ExpandInstanceName(self.op.name)
8735
      if fname is None:
8736
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8737
                                   self.op.name, errors.ECODE_NOENT)
8738
      self.op.name = fname
8739
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8740
    else:
8741
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8742
                                 self.op.mode, errors.ECODE_INVAL)
8743

    
8744
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8745
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8746
        raise errors.OpPrereqError("Missing allocator name",
8747
                                   errors.ECODE_INVAL)
8748
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8749
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8750
                                 self.op.direction, errors.ECODE_INVAL)
8751

    
8752
  def Exec(self, feedback_fn):
8753
    """Run the allocator test.
8754

8755
    """
8756
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8757
      ial = IAllocator(self.cfg, self.rpc,
8758
                       mode=self.op.mode,
8759
                       name=self.op.name,
8760
                       mem_size=self.op.mem_size,
8761
                       disks=self.op.disks,
8762
                       disk_template=self.op.disk_template,
8763
                       os=self.op.os,
8764
                       tags=self.op.tags,
8765
                       nics=self.op.nics,
8766
                       vcpus=self.op.vcpus,
8767
                       hypervisor=self.op.hypervisor,
8768
                       )
8769
    else:
8770
      ial = IAllocator(self.cfg, self.rpc,
8771
                       mode=self.op.mode,
8772
                       name=self.op.name,
8773
                       relocate_from=list(self.relocate_from),
8774
                       )
8775

    
8776
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8777
      result = ial.in_text
8778
    else:
8779
      ial.Run(self.op.allocator, validate=False)
8780
      result = ial.out_text
8781
    return result