Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 5f23e043

History | View | Annotate | Download (309.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name, errors.ECODE_INVAL)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                 self.op.instance_name, errors.ECODE_NOENT)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
421
                               errors.ECODE_INVAL)
422

    
423
  if not nodes:
424
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
425
      " non-empty list of nodes whose name is to be expanded.")
426

    
427
  wanted = []
428
  for name in nodes:
429
    node = lu.cfg.ExpandNodeName(name)
430
    if node is None:
431
      raise errors.OpPrereqError("No such node name '%s'" % name,
432
                                 errors.ECODE_NOENT)
433
    wanted.append(node)
434

    
435
  return utils.NiceSort(wanted)
436

    
437

    
438
def _GetWantedInstances(lu, instances):
439
  """Returns list of checked and expanded instance names.
440

441
  @type lu: L{LogicalUnit}
442
  @param lu: the logical unit on whose behalf we execute
443
  @type instances: list
444
  @param instances: list of instance names or None for all instances
445
  @rtype: list
446
  @return: the list of instances, sorted
447
  @raise errors.OpPrereqError: if the instances parameter is wrong type
448
  @raise errors.OpPrereqError: if any of the passed instances is not found
449

450
  """
451
  if not isinstance(instances, list):
452
    raise errors.OpPrereqError("Invalid argument type 'instances'",
453
                               errors.ECODE_INVAL)
454

    
455
  if instances:
456
    wanted = []
457

    
458
    for name in instances:
459
      instance = lu.cfg.ExpandInstanceName(name)
460
      if instance is None:
461
        raise errors.OpPrereqError("No such instance name '%s'" % name,
462
                                   errors.ECODE_NOENT)
463
      wanted.append(instance)
464

    
465
  else:
466
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
467
  return wanted
468

    
469

    
470
def _CheckOutputFields(static, dynamic, selected):
471
  """Checks whether all selected fields are valid.
472

473
  @type static: L{utils.FieldSet}
474
  @param static: static fields set
475
  @type dynamic: L{utils.FieldSet}
476
  @param dynamic: dynamic fields set
477

478
  """
479
  f = utils.FieldSet()
480
  f.Extend(static)
481
  f.Extend(dynamic)
482

    
483
  delta = f.NonMatching(selected)
484
  if delta:
485
    raise errors.OpPrereqError("Unknown output fields selected: %s"
486
                               % ",".join(delta), errors.ECODE_INVAL)
487

    
488

    
489
def _CheckBooleanOpField(op, name):
490
  """Validates boolean opcode parameters.
491

492
  This will ensure that an opcode parameter is either a boolean value,
493
  or None (but that it always exists).
494

495
  """
496
  val = getattr(op, name, None)
497
  if not (val is None or isinstance(val, bool)):
498
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
499
                               (name, str(val)), errors.ECODE_INVAL)
500
  setattr(op, name, val)
501

    
502

    
503
def _CheckGlobalHvParams(params):
504
  """Validates that given hypervisor params are not global ones.
505

506
  This will ensure that instances don't get customised versions of
507
  global params.
508

509
  """
510
  used_globals = constants.HVC_GLOBALS.intersection(params)
511
  if used_globals:
512
    msg = ("The following hypervisor parameters are global and cannot"
513
           " be customized at instance level, please modify them at"
514
           " cluster level: %s" % utils.CommaJoin(used_globals))
515
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
516

    
517

    
518
def _CheckNodeOnline(lu, node):
519
  """Ensure that a given node is online.
520

521
  @param lu: the LU on behalf of which we make the check
522
  @param node: the node to check
523
  @raise errors.OpPrereqError: if the node is offline
524

525
  """
526
  if lu.cfg.GetNodeInfo(node).offline:
527
    raise errors.OpPrereqError("Can't use offline node %s" % node,
528
                               errors.ECODE_INVAL)
529

    
530

    
531
def _CheckNodeNotDrained(lu, node):
532
  """Ensure that a given node is not drained.
533

534
  @param lu: the LU on behalf of which we make the check
535
  @param node: the node to check
536
  @raise errors.OpPrereqError: if the node is drained
537

538
  """
539
  if lu.cfg.GetNodeInfo(node).drained:
540
    raise errors.OpPrereqError("Can't use drained node %s" % node,
541
                               errors.ECODE_INVAL)
542

    
543

    
544
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
545
                          memory, vcpus, nics, disk_template, disks,
546
                          bep, hvp, hypervisor_name):
547
  """Builds instance related env variables for hooks
548

549
  This builds the hook environment from individual variables.
550

551
  @type name: string
552
  @param name: the name of the instance
553
  @type primary_node: string
554
  @param primary_node: the name of the instance's primary node
555
  @type secondary_nodes: list
556
  @param secondary_nodes: list of secondary nodes as strings
557
  @type os_type: string
558
  @param os_type: the name of the instance's OS
559
  @type status: boolean
560
  @param status: the should_run status of the instance
561
  @type memory: string
562
  @param memory: the memory size of the instance
563
  @type vcpus: string
564
  @param vcpus: the count of VCPUs the instance has
565
  @type nics: list
566
  @param nics: list of tuples (ip, mac, mode, link) representing
567
      the NICs the instance has
568
  @type disk_template: string
569
  @param disk_template: the disk template of the instance
570
  @type disks: list
571
  @param disks: the list of (size, mode) pairs
572
  @type bep: dict
573
  @param bep: the backend parameters for the instance
574
  @type hvp: dict
575
  @param hvp: the hypervisor parameters for the instance
576
  @type hypervisor_name: string
577
  @param hypervisor_name: the hypervisor for the instance
578
  @rtype: dict
579
  @return: the hook environment for this instance
580

581
  """
582
  if status:
583
    str_status = "up"
584
  else:
585
    str_status = "down"
586
  env = {
587
    "OP_TARGET": name,
588
    "INSTANCE_NAME": name,
589
    "INSTANCE_PRIMARY": primary_node,
590
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
591
    "INSTANCE_OS_TYPE": os_type,
592
    "INSTANCE_STATUS": str_status,
593
    "INSTANCE_MEMORY": memory,
594
    "INSTANCE_VCPUS": vcpus,
595
    "INSTANCE_DISK_TEMPLATE": disk_template,
596
    "INSTANCE_HYPERVISOR": hypervisor_name,
597
  }
598

    
599
  if nics:
600
    nic_count = len(nics)
601
    for idx, (ip, mac, mode, link) in enumerate(nics):
602
      if ip is None:
603
        ip = ""
604
      env["INSTANCE_NIC%d_IP" % idx] = ip
605
      env["INSTANCE_NIC%d_MAC" % idx] = mac
606
      env["INSTANCE_NIC%d_MODE" % idx] = mode
607
      env["INSTANCE_NIC%d_LINK" % idx] = link
608
      if mode == constants.NIC_MODE_BRIDGED:
609
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
610
  else:
611
    nic_count = 0
612

    
613
  env["INSTANCE_NIC_COUNT"] = nic_count
614

    
615
  if disks:
616
    disk_count = len(disks)
617
    for idx, (size, mode) in enumerate(disks):
618
      env["INSTANCE_DISK%d_SIZE" % idx] = size
619
      env["INSTANCE_DISK%d_MODE" % idx] = mode
620
  else:
621
    disk_count = 0
622

    
623
  env["INSTANCE_DISK_COUNT"] = disk_count
624

    
625
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
626
    for key, value in source.items():
627
      env["INSTANCE_%s_%s" % (kind, key)] = value
628

    
629
  return env
630

    
631

    
632
def _NICListToTuple(lu, nics):
633
  """Build a list of nic information tuples.
634

635
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
636
  value in LUQueryInstanceData.
637

638
  @type lu:  L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type nics: list of L{objects.NIC}
641
  @param nics: list of nics to convert to hooks tuples
642

643
  """
644
  hooks_nics = []
645
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
646
  for nic in nics:
647
    ip = nic.ip
648
    mac = nic.mac
649
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
650
    mode = filled_params[constants.NIC_MODE]
651
    link = filled_params[constants.NIC_LINK]
652
    hooks_nics.append((ip, mac, mode, link))
653
  return hooks_nics
654

    
655

    
656
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
657
  """Builds instance related env variables for hooks from an object.
658

659
  @type lu: L{LogicalUnit}
660
  @param lu: the logical unit on whose behalf we execute
661
  @type instance: L{objects.Instance}
662
  @param instance: the instance for which we should build the
663
      environment
664
  @type override: dict
665
  @param override: dictionary with key/values that will override
666
      our values
667
  @rtype: dict
668
  @return: the hook environment dictionary
669

670
  """
671
  cluster = lu.cfg.GetClusterInfo()
672
  bep = cluster.FillBE(instance)
673
  hvp = cluster.FillHV(instance)
674
  args = {
675
    'name': instance.name,
676
    'primary_node': instance.primary_node,
677
    'secondary_nodes': instance.secondary_nodes,
678
    'os_type': instance.os,
679
    'status': instance.admin_up,
680
    'memory': bep[constants.BE_MEMORY],
681
    'vcpus': bep[constants.BE_VCPUS],
682
    'nics': _NICListToTuple(lu, instance.nics),
683
    'disk_template': instance.disk_template,
684
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
685
    'bep': bep,
686
    'hvp': hvp,
687
    'hypervisor_name': instance.hypervisor,
688
  }
689
  if override:
690
    args.update(override)
691
  return _BuildInstanceHookEnv(**args)
692

    
693

    
694
def _AdjustCandidatePool(lu, exceptions):
695
  """Adjust the candidate pool after node operations.
696

697
  """
698
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
699
  if mod_list:
700
    lu.LogInfo("Promoted nodes to master candidate role: %s",
701
               utils.CommaJoin(node.name for node in mod_list))
702
    for name in mod_list:
703
      lu.context.ReaddNode(name)
704
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
705
  if mc_now > mc_max:
706
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
707
               (mc_now, mc_max))
708

    
709

    
710
def _DecideSelfPromotion(lu, exceptions=None):
711
  """Decide whether I should promote myself as a master candidate.
712

713
  """
714
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
715
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
716
  # the new node will increase mc_max with one, so:
717
  mc_should = min(mc_should + 1, cp_size)
718
  return mc_now < mc_should
719

    
720

    
721
def _CheckNicsBridgesExist(lu, target_nics, target_node,
722
                               profile=constants.PP_DEFAULT):
723
  """Check that the brigdes needed by a list of nics exist.
724

725
  """
726
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
727
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
728
                for nic in target_nics]
729
  brlist = [params[constants.NIC_LINK] for params in paramslist
730
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
731
  if brlist:
732
    result = lu.rpc.call_bridges_exist(target_node, brlist)
733
    result.Raise("Error checking bridges on destination node '%s'" %
734
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
735

    
736

    
737
def _CheckInstanceBridgesExist(lu, instance, node=None):
738
  """Check that the brigdes needed by an instance exist.
739

740
  """
741
  if node is None:
742
    node = instance.primary_node
743
  _CheckNicsBridgesExist(lu, instance.nics, node)
744

    
745

    
746
def _CheckOSVariant(os_obj, name):
747
  """Check whether an OS name conforms to the os variants specification.
748

749
  @type os_obj: L{objects.OS}
750
  @param os_obj: OS object to check
751
  @type name: string
752
  @param name: OS name passed by the user, to check for validity
753

754
  """
755
  if not os_obj.supported_variants:
756
    return
757
  try:
758
    variant = name.split("+", 1)[1]
759
  except IndexError:
760
    raise errors.OpPrereqError("OS name must include a variant",
761
                               errors.ECODE_INVAL)
762

    
763
  if variant not in os_obj.supported_variants:
764
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
765

    
766

    
767
def _GetNodeInstancesInner(cfg, fn):
768
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
769

    
770

    
771
def _GetNodeInstances(cfg, node_name):
772
  """Returns a list of all primary and secondary instances on a node.
773

774
  """
775

    
776
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
777

    
778

    
779
def _GetNodePrimaryInstances(cfg, node_name):
780
  """Returns primary instances on a node.
781

782
  """
783
  return _GetNodeInstancesInner(cfg,
784
                                lambda inst: node_name == inst.primary_node)
785

    
786

    
787
def _GetNodeSecondaryInstances(cfg, node_name):
788
  """Returns secondary instances on a node.
789

790
  """
791
  return _GetNodeInstancesInner(cfg,
792
                                lambda inst: node_name in inst.secondary_nodes)
793

    
794

    
795
def _GetStorageTypeArgs(cfg, storage_type):
796
  """Returns the arguments for a storage type.
797

798
  """
799
  # Special case for file storage
800
  if storage_type == constants.ST_FILE:
801
    # storage.FileStorage wants a list of storage directories
802
    return [[cfg.GetFileStorageDir()]]
803

    
804
  return []
805

    
806

    
807
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
808
  faulty = []
809

    
810
  for dev in instance.disks:
811
    cfg.SetDiskID(dev, node_name)
812

    
813
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
814
  result.Raise("Failed to get disk status from node %s" % node_name,
815
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
816

    
817
  for idx, bdev_status in enumerate(result.payload):
818
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
819
      faulty.append(idx)
820

    
821
  return faulty
822

    
823

    
824
class LUPostInitCluster(LogicalUnit):
825
  """Logical unit for running hooks after cluster initialization.
826

827
  """
828
  HPATH = "cluster-init"
829
  HTYPE = constants.HTYPE_CLUSTER
830
  _OP_REQP = []
831

    
832
  def BuildHooksEnv(self):
833
    """Build hooks env.
834

835
    """
836
    env = {"OP_TARGET": self.cfg.GetClusterName()}
837
    mn = self.cfg.GetMasterNode()
838
    return env, [], [mn]
839

    
840
  def CheckPrereq(self):
841
    """No prerequisites to check.
842

843
    """
844
    return True
845

    
846
  def Exec(self, feedback_fn):
847
    """Nothing to do.
848

849
    """
850
    return True
851

    
852

    
853
class LUDestroyCluster(LogicalUnit):
854
  """Logical unit for destroying the cluster.
855

856
  """
857
  HPATH = "cluster-destroy"
858
  HTYPE = constants.HTYPE_CLUSTER
859
  _OP_REQP = []
860

    
861
  def BuildHooksEnv(self):
862
    """Build hooks env.
863

864
    """
865
    env = {"OP_TARGET": self.cfg.GetClusterName()}
866
    return env, [], []
867

    
868
  def CheckPrereq(self):
869
    """Check prerequisites.
870

871
    This checks whether the cluster is empty.
872

873
    Any errors are signaled by raising errors.OpPrereqError.
874

875
    """
876
    master = self.cfg.GetMasterNode()
877

    
878
    nodelist = self.cfg.GetNodeList()
879
    if len(nodelist) != 1 or nodelist[0] != master:
880
      raise errors.OpPrereqError("There are still %d node(s) in"
881
                                 " this cluster." % (len(nodelist) - 1),
882
                                 errors.ECODE_INVAL)
883
    instancelist = self.cfg.GetInstanceList()
884
    if instancelist:
885
      raise errors.OpPrereqError("There are still %d instance(s) in"
886
                                 " this cluster." % len(instancelist),
887
                                 errors.ECODE_INVAL)
888

    
889
  def Exec(self, feedback_fn):
890
    """Destroys the cluster.
891

892
    """
893
    master = self.cfg.GetMasterNode()
894
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
895

    
896
    # Run post hooks on master node before it's removed
897
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
898
    try:
899
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
900
    except:
901
      self.LogWarning("Errors occurred running hooks on %s" % master)
902

    
903
    result = self.rpc.call_node_stop_master(master, False)
904
    result.Raise("Could not disable the master role")
905

    
906
    if modify_ssh_setup:
907
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
908
      utils.CreateBackup(priv_key)
909
      utils.CreateBackup(pub_key)
910

    
911
    return master
912

    
913

    
914
class LUVerifyCluster(LogicalUnit):
915
  """Verifies the cluster status.
916

917
  """
918
  HPATH = "cluster-verify"
919
  HTYPE = constants.HTYPE_CLUSTER
920
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
921
  REQ_BGL = False
922

    
923
  TCLUSTER = "cluster"
924
  TNODE = "node"
925
  TINSTANCE = "instance"
926

    
927
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
928
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
929
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
930
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
931
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
932
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
933
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
934
  ENODEDRBD = (TNODE, "ENODEDRBD")
935
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
936
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
937
  ENODEHV = (TNODE, "ENODEHV")
938
  ENODELVM = (TNODE, "ENODELVM")
939
  ENODEN1 = (TNODE, "ENODEN1")
940
  ENODENET = (TNODE, "ENODENET")
941
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
942
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
943
  ENODERPC = (TNODE, "ENODERPC")
944
  ENODESSH = (TNODE, "ENODESSH")
945
  ENODEVERSION = (TNODE, "ENODEVERSION")
946
  ENODESETUP = (TNODE, "ENODESETUP")
947
  ENODETIME = (TNODE, "ENODETIME")
948

    
949
  ETYPE_FIELD = "code"
950
  ETYPE_ERROR = "ERROR"
951
  ETYPE_WARNING = "WARNING"
952

    
953
  def ExpandNames(self):
954
    self.needed_locks = {
955
      locking.LEVEL_NODE: locking.ALL_SET,
956
      locking.LEVEL_INSTANCE: locking.ALL_SET,
957
    }
958
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
959

    
960
  def _Error(self, ecode, item, msg, *args, **kwargs):
961
    """Format an error message.
962

963
    Based on the opcode's error_codes parameter, either format a
964
    parseable error code, or a simpler error string.
965

966
    This must be called only from Exec and functions called from Exec.
967

968
    """
969
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
970
    itype, etxt = ecode
971
    # first complete the msg
972
    if args:
973
      msg = msg % args
974
    # then format the whole message
975
    if self.op.error_codes:
976
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
977
    else:
978
      if item:
979
        item = " " + item
980
      else:
981
        item = ""
982
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
983
    # and finally report it via the feedback_fn
984
    self._feedback_fn("  - %s" % msg)
985

    
986
  def _ErrorIf(self, cond, *args, **kwargs):
987
    """Log an error message if the passed condition is True.
988

989
    """
990
    cond = bool(cond) or self.op.debug_simulate_errors
991
    if cond:
992
      self._Error(*args, **kwargs)
993
    # do not mark the operation as failed for WARN cases only
994
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
995
      self.bad = self.bad or cond
996

    
997
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
998
                  node_result, master_files, drbd_map, vg_name):
999
    """Run multiple tests against a node.
1000

1001
    Test list:
1002

1003
      - compares ganeti version
1004
      - checks vg existence and size > 20G
1005
      - checks config file checksum
1006
      - checks ssh to other nodes
1007

1008
    @type nodeinfo: L{objects.Node}
1009
    @param nodeinfo: the node to check
1010
    @param file_list: required list of files
1011
    @param local_cksum: dictionary of local files and their checksums
1012
    @param node_result: the results from the node
1013
    @param master_files: list of files that only masters should have
1014
    @param drbd_map: the useddrbd minors for this node, in
1015
        form of minor: (instance, must_exist) which correspond to instances
1016
        and their running status
1017
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1018

1019
    """
1020
    node = nodeinfo.name
1021
    _ErrorIf = self._ErrorIf
1022

    
1023
    # main result, node_result should be a non-empty dict
1024
    test = not node_result or not isinstance(node_result, dict)
1025
    _ErrorIf(test, self.ENODERPC, node,
1026
                  "unable to verify node: no data returned")
1027
    if test:
1028
      return
1029

    
1030
    # compares ganeti version
1031
    local_version = constants.PROTOCOL_VERSION
1032
    remote_version = node_result.get('version', None)
1033
    test = not (remote_version and
1034
                isinstance(remote_version, (list, tuple)) and
1035
                len(remote_version) == 2)
1036
    _ErrorIf(test, self.ENODERPC, node,
1037
             "connection to node returned invalid data")
1038
    if test:
1039
      return
1040

    
1041
    test = local_version != remote_version[0]
1042
    _ErrorIf(test, self.ENODEVERSION, node,
1043
             "incompatible protocol versions: master %s,"
1044
             " node %s", local_version, remote_version[0])
1045
    if test:
1046
      return
1047

    
1048
    # node seems compatible, we can actually try to look into its results
1049

    
1050
    # full package version
1051
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1052
                  self.ENODEVERSION, node,
1053
                  "software version mismatch: master %s, node %s",
1054
                  constants.RELEASE_VERSION, remote_version[1],
1055
                  code=self.ETYPE_WARNING)
1056

    
1057
    # checks vg existence and size > 20G
1058
    if vg_name is not None:
1059
      vglist = node_result.get(constants.NV_VGLIST, None)
1060
      test = not vglist
1061
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1062
      if not test:
1063
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1064
                                              constants.MIN_VG_SIZE)
1065
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1066

    
1067
    # checks config file checksum
1068

    
1069
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1070
    test = not isinstance(remote_cksum, dict)
1071
    _ErrorIf(test, self.ENODEFILECHECK, node,
1072
             "node hasn't returned file checksum data")
1073
    if not test:
1074
      for file_name in file_list:
1075
        node_is_mc = nodeinfo.master_candidate
1076
        must_have = (file_name not in master_files) or node_is_mc
1077
        # missing
1078
        test1 = file_name not in remote_cksum
1079
        # invalid checksum
1080
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1081
        # existing and good
1082
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1083
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1084
                 "file '%s' missing", file_name)
1085
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1086
                 "file '%s' has wrong checksum", file_name)
1087
        # not candidate and this is not a must-have file
1088
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1089
                 "file '%s' should not exist on non master"
1090
                 " candidates (and the file is outdated)", file_name)
1091
        # all good, except non-master/non-must have combination
1092
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1093
                 "file '%s' should not exist"
1094
                 " on non master candidates", file_name)
1095

    
1096
    # checks ssh to any
1097

    
1098
    test = constants.NV_NODELIST not in node_result
1099
    _ErrorIf(test, self.ENODESSH, node,
1100
             "node hasn't returned node ssh connectivity data")
1101
    if not test:
1102
      if node_result[constants.NV_NODELIST]:
1103
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1104
          _ErrorIf(True, self.ENODESSH, node,
1105
                   "ssh communication with node '%s': %s", a_node, a_msg)
1106

    
1107
    test = constants.NV_NODENETTEST not in node_result
1108
    _ErrorIf(test, self.ENODENET, node,
1109
             "node hasn't returned node tcp connectivity data")
1110
    if not test:
1111
      if node_result[constants.NV_NODENETTEST]:
1112
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1113
        for anode in nlist:
1114
          _ErrorIf(True, self.ENODENET, node,
1115
                   "tcp communication with node '%s': %s",
1116
                   anode, node_result[constants.NV_NODENETTEST][anode])
1117

    
1118
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1119
    if isinstance(hyp_result, dict):
1120
      for hv_name, hv_result in hyp_result.iteritems():
1121
        test = hv_result is not None
1122
        _ErrorIf(test, self.ENODEHV, node,
1123
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1124

    
1125
    # check used drbd list
1126
    if vg_name is not None:
1127
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1128
      test = not isinstance(used_minors, (tuple, list))
1129
      _ErrorIf(test, self.ENODEDRBD, node,
1130
               "cannot parse drbd status file: %s", str(used_minors))
1131
      if not test:
1132
        for minor, (iname, must_exist) in drbd_map.items():
1133
          test = minor not in used_minors and must_exist
1134
          _ErrorIf(test, self.ENODEDRBD, node,
1135
                   "drbd minor %d of instance %s is not active",
1136
                   minor, iname)
1137
        for minor in used_minors:
1138
          test = minor not in drbd_map
1139
          _ErrorIf(test, self.ENODEDRBD, node,
1140
                   "unallocated drbd minor %d is in use", minor)
1141
    test = node_result.get(constants.NV_NODESETUP,
1142
                           ["Missing NODESETUP results"])
1143
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1144
             "; ".join(test))
1145

    
1146
    # check pv names
1147
    if vg_name is not None:
1148
      pvlist = node_result.get(constants.NV_PVLIST, None)
1149
      test = pvlist is None
1150
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1151
      if not test:
1152
        # check that ':' is not present in PV names, since it's a
1153
        # special character for lvcreate (denotes the range of PEs to
1154
        # use on the PV)
1155
        for size, pvname, owner_vg in pvlist:
1156
          test = ":" in pvname
1157
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1158
                   " '%s' of VG '%s'", pvname, owner_vg)
1159

    
1160
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1161
                      node_instance, n_offline):
1162
    """Verify an instance.
1163

1164
    This function checks to see if the required block devices are
1165
    available on the instance's node.
1166

1167
    """
1168
    _ErrorIf = self._ErrorIf
1169
    node_current = instanceconfig.primary_node
1170

    
1171
    node_vol_should = {}
1172
    instanceconfig.MapLVsByNode(node_vol_should)
1173

    
1174
    for node in node_vol_should:
1175
      if node in n_offline:
1176
        # ignore missing volumes on offline nodes
1177
        continue
1178
      for volume in node_vol_should[node]:
1179
        test = node not in node_vol_is or volume not in node_vol_is[node]
1180
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1181
                 "volume %s missing on node %s", volume, node)
1182

    
1183
    if instanceconfig.admin_up:
1184
      test = ((node_current not in node_instance or
1185
               not instance in node_instance[node_current]) and
1186
              node_current not in n_offline)
1187
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1188
               "instance not running on its primary node %s",
1189
               node_current)
1190

    
1191
    for node in node_instance:
1192
      if (not node == node_current):
1193
        test = instance in node_instance[node]
1194
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1195
                 "instance should not run on node %s", node)
1196

    
1197
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1198
    """Verify if there are any unknown volumes in the cluster.
1199

1200
    The .os, .swap and backup volumes are ignored. All other volumes are
1201
    reported as unknown.
1202

1203
    """
1204
    for node in node_vol_is:
1205
      for volume in node_vol_is[node]:
1206
        test = (node not in node_vol_should or
1207
                volume not in node_vol_should[node])
1208
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1209
                      "volume %s is unknown", volume)
1210

    
1211
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1212
    """Verify the list of running instances.
1213

1214
    This checks what instances are running but unknown to the cluster.
1215

1216
    """
1217
    for node in node_instance:
1218
      for o_inst in node_instance[node]:
1219
        test = o_inst not in instancelist
1220
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1221
                      "instance %s on node %s should not exist", o_inst, node)
1222

    
1223
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1224
    """Verify N+1 Memory Resilience.
1225

1226
    Check that if one single node dies we can still start all the instances it
1227
    was primary for.
1228

1229
    """
1230
    for node, nodeinfo in node_info.iteritems():
1231
      # This code checks that every node which is now listed as secondary has
1232
      # enough memory to host all instances it is supposed to should a single
1233
      # other node in the cluster fail.
1234
      # FIXME: not ready for failover to an arbitrary node
1235
      # FIXME: does not support file-backed instances
1236
      # WARNING: we currently take into account down instances as well as up
1237
      # ones, considering that even if they're down someone might want to start
1238
      # them even in the event of a node failure.
1239
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1240
        needed_mem = 0
1241
        for instance in instances:
1242
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1243
          if bep[constants.BE_AUTO_BALANCE]:
1244
            needed_mem += bep[constants.BE_MEMORY]
1245
        test = nodeinfo['mfree'] < needed_mem
1246
        self._ErrorIf(test, self.ENODEN1, node,
1247
                      "not enough memory on to accommodate"
1248
                      " failovers should peer node %s fail", prinode)
1249

    
1250
  def CheckPrereq(self):
1251
    """Check prerequisites.
1252

1253
    Transform the list of checks we're going to skip into a set and check that
1254
    all its members are valid.
1255

1256
    """
1257
    self.skip_set = frozenset(self.op.skip_checks)
1258
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1259
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1260
                                 errors.ECODE_INVAL)
1261

    
1262
  def BuildHooksEnv(self):
1263
    """Build hooks env.
1264

1265
    Cluster-Verify hooks just ran in the post phase and their failure makes
1266
    the output be logged in the verify output and the verification to fail.
1267

1268
    """
1269
    all_nodes = self.cfg.GetNodeList()
1270
    env = {
1271
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1272
      }
1273
    for node in self.cfg.GetAllNodesInfo().values():
1274
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1275

    
1276
    return env, [], all_nodes
1277

    
1278
  def Exec(self, feedback_fn):
1279
    """Verify integrity of cluster, performing various test on nodes.
1280

1281
    """
1282
    self.bad = False
1283
    _ErrorIf = self._ErrorIf
1284
    verbose = self.op.verbose
1285
    self._feedback_fn = feedback_fn
1286
    feedback_fn("* Verifying global settings")
1287
    for msg in self.cfg.VerifyConfig():
1288
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1289

    
1290
    vg_name = self.cfg.GetVGName()
1291
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1292
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1293
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1294
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1295
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1296
                        for iname in instancelist)
1297
    i_non_redundant = [] # Non redundant instances
1298
    i_non_a_balanced = [] # Non auto-balanced instances
1299
    n_offline = [] # List of offline nodes
1300
    n_drained = [] # List of nodes being drained
1301
    node_volume = {}
1302
    node_instance = {}
1303
    node_info = {}
1304
    instance_cfg = {}
1305

    
1306
    # FIXME: verify OS list
1307
    # do local checksums
1308
    master_files = [constants.CLUSTER_CONF_FILE]
1309

    
1310
    file_names = ssconf.SimpleStore().GetFileList()
1311
    file_names.append(constants.SSL_CERT_FILE)
1312
    file_names.append(constants.RAPI_CERT_FILE)
1313
    file_names.extend(master_files)
1314

    
1315
    local_checksums = utils.FingerprintFiles(file_names)
1316

    
1317
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1318
    node_verify_param = {
1319
      constants.NV_FILELIST: file_names,
1320
      constants.NV_NODELIST: [node.name for node in nodeinfo
1321
                              if not node.offline],
1322
      constants.NV_HYPERVISOR: hypervisors,
1323
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1324
                                  node.secondary_ip) for node in nodeinfo
1325
                                 if not node.offline],
1326
      constants.NV_INSTANCELIST: hypervisors,
1327
      constants.NV_VERSION: None,
1328
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1329
      constants.NV_NODESETUP: None,
1330
      constants.NV_TIME: None,
1331
      }
1332

    
1333
    if vg_name is not None:
1334
      node_verify_param[constants.NV_VGLIST] = None
1335
      node_verify_param[constants.NV_LVLIST] = vg_name
1336
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1337
      node_verify_param[constants.NV_DRBDLIST] = None
1338

    
1339
    # Due to the way our RPC system works, exact response times cannot be
1340
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1341
    # time before and after executing the request, we can at least have a time
1342
    # window.
1343
    nvinfo_starttime = time.time()
1344
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1345
                                           self.cfg.GetClusterName())
1346
    nvinfo_endtime = time.time()
1347

    
1348
    cluster = self.cfg.GetClusterInfo()
1349
    master_node = self.cfg.GetMasterNode()
1350
    all_drbd_map = self.cfg.ComputeDRBDMap()
1351

    
1352
    feedback_fn("* Verifying node status")
1353
    for node_i in nodeinfo:
1354
      node = node_i.name
1355

    
1356
      if node_i.offline:
1357
        if verbose:
1358
          feedback_fn("* Skipping offline node %s" % (node,))
1359
        n_offline.append(node)
1360
        continue
1361

    
1362
      if node == master_node:
1363
        ntype = "master"
1364
      elif node_i.master_candidate:
1365
        ntype = "master candidate"
1366
      elif node_i.drained:
1367
        ntype = "drained"
1368
        n_drained.append(node)
1369
      else:
1370
        ntype = "regular"
1371
      if verbose:
1372
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1373

    
1374
      msg = all_nvinfo[node].fail_msg
1375
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1376
      if msg:
1377
        continue
1378

    
1379
      nresult = all_nvinfo[node].payload
1380
      node_drbd = {}
1381
      for minor, instance in all_drbd_map[node].items():
1382
        test = instance not in instanceinfo
1383
        _ErrorIf(test, self.ECLUSTERCFG, None,
1384
                 "ghost instance '%s' in temporary DRBD map", instance)
1385
          # ghost instance should not be running, but otherwise we
1386
          # don't give double warnings (both ghost instance and
1387
          # unallocated minor in use)
1388
        if test:
1389
          node_drbd[minor] = (instance, False)
1390
        else:
1391
          instance = instanceinfo[instance]
1392
          node_drbd[minor] = (instance.name, instance.admin_up)
1393

    
1394
      self._VerifyNode(node_i, file_names, local_checksums,
1395
                       nresult, master_files, node_drbd, vg_name)
1396

    
1397
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1398
      if vg_name is None:
1399
        node_volume[node] = {}
1400
      elif isinstance(lvdata, basestring):
1401
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1402
                 utils.SafeEncode(lvdata))
1403
        node_volume[node] = {}
1404
      elif not isinstance(lvdata, dict):
1405
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1406
        continue
1407
      else:
1408
        node_volume[node] = lvdata
1409

    
1410
      # node_instance
1411
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1412
      test = not isinstance(idata, list)
1413
      _ErrorIf(test, self.ENODEHV, node,
1414
               "rpc call to node failed (instancelist)")
1415
      if test:
1416
        continue
1417

    
1418
      node_instance[node] = idata
1419

    
1420
      # node_info
1421
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1422
      test = not isinstance(nodeinfo, dict)
1423
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1424
      if test:
1425
        continue
1426

    
1427
      # Node time
1428
      ntime = nresult.get(constants.NV_TIME, None)
1429
      try:
1430
        ntime_merged = utils.MergeTime(ntime)
1431
      except (ValueError, TypeError):
1432
        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1433

    
1434
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1435
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1436
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1437
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1438
      else:
1439
        ntime_diff = None
1440

    
1441
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1442
               "Node time diverges by at least %0.1fs from master node time",
1443
               ntime_diff)
1444

    
1445
      if ntime_diff is not None:
1446
        continue
1447

    
1448
      try:
1449
        node_info[node] = {
1450
          "mfree": int(nodeinfo['memory_free']),
1451
          "pinst": [],
1452
          "sinst": [],
1453
          # dictionary holding all instances this node is secondary for,
1454
          # grouped by their primary node. Each key is a cluster node, and each
1455
          # value is a list of instances which have the key as primary and the
1456
          # current node as secondary.  this is handy to calculate N+1 memory
1457
          # availability if you can only failover from a primary to its
1458
          # secondary.
1459
          "sinst-by-pnode": {},
1460
        }
1461
        # FIXME: devise a free space model for file based instances as well
1462
        if vg_name is not None:
1463
          test = (constants.NV_VGLIST not in nresult or
1464
                  vg_name not in nresult[constants.NV_VGLIST])
1465
          _ErrorIf(test, self.ENODELVM, node,
1466
                   "node didn't return data for the volume group '%s'"
1467
                   " - it is either missing or broken", vg_name)
1468
          if test:
1469
            continue
1470
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1471
      except (ValueError, KeyError):
1472
        _ErrorIf(True, self.ENODERPC, node,
1473
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1474
        continue
1475

    
1476
    node_vol_should = {}
1477

    
1478
    feedback_fn("* Verifying instance status")
1479
    for instance in instancelist:
1480
      if verbose:
1481
        feedback_fn("* Verifying instance %s" % instance)
1482
      inst_config = instanceinfo[instance]
1483
      self._VerifyInstance(instance, inst_config, node_volume,
1484
                           node_instance, n_offline)
1485
      inst_nodes_offline = []
1486

    
1487
      inst_config.MapLVsByNode(node_vol_should)
1488

    
1489
      instance_cfg[instance] = inst_config
1490

    
1491
      pnode = inst_config.primary_node
1492
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1493
               self.ENODERPC, pnode, "instance %s, connection to"
1494
               " primary node failed", instance)
1495
      if pnode in node_info:
1496
        node_info[pnode]['pinst'].append(instance)
1497

    
1498
      if pnode in n_offline:
1499
        inst_nodes_offline.append(pnode)
1500

    
1501
      # If the instance is non-redundant we cannot survive losing its primary
1502
      # node, so we are not N+1 compliant. On the other hand we have no disk
1503
      # templates with more than one secondary so that situation is not well
1504
      # supported either.
1505
      # FIXME: does not support file-backed instances
1506
      if len(inst_config.secondary_nodes) == 0:
1507
        i_non_redundant.append(instance)
1508
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1509
               self.EINSTANCELAYOUT, instance,
1510
               "instance has multiple secondary nodes", code="WARNING")
1511

    
1512
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1513
        i_non_a_balanced.append(instance)
1514

    
1515
      for snode in inst_config.secondary_nodes:
1516
        _ErrorIf(snode not in node_info and snode not in n_offline,
1517
                 self.ENODERPC, snode,
1518
                 "instance %s, connection to secondary node"
1519
                 "failed", instance)
1520

    
1521
        if snode in node_info:
1522
          node_info[snode]['sinst'].append(instance)
1523
          if pnode not in node_info[snode]['sinst-by-pnode']:
1524
            node_info[snode]['sinst-by-pnode'][pnode] = []
1525
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1526

    
1527
        if snode in n_offline:
1528
          inst_nodes_offline.append(snode)
1529

    
1530
      # warn that the instance lives on offline nodes
1531
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1532
               "instance lives on offline node(s) %s",
1533
               utils.CommaJoin(inst_nodes_offline))
1534

    
1535
    feedback_fn("* Verifying orphan volumes")
1536
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1537

    
1538
    feedback_fn("* Verifying remaining instances")
1539
    self._VerifyOrphanInstances(instancelist, node_instance)
1540

    
1541
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1542
      feedback_fn("* Verifying N+1 Memory redundancy")
1543
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1544

    
1545
    feedback_fn("* Other Notes")
1546
    if i_non_redundant:
1547
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1548
                  % len(i_non_redundant))
1549

    
1550
    if i_non_a_balanced:
1551
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1552
                  % len(i_non_a_balanced))
1553

    
1554
    if n_offline:
1555
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1556

    
1557
    if n_drained:
1558
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1559

    
1560
    return not self.bad
1561

    
1562
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1563
    """Analyze the post-hooks' result
1564

1565
    This method analyses the hook result, handles it, and sends some
1566
    nicely-formatted feedback back to the user.
1567

1568
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1569
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1570
    @param hooks_results: the results of the multi-node hooks rpc call
1571
    @param feedback_fn: function used send feedback back to the caller
1572
    @param lu_result: previous Exec result
1573
    @return: the new Exec result, based on the previous result
1574
        and hook results
1575

1576
    """
1577
    # We only really run POST phase hooks, and are only interested in
1578
    # their results
1579
    if phase == constants.HOOKS_PHASE_POST:
1580
      # Used to change hooks' output to proper indentation
1581
      indent_re = re.compile('^', re.M)
1582
      feedback_fn("* Hooks Results")
1583
      assert hooks_results, "invalid result from hooks"
1584

    
1585
      for node_name in hooks_results:
1586
        show_node_header = True
1587
        res = hooks_results[node_name]
1588
        msg = res.fail_msg
1589
        test = msg and not res.offline
1590
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1591
                      "Communication failure in hooks execution: %s", msg)
1592
        if test:
1593
          # override manually lu_result here as _ErrorIf only
1594
          # overrides self.bad
1595
          lu_result = 1
1596
          continue
1597
        for script, hkr, output in res.payload:
1598
          test = hkr == constants.HKR_FAIL
1599
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1600
                        "Script %s failed, output:", script)
1601
          if test:
1602
            output = indent_re.sub('      ', output)
1603
            feedback_fn("%s" % output)
1604
            lu_result = 1
1605

    
1606
      return lu_result
1607

    
1608

    
1609
class LUVerifyDisks(NoHooksLU):
1610
  """Verifies the cluster disks status.
1611

1612
  """
1613
  _OP_REQP = []
1614
  REQ_BGL = False
1615

    
1616
  def ExpandNames(self):
1617
    self.needed_locks = {
1618
      locking.LEVEL_NODE: locking.ALL_SET,
1619
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1620
    }
1621
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1622

    
1623
  def CheckPrereq(self):
1624
    """Check prerequisites.
1625

1626
    This has no prerequisites.
1627

1628
    """
1629
    pass
1630

    
1631
  def Exec(self, feedback_fn):
1632
    """Verify integrity of cluster disks.
1633

1634
    @rtype: tuple of three items
1635
    @return: a tuple of (dict of node-to-node_error, list of instances
1636
        which need activate-disks, dict of instance: (node, volume) for
1637
        missing volumes
1638

1639
    """
1640
    result = res_nodes, res_instances, res_missing = {}, [], {}
1641

    
1642
    vg_name = self.cfg.GetVGName()
1643
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1644
    instances = [self.cfg.GetInstanceInfo(name)
1645
                 for name in self.cfg.GetInstanceList()]
1646

    
1647
    nv_dict = {}
1648
    for inst in instances:
1649
      inst_lvs = {}
1650
      if (not inst.admin_up or
1651
          inst.disk_template not in constants.DTS_NET_MIRROR):
1652
        continue
1653
      inst.MapLVsByNode(inst_lvs)
1654
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1655
      for node, vol_list in inst_lvs.iteritems():
1656
        for vol in vol_list:
1657
          nv_dict[(node, vol)] = inst
1658

    
1659
    if not nv_dict:
1660
      return result
1661

    
1662
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1663

    
1664
    for node in nodes:
1665
      # node_volume
1666
      node_res = node_lvs[node]
1667
      if node_res.offline:
1668
        continue
1669
      msg = node_res.fail_msg
1670
      if msg:
1671
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1672
        res_nodes[node] = msg
1673
        continue
1674

    
1675
      lvs = node_res.payload
1676
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1677
        inst = nv_dict.pop((node, lv_name), None)
1678
        if (not lv_online and inst is not None
1679
            and inst.name not in res_instances):
1680
          res_instances.append(inst.name)
1681

    
1682
    # any leftover items in nv_dict are missing LVs, let's arrange the
1683
    # data better
1684
    for key, inst in nv_dict.iteritems():
1685
      if inst.name not in res_missing:
1686
        res_missing[inst.name] = []
1687
      res_missing[inst.name].append(key)
1688

    
1689
    return result
1690

    
1691

    
1692
class LURepairDiskSizes(NoHooksLU):
1693
  """Verifies the cluster disks sizes.
1694

1695
  """
1696
  _OP_REQP = ["instances"]
1697
  REQ_BGL = False
1698

    
1699
  def ExpandNames(self):
1700
    if not isinstance(self.op.instances, list):
1701
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1702
                                 errors.ECODE_INVAL)
1703

    
1704
    if self.op.instances:
1705
      self.wanted_names = []
1706
      for name in self.op.instances:
1707
        full_name = self.cfg.ExpandInstanceName(name)
1708
        if full_name is None:
1709
          raise errors.OpPrereqError("Instance '%s' not known" % name,
1710
                                     errors.ECODE_NOENT)
1711
        self.wanted_names.append(full_name)
1712
      self.needed_locks = {
1713
        locking.LEVEL_NODE: [],
1714
        locking.LEVEL_INSTANCE: self.wanted_names,
1715
        }
1716
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1717
    else:
1718
      self.wanted_names = None
1719
      self.needed_locks = {
1720
        locking.LEVEL_NODE: locking.ALL_SET,
1721
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1722
        }
1723
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1724

    
1725
  def DeclareLocks(self, level):
1726
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1727
      self._LockInstancesNodes(primary_only=True)
1728

    
1729
  def CheckPrereq(self):
1730
    """Check prerequisites.
1731

1732
    This only checks the optional instance list against the existing names.
1733

1734
    """
1735
    if self.wanted_names is None:
1736
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1737

    
1738
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1739
                             in self.wanted_names]
1740

    
1741
  def _EnsureChildSizes(self, disk):
1742
    """Ensure children of the disk have the needed disk size.
1743

1744
    This is valid mainly for DRBD8 and fixes an issue where the
1745
    children have smaller disk size.
1746

1747
    @param disk: an L{ganeti.objects.Disk} object
1748

1749
    """
1750
    if disk.dev_type == constants.LD_DRBD8:
1751
      assert disk.children, "Empty children for DRBD8?"
1752
      fchild = disk.children[0]
1753
      mismatch = fchild.size < disk.size
1754
      if mismatch:
1755
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1756
                     fchild.size, disk.size)
1757
        fchild.size = disk.size
1758

    
1759
      # and we recurse on this child only, not on the metadev
1760
      return self._EnsureChildSizes(fchild) or mismatch
1761
    else:
1762
      return False
1763

    
1764
  def Exec(self, feedback_fn):
1765
    """Verify the size of cluster disks.
1766

1767
    """
1768
    # TODO: check child disks too
1769
    # TODO: check differences in size between primary/secondary nodes
1770
    per_node_disks = {}
1771
    for instance in self.wanted_instances:
1772
      pnode = instance.primary_node
1773
      if pnode not in per_node_disks:
1774
        per_node_disks[pnode] = []
1775
      for idx, disk in enumerate(instance.disks):
1776
        per_node_disks[pnode].append((instance, idx, disk))
1777

    
1778
    changed = []
1779
    for node, dskl in per_node_disks.items():
1780
      newl = [v[2].Copy() for v in dskl]
1781
      for dsk in newl:
1782
        self.cfg.SetDiskID(dsk, node)
1783
      result = self.rpc.call_blockdev_getsizes(node, newl)
1784
      if result.fail_msg:
1785
        self.LogWarning("Failure in blockdev_getsizes call to node"
1786
                        " %s, ignoring", node)
1787
        continue
1788
      if len(result.data) != len(dskl):
1789
        self.LogWarning("Invalid result from node %s, ignoring node results",
1790
                        node)
1791
        continue
1792
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1793
        if size is None:
1794
          self.LogWarning("Disk %d of instance %s did not return size"
1795
                          " information, ignoring", idx, instance.name)
1796
          continue
1797
        if not isinstance(size, (int, long)):
1798
          self.LogWarning("Disk %d of instance %s did not return valid"
1799
                          " size information, ignoring", idx, instance.name)
1800
          continue
1801
        size = size >> 20
1802
        if size != disk.size:
1803
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1804
                       " correcting: recorded %d, actual %d", idx,
1805
                       instance.name, disk.size, size)
1806
          disk.size = size
1807
          self.cfg.Update(instance, feedback_fn)
1808
          changed.append((instance.name, idx, size))
1809
        if self._EnsureChildSizes(disk):
1810
          self.cfg.Update(instance, feedback_fn)
1811
          changed.append((instance.name, idx, disk.size))
1812
    return changed
1813

    
1814

    
1815
class LURenameCluster(LogicalUnit):
1816
  """Rename the cluster.
1817

1818
  """
1819
  HPATH = "cluster-rename"
1820
  HTYPE = constants.HTYPE_CLUSTER
1821
  _OP_REQP = ["name"]
1822

    
1823
  def BuildHooksEnv(self):
1824
    """Build hooks env.
1825

1826
    """
1827
    env = {
1828
      "OP_TARGET": self.cfg.GetClusterName(),
1829
      "NEW_NAME": self.op.name,
1830
      }
1831
    mn = self.cfg.GetMasterNode()
1832
    return env, [mn], [mn]
1833

    
1834
  def CheckPrereq(self):
1835
    """Verify that the passed name is a valid one.
1836

1837
    """
1838
    hostname = utils.GetHostInfo(self.op.name)
1839

    
1840
    new_name = hostname.name
1841
    self.ip = new_ip = hostname.ip
1842
    old_name = self.cfg.GetClusterName()
1843
    old_ip = self.cfg.GetMasterIP()
1844
    if new_name == old_name and new_ip == old_ip:
1845
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1846
                                 " cluster has changed",
1847
                                 errors.ECODE_INVAL)
1848
    if new_ip != old_ip:
1849
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1850
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1851
                                   " reachable on the network. Aborting." %
1852
                                   new_ip, errors.ECODE_NOTUNIQUE)
1853

    
1854
    self.op.name = new_name
1855

    
1856
  def Exec(self, feedback_fn):
1857
    """Rename the cluster.
1858

1859
    """
1860
    clustername = self.op.name
1861
    ip = self.ip
1862

    
1863
    # shutdown the master IP
1864
    master = self.cfg.GetMasterNode()
1865
    result = self.rpc.call_node_stop_master(master, False)
1866
    result.Raise("Could not disable the master role")
1867

    
1868
    try:
1869
      cluster = self.cfg.GetClusterInfo()
1870
      cluster.cluster_name = clustername
1871
      cluster.master_ip = ip
1872
      self.cfg.Update(cluster, feedback_fn)
1873

    
1874
      # update the known hosts file
1875
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1876
      node_list = self.cfg.GetNodeList()
1877
      try:
1878
        node_list.remove(master)
1879
      except ValueError:
1880
        pass
1881
      result = self.rpc.call_upload_file(node_list,
1882
                                         constants.SSH_KNOWN_HOSTS_FILE)
1883
      for to_node, to_result in result.iteritems():
1884
        msg = to_result.fail_msg
1885
        if msg:
1886
          msg = ("Copy of file %s to node %s failed: %s" %
1887
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1888
          self.proc.LogWarning(msg)
1889

    
1890
    finally:
1891
      result = self.rpc.call_node_start_master(master, False, False)
1892
      msg = result.fail_msg
1893
      if msg:
1894
        self.LogWarning("Could not re-enable the master role on"
1895
                        " the master, please restart manually: %s", msg)
1896

    
1897

    
1898
def _RecursiveCheckIfLVMBased(disk):
1899
  """Check if the given disk or its children are lvm-based.
1900

1901
  @type disk: L{objects.Disk}
1902
  @param disk: the disk to check
1903
  @rtype: boolean
1904
  @return: boolean indicating whether a LD_LV dev_type was found or not
1905

1906
  """
1907
  if disk.children:
1908
    for chdisk in disk.children:
1909
      if _RecursiveCheckIfLVMBased(chdisk):
1910
        return True
1911
  return disk.dev_type == constants.LD_LV
1912

    
1913

    
1914
class LUSetClusterParams(LogicalUnit):
1915
  """Change the parameters of the cluster.
1916

1917
  """
1918
  HPATH = "cluster-modify"
1919
  HTYPE = constants.HTYPE_CLUSTER
1920
  _OP_REQP = []
1921
  REQ_BGL = False
1922

    
1923
  def CheckArguments(self):
1924
    """Check parameters
1925

1926
    """
1927
    if not hasattr(self.op, "candidate_pool_size"):
1928
      self.op.candidate_pool_size = None
1929
    if self.op.candidate_pool_size is not None:
1930
      try:
1931
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1932
      except (ValueError, TypeError), err:
1933
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1934
                                   str(err), errors.ECODE_INVAL)
1935
      if self.op.candidate_pool_size < 1:
1936
        raise errors.OpPrereqError("At least one master candidate needed",
1937
                                   errors.ECODE_INVAL)
1938

    
1939
  def ExpandNames(self):
1940
    # FIXME: in the future maybe other cluster params won't require checking on
1941
    # all nodes to be modified.
1942
    self.needed_locks = {
1943
      locking.LEVEL_NODE: locking.ALL_SET,
1944
    }
1945
    self.share_locks[locking.LEVEL_NODE] = 1
1946

    
1947
  def BuildHooksEnv(self):
1948
    """Build hooks env.
1949

1950
    """
1951
    env = {
1952
      "OP_TARGET": self.cfg.GetClusterName(),
1953
      "NEW_VG_NAME": self.op.vg_name,
1954
      }
1955
    mn = self.cfg.GetMasterNode()
1956
    return env, [mn], [mn]
1957

    
1958
  def CheckPrereq(self):
1959
    """Check prerequisites.
1960

1961
    This checks whether the given params don't conflict and
1962
    if the given volume group is valid.
1963

1964
    """
1965
    if self.op.vg_name is not None and not self.op.vg_name:
1966
      instances = self.cfg.GetAllInstancesInfo().values()
1967
      for inst in instances:
1968
        for disk in inst.disks:
1969
          if _RecursiveCheckIfLVMBased(disk):
1970
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1971
                                       " lvm-based instances exist",
1972
                                       errors.ECODE_INVAL)
1973

    
1974
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1975

    
1976
    # if vg_name not None, checks given volume group on all nodes
1977
    if self.op.vg_name:
1978
      vglist = self.rpc.call_vg_list(node_list)
1979
      for node in node_list:
1980
        msg = vglist[node].fail_msg
1981
        if msg:
1982
          # ignoring down node
1983
          self.LogWarning("Error while gathering data on node %s"
1984
                          " (ignoring node): %s", node, msg)
1985
          continue
1986
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1987
                                              self.op.vg_name,
1988
                                              constants.MIN_VG_SIZE)
1989
        if vgstatus:
1990
          raise errors.OpPrereqError("Error on node '%s': %s" %
1991
                                     (node, vgstatus), errors.ECODE_ENVIRON)
1992

    
1993
    self.cluster = cluster = self.cfg.GetClusterInfo()
1994
    # validate params changes
1995
    if self.op.beparams:
1996
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1997
      self.new_beparams = objects.FillDict(
1998
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1999

    
2000
    if self.op.nicparams:
2001
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2002
      self.new_nicparams = objects.FillDict(
2003
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2004
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2005
      nic_errors = []
2006

    
2007
      # check all instances for consistency
2008
      for instance in self.cfg.GetAllInstancesInfo().values():
2009
        for nic_idx, nic in enumerate(instance.nics):
2010
          params_copy = copy.deepcopy(nic.nicparams)
2011
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2012

    
2013
          # check parameter syntax
2014
          try:
2015
            objects.NIC.CheckParameterSyntax(params_filled)
2016
          except errors.ConfigurationError, err:
2017
            nic_errors.append("Instance %s, nic/%d: %s" %
2018
                              (instance.name, nic_idx, err))
2019

    
2020
          # if we're moving instances to routed, check that they have an ip
2021
          target_mode = params_filled[constants.NIC_MODE]
2022
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2023
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2024
                              (instance.name, nic_idx))
2025
      if nic_errors:
2026
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2027
                                   "\n".join(nic_errors))
2028

    
2029
    # hypervisor list/parameters
2030
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2031
    if self.op.hvparams:
2032
      if not isinstance(self.op.hvparams, dict):
2033
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2034
                                   errors.ECODE_INVAL)
2035
      for hv_name, hv_dict in self.op.hvparams.items():
2036
        if hv_name not in self.new_hvparams:
2037
          self.new_hvparams[hv_name] = hv_dict
2038
        else:
2039
          self.new_hvparams[hv_name].update(hv_dict)
2040

    
2041
    if self.op.enabled_hypervisors is not None:
2042
      self.hv_list = self.op.enabled_hypervisors
2043
      if not self.hv_list:
2044
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2045
                                   " least one member",
2046
                                   errors.ECODE_INVAL)
2047
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2048
      if invalid_hvs:
2049
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2050
                                   " entries: %s" %
2051
                                   utils.CommaJoin(invalid_hvs),
2052
                                   errors.ECODE_INVAL)
2053
    else:
2054
      self.hv_list = cluster.enabled_hypervisors
2055

    
2056
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2057
      # either the enabled list has changed, or the parameters have, validate
2058
      for hv_name, hv_params in self.new_hvparams.items():
2059
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2060
            (self.op.enabled_hypervisors and
2061
             hv_name in self.op.enabled_hypervisors)):
2062
          # either this is a new hypervisor, or its parameters have changed
2063
          hv_class = hypervisor.GetHypervisor(hv_name)
2064
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2065
          hv_class.CheckParameterSyntax(hv_params)
2066
          _CheckHVParams(self, node_list, hv_name, hv_params)
2067

    
2068
  def Exec(self, feedback_fn):
2069
    """Change the parameters of the cluster.
2070

2071
    """
2072
    if self.op.vg_name is not None:
2073
      new_volume = self.op.vg_name
2074
      if not new_volume:
2075
        new_volume = None
2076
      if new_volume != self.cfg.GetVGName():
2077
        self.cfg.SetVGName(new_volume)
2078
      else:
2079
        feedback_fn("Cluster LVM configuration already in desired"
2080
                    " state, not changing")
2081
    if self.op.hvparams:
2082
      self.cluster.hvparams = self.new_hvparams
2083
    if self.op.enabled_hypervisors is not None:
2084
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2085
    if self.op.beparams:
2086
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2087
    if self.op.nicparams:
2088
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2089

    
2090
    if self.op.candidate_pool_size is not None:
2091
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2092
      # we need to update the pool size here, otherwise the save will fail
2093
      _AdjustCandidatePool(self, [])
2094

    
2095
    self.cfg.Update(self.cluster, feedback_fn)
2096

    
2097

    
2098
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2099
  """Distribute additional files which are part of the cluster configuration.
2100

2101
  ConfigWriter takes care of distributing the config and ssconf files, but
2102
  there are more files which should be distributed to all nodes. This function
2103
  makes sure those are copied.
2104

2105
  @param lu: calling logical unit
2106
  @param additional_nodes: list of nodes not in the config to distribute to
2107

2108
  """
2109
  # 1. Gather target nodes
2110
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2111
  dist_nodes = lu.cfg.GetNodeList()
2112
  if additional_nodes is not None:
2113
    dist_nodes.extend(additional_nodes)
2114
  if myself.name in dist_nodes:
2115
    dist_nodes.remove(myself.name)
2116

    
2117
  # 2. Gather files to distribute
2118
  dist_files = set([constants.ETC_HOSTS,
2119
                    constants.SSH_KNOWN_HOSTS_FILE,
2120
                    constants.RAPI_CERT_FILE,
2121
                    constants.RAPI_USERS_FILE,
2122
                    constants.HMAC_CLUSTER_KEY,
2123
                   ])
2124

    
2125
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2126
  for hv_name in enabled_hypervisors:
2127
    hv_class = hypervisor.GetHypervisor(hv_name)
2128
    dist_files.update(hv_class.GetAncillaryFiles())
2129

    
2130
  # 3. Perform the files upload
2131
  for fname in dist_files:
2132
    if os.path.exists(fname):
2133
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2134
      for to_node, to_result in result.items():
2135
        msg = to_result.fail_msg
2136
        if msg:
2137
          msg = ("Copy of file %s to node %s failed: %s" %
2138
                 (fname, to_node, msg))
2139
          lu.proc.LogWarning(msg)
2140

    
2141

    
2142
class LURedistributeConfig(NoHooksLU):
2143
  """Force the redistribution of cluster configuration.
2144

2145
  This is a very simple LU.
2146

2147
  """
2148
  _OP_REQP = []
2149
  REQ_BGL = False
2150

    
2151
  def ExpandNames(self):
2152
    self.needed_locks = {
2153
      locking.LEVEL_NODE: locking.ALL_SET,
2154
    }
2155
    self.share_locks[locking.LEVEL_NODE] = 1
2156

    
2157
  def CheckPrereq(self):
2158
    """Check prerequisites.
2159

2160
    """
2161

    
2162
  def Exec(self, feedback_fn):
2163
    """Redistribute the configuration.
2164

2165
    """
2166
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2167
    _RedistributeAncillaryFiles(self)
2168

    
2169

    
2170
def _WaitForSync(lu, instance, oneshot=False):
2171
  """Sleep and poll for an instance's disk to sync.
2172

2173
  """
2174
  if not instance.disks:
2175
    return True
2176

    
2177
  if not oneshot:
2178
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2179

    
2180
  node = instance.primary_node
2181

    
2182
  for dev in instance.disks:
2183
    lu.cfg.SetDiskID(dev, node)
2184

    
2185
  # TODO: Convert to utils.Retry
2186

    
2187
  retries = 0
2188
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2189
  while True:
2190
    max_time = 0
2191
    done = True
2192
    cumul_degraded = False
2193
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2194
    msg = rstats.fail_msg
2195
    if msg:
2196
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2197
      retries += 1
2198
      if retries >= 10:
2199
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2200
                                 " aborting." % node)
2201
      time.sleep(6)
2202
      continue
2203
    rstats = rstats.payload
2204
    retries = 0
2205
    for i, mstat in enumerate(rstats):
2206
      if mstat is None:
2207
        lu.LogWarning("Can't compute data for node %s/%s",
2208
                           node, instance.disks[i].iv_name)
2209
        continue
2210

    
2211
      cumul_degraded = (cumul_degraded or
2212
                        (mstat.is_degraded and mstat.sync_percent is None))
2213
      if mstat.sync_percent is not None:
2214
        done = False
2215
        if mstat.estimated_time is not None:
2216
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2217
          max_time = mstat.estimated_time
2218
        else:
2219
          rem_time = "no time estimate"
2220
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2221
                        (instance.disks[i].iv_name, mstat.sync_percent,
2222
                         rem_time))
2223

    
2224
    # if we're done but degraded, let's do a few small retries, to
2225
    # make sure we see a stable and not transient situation; therefore
2226
    # we force restart of the loop
2227
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2228
      logging.info("Degraded disks found, %d retries left", degr_retries)
2229
      degr_retries -= 1
2230
      time.sleep(1)
2231
      continue
2232

    
2233
    if done or oneshot:
2234
      break
2235

    
2236
    time.sleep(min(60, max_time))
2237

    
2238
  if done:
2239
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2240
  return not cumul_degraded
2241

    
2242

    
2243
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2244
  """Check that mirrors are not degraded.
2245

2246
  The ldisk parameter, if True, will change the test from the
2247
  is_degraded attribute (which represents overall non-ok status for
2248
  the device(s)) to the ldisk (representing the local storage status).
2249

2250
  """
2251
  lu.cfg.SetDiskID(dev, node)
2252

    
2253
  result = True
2254

    
2255
  if on_primary or dev.AssembleOnSecondary():
2256
    rstats = lu.rpc.call_blockdev_find(node, dev)
2257
    msg = rstats.fail_msg
2258
    if msg:
2259
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2260
      result = False
2261
    elif not rstats.payload:
2262
      lu.LogWarning("Can't find disk on node %s", node)
2263
      result = False
2264
    else:
2265
      if ldisk:
2266
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2267
      else:
2268
        result = result and not rstats.payload.is_degraded
2269

    
2270
  if dev.children:
2271
    for child in dev.children:
2272
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2273

    
2274
  return result
2275

    
2276

    
2277
class LUDiagnoseOS(NoHooksLU):
2278
  """Logical unit for OS diagnose/query.
2279

2280
  """
2281
  _OP_REQP = ["output_fields", "names"]
2282
  REQ_BGL = False
2283
  _FIELDS_STATIC = utils.FieldSet()
2284
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2285
  # Fields that need calculation of global os validity
2286
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2287

    
2288
  def ExpandNames(self):
2289
    if self.op.names:
2290
      raise errors.OpPrereqError("Selective OS query not supported",
2291
                                 errors.ECODE_INVAL)
2292

    
2293
    _CheckOutputFields(static=self._FIELDS_STATIC,
2294
                       dynamic=self._FIELDS_DYNAMIC,
2295
                       selected=self.op.output_fields)
2296

    
2297
    # Lock all nodes, in shared mode
2298
    # Temporary removal of locks, should be reverted later
2299
    # TODO: reintroduce locks when they are lighter-weight
2300
    self.needed_locks = {}
2301
    #self.share_locks[locking.LEVEL_NODE] = 1
2302
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2303

    
2304
  def CheckPrereq(self):
2305
    """Check prerequisites.
2306

2307
    """
2308

    
2309
  @staticmethod
2310
  def _DiagnoseByOS(node_list, rlist):
2311
    """Remaps a per-node return list into an a per-os per-node dictionary
2312

2313
    @param node_list: a list with the names of all nodes
2314
    @param rlist: a map with node names as keys and OS objects as values
2315

2316
    @rtype: dict
2317
    @return: a dictionary with osnames as keys and as value another map, with
2318
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2319

2320
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2321
                                     (/srv/..., False, "invalid api")],
2322
                           "node2": [(/srv/..., True, "")]}
2323
          }
2324

2325
    """
2326
    all_os = {}
2327
    # we build here the list of nodes that didn't fail the RPC (at RPC
2328
    # level), so that nodes with a non-responding node daemon don't
2329
    # make all OSes invalid
2330
    good_nodes = [node_name for node_name in rlist
2331
                  if not rlist[node_name].fail_msg]
2332
    for node_name, nr in rlist.items():
2333
      if nr.fail_msg or not nr.payload:
2334
        continue
2335
      for name, path, status, diagnose, variants in nr.payload:
2336
        if name not in all_os:
2337
          # build a list of nodes for this os containing empty lists
2338
          # for each node in node_list
2339
          all_os[name] = {}
2340
          for nname in good_nodes:
2341
            all_os[name][nname] = []
2342
        all_os[name][node_name].append((path, status, diagnose, variants))
2343
    return all_os
2344

    
2345
  def Exec(self, feedback_fn):
2346
    """Compute the list of OSes.
2347

2348
    """
2349
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2350
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2351
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2352
    output = []
2353
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2354
    calc_variants = "variants" in self.op.output_fields
2355

    
2356
    for os_name, os_data in pol.items():
2357
      row = []
2358
      if calc_valid:
2359
        valid = True
2360
        variants = None
2361
        for osl in os_data.values():
2362
          valid = valid and osl and osl[0][1]
2363
          if not valid:
2364
            variants = None
2365
            break
2366
          if calc_variants:
2367
            node_variants = osl[0][3]
2368
            if variants is None:
2369
              variants = node_variants
2370
            else:
2371
              variants = [v for v in variants if v in node_variants]
2372

    
2373
      for field in self.op.output_fields:
2374
        if field == "name":
2375
          val = os_name
2376
        elif field == "valid":
2377
          val = valid
2378
        elif field == "node_status":
2379
          # this is just a copy of the dict
2380
          val = {}
2381
          for node_name, nos_list in os_data.items():
2382
            val[node_name] = nos_list
2383
        elif field == "variants":
2384
          val =  variants
2385
        else:
2386
          raise errors.ParameterError(field)
2387
        row.append(val)
2388
      output.append(row)
2389

    
2390
    return output
2391

    
2392

    
2393
class LURemoveNode(LogicalUnit):
2394
  """Logical unit for removing a node.
2395

2396
  """
2397
  HPATH = "node-remove"
2398
  HTYPE = constants.HTYPE_NODE
2399
  _OP_REQP = ["node_name"]
2400

    
2401
  def BuildHooksEnv(self):
2402
    """Build hooks env.
2403

2404
    This doesn't run on the target node in the pre phase as a failed
2405
    node would then be impossible to remove.
2406

2407
    """
2408
    env = {
2409
      "OP_TARGET": self.op.node_name,
2410
      "NODE_NAME": self.op.node_name,
2411
      }
2412
    all_nodes = self.cfg.GetNodeList()
2413
    if self.op.node_name in all_nodes:
2414
      all_nodes.remove(self.op.node_name)
2415
    return env, all_nodes, all_nodes
2416

    
2417
  def CheckPrereq(self):
2418
    """Check prerequisites.
2419

2420
    This checks:
2421
     - the node exists in the configuration
2422
     - it does not have primary or secondary instances
2423
     - it's not the master
2424

2425
    Any errors are signaled by raising errors.OpPrereqError.
2426

2427
    """
2428
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2429
    if node is None:
2430
      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2431
                                 errors.ECODE_NOENT)
2432

    
2433
    instance_list = self.cfg.GetInstanceList()
2434

    
2435
    masternode = self.cfg.GetMasterNode()
2436
    if node.name == masternode:
2437
      raise errors.OpPrereqError("Node is the master node,"
2438
                                 " you need to failover first.",
2439
                                 errors.ECODE_INVAL)
2440

    
2441
    for instance_name in instance_list:
2442
      instance = self.cfg.GetInstanceInfo(instance_name)
2443
      if node.name in instance.all_nodes:
2444
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2445
                                   " please remove first." % instance_name,
2446
                                   errors.ECODE_INVAL)
2447
    self.op.node_name = node.name
2448
    self.node = node
2449

    
2450
  def Exec(self, feedback_fn):
2451
    """Removes the node from the cluster.
2452

2453
    """
2454
    node = self.node
2455
    logging.info("Stopping the node daemon and removing configs from node %s",
2456
                 node.name)
2457

    
2458
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2459

    
2460
    # Promote nodes to master candidate as needed
2461
    _AdjustCandidatePool(self, exceptions=[node.name])
2462
    self.context.RemoveNode(node.name)
2463

    
2464
    # Run post hooks on the node before it's removed
2465
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2466
    try:
2467
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2468
    except:
2469
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2470

    
2471
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2472
    msg = result.fail_msg
2473
    if msg:
2474
      self.LogWarning("Errors encountered on the remote node while leaving"
2475
                      " the cluster: %s", msg)
2476

    
2477

    
2478
class LUQueryNodes(NoHooksLU):
2479
  """Logical unit for querying nodes.
2480

2481
  """
2482
  _OP_REQP = ["output_fields", "names", "use_locking"]
2483
  REQ_BGL = False
2484

    
2485
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2486
                    "master_candidate", "offline", "drained"]
2487

    
2488
  _FIELDS_DYNAMIC = utils.FieldSet(
2489
    "dtotal", "dfree",
2490
    "mtotal", "mnode", "mfree",
2491
    "bootid",
2492
    "ctotal", "cnodes", "csockets",
2493
    )
2494

    
2495
  _FIELDS_STATIC = utils.FieldSet(*[
2496
    "pinst_cnt", "sinst_cnt",
2497
    "pinst_list", "sinst_list",
2498
    "pip", "sip", "tags",
2499
    "master",
2500
    "role"] + _SIMPLE_FIELDS
2501
    )
2502

    
2503
  def ExpandNames(self):
2504
    _CheckOutputFields(static=self._FIELDS_STATIC,
2505
                       dynamic=self._FIELDS_DYNAMIC,
2506
                       selected=self.op.output_fields)
2507

    
2508
    self.needed_locks = {}
2509
    self.share_locks[locking.LEVEL_NODE] = 1
2510

    
2511
    if self.op.names:
2512
      self.wanted = _GetWantedNodes(self, self.op.names)
2513
    else:
2514
      self.wanted = locking.ALL_SET
2515

    
2516
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2517
    self.do_locking = self.do_node_query and self.op.use_locking
2518
    if self.do_locking:
2519
      # if we don't request only static fields, we need to lock the nodes
2520
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2521

    
2522
  def CheckPrereq(self):
2523
    """Check prerequisites.
2524

2525
    """
2526
    # The validation of the node list is done in the _GetWantedNodes,
2527
    # if non empty, and if empty, there's no validation to do
2528
    pass
2529

    
2530
  def Exec(self, feedback_fn):
2531
    """Computes the list of nodes and their attributes.
2532

2533
    """
2534
    all_info = self.cfg.GetAllNodesInfo()
2535
    if self.do_locking:
2536
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2537
    elif self.wanted != locking.ALL_SET:
2538
      nodenames = self.wanted
2539
      missing = set(nodenames).difference(all_info.keys())
2540
      if missing:
2541
        raise errors.OpExecError(
2542
          "Some nodes were removed before retrieving their data: %s" % missing)
2543
    else:
2544
      nodenames = all_info.keys()
2545

    
2546
    nodenames = utils.NiceSort(nodenames)
2547
    nodelist = [all_info[name] for name in nodenames]
2548

    
2549
    # begin data gathering
2550

    
2551
    if self.do_node_query:
2552
      live_data = {}
2553
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2554
                                          self.cfg.GetHypervisorType())
2555
      for name in nodenames:
2556
        nodeinfo = node_data[name]
2557
        if not nodeinfo.fail_msg and nodeinfo.payload:
2558
          nodeinfo = nodeinfo.payload
2559
          fn = utils.TryConvert
2560
          live_data[name] = {
2561
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2562
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2563
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2564
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2565
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2566
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2567
            "bootid": nodeinfo.get('bootid', None),
2568
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2569
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2570
            }
2571
        else:
2572
          live_data[name] = {}
2573
    else:
2574
      live_data = dict.fromkeys(nodenames, {})
2575

    
2576
    node_to_primary = dict([(name, set()) for name in nodenames])
2577
    node_to_secondary = dict([(name, set()) for name in nodenames])
2578

    
2579
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2580
                             "sinst_cnt", "sinst_list"))
2581
    if inst_fields & frozenset(self.op.output_fields):
2582
      instancelist = self.cfg.GetInstanceList()
2583

    
2584
      for instance_name in instancelist:
2585
        inst = self.cfg.GetInstanceInfo(instance_name)
2586
        if inst.primary_node in node_to_primary:
2587
          node_to_primary[inst.primary_node].add(inst.name)
2588
        for secnode in inst.secondary_nodes:
2589
          if secnode in node_to_secondary:
2590
            node_to_secondary[secnode].add(inst.name)
2591

    
2592
    master_node = self.cfg.GetMasterNode()
2593

    
2594
    # end data gathering
2595

    
2596
    output = []
2597
    for node in nodelist:
2598
      node_output = []
2599
      for field in self.op.output_fields:
2600
        if field in self._SIMPLE_FIELDS:
2601
          val = getattr(node, field)
2602
        elif field == "pinst_list":
2603
          val = list(node_to_primary[node.name])
2604
        elif field == "sinst_list":
2605
          val = list(node_to_secondary[node.name])
2606
        elif field == "pinst_cnt":
2607
          val = len(node_to_primary[node.name])
2608
        elif field == "sinst_cnt":
2609
          val = len(node_to_secondary[node.name])
2610
        elif field == "pip":
2611
          val = node.primary_ip
2612
        elif field == "sip":
2613
          val = node.secondary_ip
2614
        elif field == "tags":
2615
          val = list(node.GetTags())
2616
        elif field == "master":
2617
          val = node.name == master_node
2618
        elif self._FIELDS_DYNAMIC.Matches(field):
2619
          val = live_data[node.name].get(field, None)
2620
        elif field == "role":
2621
          if node.name == master_node:
2622
            val = "M"
2623
          elif node.master_candidate:
2624
            val = "C"
2625
          elif node.drained:
2626
            val = "D"
2627
          elif node.offline:
2628
            val = "O"
2629
          else:
2630
            val = "R"
2631
        else:
2632
          raise errors.ParameterError(field)
2633
        node_output.append(val)
2634
      output.append(node_output)
2635

    
2636
    return output
2637

    
2638

    
2639
class LUQueryNodeVolumes(NoHooksLU):
2640
  """Logical unit for getting volumes on node(s).
2641

2642
  """
2643
  _OP_REQP = ["nodes", "output_fields"]
2644
  REQ_BGL = False
2645
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2646
  _FIELDS_STATIC = utils.FieldSet("node")
2647

    
2648
  def ExpandNames(self):
2649
    _CheckOutputFields(static=self._FIELDS_STATIC,
2650
                       dynamic=self._FIELDS_DYNAMIC,
2651
                       selected=self.op.output_fields)
2652

    
2653
    self.needed_locks = {}
2654
    self.share_locks[locking.LEVEL_NODE] = 1
2655
    if not self.op.nodes:
2656
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2657
    else:
2658
      self.needed_locks[locking.LEVEL_NODE] = \
2659
        _GetWantedNodes(self, self.op.nodes)
2660

    
2661
  def CheckPrereq(self):
2662
    """Check prerequisites.
2663

2664
    This checks that the fields required are valid output fields.
2665

2666
    """
2667
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2668

    
2669
  def Exec(self, feedback_fn):
2670
    """Computes the list of nodes and their attributes.
2671

2672
    """
2673
    nodenames = self.nodes
2674
    volumes = self.rpc.call_node_volumes(nodenames)
2675

    
2676
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2677
             in self.cfg.GetInstanceList()]
2678

    
2679
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2680

    
2681
    output = []
2682
    for node in nodenames:
2683
      nresult = volumes[node]
2684
      if nresult.offline:
2685
        continue
2686
      msg = nresult.fail_msg
2687
      if msg:
2688
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2689
        continue
2690

    
2691
      node_vols = nresult.payload[:]
2692
      node_vols.sort(key=lambda vol: vol['dev'])
2693

    
2694
      for vol in node_vols:
2695
        node_output = []
2696
        for field in self.op.output_fields:
2697
          if field == "node":
2698
            val = node
2699
          elif field == "phys":
2700
            val = vol['dev']
2701
          elif field == "vg":
2702
            val = vol['vg']
2703
          elif field == "name":
2704
            val = vol['name']
2705
          elif field == "size":
2706
            val = int(float(vol['size']))
2707
          elif field == "instance":
2708
            for inst in ilist:
2709
              if node not in lv_by_node[inst]:
2710
                continue
2711
              if vol['name'] in lv_by_node[inst][node]:
2712
                val = inst.name
2713
                break
2714
            else:
2715
              val = '-'
2716
          else:
2717
            raise errors.ParameterError(field)
2718
          node_output.append(str(val))
2719

    
2720
        output.append(node_output)
2721

    
2722
    return output
2723

    
2724

    
2725
class LUQueryNodeStorage(NoHooksLU):
2726
  """Logical unit for getting information on storage units on node(s).
2727

2728
  """
2729
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2730
  REQ_BGL = False
2731
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2732

    
2733
  def ExpandNames(self):
2734
    storage_type = self.op.storage_type
2735

    
2736
    if storage_type not in constants.VALID_STORAGE_TYPES:
2737
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2738
                                 errors.ECODE_INVAL)
2739

    
2740
    _CheckOutputFields(static=self._FIELDS_STATIC,
2741
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2742
                       selected=self.op.output_fields)
2743

    
2744
    self.needed_locks = {}
2745
    self.share_locks[locking.LEVEL_NODE] = 1
2746

    
2747
    if self.op.nodes:
2748
      self.needed_locks[locking.LEVEL_NODE] = \
2749
        _GetWantedNodes(self, self.op.nodes)
2750
    else:
2751
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2752

    
2753
  def CheckPrereq(self):
2754
    """Check prerequisites.
2755

2756
    This checks that the fields required are valid output fields.
2757

2758
    """
2759
    self.op.name = getattr(self.op, "name", None)
2760

    
2761
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2762

    
2763
  def Exec(self, feedback_fn):
2764
    """Computes the list of nodes and their attributes.
2765

2766
    """
2767
    # Always get name to sort by
2768
    if constants.SF_NAME in self.op.output_fields:
2769
      fields = self.op.output_fields[:]
2770
    else:
2771
      fields = [constants.SF_NAME] + self.op.output_fields
2772

    
2773
    # Never ask for node or type as it's only known to the LU
2774
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2775
      while extra in fields:
2776
        fields.remove(extra)
2777

    
2778
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2779
    name_idx = field_idx[constants.SF_NAME]
2780

    
2781
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2782
    data = self.rpc.call_storage_list(self.nodes,
2783
                                      self.op.storage_type, st_args,
2784
                                      self.op.name, fields)
2785

    
2786
    result = []
2787

    
2788
    for node in utils.NiceSort(self.nodes):
2789
      nresult = data[node]
2790
      if nresult.offline:
2791
        continue
2792

    
2793
      msg = nresult.fail_msg
2794
      if msg:
2795
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2796
        continue
2797

    
2798
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2799

    
2800
      for name in utils.NiceSort(rows.keys()):
2801
        row = rows[name]
2802

    
2803
        out = []
2804

    
2805
        for field in self.op.output_fields:
2806
          if field == constants.SF_NODE:
2807
            val = node
2808
          elif field == constants.SF_TYPE:
2809
            val = self.op.storage_type
2810
          elif field in field_idx:
2811
            val = row[field_idx[field]]
2812
          else:
2813
            raise errors.ParameterError(field)
2814

    
2815
          out.append(val)
2816

    
2817
        result.append(out)
2818

    
2819
    return result
2820

    
2821

    
2822
class LUModifyNodeStorage(NoHooksLU):
2823
  """Logical unit for modifying a storage volume on a node.
2824

2825
  """
2826
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2827
  REQ_BGL = False
2828

    
2829
  def CheckArguments(self):
2830
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2831
    if node_name is None:
2832
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2833
                                 errors.ECODE_NOENT)
2834

    
2835
    self.op.node_name = node_name
2836

    
2837
    storage_type = self.op.storage_type
2838
    if storage_type not in constants.VALID_STORAGE_TYPES:
2839
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2840
                                 errors.ECODE_INVAL)
2841

    
2842
  def ExpandNames(self):
2843
    self.needed_locks = {
2844
      locking.LEVEL_NODE: self.op.node_name,
2845
      }
2846

    
2847
  def CheckPrereq(self):
2848
    """Check prerequisites.
2849

2850
    """
2851
    storage_type = self.op.storage_type
2852

    
2853
    try:
2854
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2855
    except KeyError:
2856
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2857
                                 " modified" % storage_type,
2858
                                 errors.ECODE_INVAL)
2859

    
2860
    diff = set(self.op.changes.keys()) - modifiable
2861
    if diff:
2862
      raise errors.OpPrereqError("The following fields can not be modified for"
2863
                                 " storage units of type '%s': %r" %
2864
                                 (storage_type, list(diff)),
2865
                                 errors.ECODE_INVAL)
2866

    
2867
  def Exec(self, feedback_fn):
2868
    """Computes the list of nodes and their attributes.
2869

2870
    """
2871
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2872
    result = self.rpc.call_storage_modify(self.op.node_name,
2873
                                          self.op.storage_type, st_args,
2874
                                          self.op.name, self.op.changes)
2875
    result.Raise("Failed to modify storage unit '%s' on %s" %
2876
                 (self.op.name, self.op.node_name))
2877

    
2878

    
2879
class LUAddNode(LogicalUnit):
2880
  """Logical unit for adding node to the cluster.
2881

2882
  """
2883
  HPATH = "node-add"
2884
  HTYPE = constants.HTYPE_NODE
2885
  _OP_REQP = ["node_name"]
2886

    
2887
  def BuildHooksEnv(self):
2888
    """Build hooks env.
2889

2890
    This will run on all nodes before, and on all nodes + the new node after.
2891

2892
    """
2893
    env = {
2894
      "OP_TARGET": self.op.node_name,
2895
      "NODE_NAME": self.op.node_name,
2896
      "NODE_PIP": self.op.primary_ip,
2897
      "NODE_SIP": self.op.secondary_ip,
2898
      }
2899
    nodes_0 = self.cfg.GetNodeList()
2900
    nodes_1 = nodes_0 + [self.op.node_name, ]
2901
    return env, nodes_0, nodes_1
2902

    
2903
  def CheckPrereq(self):
2904
    """Check prerequisites.
2905

2906
    This checks:
2907
     - the new node is not already in the config
2908
     - it is resolvable
2909
     - its parameters (single/dual homed) matches the cluster
2910

2911
    Any errors are signaled by raising errors.OpPrereqError.
2912

2913
    """
2914
    node_name = self.op.node_name
2915
    cfg = self.cfg
2916

    
2917
    dns_data = utils.GetHostInfo(node_name)
2918

    
2919
    node = dns_data.name
2920
    primary_ip = self.op.primary_ip = dns_data.ip
2921
    secondary_ip = getattr(self.op, "secondary_ip", None)
2922
    if secondary_ip is None:
2923
      secondary_ip = primary_ip
2924
    if not utils.IsValidIP(secondary_ip):
2925
      raise errors.OpPrereqError("Invalid secondary IP given",
2926
                                 errors.ECODE_INVAL)
2927
    self.op.secondary_ip = secondary_ip
2928

    
2929
    node_list = cfg.GetNodeList()
2930
    if not self.op.readd and node in node_list:
2931
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2932
                                 node, errors.ECODE_EXISTS)
2933
    elif self.op.readd and node not in node_list:
2934
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2935
                                 errors.ECODE_NOENT)
2936

    
2937
    for existing_node_name in node_list:
2938
      existing_node = cfg.GetNodeInfo(existing_node_name)
2939

    
2940
      if self.op.readd and node == existing_node_name:
2941
        if (existing_node.primary_ip != primary_ip or
2942
            existing_node.secondary_ip != secondary_ip):
2943
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2944
                                     " address configuration as before",
2945
                                     errors.ECODE_INVAL)
2946
        continue
2947

    
2948
      if (existing_node.primary_ip == primary_ip or
2949
          existing_node.secondary_ip == primary_ip or
2950
          existing_node.primary_ip == secondary_ip or
2951
          existing_node.secondary_ip == secondary_ip):
2952
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2953
                                   " existing node %s" % existing_node.name,
2954
                                   errors.ECODE_NOTUNIQUE)
2955

    
2956
    # check that the type of the node (single versus dual homed) is the
2957
    # same as for the master
2958
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2959
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2960
    newbie_singlehomed = secondary_ip == primary_ip
2961
    if master_singlehomed != newbie_singlehomed:
2962
      if master_singlehomed:
2963
        raise errors.OpPrereqError("The master has no private ip but the"
2964
                                   " new node has one",
2965
                                   errors.ECODE_INVAL)
2966
      else:
2967
        raise errors.OpPrereqError("The master has a private ip but the"
2968
                                   " new node doesn't have one",
2969
                                   errors.ECODE_INVAL)
2970

    
2971
    # checks reachability
2972
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2973
      raise errors.OpPrereqError("Node not reachable by ping",
2974
                                 errors.ECODE_ENVIRON)
2975

    
2976
    if not newbie_singlehomed:
2977
      # check reachability from my secondary ip to newbie's secondary ip
2978
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2979
                           source=myself.secondary_ip):
2980
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2981
                                   " based ping to noded port",
2982
                                   errors.ECODE_ENVIRON)
2983

    
2984
    if self.op.readd:
2985
      exceptions = [node]
2986
    else:
2987
      exceptions = []
2988

    
2989
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2990

    
2991
    if self.op.readd:
2992
      self.new_node = self.cfg.GetNodeInfo(node)
2993
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2994
    else:
2995
      self.new_node = objects.Node(name=node,
2996
                                   primary_ip=primary_ip,
2997
                                   secondary_ip=secondary_ip,
2998
                                   master_candidate=self.master_candidate,
2999
                                   offline=False, drained=False)
3000

    
3001
  def Exec(self, feedback_fn):
3002
    """Adds the new node to the cluster.
3003

3004
    """
3005
    new_node = self.new_node
3006
    node = new_node.name
3007

    
3008
    # for re-adds, reset the offline/drained/master-candidate flags;
3009
    # we need to reset here, otherwise offline would prevent RPC calls
3010
    # later in the procedure; this also means that if the re-add
3011
    # fails, we are left with a non-offlined, broken node
3012
    if self.op.readd:
3013
      new_node.drained = new_node.offline = False
3014
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3015
      # if we demote the node, we do cleanup later in the procedure
3016
      new_node.master_candidate = self.master_candidate
3017

    
3018
    # notify the user about any possible mc promotion
3019
    if new_node.master_candidate:
3020
      self.LogInfo("Node will be a master candidate")
3021

    
3022
    # check connectivity
3023
    result = self.rpc.call_version([node])[node]
3024
    result.Raise("Can't get version information from node %s" % node)
3025
    if constants.PROTOCOL_VERSION == result.payload:
3026
      logging.info("Communication to node %s fine, sw version %s match",
3027
                   node, result.payload)
3028
    else:
3029
      raise errors.OpExecError("Version mismatch master version %s,"
3030
                               " node version %s" %
3031
                               (constants.PROTOCOL_VERSION, result.payload))
3032

    
3033
    # setup ssh on node
3034
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3035
      logging.info("Copy ssh key to node %s", node)
3036
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3037
      keyarray = []
3038
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3039
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3040
                  priv_key, pub_key]
3041

    
3042
      for i in keyfiles:
3043
        keyarray.append(utils.ReadFile(i))
3044

    
3045
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3046
                                      keyarray[2], keyarray[3], keyarray[4],
3047
                                      keyarray[5])
3048
      result.Raise("Cannot transfer ssh keys to the new node")
3049

    
3050
    # Add node to our /etc/hosts, and add key to known_hosts
3051
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3052
      utils.AddHostToEtcHosts(new_node.name)
3053

    
3054
    if new_node.secondary_ip != new_node.primary_ip:
3055
      result = self.rpc.call_node_has_ip_address(new_node.name,
3056
                                                 new_node.secondary_ip)
3057
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3058
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3059
      if not result.payload:
3060
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3061
                                 " you gave (%s). Please fix and re-run this"
3062
                                 " command." % new_node.secondary_ip)
3063

    
3064
    node_verify_list = [self.cfg.GetMasterNode()]
3065
    node_verify_param = {
3066
      constants.NV_NODELIST: [node],
3067
      # TODO: do a node-net-test as well?
3068
    }
3069

    
3070
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3071
                                       self.cfg.GetClusterName())
3072
    for verifier in node_verify_list:
3073
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3074
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3075
      if nl_payload:
3076
        for failed in nl_payload:
3077
          feedback_fn("ssh/hostname verification failed"
3078
                      " (checking from %s): %s" %
3079
                      (verifier, nl_payload[failed]))
3080
        raise errors.OpExecError("ssh/hostname verification failed.")
3081

    
3082
    if self.op.readd:
3083
      _RedistributeAncillaryFiles(self)
3084
      self.context.ReaddNode(new_node)
3085
      # make sure we redistribute the config
3086
      self.cfg.Update(new_node, feedback_fn)
3087
      # and make sure the new node will not have old files around
3088
      if not new_node.master_candidate:
3089
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3090
        msg = result.fail_msg
3091
        if msg:
3092
          self.LogWarning("Node failed to demote itself from master"
3093
                          " candidate status: %s" % msg)
3094
    else:
3095
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3096
      self.context.AddNode(new_node, self.proc.GetECId())
3097

    
3098

    
3099
class LUSetNodeParams(LogicalUnit):
3100
  """Modifies the parameters of a node.
3101

3102
  """
3103
  HPATH = "node-modify"
3104
  HTYPE = constants.HTYPE_NODE
3105
  _OP_REQP = ["node_name"]
3106
  REQ_BGL = False
3107

    
3108
  def CheckArguments(self):
3109
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3110
    if node_name is None:
3111
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3112
                                 errors.ECODE_INVAL)
3113
    self.op.node_name = node_name
3114
    _CheckBooleanOpField(self.op, 'master_candidate')
3115
    _CheckBooleanOpField(self.op, 'offline')
3116
    _CheckBooleanOpField(self.op, 'drained')
3117
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3118
    if all_mods.count(None) == 3:
3119
      raise errors.OpPrereqError("Please pass at least one modification",
3120
                                 errors.ECODE_INVAL)
3121
    if all_mods.count(True) > 1:
3122
      raise errors.OpPrereqError("Can't set the node into more than one"
3123
                                 " state at the same time",
3124
                                 errors.ECODE_INVAL)
3125

    
3126
  def ExpandNames(self):
3127
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3128

    
3129
  def BuildHooksEnv(self):
3130
    """Build hooks env.
3131

3132
    This runs on the master node.
3133

3134
    """
3135
    env = {
3136
      "OP_TARGET": self.op.node_name,
3137
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3138
      "OFFLINE": str(self.op.offline),
3139
      "DRAINED": str(self.op.drained),
3140
      }
3141
    nl = [self.cfg.GetMasterNode(),
3142
          self.op.node_name]
3143
    return env, nl, nl
3144

    
3145
  def CheckPrereq(self):
3146
    """Check prerequisites.
3147

3148
    This only checks the instance list against the existing names.
3149

3150
    """
3151
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3152

    
3153
    if (self.op.master_candidate is not None or
3154
        self.op.drained is not None or
3155
        self.op.offline is not None):
3156
      # we can't change the master's node flags
3157
      if self.op.node_name == self.cfg.GetMasterNode():
3158
        raise errors.OpPrereqError("The master role can be changed"
3159
                                   " only via masterfailover",
3160
                                   errors.ECODE_INVAL)
3161

    
3162
    # Boolean value that tells us whether we're offlining or draining the node
3163
    offline_or_drain = self.op.offline == True or self.op.drained == True
3164
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3165

    
3166
    if (node.master_candidate and
3167
        (self.op.master_candidate == False or offline_or_drain)):
3168
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3169
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3170
      if mc_now <= cp_size:
3171
        msg = ("Not enough master candidates (desired"
3172
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3173
        # Only allow forcing the operation if it's an offline/drain operation,
3174
        # and we could not possibly promote more nodes.
3175
        # FIXME: this can still lead to issues if in any way another node which
3176
        # could be promoted appears in the meantime.
3177
        if self.op.force and offline_or_drain and mc_should == mc_max:
3178
          self.LogWarning(msg)
3179
        else:
3180
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3181

    
3182
    if (self.op.master_candidate == True and
3183
        ((node.offline and not self.op.offline == False) or
3184
         (node.drained and not self.op.drained == False))):
3185
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3186
                                 " to master_candidate" % node.name,
3187
                                 errors.ECODE_INVAL)
3188

    
3189
    # If we're being deofflined/drained, we'll MC ourself if needed
3190
    if (deoffline_or_drain and not offline_or_drain and not
3191
        self.op.master_candidate == True):
3192
      self.op.master_candidate = _DecideSelfPromotion(self)
3193
      if self.op.master_candidate:
3194
        self.LogInfo("Autopromoting node to master candidate")
3195

    
3196
    return
3197

    
3198
  def Exec(self, feedback_fn):
3199
    """Modifies a node.
3200

3201
    """
3202
    node = self.node
3203

    
3204
    result = []
3205
    changed_mc = False
3206

    
3207
    if self.op.offline is not None:
3208
      node.offline = self.op.offline
3209
      result.append(("offline", str(self.op.offline)))
3210
      if self.op.offline == True:
3211
        if node.master_candidate:
3212
          node.master_candidate = False
3213
          changed_mc = True
3214
          result.append(("master_candidate", "auto-demotion due to offline"))
3215
        if node.drained:
3216
          node.drained = False
3217
          result.append(("drained", "clear drained status due to offline"))
3218

    
3219
    if self.op.master_candidate is not None:
3220
      node.master_candidate = self.op.master_candidate
3221
      changed_mc = True
3222
      result.append(("master_candidate", str(self.op.master_candidate)))
3223
      if self.op.master_candidate == False:
3224
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3225
        msg = rrc.fail_msg
3226
        if msg:
3227
          self.LogWarning("Node failed to demote itself: %s" % msg)
3228

    
3229
    if self.op.drained is not None:
3230
      node.drained = self.op.drained
3231
      result.append(("drained", str(self.op.drained)))
3232
      if self.op.drained == True:
3233
        if node.master_candidate:
3234
          node.master_candidate = False
3235
          changed_mc = True
3236
          result.append(("master_candidate", "auto-demotion due to drain"))
3237
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3238
          msg = rrc.fail_msg
3239
          if msg:
3240
            self.LogWarning("Node failed to demote itself: %s" % msg)
3241
        if node.offline:
3242
          node.offline = False
3243
          result.append(("offline", "clear offline status due to drain"))
3244

    
3245
    # this will trigger configuration file update, if needed
3246
    self.cfg.Update(node, feedback_fn)
3247
    # this will trigger job queue propagation or cleanup
3248
    if changed_mc:
3249
      self.context.ReaddNode(node)
3250

    
3251
    return result
3252

    
3253

    
3254
class LUPowercycleNode(NoHooksLU):
3255
  """Powercycles a node.
3256

3257
  """
3258
  _OP_REQP = ["node_name", "force"]
3259
  REQ_BGL = False
3260

    
3261
  def CheckArguments(self):
3262
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3263
    if node_name is None:
3264
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3265
                                 errors.ECODE_NOENT)
3266
    self.op.node_name = node_name
3267
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3268
      raise errors.OpPrereqError("The node is the master and the force"
3269
                                 " parameter was not set",
3270
                                 errors.ECODE_INVAL)
3271

    
3272
  def ExpandNames(self):
3273
    """Locking for PowercycleNode.
3274

3275
    This is a last-resort option and shouldn't block on other
3276
    jobs. Therefore, we grab no locks.
3277

3278
    """
3279
    self.needed_locks = {}
3280

    
3281
  def CheckPrereq(self):
3282
    """Check prerequisites.
3283

3284
    This LU has no prereqs.
3285

3286
    """
3287
    pass
3288

    
3289
  def Exec(self, feedback_fn):
3290
    """Reboots a node.
3291

3292
    """
3293
    result = self.rpc.call_node_powercycle(self.op.node_name,
3294
                                           self.cfg.GetHypervisorType())
3295
    result.Raise("Failed to schedule the reboot")
3296
    return result.payload
3297

    
3298

    
3299
class LUQueryClusterInfo(NoHooksLU):
3300
  """Query cluster configuration.
3301

3302
  """
3303
  _OP_REQP = []
3304
  REQ_BGL = False
3305

    
3306
  def ExpandNames(self):
3307
    self.needed_locks = {}
3308

    
3309
  def CheckPrereq(self):
3310
    """No prerequsites needed for this LU.
3311

3312
    """
3313
    pass
3314

    
3315
  def Exec(self, feedback_fn):
3316
    """Return cluster config.
3317

3318
    """
3319
    cluster = self.cfg.GetClusterInfo()
3320
    result = {
3321
      "software_version": constants.RELEASE_VERSION,
3322
      "protocol_version": constants.PROTOCOL_VERSION,
3323
      "config_version": constants.CONFIG_VERSION,
3324
      "os_api_version": max(constants.OS_API_VERSIONS),
3325
      "export_version": constants.EXPORT_VERSION,
3326
      "architecture": (platform.architecture()[0], platform.machine()),
3327
      "name": cluster.cluster_name,
3328
      "master": cluster.master_node,
3329
      "default_hypervisor": cluster.enabled_hypervisors[0],
3330
      "enabled_hypervisors": cluster.enabled_hypervisors,
3331
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3332
                        for hypervisor_name in cluster.enabled_hypervisors]),
3333
      "beparams": cluster.beparams,
3334
      "nicparams": cluster.nicparams,
3335
      "candidate_pool_size": cluster.candidate_pool_size,
3336
      "master_netdev": cluster.master_netdev,
3337
      "volume_group_name": cluster.volume_group_name,
3338
      "file_storage_dir": cluster.file_storage_dir,
3339
      "ctime": cluster.ctime,
3340
      "mtime": cluster.mtime,
3341
      "uuid": cluster.uuid,
3342
      "tags": list(cluster.GetTags()),
3343
      }
3344

    
3345
    return result
3346

    
3347

    
3348
class LUQueryConfigValues(NoHooksLU):
3349
  """Return configuration values.
3350

3351
  """
3352
  _OP_REQP = []
3353
  REQ_BGL = False
3354
  _FIELDS_DYNAMIC = utils.FieldSet()
3355
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3356
                                  "watcher_pause")
3357

    
3358
  def ExpandNames(self):
3359
    self.needed_locks = {}
3360

    
3361
    _CheckOutputFields(static=self._FIELDS_STATIC,
3362
                       dynamic=self._FIELDS_DYNAMIC,
3363
                       selected=self.op.output_fields)
3364

    
3365
  def CheckPrereq(self):
3366
    """No prerequisites.
3367

3368
    """
3369
    pass
3370

    
3371
  def Exec(self, feedback_fn):
3372
    """Dump a representation of the cluster config to the standard output.
3373

3374
    """
3375
    values = []
3376
    for field in self.op.output_fields:
3377
      if field == "cluster_name":
3378
        entry = self.cfg.GetClusterName()
3379
      elif field == "master_node":
3380
        entry = self.cfg.GetMasterNode()
3381
      elif field == "drain_flag":
3382
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3383
      elif field == "watcher_pause":
3384
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3385
      else:
3386
        raise errors.ParameterError(field)
3387
      values.append(entry)
3388
    return values
3389

    
3390

    
3391
class LUActivateInstanceDisks(NoHooksLU):
3392
  """Bring up an instance's disks.
3393

3394
  """
3395
  _OP_REQP = ["instance_name"]
3396
  REQ_BGL = False
3397

    
3398
  def ExpandNames(self):
3399
    self._ExpandAndLockInstance()
3400
    self.needed_locks[locking.LEVEL_NODE] = []
3401
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3402

    
3403
  def DeclareLocks(self, level):
3404
    if level == locking.LEVEL_NODE:
3405
      self._LockInstancesNodes()
3406

    
3407
  def CheckPrereq(self):
3408
    """Check prerequisites.
3409

3410
    This checks that the instance is in the cluster.
3411

3412
    """
3413
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3414
    assert self.instance is not None, \
3415
      "Cannot retrieve locked instance %s" % self.op.instance_name
3416
    _CheckNodeOnline(self, self.instance.primary_node)
3417
    if not hasattr(self.op, "ignore_size"):
3418
      self.op.ignore_size = False
3419

    
3420
  def Exec(self, feedback_fn):
3421
    """Activate the disks.
3422

3423
    """
3424
    disks_ok, disks_info = \
3425
              _AssembleInstanceDisks(self, self.instance,
3426
                                     ignore_size=self.op.ignore_size)
3427
    if not disks_ok:
3428
      raise errors.OpExecError("Cannot activate block devices")
3429

    
3430
    return disks_info
3431

    
3432

    
3433
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3434
                           ignore_size=False):
3435
  """Prepare the block devices for an instance.
3436

3437
  This sets up the block devices on all nodes.
3438

3439
  @type lu: L{LogicalUnit}
3440
  @param lu: the logical unit on whose behalf we execute
3441
  @type instance: L{objects.Instance}
3442
  @param instance: the instance for whose disks we assemble
3443
  @type ignore_secondaries: boolean
3444
  @param ignore_secondaries: if true, errors on secondary nodes
3445
      won't result in an error return from the function
3446
  @type ignore_size: boolean
3447
  @param ignore_size: if true, the current known size of the disk
3448
      will not be used during the disk activation, useful for cases
3449
      when the size is wrong
3450
  @return: False if the operation failed, otherwise a list of
3451
      (host, instance_visible_name, node_visible_name)
3452
      with the mapping from node devices to instance devices
3453

3454
  """
3455
  device_info = []
3456
  disks_ok = True
3457
  iname = instance.name
3458
  # With the two passes mechanism we try to reduce the window of
3459
  # opportunity for the race condition of switching DRBD to primary
3460
  # before handshaking occured, but we do not eliminate it
3461

    
3462
  # The proper fix would be to wait (with some limits) until the
3463
  # connection has been made and drbd transitions from WFConnection
3464
  # into any other network-connected state (Connected, SyncTarget,
3465
  # SyncSource, etc.)
3466

    
3467
  # 1st pass, assemble on all nodes in secondary mode
3468
  for inst_disk in instance.disks:
3469
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3470
      if ignore_size:
3471
        node_disk = node_disk.Copy()
3472
        node_disk.UnsetSize()
3473
      lu.cfg.SetDiskID(node_disk, node)
3474
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3475
      msg = result.fail_msg
3476
      if msg:
3477
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3478
                           " (is_primary=False, pass=1): %s",
3479
                           inst_disk.iv_name, node, msg)
3480
        if not ignore_secondaries:
3481
          disks_ok = False
3482

    
3483
  # FIXME: race condition on drbd migration to primary
3484

    
3485
  # 2nd pass, do only the primary node
3486
  for inst_disk in instance.disks:
3487
    dev_path = None
3488

    
3489
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3490
      if node != instance.primary_node:
3491
        continue
3492
      if ignore_size:
3493
        node_disk = node_disk.Copy()
3494
        node_disk.UnsetSize()
3495
      lu.cfg.SetDiskID(node_disk, node)
3496
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3497
      msg = result.fail_msg
3498
      if msg:
3499
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3500
                           " (is_primary=True, pass=2): %s",
3501
                           inst_disk.iv_name, node, msg)
3502
        disks_ok = False
3503
      else:
3504
        dev_path = result.payload
3505

    
3506
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3507

    
3508
  # leave the disks configured for the primary node
3509
  # this is a workaround that would be fixed better by
3510
  # improving the logical/physical id handling
3511
  for disk in instance.disks:
3512
    lu.cfg.SetDiskID(disk, instance.primary_node)
3513

    
3514
  return disks_ok, device_info
3515

    
3516

    
3517
def _StartInstanceDisks(lu, instance, force):
3518
  """Start the disks of an instance.
3519

3520
  """
3521
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3522
                                           ignore_secondaries=force)
3523
  if not disks_ok:
3524
    _ShutdownInstanceDisks(lu, instance)
3525
    if force is not None and not force:
3526
      lu.proc.LogWarning("", hint="If the message above refers to a"
3527
                         " secondary node,"
3528
                         " you can retry the operation using '--force'.")
3529
    raise errors.OpExecError("Disk consistency error")
3530

    
3531

    
3532
class LUDeactivateInstanceDisks(NoHooksLU):
3533
  """Shutdown an instance's disks.
3534

3535
  """
3536
  _OP_REQP = ["instance_name"]
3537
  REQ_BGL = False
3538

    
3539
  def ExpandNames(self):
3540
    self._ExpandAndLockInstance()
3541
    self.needed_locks[locking.LEVEL_NODE] = []
3542
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3543

    
3544
  def DeclareLocks(self, level):
3545
    if level == locking.LEVEL_NODE:
3546
      self._LockInstancesNodes()
3547

    
3548
  def CheckPrereq(self):
3549
    """Check prerequisites.
3550

3551
    This checks that the instance is in the cluster.
3552

3553
    """
3554
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3555
    assert self.instance is not None, \
3556
      "Cannot retrieve locked instance %s" % self.op.instance_name
3557

    
3558
  def Exec(self, feedback_fn):
3559
    """Deactivate the disks
3560

3561
    """
3562
    instance = self.instance
3563
    _SafeShutdownInstanceDisks(self, instance)
3564

    
3565

    
3566
def _SafeShutdownInstanceDisks(lu, instance):
3567
  """Shutdown block devices of an instance.
3568

3569
  This function checks if an instance is running, before calling
3570
  _ShutdownInstanceDisks.
3571

3572
  """
3573
  pnode = instance.primary_node
3574
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3575
  ins_l.Raise("Can't contact node %s" % pnode)
3576

    
3577
  if instance.name in ins_l.payload:
3578
    raise errors.OpExecError("Instance is running, can't shutdown"
3579
                             " block devices.")
3580

    
3581
  _ShutdownInstanceDisks(lu, instance)
3582

    
3583

    
3584
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3585
  """Shutdown block devices of an instance.
3586

3587
  This does the shutdown on all nodes of the instance.
3588

3589
  If the ignore_primary is false, errors on the primary node are
3590
  ignored.
3591

3592
  """
3593
  all_result = True
3594
  for disk in instance.disks:
3595
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3596
      lu.cfg.SetDiskID(top_disk, node)
3597
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3598
      msg = result.fail_msg
3599
      if msg:
3600
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3601
                      disk.iv_name, node, msg)
3602
        if not ignore_primary or node != instance.primary_node:
3603
          all_result = False
3604
  return all_result
3605

    
3606

    
3607
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3608
  """Checks if a node has enough free memory.
3609

3610
  This function check if a given node has the needed amount of free
3611
  memory. In case the node has less memory or we cannot get the
3612
  information from the node, this function raise an OpPrereqError
3613
  exception.
3614

3615
  @type lu: C{LogicalUnit}
3616
  @param lu: a logical unit from which we get configuration data
3617
  @type node: C{str}
3618
  @param node: the node to check
3619
  @type reason: C{str}
3620
  @param reason: string to use in the error message
3621
  @type requested: C{int}
3622
  @param requested: the amount of memory in MiB to check for
3623
  @type hypervisor_name: C{str}
3624
  @param hypervisor_name: the hypervisor to ask for memory stats
3625
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3626
      we cannot check the node
3627

3628
  """
3629
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3630
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3631
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3632
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3633
  if not isinstance(free_mem, int):
3634
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3635
                               " was '%s'" % (node, free_mem),
3636
                               errors.ECODE_ENVIRON)
3637
  if requested > free_mem:
3638
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3639
                               " needed %s MiB, available %s MiB" %
3640
                               (node, reason, requested, free_mem),
3641
                               errors.ECODE_NORES)
3642

    
3643

    
3644
class LUStartupInstance(LogicalUnit):
3645
  """Starts an instance.
3646

3647
  """
3648
  HPATH = "instance-start"
3649
  HTYPE = constants.HTYPE_INSTANCE
3650
  _OP_REQP = ["instance_name", "force"]
3651
  REQ_BGL = False
3652

    
3653
  def ExpandNames(self):
3654
    self._ExpandAndLockInstance()
3655

    
3656
  def BuildHooksEnv(self):
3657
    """Build hooks env.
3658

3659
    This runs on master, primary and secondary nodes of the instance.
3660

3661
    """
3662
    env = {
3663
      "FORCE": self.op.force,
3664
      }
3665
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3666
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3667
    return env, nl, nl
3668

    
3669
  def CheckPrereq(self):
3670
    """Check prerequisites.
3671

3672
    This checks that the instance is in the cluster.
3673

3674
    """
3675
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3676
    assert self.instance is not None, \
3677
      "Cannot retrieve locked instance %s" % self.op.instance_name
3678

    
3679
    # extra beparams
3680
    self.beparams = getattr(self.op, "beparams", {})
3681
    if self.beparams:
3682
      if not isinstance(self.beparams, dict):
3683
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3684
                                   " dict" % (type(self.beparams), ),
3685
                                   errors.ECODE_INVAL)
3686
      # fill the beparams dict
3687
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3688
      self.op.beparams = self.beparams
3689

    
3690
    # extra hvparams
3691
    self.hvparams = getattr(self.op, "hvparams", {})
3692
    if self.hvparams:
3693
      if not isinstance(self.hvparams, dict):
3694
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3695
                                   " dict" % (type(self.hvparams), ),
3696
                                   errors.ECODE_INVAL)
3697

    
3698
      # check hypervisor parameter syntax (locally)
3699
      cluster = self.cfg.GetClusterInfo()
3700
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3701
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3702
                                    instance.hvparams)
3703
      filled_hvp.update(self.hvparams)
3704
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3705
      hv_type.CheckParameterSyntax(filled_hvp)
3706
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3707
      self.op.hvparams = self.hvparams
3708

    
3709
    _CheckNodeOnline(self, instance.primary_node)
3710

    
3711
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3712
    # check bridges existence
3713
    _CheckInstanceBridgesExist(self, instance)
3714

    
3715
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3716
                                              instance.name,
3717
                                              instance.hypervisor)
3718
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3719
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3720
    if not remote_info.payload: # not running already
3721
      _CheckNodeFreeMemory(self, instance.primary_node,
3722
                           "starting instance %s" % instance.name,
3723
                           bep[constants.BE_MEMORY], instance.hypervisor)
3724

    
3725
  def Exec(self, feedback_fn):
3726
    """Start the instance.
3727

3728
    """
3729
    instance = self.instance
3730
    force = self.op.force
3731

    
3732
    self.cfg.MarkInstanceUp(instance.name)
3733

    
3734
    node_current = instance.primary_node
3735

    
3736
    _StartInstanceDisks(self, instance, force)
3737

    
3738
    result = self.rpc.call_instance_start(node_current, instance,
3739
                                          self.hvparams, self.beparams)
3740
    msg = result.fail_msg
3741
    if msg:
3742
      _ShutdownInstanceDisks(self, instance)
3743
      raise errors.OpExecError("Could not start instance: %s" % msg)
3744

    
3745

    
3746
class LURebootInstance(LogicalUnit):
3747
  """Reboot an instance.
3748

3749
  """
3750
  HPATH = "instance-reboot"
3751
  HTYPE = constants.HTYPE_INSTANCE
3752
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3753
  REQ_BGL = False
3754

    
3755
  def CheckArguments(self):
3756
    """Check the arguments.
3757

3758
    """
3759
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3760
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3761

    
3762
  def ExpandNames(self):
3763
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3764
                                   constants.INSTANCE_REBOOT_HARD,
3765
                                   constants.INSTANCE_REBOOT_FULL]:
3766
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3767
                                  (constants.INSTANCE_REBOOT_SOFT,
3768
                                   constants.INSTANCE_REBOOT_HARD,
3769
                                   constants.INSTANCE_REBOOT_FULL))
3770
    self._ExpandAndLockInstance()
3771

    
3772
  def BuildHooksEnv(self):
3773
    """Build hooks env.
3774

3775
    This runs on master, primary and secondary nodes of the instance.
3776

3777
    """
3778
    env = {
3779
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3780
      "REBOOT_TYPE": self.op.reboot_type,
3781
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3782
      }
3783
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3784
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3785
    return env, nl, nl
3786

    
3787
  def CheckPrereq(self):
3788
    """Check prerequisites.
3789

3790
    This checks that the instance is in the cluster.
3791

3792
    """
3793
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3794
    assert self.instance is not None, \
3795
      "Cannot retrieve locked instance %s" % self.op.instance_name
3796

    
3797
    _CheckNodeOnline(self, instance.primary_node)
3798

    
3799
    # check bridges existence
3800
    _CheckInstanceBridgesExist(self, instance)
3801

    
3802
  def Exec(self, feedback_fn):
3803
    """Reboot the instance.
3804

3805
    """
3806
    instance = self.instance
3807
    ignore_secondaries = self.op.ignore_secondaries
3808
    reboot_type = self.op.reboot_type
3809

    
3810
    node_current = instance.primary_node
3811

    
3812
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3813
                       constants.INSTANCE_REBOOT_HARD]:
3814
      for disk in instance.disks:
3815
        self.cfg.SetDiskID(disk, node_current)
3816
      result = self.rpc.call_instance_reboot(node_current, instance,
3817
                                             reboot_type,
3818
                                             self.shutdown_timeout)
3819
      result.Raise("Could not reboot instance")
3820
    else:
3821
      result = self.rpc.call_instance_shutdown(node_current, instance,
3822
                                               self.shutdown_timeout)
3823
      result.Raise("Could not shutdown instance for full reboot")
3824
      _ShutdownInstanceDisks(self, instance)
3825
      _StartInstanceDisks(self, instance, ignore_secondaries)
3826
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3827
      msg = result.fail_msg
3828
      if msg:
3829
        _ShutdownInstanceDisks(self, instance)
3830
        raise errors.OpExecError("Could not start instance for"
3831
                                 " full reboot: %s" % msg)
3832

    
3833
    self.cfg.MarkInstanceUp(instance.name)
3834

    
3835

    
3836
class LUShutdownInstance(LogicalUnit):
3837
  """Shutdown an instance.
3838

3839
  """
3840
  HPATH = "instance-stop"
3841
  HTYPE = constants.HTYPE_INSTANCE
3842
  _OP_REQP = ["instance_name"]
3843
  REQ_BGL = False
3844

    
3845
  def CheckArguments(self):
3846
    """Check the arguments.
3847

3848
    """
3849
    self.timeout = getattr(self.op, "timeout",
3850
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3851

    
3852
  def ExpandNames(self):
3853
    self._ExpandAndLockInstance()
3854

    
3855
  def BuildHooksEnv(self):
3856
    """Build hooks env.
3857

3858
    This runs on master, primary and secondary nodes of the instance.
3859

3860
    """
3861
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3862
    env["TIMEOUT"] = self.timeout
3863
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3864
    return env, nl, nl
3865

    
3866
  def CheckPrereq(self):
3867
    """Check prerequisites.
3868

3869
    This checks that the instance is in the cluster.
3870

3871
    """
3872
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3873
    assert self.instance is not None, \
3874
      "Cannot retrieve locked instance %s" % self.op.instance_name
3875
    _CheckNodeOnline(self, self.instance.primary_node)
3876

    
3877
  def Exec(self, feedback_fn):
3878
    """Shutdown the instance.
3879

3880
    """
3881
    instance = self.instance
3882
    node_current = instance.primary_node
3883
    timeout = self.timeout
3884
    self.cfg.MarkInstanceDown(instance.name)
3885
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3886
    msg = result.fail_msg
3887
    if msg:
3888
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3889

    
3890
    _ShutdownInstanceDisks(self, instance)
3891

    
3892

    
3893
class LUReinstallInstance(LogicalUnit):
3894
  """Reinstall an instance.
3895

3896
  """
3897
  HPATH = "instance-reinstall"
3898
  HTYPE = constants.HTYPE_INSTANCE
3899
  _OP_REQP = ["instance_name"]
3900
  REQ_BGL = False
3901

    
3902
  def ExpandNames(self):
3903
    self._ExpandAndLockInstance()
3904

    
3905
  def BuildHooksEnv(self):
3906
    """Build hooks env.
3907

3908
    This runs on master, primary and secondary nodes of the instance.
3909

3910
    """
3911
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3912
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3913
    return env, nl, nl
3914

    
3915
  def CheckPrereq(self):
3916
    """Check prerequisites.
3917

3918
    This checks that the instance is in the cluster and is not running.
3919

3920
    """
3921
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3922
    assert instance is not None, \
3923
      "Cannot retrieve locked instance %s" % self.op.instance_name
3924
    _CheckNodeOnline(self, instance.primary_node)
3925

    
3926
    if instance.disk_template == constants.DT_DISKLESS:
3927
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3928
                                 self.op.instance_name,
3929
                                 errors.ECODE_INVAL)
3930
    if instance.admin_up:
3931
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3932
                                 self.op.instance_name,
3933
                                 errors.ECODE_STATE)
3934
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3935
                                              instance.name,
3936
                                              instance.hypervisor)
3937
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3938
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3939
    if remote_info.payload:
3940
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3941
                                 (self.op.instance_name,
3942
                                  instance.primary_node),
3943
                                 errors.ECODE_STATE)
3944

    
3945
    self.op.os_type = getattr(self.op, "os_type", None)
3946
    self.op.force_variant = getattr(self.op, "force_variant", False)
3947
    if self.op.os_type is not None:
3948
      # OS verification
3949
      pnode = self.cfg.GetNodeInfo(
3950
        self.cfg.ExpandNodeName(instance.primary_node))
3951
      if pnode is None:
3952
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3953
                                   self.op.pnode, errors.ECODE_NOENT)
3954
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3955
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3956
                   (self.op.os_type, pnode.name),
3957
                   prereq=True, ecode=errors.ECODE_INVAL)
3958
      if not self.op.force_variant:
3959
        _CheckOSVariant(result.payload, self.op.os_type)
3960

    
3961
    self.instance = instance
3962

    
3963
  def Exec(self, feedback_fn):
3964
    """Reinstall the instance.
3965

3966
    """
3967
    inst = self.instance
3968

    
3969
    if self.op.os_type is not None:
3970
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3971
      inst.os = self.op.os_type
3972
      self.cfg.Update(inst, feedback_fn)
3973

    
3974
    _StartInstanceDisks(self, inst, None)
3975
    try:
3976
      feedback_fn("Running the instance OS create scripts...")
3977
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3978
      result.Raise("Could not install OS for instance %s on node %s" %
3979
                   (inst.name, inst.primary_node))
3980
    finally:
3981
      _ShutdownInstanceDisks(self, inst)
3982

    
3983

    
3984
class LURecreateInstanceDisks(LogicalUnit):
3985
  """Recreate an instance's missing disks.
3986

3987
  """
3988
  HPATH = "instance-recreate-disks"
3989
  HTYPE = constants.HTYPE_INSTANCE
3990
  _OP_REQP = ["instance_name", "disks"]
3991
  REQ_BGL = False
3992

    
3993
  def CheckArguments(self):
3994
    """Check the arguments.
3995

3996
    """
3997
    if not isinstance(self.op.disks, list):
3998
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
3999
    for item in self.op.disks:
4000
      if (not isinstance(item, int) or
4001
          item < 0):
4002
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4003
                                   str(item), errors.ECODE_INVAL)
4004

    
4005
  def ExpandNames(self):
4006
    self._ExpandAndLockInstance()
4007

    
4008
  def BuildHooksEnv(self):
4009
    """Build hooks env.
4010

4011
    This runs on master, primary and secondary nodes of the instance.
4012

4013
    """
4014
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4015
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4016
    return env, nl, nl
4017

    
4018
  def CheckPrereq(self):
4019
    """Check prerequisites.
4020

4021
    This checks that the instance is in the cluster and is not running.
4022

4023
    """
4024
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4025
    assert instance is not None, \
4026
      "Cannot retrieve locked instance %s" % self.op.instance_name
4027
    _CheckNodeOnline(self, instance.primary_node)
4028

    
4029
    if instance.disk_template == constants.DT_DISKLESS:
4030
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4031
                                 self.op.instance_name, errors.ECODE_INVAL)
4032
    if instance.admin_up:
4033
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4034
                                 self.op.instance_name, errors.ECODE_STATE)
4035
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4036
                                              instance.name,
4037
                                              instance.hypervisor)
4038
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4039
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4040
    if remote_info.payload:
4041
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4042
                                 (self.op.instance_name,
4043
                                  instance.primary_node), errors.ECODE_STATE)
4044

    
4045
    if not self.op.disks:
4046
      self.op.disks = range(len(instance.disks))
4047
    else:
4048
      for idx in self.op.disks:
4049
        if idx >= len(instance.disks):
4050
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4051
                                     errors.ECODE_INVAL)
4052

    
4053
    self.instance = instance
4054

    
4055
  def Exec(self, feedback_fn):
4056
    """Recreate the disks.
4057

4058
    """
4059
    to_skip = []
4060
    for idx, disk in enumerate(self.instance.disks):
4061
      if idx not in self.op.disks: # disk idx has not been passed in
4062
        to_skip.append(idx)
4063
        continue
4064

    
4065
    _CreateDisks(self, self.instance, to_skip=to_skip)
4066

    
4067

    
4068
class LURenameInstance(LogicalUnit):
4069
  """Rename an instance.
4070

4071
  """
4072
  HPATH = "instance-rename"
4073
  HTYPE = constants.HTYPE_INSTANCE
4074
  _OP_REQP = ["instance_name", "new_name"]
4075

    
4076
  def BuildHooksEnv(self):
4077
    """Build hooks env.
4078

4079
    This runs on master, primary and secondary nodes of the instance.
4080

4081
    """
4082
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4083
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4084
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4085
    return env, nl, nl
4086

    
4087
  def CheckPrereq(self):
4088
    """Check prerequisites.
4089

4090
    This checks that the instance is in the cluster and is not running.
4091

4092
    """
4093
    instance = self.cfg.GetInstanceInfo(
4094
      self.cfg.ExpandInstanceName(self.op.instance_name))
4095
    if instance is None:
4096
      raise errors.OpPrereqError("Instance '%s' not known" %
4097
                                 self.op.instance_name, errors.ECODE_NOENT)
4098
    _CheckNodeOnline(self, instance.primary_node)
4099

    
4100
    if instance.admin_up:
4101
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4102
                                 self.op.instance_name, errors.ECODE_STATE)
4103
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4104
                                              instance.name,
4105
                                              instance.hypervisor)
4106
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4107
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4108
    if remote_info.payload:
4109
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4110
                                 (self.op.instance_name,
4111
                                  instance.primary_node), errors.ECODE_STATE)
4112
    self.instance = instance
4113

    
4114
    # new name verification
4115
    name_info = utils.GetHostInfo(self.op.new_name)
4116

    
4117
    self.op.new_name = new_name = name_info.name
4118
    instance_list = self.cfg.GetInstanceList()
4119
    if new_name in instance_list:
4120
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4121
                                 new_name, errors.ECODE_EXISTS)
4122

    
4123
    if not getattr(self.op, "ignore_ip", False):
4124
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4125
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4126
                                   (name_info.ip, new_name),
4127
                                   errors.ECODE_NOTUNIQUE)
4128

    
4129

    
4130
  def Exec(self, feedback_fn):
4131
    """Reinstall the instance.
4132

4133
    """
4134
    inst = self.instance
4135
    old_name = inst.name
4136

    
4137
    if inst.disk_template == constants.DT_FILE:
4138
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4139

    
4140
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4141
    # Change the instance lock. This is definitely safe while we hold the BGL
4142
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4143
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4144

    
4145
    # re-read the instance from the configuration after rename
4146
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4147

    
4148
    if inst.disk_template == constants.DT_FILE:
4149
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4150
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4151
                                                     old_file_storage_dir,
4152
                                                     new_file_storage_dir)
4153
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4154
                   " (but the instance has been renamed in Ganeti)" %
4155
                   (inst.primary_node, old_file_storage_dir,
4156
                    new_file_storage_dir))
4157

    
4158
    _StartInstanceDisks(self, inst, None)
4159
    try:
4160
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4161
                                                 old_name)
4162
      msg = result.fail_msg
4163
      if msg:
4164
        msg = ("Could not run OS rename script for instance %s on node %s"
4165
               " (but the instance has been renamed in Ganeti): %s" %
4166
               (inst.name, inst.primary_node, msg))
4167
        self.proc.LogWarning(msg)
4168
    finally:
4169
      _ShutdownInstanceDisks(self, inst)
4170

    
4171

    
4172
class LURemoveInstance(LogicalUnit):
4173
  """Remove an instance.
4174

4175
  """
4176
  HPATH = "instance-remove"
4177
  HTYPE = constants.HTYPE_INSTANCE
4178
  _OP_REQP = ["instance_name", "ignore_failures"]
4179
  REQ_BGL = False
4180

    
4181
  def CheckArguments(self):
4182
    """Check the arguments.
4183

4184
    """
4185
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4186
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4187

    
4188
  def ExpandNames(self):
4189
    self._ExpandAndLockInstance()
4190
    self.needed_locks[locking.LEVEL_NODE] = []
4191
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4192

    
4193
  def DeclareLocks(self, level):
4194
    if level == locking.LEVEL_NODE:
4195
      self._LockInstancesNodes()
4196

    
4197
  def BuildHooksEnv(self):
4198
    """Build hooks env.
4199

4200
    This runs on master, primary and secondary nodes of the instance.
4201

4202
    """
4203
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4204
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4205
    nl = [self.cfg.GetMasterNode()]
4206
    return env, nl, nl
4207

    
4208
  def CheckPrereq(self):
4209
    """Check prerequisites.
4210

4211
    This checks that the instance is in the cluster.
4212

4213
    """
4214
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4215
    assert self.instance is not None, \
4216
      "Cannot retrieve locked instance %s" % self.op.instance_name
4217

    
4218
  def Exec(self, feedback_fn):
4219
    """Remove the instance.
4220

4221
    """
4222
    instance = self.instance
4223
    logging.info("Shutting down instance %s on node %s",
4224
                 instance.name, instance.primary_node)
4225

    
4226
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4227
                                             self.shutdown_timeout)
4228
    msg = result.fail_msg
4229
    if msg:
4230
      if self.op.ignore_failures:
4231
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4232
      else:
4233
        raise errors.OpExecError("Could not shutdown instance %s on"
4234
                                 " node %s: %s" %
4235
                                 (instance.name, instance.primary_node, msg))
4236

    
4237
    logging.info("Removing block devices for instance %s", instance.name)
4238

    
4239
    if not _RemoveDisks(self, instance):
4240
      if self.op.ignore_failures:
4241
        feedback_fn("Warning: can't remove instance's disks")
4242
      else:
4243
        raise errors.OpExecError("Can't remove instance's disks")
4244

    
4245
    logging.info("Removing instance %s out of cluster config", instance.name)
4246

    
4247
    self.cfg.RemoveInstance(instance.name)
4248
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4249

    
4250

    
4251
class LUQueryInstances(NoHooksLU):
4252
  """Logical unit for querying instances.
4253

4254
  """
4255
  _OP_REQP = ["output_fields", "names", "use_locking"]
4256
  REQ_BGL = False
4257
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4258
                    "serial_no", "ctime", "mtime", "uuid"]
4259
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4260
                                    "admin_state",
4261
                                    "disk_template", "ip", "mac", "bridge",
4262
                                    "nic_mode", "nic_link",
4263
                                    "sda_size", "sdb_size", "vcpus", "tags",
4264
                                    "network_port", "beparams",
4265
                                    r"(disk)\.(size)/([0-9]+)",
4266
                                    r"(disk)\.(sizes)", "disk_usage",
4267
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4268
                                    r"(nic)\.(bridge)/([0-9]+)",
4269
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4270
                                    r"(disk|nic)\.(count)",
4271
                                    "hvparams",
4272
                                    ] + _SIMPLE_FIELDS +
4273
                                  ["hv/%s" % name
4274
                                   for name in constants.HVS_PARAMETERS
4275
                                   if name not in constants.HVC_GLOBALS] +
4276
                                  ["be/%s" % name
4277
                                   for name in constants.BES_PARAMETERS])
4278
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4279

    
4280

    
4281
  def ExpandNames(self):
4282
    _CheckOutputFields(static=self._FIELDS_STATIC,
4283
                       dynamic=self._FIELDS_DYNAMIC,
4284
                       selected=self.op.output_fields)
4285

    
4286
    self.needed_locks = {}
4287
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4288
    self.share_locks[locking.LEVEL_NODE] = 1
4289

    
4290
    if self.op.names:
4291
      self.wanted = _GetWantedInstances(self, self.op.names)
4292
    else:
4293
      self.wanted = locking.ALL_SET
4294

    
4295
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4296
    self.do_locking = self.do_node_query and self.op.use_locking
4297
    if self.do_locking:
4298
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4299
      self.needed_locks[locking.LEVEL_NODE] = []
4300
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4301

    
4302
  def DeclareLocks(self, level):
4303
    if level == locking.LEVEL_NODE and self.do_locking:
4304
      self._LockInstancesNodes()
4305

    
4306
  def CheckPrereq(self):
4307
    """Check prerequisites.
4308

4309
    """
4310
    pass
4311

    
4312
  def Exec(self, feedback_fn):
4313
    """Computes the list of nodes and their attributes.
4314

4315
    """
4316
    all_info = self.cfg.GetAllInstancesInfo()
4317
    if self.wanted == locking.ALL_SET:
4318
      # caller didn't specify instance names, so ordering is not important
4319
      if self.do_locking:
4320
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4321
      else:
4322
        instance_names = all_info.keys()
4323
      instance_names = utils.NiceSort(instance_names)
4324
    else:
4325
      # caller did specify names, so we must keep the ordering
4326
      if self.do_locking:
4327
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4328
      else:
4329
        tgt_set = all_info.keys()
4330
      missing = set(self.wanted).difference(tgt_set)
4331
      if missing:
4332
        raise errors.OpExecError("Some instances were removed before"
4333
                                 " retrieving their data: %s" % missing)
4334
      instance_names = self.wanted
4335

    
4336
    instance_list = [all_info[iname] for iname in instance_names]
4337

    
4338
    # begin data gathering
4339

    
4340
    nodes = frozenset([inst.primary_node for inst in instance_list])
4341
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4342

    
4343
    bad_nodes = []
4344
    off_nodes = []
4345
    if self.do_node_query:
4346
      live_data = {}
4347
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4348
      for name in nodes:
4349
        result = node_data[name]
4350
        if result.offline:
4351
          # offline nodes will be in both lists
4352
          off_nodes.append(name)
4353
        if result.fail_msg:
4354
          bad_nodes.append(name)
4355
        else:
4356
          if result.payload:
4357
            live_data.update(result.payload)
4358
          # else no instance is alive
4359
    else:
4360
      live_data = dict([(name, {}) for name in instance_names])
4361

    
4362
    # end data gathering
4363

    
4364
    HVPREFIX = "hv/"
4365
    BEPREFIX = "be/"
4366
    output = []
4367
    cluster = self.cfg.GetClusterInfo()
4368
    for instance in instance_list:
4369
      iout = []
4370
      i_hv = cluster.FillHV(instance, skip_globals=True)
4371
      i_be = cluster.FillBE(instance)
4372
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4373
                                 nic.nicparams) for nic in instance.nics]
4374
      for field in self.op.output_fields:
4375
        st_match = self._FIELDS_STATIC.Matches(field)
4376
        if field in self._SIMPLE_FIELDS:
4377
          val = getattr(instance, field)
4378
        elif field == "pnode":
4379
          val = instance.primary_node
4380
        elif field == "snodes":
4381
          val = list(instance.secondary_nodes)
4382
        elif field == "admin_state":
4383
          val = instance.admin_up
4384
        elif field == "oper_state":
4385
          if instance.primary_node in bad_nodes:
4386
            val = None
4387
          else:
4388
            val = bool(live_data.get(instance.name))
4389
        elif field == "status":
4390
          if instance.primary_node in off_nodes:
4391
            val = "ERROR_nodeoffline"
4392
          elif instance.primary_node in bad_nodes:
4393
            val = "ERROR_nodedown"
4394
          else:
4395
            running = bool(live_data.get(instance.name))
4396
            if running:
4397
              if instance.admin_up:
4398
                val = "running"
4399
              else:
4400
                val = "ERROR_up"
4401
            else:
4402
              if instance.admin_up:
4403
                val = "ERROR_down"
4404
              else:
4405
                val = "ADMIN_down"
4406
        elif field == "oper_ram":
4407
          if instance.primary_node in bad_nodes:
4408
            val = None
4409
          elif instance.name in live_data:
4410
            val = live_data[instance.name].get("memory", "?")
4411
          else:
4412
            val = "-"
4413
        elif field == "vcpus":
4414
          val = i_be[constants.BE_VCPUS]
4415
        elif field == "disk_template":
4416
          val = instance.disk_template
4417
        elif field == "ip":
4418
          if instance.nics:
4419
            val = instance.nics[0].ip
4420
          else:
4421
            val = None
4422
        elif field == "nic_mode":
4423
          if instance.nics:
4424
            val = i_nicp[0][constants.NIC_MODE]
4425
          else:
4426
            val = None
4427
        elif field == "nic_link":
4428
          if instance.nics:
4429
            val = i_nicp[0][constants.NIC_LINK]
4430
          else:
4431
            val = None
4432
        elif field == "bridge":
4433
          if (instance.nics and
4434
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4435
            val = i_nicp[0][constants.NIC_LINK]
4436
          else:
4437
            val = None
4438
        elif field == "mac":
4439
          if instance.nics:
4440
            val = instance.nics[0].mac
4441
          else:
4442
            val = None
4443
        elif field == "sda_size" or field == "sdb_size":
4444
          idx = ord(field[2]) - ord('a')
4445
          try:
4446
            val = instance.FindDisk(idx).size
4447
          except errors.OpPrereqError:
4448
            val = None
4449
        elif field == "disk_usage": # total disk usage per node
4450
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4451
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4452
        elif field == "tags":
4453
          val = list(instance.GetTags())
4454
        elif field == "hvparams":
4455
          val = i_hv
4456
        elif (field.startswith(HVPREFIX) and
4457
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4458
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4459
          val = i_hv.get(field[len(HVPREFIX):], None)
4460
        elif field == "beparams":
4461
          val = i_be
4462
        elif (field.startswith(BEPREFIX) and
4463
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4464
          val = i_be.get(field[len(BEPREFIX):], None)
4465
        elif st_match and st_match.groups():
4466
          # matches a variable list
4467
          st_groups = st_match.groups()
4468
          if st_groups and st_groups[0] == "disk":
4469
            if st_groups[1] == "count":
4470
              val = len(instance.disks)
4471
            elif st_groups[1] == "sizes":
4472
              val = [disk.size for disk in instance.disks]
4473
            elif st_groups[1] == "size":
4474
              try:
4475
                val = instance.FindDisk(st_groups[2]).size
4476
              except errors.OpPrereqError:
4477
                val = None
4478
            else:
4479
              assert False, "Unhandled disk parameter"
4480
          elif st_groups[0] == "nic":
4481
            if st_groups[1] == "count":
4482
              val = len(instance.nics)
4483
            elif st_groups[1] == "macs":
4484
              val = [nic.mac for nic in instance.nics]
4485
            elif st_groups[1] == "ips":
4486
              val = [nic.ip for nic in instance.nics]
4487
            elif st_groups[1] == "modes":
4488
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4489
            elif st_groups[1] == "links":
4490
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4491
            elif st_groups[1] == "bridges":
4492
              val = []
4493
              for nicp in i_nicp:
4494
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4495
                  val.append(nicp[constants.NIC_LINK])
4496
                else:
4497
                  val.append(None)
4498
            else:
4499
              # index-based item
4500
              nic_idx = int(st_groups[2])
4501
              if nic_idx >= len(instance.nics):
4502
                val = None
4503
              else:
4504
                if st_groups[1] == "mac":
4505
                  val = instance.nics[nic_idx].mac
4506
                elif st_groups[1] == "ip":
4507
                  val = instance.nics[nic_idx].ip
4508
                elif st_groups[1] == "mode":
4509
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4510
                elif st_groups[1] == "link":
4511
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4512
                elif st_groups[1] == "bridge":
4513
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4514
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4515
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4516
                  else:
4517
                    val = None
4518
                else:
4519
                  assert False, "Unhandled NIC parameter"
4520
          else:
4521
            assert False, ("Declared but unhandled variable parameter '%s'" %
4522
                           field)
4523
        else:
4524
          assert False, "Declared but unhandled parameter '%s'" % field
4525
        iout.append(val)
4526
      output.append(iout)
4527

    
4528
    return output
4529

    
4530

    
4531
class LUFailoverInstance(LogicalUnit):
4532
  """Failover an instance.
4533

4534
  """
4535
  HPATH = "instance-failover"
4536
  HTYPE = constants.HTYPE_INSTANCE
4537
  _OP_REQP = ["instance_name", "ignore_consistency"]
4538
  REQ_BGL = False
4539

    
4540
  def CheckArguments(self):
4541
    """Check the arguments.
4542

4543
    """
4544
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4545
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4546

    
4547
  def ExpandNames(self):
4548
    self._ExpandAndLockInstance()
4549
    self.needed_locks[locking.LEVEL_NODE] = []
4550
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4551

    
4552
  def DeclareLocks(self, level):
4553
    if level == locking.LEVEL_NODE:
4554
      self._LockInstancesNodes()
4555

    
4556
  def BuildHooksEnv(self):
4557
    """Build hooks env.
4558

4559
    This runs on master, primary and secondary nodes of the instance.
4560

4561
    """
4562
    env = {
4563
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4564
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4565
      }
4566
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4567
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4568
    return env, nl, nl
4569

    
4570
  def CheckPrereq(self):
4571
    """Check prerequisites.
4572

4573
    This checks that the instance is in the cluster.
4574

4575
    """
4576
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4577
    assert self.instance is not None, \
4578
      "Cannot retrieve locked instance %s" % self.op.instance_name
4579

    
4580
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4581
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4582
      raise errors.OpPrereqError("Instance's disk layout is not"
4583
                                 " network mirrored, cannot failover.",
4584
                                 errors.ECODE_STATE)
4585

    
4586
    secondary_nodes = instance.secondary_nodes
4587
    if not secondary_nodes:
4588
      raise errors.ProgrammerError("no secondary node but using "
4589
                                   "a mirrored disk template")
4590

    
4591
    target_node = secondary_nodes[0]
4592
    _CheckNodeOnline(self, target_node)
4593
    _CheckNodeNotDrained(self, target_node)
4594
    if instance.admin_up:
4595
      # check memory requirements on the secondary node
4596
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4597
                           instance.name, bep[constants.BE_MEMORY],
4598
                           instance.hypervisor)
4599
    else:
4600
      self.LogInfo("Not checking memory on the secondary node as"
4601
                   " instance will not be started")
4602

    
4603
    # check bridge existance
4604
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4605

    
4606
  def Exec(self, feedback_fn):
4607
    """Failover an instance.
4608

4609
    The failover is done by shutting it down on its present node and
4610
    starting it on the secondary.
4611

4612
    """
4613
    instance = self.instance
4614

    
4615
    source_node = instance.primary_node
4616
    target_node = instance.secondary_nodes[0]
4617

    
4618
    if instance.admin_up:
4619
      feedback_fn("* checking disk consistency between source and target")
4620
      for dev in instance.disks:
4621
        # for drbd, these are drbd over lvm
4622
        if not _CheckDiskConsistency(self, dev, target_node, False):
4623
          if not self.op.ignore_consistency:
4624
            raise errors.OpExecError("Disk %s is degraded on target node,"
4625
                                     " aborting failover." % dev.iv_name)
4626
    else:
4627
      feedback_fn("* not checking disk consistency as instance is not running")
4628

    
4629
    feedback_fn("* shutting down instance on source node")
4630
    logging.info("Shutting down instance %s on node %s",
4631
                 instance.name, source_node)
4632

    
4633
    result = self.rpc.call_instance_shutdown(source_node, instance,
4634
                                             self.shutdown_timeout)
4635
    msg = result.fail_msg
4636
    if msg:
4637
      if self.op.ignore_consistency:
4638
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4639
                             " Proceeding anyway. Please make sure node"
4640
                             " %s is down. Error details: %s",
4641
                             instance.name, source_node, source_node, msg)
4642
      else:
4643
        raise errors.OpExecError("Could not shutdown instance %s on"
4644
                                 " node %s: %s" %
4645
                                 (instance.name, source_node, msg))
4646

    
4647
    feedback_fn("* deactivating the instance's disks on source node")
4648
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4649
      raise errors.OpExecError("Can't shut down the instance's disks.")
4650

    
4651
    instance.primary_node = target_node
4652
    # distribute new instance config to the other nodes
4653
    self.cfg.Update(instance, feedback_fn)
4654

    
4655
    # Only start the instance if it's marked as up
4656
    if instance.admin_up:
4657
      feedback_fn("* activating the instance's disks on target node")
4658
      logging.info("Starting instance %s on node %s",
4659
                   instance.name, target_node)
4660

    
4661
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4662
                                               ignore_secondaries=True)
4663
      if not disks_ok:
4664
        _ShutdownInstanceDisks(self, instance)
4665
        raise errors.OpExecError("Can't activate the instance's disks")
4666

    
4667
      feedback_fn("* starting the instance on the target node")
4668
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4669
      msg = result.fail_msg
4670
      if msg:
4671
        _ShutdownInstanceDisks(self, instance)
4672
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4673
                                 (instance.name, target_node, msg))
4674

    
4675

    
4676
class LUMigrateInstance(LogicalUnit):
4677
  """Migrate an instance.
4678

4679
  This is migration without shutting down, compared to the failover,
4680
  which is done with shutdown.
4681

4682
  """
4683
  HPATH = "instance-migrate"
4684
  HTYPE = constants.HTYPE_INSTANCE
4685
  _OP_REQP = ["instance_name", "live", "cleanup"]
4686

    
4687
  REQ_BGL = False
4688

    
4689
  def ExpandNames(self):
4690
    self._ExpandAndLockInstance()
4691

    
4692
    self.needed_locks[locking.LEVEL_NODE] = []
4693
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4694

    
4695
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4696
                                       self.op.live, self.op.cleanup)
4697
    self.tasklets = [self._migrater]
4698

    
4699
  def DeclareLocks(self, level):
4700
    if level == locking.LEVEL_NODE:
4701
      self._LockInstancesNodes()
4702

    
4703
  def BuildHooksEnv(self):
4704
    """Build hooks env.
4705

4706
    This runs on master, primary and secondary nodes of the instance.
4707

4708
    """
4709
    instance = self._migrater.instance
4710
    env = _BuildInstanceHookEnvByObject(self, instance)
4711
    env["MIGRATE_LIVE"] = self.op.live
4712
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4713
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4714
    return env, nl, nl
4715

    
4716

    
4717
class LUMoveInstance(LogicalUnit):
4718
  """Move an instance by data-copying.
4719

4720
  """
4721
  HPATH = "instance-move"
4722
  HTYPE = constants.HTYPE_INSTANCE
4723
  _OP_REQP = ["instance_name", "target_node"]
4724
  REQ_BGL = False
4725

    
4726
  def CheckArguments(self):
4727
    """Check the arguments.
4728

4729
    """
4730
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4731
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4732

    
4733
  def ExpandNames(self):
4734
    self._ExpandAndLockInstance()
4735
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4736
    if target_node is None:
4737
      raise errors.OpPrereqError("Node '%s' not known" %
4738
                                  self.op.target_node, errors.ECODE_NOENT)
4739
    self.op.target_node = target_node
4740
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4741
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4742

    
4743
  def DeclareLocks(self, level):
4744
    if level == locking.LEVEL_NODE:
4745
      self._LockInstancesNodes(primary_only=True)
4746

    
4747
  def BuildHooksEnv(self):
4748
    """Build hooks env.
4749

4750
    This runs on master, primary and secondary nodes of the instance.
4751

4752
    """
4753
    env = {
4754
      "TARGET_NODE": self.op.target_node,
4755
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4756
      }
4757
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4758
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4759
                                       self.op.target_node]
4760
    return env, nl, nl
4761

    
4762
  def CheckPrereq(self):
4763
    """Check prerequisites.
4764

4765
    This checks that the instance is in the cluster.
4766

4767
    """
4768
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4769
    assert self.instance is not None, \
4770
      "Cannot retrieve locked instance %s" % self.op.instance_name
4771

    
4772
    node = self.cfg.GetNodeInfo(self.op.target_node)
4773
    assert node is not None, \
4774
      "Cannot retrieve locked node %s" % self.op.target_node
4775

    
4776
    self.target_node = target_node = node.name
4777

    
4778
    if target_node == instance.primary_node:
4779
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4780
                                 (instance.name, target_node),
4781
                                 errors.ECODE_STATE)
4782

    
4783
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4784

    
4785
    for idx, dsk in enumerate(instance.disks):
4786
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4787
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4788
                                   " cannot copy", errors.ECODE_STATE)
4789

    
4790
    _CheckNodeOnline(self, target_node)
4791
    _CheckNodeNotDrained(self, target_node)
4792

    
4793
    if instance.admin_up:
4794
      # check memory requirements on the secondary node
4795
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4796
                           instance.name, bep[constants.BE_MEMORY],
4797
                           instance.hypervisor)
4798
    else:
4799
      self.LogInfo("Not checking memory on the secondary node as"
4800
                   " instance will not be started")
4801

    
4802
    # check bridge existance
4803
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4804

    
4805
  def Exec(self, feedback_fn):
4806
    """Move an instance.
4807

4808
    The move is done by shutting it down on its present node, copying
4809
    the data over (slow) and starting it on the new node.
4810

4811
    """
4812
    instance = self.instance
4813

    
4814
    source_node = instance.primary_node
4815
    target_node = self.target_node
4816

    
4817
    self.LogInfo("Shutting down instance %s on source node %s",
4818
                 instance.name, source_node)
4819

    
4820
    result = self.rpc.call_instance_shutdown(source_node, instance,
4821
                                             self.shutdown_timeout)
4822
    msg = result.fail_msg
4823
    if msg:
4824
      if self.op.ignore_consistency:
4825
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4826
                             " Proceeding anyway. Please make sure node"
4827
                             " %s is down. Error details: %s",
4828
                             instance.name, source_node, source_node, msg)
4829
      else:
4830
        raise errors.OpExecError("Could not shutdown instance %s on"
4831
                                 " node %s: %s" %
4832
                                 (instance.name, source_node, msg))
4833

    
4834
    # create the target disks
4835
    try:
4836
      _CreateDisks(self, instance, target_node=target_node)
4837
    except errors.OpExecError:
4838
      self.LogWarning("Device creation failed, reverting...")
4839
      try:
4840
        _RemoveDisks(self, instance, target_node=target_node)
4841
      finally:
4842
        self.cfg.ReleaseDRBDMinors(instance.name)
4843
        raise
4844

    
4845
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4846

    
4847
    errs = []
4848
    # activate, get path, copy the data over
4849
    for idx, disk in enumerate(instance.disks):
4850
      self.LogInfo("Copying data for disk %d", idx)
4851
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4852
                                               instance.name, True)
4853
      if result.fail_msg:
4854
        self.LogWarning("Can't assemble newly created disk %d: %s",
4855
                        idx, result.fail_msg)
4856
        errs.append(result.fail_msg)
4857
        break
4858
      dev_path = result.payload
4859
      result = self.rpc.call_blockdev_export(source_node, disk,
4860
                                             target_node, dev_path,
4861
                                             cluster_name)
4862
      if result.fail_msg:
4863
        self.LogWarning("Can't copy data over for disk %d: %s",
4864
                        idx, result.fail_msg)
4865
        errs.append(result.fail_msg)
4866
        break
4867

    
4868
    if errs:
4869
      self.LogWarning("Some disks failed to copy, aborting")
4870
      try:
4871
        _RemoveDisks(self, instance, target_node=target_node)
4872
      finally:
4873
        self.cfg.ReleaseDRBDMinors(instance.name)
4874
        raise errors.OpExecError("Errors during disk copy: %s" %
4875
                                 (",".join(errs),))
4876

    
4877
    instance.primary_node = target_node
4878
    self.cfg.Update(instance, feedback_fn)
4879

    
4880
    self.LogInfo("Removing the disks on the original node")
4881
    _RemoveDisks(self, instance, target_node=source_node)
4882

    
4883
    # Only start the instance if it's marked as up
4884
    if instance.admin_up:
4885
      self.LogInfo("Starting instance %s on node %s",
4886
                   instance.name, target_node)
4887

    
4888
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4889
                                           ignore_secondaries=True)
4890
      if not disks_ok:
4891
        _ShutdownInstanceDisks(self, instance)
4892
        raise errors.OpExecError("Can't activate the instance's disks")
4893

    
4894
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4895
      msg = result.fail_msg
4896
      if msg:
4897
        _ShutdownInstanceDisks(self, instance)
4898
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4899
                                 (instance.name, target_node, msg))
4900

    
4901

    
4902
class LUMigrateNode(LogicalUnit):
4903
  """Migrate all instances from a node.
4904

4905
  """
4906
  HPATH = "node-migrate"
4907
  HTYPE = constants.HTYPE_NODE
4908
  _OP_REQP = ["node_name", "live"]
4909
  REQ_BGL = False
4910

    
4911
  def ExpandNames(self):
4912
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4913
    if self.op.node_name is None:
4914
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
4915
                                 errors.ECODE_NOENT)
4916

    
4917
    self.needed_locks = {
4918
      locking.LEVEL_NODE: [self.op.node_name],
4919
      }
4920

    
4921
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4922

    
4923
    # Create tasklets for migrating instances for all instances on this node
4924
    names = []
4925
    tasklets = []
4926

    
4927
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4928
      logging.debug("Migrating instance %s", inst.name)
4929
      names.append(inst.name)
4930

    
4931
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4932

    
4933
    self.tasklets = tasklets
4934

    
4935
    # Declare instance locks
4936
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4937

    
4938
  def DeclareLocks(self, level):
4939
    if level == locking.LEVEL_NODE:
4940
      self._LockInstancesNodes()
4941

    
4942
  def BuildHooksEnv(self):
4943
    """Build hooks env.
4944

4945
    This runs on the master, the primary and all the secondaries.
4946

4947
    """
4948
    env = {
4949
      "NODE_NAME": self.op.node_name,
4950
      }
4951

    
4952
    nl = [self.cfg.GetMasterNode()]
4953

    
4954
    return (env, nl, nl)
4955

    
4956

    
4957
class TLMigrateInstance(Tasklet):
4958
  def __init__(self, lu, instance_name, live, cleanup):
4959
    """Initializes this class.
4960

4961
    """
4962
    Tasklet.__init__(self, lu)
4963

    
4964
    # Parameters
4965
    self.instance_name = instance_name
4966
    self.live = live
4967
    self.cleanup = cleanup
4968

    
4969
  def CheckPrereq(self):
4970
    """Check prerequisites.
4971

4972
    This checks that the instance is in the cluster.
4973

4974
    """
4975
    instance = self.cfg.GetInstanceInfo(
4976
      self.cfg.ExpandInstanceName(self.instance_name))
4977
    if instance is None:
4978
      raise errors.OpPrereqError("Instance '%s' not known" %
4979
                                 self.instance_name, errors.ECODE_NOENT)
4980

    
4981
    if instance.disk_template != constants.DT_DRBD8:
4982
      raise errors.OpPrereqError("Instance's disk layout is not"
4983
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
4984

    
4985
    secondary_nodes = instance.secondary_nodes
4986
    if not secondary_nodes:
4987
      raise errors.ConfigurationError("No secondary node but using"
4988
                                      " drbd8 disk template")
4989

    
4990
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4991

    
4992
    target_node = secondary_nodes[0]
4993
    # check memory requirements on the secondary node
4994
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4995
                         instance.name, i_be[constants.BE_MEMORY],
4996
                         instance.hypervisor)
4997

    
4998
    # check bridge existance
4999
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5000

    
5001
    if not self.cleanup:
5002
      _CheckNodeNotDrained(self, target_node)
5003
      result = self.rpc.call_instance_migratable(instance.primary_node,
5004
                                                 instance)
5005
      result.Raise("Can't migrate, please use failover",
5006
                   prereq=True, ecode=errors.ECODE_STATE)
5007

    
5008
    self.instance = instance
5009

    
5010
  def _WaitUntilSync(self):
5011
    """Poll with custom rpc for disk sync.
5012

5013
    This uses our own step-based rpc call.
5014

5015
    """
5016
    self.feedback_fn("* wait until resync is done")
5017
    all_done = False
5018
    while not all_done:
5019
      all_done = True
5020
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5021
                                            self.nodes_ip,
5022
                                            self.instance.disks)
5023
      min_percent = 100
5024
      for node, nres in result.items():
5025
        nres.Raise("Cannot resync disks on node %s" % node)
5026
        node_done, node_percent = nres.payload
5027
        all_done = all_done and node_done
5028
        if node_percent is not None:
5029
          min_percent = min(min_percent, node_percent)
5030
      if not all_done:
5031
        if min_percent < 100:
5032
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5033
        time.sleep(2)
5034

    
5035
  def _EnsureSecondary(self, node):
5036
    """Demote a node to secondary.
5037

5038
    """
5039
    self.feedback_fn("* switching node %s to secondary mode" % node)
5040

    
5041
    for dev in self.instance.disks:
5042
      self.cfg.SetDiskID(dev, node)
5043

    
5044
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5045
                                          self.instance.disks)
5046
    result.Raise("Cannot change disk to secondary on node %s" % node)
5047

    
5048
  def _GoStandalone(self):
5049
    """Disconnect from the network.
5050

5051
    """
5052
    self.feedback_fn("* changing into standalone mode")
5053
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5054
                                               self.instance.disks)
5055
    for node, nres in result.items():
5056
      nres.Raise("Cannot disconnect disks node %s" % node)
5057

    
5058
  def _GoReconnect(self, multimaster):
5059
    """Reconnect to the network.
5060

5061
    """
5062
    if multimaster:
5063
      msg = "dual-master"
5064
    else:
5065
      msg = "single-master"
5066
    self.feedback_fn("* changing disks into %s mode" % msg)
5067
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5068
                                           self.instance.disks,
5069
                                           self.instance.name, multimaster)
5070
    for node, nres in result.items():
5071
      nres.Raise("Cannot change disks config on node %s" % node)
5072

    
5073
  def _ExecCleanup(self):
5074
    """Try to cleanup after a failed migration.
5075

5076
    The cleanup is done by:
5077
      - check that the instance is running only on one node
5078
        (and update the config if needed)
5079
      - change disks on its secondary node to secondary
5080
      - wait until disks are fully synchronized
5081
      - disconnect from the network
5082
      - change disks into single-master mode
5083
      - wait again until disks are fully synchronized
5084

5085
    """
5086
    instance = self.instance
5087
    target_node = self.target_node
5088
    source_node = self.source_node
5089

    
5090
    # check running on only one node
5091
    self.feedback_fn("* checking where the instance actually runs"
5092
                     " (if this hangs, the hypervisor might be in"
5093
                     " a bad state)")
5094
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5095
    for node, result in ins_l.items():
5096
      result.Raise("Can't contact node %s" % node)
5097

    
5098
    runningon_source = instance.name in ins_l[source_node].payload
5099
    runningon_target = instance.name in ins_l[target_node].payload
5100

    
5101
    if runningon_source and runningon_target:
5102
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5103
                               " or the hypervisor is confused. You will have"
5104
                               " to ensure manually that it runs only on one"
5105
                               " and restart this operation.")
5106

    
5107
    if not (runningon_source or runningon_target):
5108
      raise errors.OpExecError("Instance does not seem to be running at all."
5109
                               " In this case, it's safer to repair by"
5110
                               " running 'gnt-instance stop' to ensure disk"
5111
                               " shutdown, and then restarting it.")
5112

    
5113
    if runningon_target:
5114
      # the migration has actually succeeded, we need to update the config
5115
      self.feedback_fn("* instance running on secondary node (%s),"
5116
                       " updating config" % target_node)
5117
      instance.primary_node = target_node
5118
      self.cfg.Update(instance, self.feedback_fn)
5119
      demoted_node = source_node
5120
    else:
5121
      self.feedback_fn("* instance confirmed to be running on its"
5122
                       " primary node (%s)" % source_node)
5123
      demoted_node = target_node
5124

    
5125
    self._EnsureSecondary(demoted_node)
5126
    try:
5127
      self._WaitUntilSync()
5128
    except errors.OpExecError:
5129
      # we ignore here errors, since if the device is standalone, it
5130
      # won't be able to sync
5131
      pass
5132
    self._GoStandalone()
5133
    self._GoReconnect(False)
5134
    self._WaitUntilSync()
5135

    
5136
    self.feedback_fn("* done")
5137

    
5138
  def _RevertDiskStatus(self):
5139
    """Try to revert the disk status after a failed migration.
5140

5141
    """
5142
    target_node = self.target_node
5143
    try:
5144
      self._EnsureSecondary(target_node)
5145
      self._GoStandalone()
5146
      self._GoReconnect(False)
5147
      self._WaitUntilSync()
5148
    except errors.OpExecError, err:
5149
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5150
                         " drives: error '%s'\n"
5151
                         "Please look and recover the instance status" %
5152
                         str(err))
5153

    
5154
  def _AbortMigration(self):
5155
    """Call the hypervisor code to abort a started migration.
5156

5157
    """
5158
    instance = self.instance
5159
    target_node = self.target_node
5160
    migration_info = self.migration_info
5161

    
5162
    abort_result = self.rpc.call_finalize_migration(target_node,
5163
                                                    instance,
5164
                                                    migration_info,
5165
                                                    False)
5166
    abort_msg = abort_result.fail_msg
5167
    if abort_msg:
5168
      logging.error("Aborting migration failed on target node %s: %s",
5169
                    target_node, abort_msg)
5170
      # Don't raise an exception here, as we stil have to try to revert the
5171
      # disk status, even if this step failed.
5172

    
5173
  def _ExecMigration(self):
5174
    """Migrate an instance.
5175

5176
    The migrate is done by:
5177
      - change the disks into dual-master mode
5178
      - wait until disks are fully synchronized again
5179
      - migrate the instance
5180
      - change disks on the new secondary node (the old primary) to secondary
5181
      - wait until disks are fully synchronized
5182
      - change disks into single-master mode
5183

5184
    """
5185
    instance = self.instance
5186
    target_node = self.target_node
5187
    source_node = self.source_node
5188

    
5189
    self.feedback_fn("* checking disk consistency between source and target")
5190
    for dev in instance.disks:
5191
      if not _CheckDiskConsistency(self, dev, target_node, False):
5192
        raise errors.OpExecError("Disk %s is degraded or not fully"
5193
                                 " synchronized on target node,"
5194
                                 " aborting migrate." % dev.iv_name)
5195

    
5196
    # First get the migration information from the remote node
5197
    result = self.rpc.call_migration_info(source_node, instance)
5198
    msg = result.fail_msg
5199
    if msg:
5200
      log_err = ("Failed fetching source migration information from %s: %s" %
5201
                 (source_node, msg))
5202
      logging.error(log_err)
5203
      raise errors.OpExecError(log_err)
5204

    
5205
    self.migration_info = migration_info = result.payload
5206

    
5207
    # Then switch the disks to master/master mode
5208
    self._EnsureSecondary(target_node)
5209
    self._GoStandalone()
5210
    self._GoReconnect(True)
5211
    self._WaitUntilSync()
5212

    
5213
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5214
    result = self.rpc.call_accept_instance(target_node,
5215
                                           instance,
5216
                                           migration_info,
5217
                                           self.nodes_ip[target_node])
5218

    
5219
    msg = result.fail_msg
5220
    if msg:
5221
      logging.error("Instance pre-migration failed, trying to revert"
5222
                    " disk status: %s", msg)
5223
      self.feedback_fn("Pre-migration failed, aborting")
5224
      self._AbortMigration()
5225
      self._RevertDiskStatus()
5226
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5227
                               (instance.name, msg))
5228

    
5229
    self.feedback_fn("* migrating instance to %s" % target_node)
5230
    time.sleep(10)
5231
    result = self.rpc.call_instance_migrate(source_node, instance,
5232
                                            self.nodes_ip[target_node],
5233
                                            self.live)
5234
    msg = result.fail_msg
5235
    if msg:
5236
      logging.error("Instance migration failed, trying to revert"
5237
                    " disk status: %s", msg)
5238
      self.feedback_fn("Migration failed, aborting")
5239
      self._AbortMigration()
5240
      self._RevertDiskStatus()
5241
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5242
                               (instance.name, msg))
5243
    time.sleep(10)
5244

    
5245
    instance.primary_node = target_node
5246
    # distribute new instance config to the other nodes
5247
    self.cfg.Update(instance, self.feedback_fn)
5248

    
5249
    result = self.rpc.call_finalize_migration(target_node,
5250
                                              instance,
5251
                                              migration_info,
5252
                                              True)
5253
    msg = result.fail_msg
5254
    if msg:
5255
      logging.error("Instance migration succeeded, but finalization failed:"
5256
                    " %s", msg)
5257
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5258
                               msg)
5259

    
5260
    self._EnsureSecondary(source_node)
5261
    self._WaitUntilSync()
5262
    self._GoStandalone()
5263
    self._GoReconnect(False)
5264
    self._WaitUntilSync()
5265

    
5266
    self.feedback_fn("* done")
5267

    
5268
  def Exec(self, feedback_fn):
5269
    """Perform the migration.
5270

5271
    """
5272
    feedback_fn("Migrating instance %s" % self.instance.name)
5273

    
5274
    self.feedback_fn = feedback_fn
5275

    
5276
    self.source_node = self.instance.primary_node
5277
    self.target_node = self.instance.secondary_nodes[0]
5278
    self.all_nodes = [self.source_node, self.target_node]
5279
    self.nodes_ip = {
5280
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5281
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5282
      }
5283

    
5284
    if self.cleanup:
5285
      return self._ExecCleanup()
5286
    else:
5287
      return self._ExecMigration()
5288

    
5289

    
5290
def _CreateBlockDev(lu, node, instance, device, force_create,
5291
                    info, force_open):
5292
  """Create a tree of block devices on a given node.
5293

5294
  If this device type has to be created on secondaries, create it and
5295
  all its children.
5296

5297
  If not, just recurse to children keeping the same 'force' value.
5298

5299
  @param lu: the lu on whose behalf we execute
5300
  @param node: the node on which to create the device
5301
  @type instance: L{objects.Instance}
5302
  @param instance: the instance which owns the device
5303
  @type device: L{objects.Disk}
5304
  @param device: the device to create
5305
  @type force_create: boolean
5306
  @param force_create: whether to force creation of this device; this
5307
      will be change to True whenever we find a device which has
5308
      CreateOnSecondary() attribute
5309
  @param info: the extra 'metadata' we should attach to the device
5310
      (this will be represented as a LVM tag)
5311
  @type force_open: boolean
5312
  @param force_open: this parameter will be passes to the
5313
      L{backend.BlockdevCreate} function where it specifies
5314
      whether we run on primary or not, and it affects both
5315
      the child assembly and the device own Open() execution
5316

5317
  """
5318
  if device.CreateOnSecondary():
5319
    force_create = True
5320

    
5321
  if device.children:
5322
    for child in device.children:
5323
      _CreateBlockDev(lu, node, instance, child, force_create,
5324
                      info, force_open)
5325

    
5326
  if not force_create:
5327
    return
5328

    
5329
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5330

    
5331

    
5332
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5333
  """Create a single block device on a given node.
5334

5335
  This will not recurse over children of the device, so they must be
5336
  created in advance.
5337

5338
  @param lu: the lu on whose behalf we execute
5339
  @param node: the node on which to create the device
5340
  @type instance: L{objects.Instance}
5341
  @param instance: the instance which owns the device
5342
  @type device: L{objects.Disk}
5343
  @param device: the device to create
5344
  @param info: the extra 'metadata' we should attach to the device
5345
      (this will be represented as a LVM tag)
5346
  @type force_open: boolean
5347
  @param force_open: this parameter will be passes to the
5348
      L{backend.BlockdevCreate} function where it specifies
5349
      whether we run on primary or not, and it affects both
5350
      the child assembly and the device own Open() execution
5351

5352
  """
5353
  lu.cfg.SetDiskID(device, node)
5354
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5355
                                       instance.name, force_open, info)
5356
  result.Raise("Can't create block device %s on"
5357
               " node %s for instance %s" % (device, node, instance.name))
5358
  if device.physical_id is None:
5359
    device.physical_id = result.payload
5360

    
5361

    
5362
def _GenerateUniqueNames(lu, exts):
5363
  """Generate a suitable LV name.
5364

5365
  This will generate a logical volume name for the given instance.
5366

5367
  """
5368
  results = []
5369
  for val in exts:
5370
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5371
    results.append("%s%s" % (new_id, val))
5372
  return results
5373

    
5374

    
5375
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5376
                         p_minor, s_minor):
5377
  """Generate a drbd8 device complete with its children.
5378

5379
  """
5380
  port = lu.cfg.AllocatePort()
5381
  vgname = lu.cfg.GetVGName()
5382
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5383
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5384
                          logical_id=(vgname, names[0]))
5385
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5386
                          logical_id=(vgname, names[1]))
5387
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5388
                          logical_id=(primary, secondary, port,
5389
                                      p_minor, s_minor,
5390
                                      shared_secret),
5391
                          children=[dev_data, dev_meta],
5392
                          iv_name=iv_name)
5393
  return drbd_dev
5394

    
5395

    
5396
def _GenerateDiskTemplate(lu, template_name,
5397
                          instance_name, primary_node,
5398
                          secondary_nodes, disk_info,
5399
                          file_storage_dir, file_driver,
5400
                          base_index):
5401
  """Generate the entire disk layout for a given template type.
5402

5403
  """
5404
  #TODO: compute space requirements
5405

    
5406
  vgname = lu.cfg.GetVGName()
5407
  disk_count = len(disk_info)
5408
  disks = []
5409
  if template_name == constants.DT_DISKLESS:
5410
    pass
5411
  elif template_name == constants.DT_PLAIN:
5412
    if len(secondary_nodes) != 0:
5413
      raise errors.ProgrammerError("Wrong template configuration")
5414

    
5415
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5416
                                      for i in range(disk_count)])
5417
    for idx, disk in enumerate(disk_info):
5418
      disk_index = idx + base_index
5419
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5420
                              logical_id=(vgname, names[idx]),
5421
                              iv_name="disk/%d" % disk_index,
5422
                              mode=disk["mode"])
5423
      disks.append(disk_dev)
5424
  elif template_name == constants.DT_DRBD8:
5425
    if len(secondary_nodes) != 1:
5426
      raise errors.ProgrammerError("Wrong template configuration")
5427
    remote_node = secondary_nodes[0]
5428
    minors = lu.cfg.AllocateDRBDMinor(
5429
      [primary_node, remote_node] * len(disk_info), instance_name)
5430

    
5431
    names = []
5432
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5433
                                               for i in range(disk_count)]):
5434
      names.append(lv_prefix + "_data")
5435
      names.append(lv_prefix + "_meta")
5436
    for idx, disk in enumerate(disk_info):
5437
      disk_index = idx + base_index
5438
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5439
                                      disk["size"], names[idx*2:idx*2+2],
5440
                                      "disk/%d" % disk_index,
5441
                                      minors[idx*2], minors[idx*2+1])
5442
      disk_dev.mode = disk["mode"]
5443
      disks.append(disk_dev)
5444
  elif template_name == constants.DT_FILE:
5445
    if len(secondary_nodes) != 0:
5446
      raise errors.ProgrammerError("Wrong template configuration")
5447

    
5448
    for idx, disk in enumerate(disk_info):
5449
      disk_index = idx + base_index
5450
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5451
                              iv_name="disk/%d" % disk_index,
5452
                              logical_id=(file_driver,
5453
                                          "%s/disk%d" % (file_storage_dir,
5454
                                                         disk_index)),
5455
                              mode=disk["mode"])
5456
      disks.append(disk_dev)
5457
  else:
5458
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5459
  return disks
5460

    
5461

    
5462
def _GetInstanceInfoText(instance):
5463
  """Compute that text that should be added to the disk's metadata.
5464

5465
  """
5466
  return "originstname+%s" % instance.name
5467

    
5468

    
5469
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5470
  """Create all disks for an instance.
5471

5472
  This abstracts away some work from AddInstance.
5473

5474
  @type lu: L{LogicalUnit}
5475
  @param lu: the logical unit on whose behalf we execute
5476
  @type instance: L{objects.Instance}
5477
  @param instance: the instance whose disks we should create
5478
  @type to_skip: list
5479
  @param to_skip: list of indices to skip
5480
  @type target_node: string
5481
  @param target_node: if passed, overrides the target node for creation
5482
  @rtype: boolean
5483
  @return: the success of the creation
5484

5485
  """
5486
  info = _GetInstanceInfoText(instance)
5487
  if target_node is None:
5488
    pnode = instance.primary_node
5489
    all_nodes = instance.all_nodes
5490
  else:
5491
    pnode = target_node
5492
    all_nodes = [pnode]
5493

    
5494
  if instance.disk_template == constants.DT_FILE:
5495
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5496
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5497

    
5498
    result.Raise("Failed to create directory '%s' on"
5499
                 " node %s" % (file_storage_dir, pnode))
5500

    
5501
  # Note: this needs to be kept in sync with adding of disks in
5502
  # LUSetInstanceParams
5503
  for idx, device in enumerate(instance.disks):
5504
    if to_skip and idx in to_skip:
5505
      continue
5506
    logging.info("Creating volume %s for instance %s",
5507
                 device.iv_name, instance.name)
5508
    #HARDCODE
5509
    for node in all_nodes:
5510
      f_create = node == pnode
5511
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5512

    
5513

    
5514
def _RemoveDisks(lu, instance, target_node=None):
5515
  """Remove all disks for an instance.
5516

5517
  This abstracts away some work from `AddInstance()` and
5518
  `RemoveInstance()`. Note that in case some of the devices couldn't
5519
  be removed, the removal will continue with the other ones (compare
5520
  with `_CreateDisks()`).
5521

5522
  @type lu: L{LogicalUnit}
5523
  @param lu: the logical unit on whose behalf we execute
5524
  @type instance: L{objects.Instance}
5525
  @param instance: the instance whose disks we should remove
5526
  @type target_node: string
5527
  @param target_node: used to override the node on which to remove the disks
5528
  @rtype: boolean
5529
  @return: the success of the removal
5530

5531
  """
5532
  logging.info("Removing block devices for instance %s", instance.name)
5533

    
5534
  all_result = True
5535
  for device in instance.disks:
5536
    if target_node:
5537
      edata = [(target_node, device)]
5538
    else:
5539
      edata = device.ComputeNodeTree(instance.primary_node)
5540
    for node, disk in edata:
5541
      lu.cfg.SetDiskID(disk, node)
5542
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5543
      if msg:
5544
        lu.LogWarning("Could not remove block device %s on node %s,"
5545
                      " continuing anyway: %s", device.iv_name, node, msg)
5546
        all_result = False
5547

    
5548
  if instance.disk_template == constants.DT_FILE:
5549
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5550
    if target_node:
5551
      tgt = target_node
5552
    else:
5553
      tgt = instance.primary_node
5554
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5555
    if result.fail_msg:
5556
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5557
                    file_storage_dir, instance.primary_node, result.fail_msg)
5558
      all_result = False
5559

    
5560
  return all_result
5561

    
5562

    
5563
def _ComputeDiskSize(disk_template, disks):
5564
  """Compute disk size requirements in the volume group
5565

5566
  """
5567
  # Required free disk space as a function of disk and swap space
5568
  req_size_dict = {
5569
    constants.DT_DISKLESS: None,
5570
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5571
    # 128 MB are added for drbd metadata for each disk
5572
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5573
    constants.DT_FILE: None,
5574
  }
5575

    
5576
  if disk_template not in req_size_dict:
5577
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5578
                                 " is unknown" %  disk_template)
5579

    
5580
  return req_size_dict[disk_template]
5581

    
5582

    
5583
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5584
  """Hypervisor parameter validation.
5585

5586
  This function abstract the hypervisor parameter validation to be
5587
  used in both instance create and instance modify.
5588

5589
  @type lu: L{LogicalUnit}
5590
  @param lu: the logical unit for which we check
5591
  @type nodenames: list
5592
  @param nodenames: the list of nodes on which we should check
5593
  @type hvname: string
5594
  @param hvname: the name of the hypervisor we should use
5595
  @type hvparams: dict
5596
  @param hvparams: the parameters which we need to check
5597
  @raise errors.OpPrereqError: if the parameters are not valid
5598

5599
  """
5600
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5601
                                                  hvname,
5602
                                                  hvparams)
5603
  for node in nodenames:
5604
    info = hvinfo[node]
5605
    if info.offline:
5606
      continue
5607
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5608

    
5609

    
5610
class LUCreateInstance(LogicalUnit):
5611
  """Create an instance.
5612

5613
  """
5614
  HPATH = "instance-add"
5615
  HTYPE = constants.HTYPE_INSTANCE
5616
  _OP_REQP = ["instance_name", "disks", "disk_template",
5617
              "mode", "start",
5618
              "wait_for_sync", "ip_check", "nics",
5619
              "hvparams", "beparams"]
5620
  REQ_BGL = False
5621

    
5622
  def CheckArguments(self):
5623
    """Check arguments.
5624

5625
    """
5626
    # do not require name_check to ease forward/backward compatibility
5627
    # for tools
5628
    if not hasattr(self.op, "name_check"):
5629
      self.op.name_check = True
5630
    if self.op.ip_check and not self.op.name_check:
5631
      # TODO: make the ip check more flexible and not depend on the name check
5632
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5633
                                 errors.ECODE_INVAL)
5634

    
5635
  def _ExpandNode(self, node):
5636
    """Expands and checks one node name.
5637

5638
    """
5639
    node_full = self.cfg.ExpandNodeName(node)
5640
    if node_full is None:
5641
      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
5642
    return node_full
5643

    
5644
  def ExpandNames(self):
5645
    """ExpandNames for CreateInstance.
5646

5647
    Figure out the right locks for instance creation.
5648

5649
    """
5650
    self.needed_locks = {}
5651

    
5652
    # set optional parameters to none if they don't exist
5653
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5654
      if not hasattr(self.op, attr):
5655
        setattr(self.op, attr, None)
5656

    
5657
    # cheap checks, mostly valid constants given
5658

    
5659
    # verify creation mode
5660
    if self.op.mode not in (constants.INSTANCE_CREATE,
5661
                            constants.INSTANCE_IMPORT):
5662
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5663
                                 self.op.mode, errors.ECODE_INVAL)
5664

    
5665
    # disk template and mirror node verification
5666
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5667
      raise errors.OpPrereqError("Invalid disk template name",
5668
                                 errors.ECODE_INVAL)
5669

    
5670
    if self.op.hypervisor is None:
5671
      self.op.hypervisor = self.cfg.GetHypervisorType()
5672

    
5673
    cluster = self.cfg.GetClusterInfo()
5674
    enabled_hvs = cluster.enabled_hypervisors
5675
    if self.op.hypervisor not in enabled_hvs:
5676
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5677
                                 " cluster (%s)" % (self.op.hypervisor,
5678
                                  ",".join(enabled_hvs)),
5679
                                 errors.ECODE_STATE)
5680

    
5681
    # check hypervisor parameter syntax (locally)
5682
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5683
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5684
                                  self.op.hvparams)
5685
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5686
    hv_type.CheckParameterSyntax(filled_hvp)
5687
    self.hv_full = filled_hvp
5688
    # check that we don't specify global parameters on an instance
5689
    _CheckGlobalHvParams(self.op.hvparams)
5690

    
5691
    # fill and remember the beparams dict
5692
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5693
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5694
                                    self.op.beparams)
5695

    
5696
    #### instance parameters check
5697

    
5698
    # instance name verification
5699
    if self.op.name_check:
5700
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5701
      self.op.instance_name = instance_name = hostname1.name
5702
      # used in CheckPrereq for ip ping check
5703
      self.check_ip = hostname1.ip
5704
    else:
5705
      instance_name = self.op.instance_name
5706
      self.check_ip = None
5707

    
5708
    # this is just a preventive check, but someone might still add this
5709
    # instance in the meantime, and creation will fail at lock-add time
5710
    if instance_name in self.cfg.GetInstanceList():
5711
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5712
                                 instance_name, errors.ECODE_EXISTS)
5713

    
5714
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5715

    
5716
    # NIC buildup
5717
    self.nics = []
5718
    for idx, nic in enumerate(self.op.nics):
5719
      nic_mode_req = nic.get("mode", None)
5720
      nic_mode = nic_mode_req
5721
      if nic_mode is None:
5722
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5723

    
5724
      # in routed mode, for the first nic, the default ip is 'auto'
5725
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5726
        default_ip_mode = constants.VALUE_AUTO
5727
      else:
5728
        default_ip_mode = constants.VALUE_NONE
5729

    
5730
      # ip validity checks
5731
      ip = nic.get("ip", default_ip_mode)
5732
      if ip is None or ip.lower() == constants.VALUE_NONE:
5733
        nic_ip = None
5734
      elif ip.lower() == constants.VALUE_AUTO:
5735
        if not self.op.name_check:
5736
          raise errors.OpPrereqError("IP address set to auto but name checks"
5737
                                     " have been skipped. Aborting.",
5738
                                     errors.ECODE_INVAL)
5739
        nic_ip = hostname1.ip
5740
      else:
5741
        if not utils.IsValidIP(ip):
5742
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5743
                                     " like a valid IP" % ip,
5744
                                     errors.ECODE_INVAL)
5745
        nic_ip = ip
5746

    
5747
      # TODO: check the ip address for uniqueness
5748
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5749
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5750
                                   errors.ECODE_INVAL)
5751

    
5752
      # MAC address verification
5753
      mac = nic.get("mac", constants.VALUE_AUTO)
5754
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5755
        if not utils.IsValidMac(mac.lower()):
5756
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5757
                                     mac, errors.ECODE_INVAL)
5758
        else:
5759
          try:
5760
            self.cfg.ReserveMAC(mac, self.proc.GetECId())
5761
          except errors.ReservationError:
5762
            raise errors.OpPrereqError("MAC address %s already in use"
5763
                                       " in cluster" % mac,
5764
                                       errors.ECODE_NOTUNIQUE)
5765

    
5766
      # bridge verification
5767
      bridge = nic.get("bridge", None)
5768
      link = nic.get("link", None)
5769
      if bridge and link:
5770
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5771
                                   " at the same time", errors.ECODE_INVAL)
5772
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5773
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5774
                                   errors.ECODE_INVAL)
5775
      elif bridge:
5776
        link = bridge
5777

    
5778
      nicparams = {}
5779
      if nic_mode_req:
5780
        nicparams[constants.NIC_MODE] = nic_mode_req
5781
      if link:
5782
        nicparams[constants.NIC_LINK] = link
5783

    
5784
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5785
                                      nicparams)
5786
      objects.NIC.CheckParameterSyntax(check_params)
5787
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5788

    
5789
    # disk checks/pre-build
5790
    self.disks = []
5791
    for disk in self.op.disks:
5792
      mode = disk.get("mode", constants.DISK_RDWR)
5793
      if mode not in constants.DISK_ACCESS_SET:
5794
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5795
                                   mode, errors.ECODE_INVAL)
5796
      size = disk.get("size", None)
5797
      if size is None:
5798
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5799
      try:
5800
        size = int(size)
5801
      except ValueError:
5802
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5803
                                   errors.ECODE_INVAL)
5804
      self.disks.append({"size": size, "mode": mode})
5805

    
5806
    # file storage checks
5807
    if (self.op.file_driver and
5808
        not self.op.file_driver in constants.FILE_DRIVER):
5809
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5810
                                 self.op.file_driver, errors.ECODE_INVAL)
5811

    
5812
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5813
      raise errors.OpPrereqError("File storage directory path not absolute",
5814
                                 errors.ECODE_INVAL)
5815

    
5816
    ### Node/iallocator related checks
5817
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5818
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5819
                                 " node must be given",
5820
                                 errors.ECODE_INVAL)
5821

    
5822
    if self.op.iallocator:
5823
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5824
    else:
5825
      self.op.pnode = self._ExpandNode(self.op.pnode)
5826
      nodelist = [self.op.pnode]
5827
      if self.op.snode is not None:
5828
        self.op.snode = self._ExpandNode(self.op.snode)
5829
        nodelist.append(self.op.snode)
5830
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5831

    
5832
    # in case of import lock the source node too
5833
    if self.op.mode == constants.INSTANCE_IMPORT:
5834
      src_node = getattr(self.op, "src_node", None)
5835
      src_path = getattr(self.op, "src_path", None)
5836

    
5837
      if src_path is None:
5838
        self.op.src_path = src_path = self.op.instance_name
5839

    
5840
      if src_node is None:
5841
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5842
        self.op.src_node = None
5843
        if os.path.isabs(src_path):
5844
          raise errors.OpPrereqError("Importing an instance from an absolute"
5845
                                     " path requires a source node option.",
5846
                                     errors.ECODE_INVAL)
5847
      else:
5848
        self.op.src_node = src_node = self._ExpandNode(src_node)
5849
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5850
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5851
        if not os.path.isabs(src_path):
5852
          self.op.src_path = src_path = \
5853
            os.path.join(constants.EXPORT_DIR, src_path)
5854

    
5855
      # On import force_variant must be True, because if we forced it at
5856
      # initial install, our only chance when importing it back is that it
5857
      # works again!
5858
      self.op.force_variant = True
5859

    
5860
    else: # INSTANCE_CREATE
5861
      if getattr(self.op, "os_type", None) is None:
5862
        raise errors.OpPrereqError("No guest OS specified",
5863
                                   errors.ECODE_INVAL)
5864
      self.op.force_variant = getattr(self.op, "force_variant", False)
5865

    
5866
  def _RunAllocator(self):
5867
    """Run the allocator based on input opcode.
5868

5869
    """
5870
    nics = [n.ToDict() for n in self.nics]
5871
    ial = IAllocator(self.cfg, self.rpc,
5872
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5873
                     name=self.op.instance_name,
5874
                     disk_template=self.op.disk_template,
5875
                     tags=[],
5876
                     os=self.op.os_type,
5877
                     vcpus=self.be_full[constants.BE_VCPUS],
5878
                     mem_size=self.be_full[constants.BE_MEMORY],
5879
                     disks=self.disks,
5880
                     nics=nics,
5881
                     hypervisor=self.op.hypervisor,
5882
                     )
5883

    
5884
    ial.Run(self.op.iallocator)
5885

    
5886
    if not ial.success:
5887
      raise errors.OpPrereqError("Can't compute nodes using"
5888
                                 " iallocator '%s': %s" %
5889
                                 (self.op.iallocator, ial.info),
5890
                                 errors.ECODE_NORES)
5891
    if len(ial.nodes) != ial.required_nodes:
5892
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5893
                                 " of nodes (%s), required %s" %
5894
                                 (self.op.iallocator, len(ial.nodes),
5895
                                  ial.required_nodes), errors.ECODE_FAULT)
5896
    self.op.pnode = ial.nodes[0]
5897
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5898
                 self.op.instance_name, self.op.iallocator,
5899
                 utils.CommaJoin(ial.nodes))
5900
    if ial.required_nodes == 2:
5901
      self.op.snode = ial.nodes[1]
5902

    
5903
  def BuildHooksEnv(self):
5904
    """Build hooks env.
5905

5906
    This runs on master, primary and secondary nodes of the instance.
5907

5908
    """
5909
    env = {
5910
      "ADD_MODE": self.op.mode,
5911
      }
5912
    if self.op.mode == constants.INSTANCE_IMPORT:
5913
      env["SRC_NODE"] = self.op.src_node
5914
      env["SRC_PATH"] = self.op.src_path
5915
      env["SRC_IMAGES"] = self.src_images
5916

    
5917
    env.update(_BuildInstanceHookEnv(
5918
      name=self.op.instance_name,
5919
      primary_node=self.op.pnode,
5920
      secondary_nodes=self.secondaries,
5921
      status=self.op.start,
5922
      os_type=self.op.os_type,
5923
      memory=self.be_full[constants.BE_MEMORY],
5924
      vcpus=self.be_full[constants.BE_VCPUS],
5925
      nics=_NICListToTuple(self, self.nics),
5926
      disk_template=self.op.disk_template,
5927
      disks=[(d["size"], d["mode"]) for d in self.disks],
5928
      bep=self.be_full,
5929
      hvp=self.hv_full,
5930
      hypervisor_name=self.op.hypervisor,
5931
    ))
5932

    
5933
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5934
          self.secondaries)
5935
    return env, nl, nl
5936

    
5937

    
5938
  def CheckPrereq(self):
5939
    """Check prerequisites.
5940

5941
    """
5942
    if (not self.cfg.GetVGName() and
5943
        self.op.disk_template not in constants.DTS_NOT_LVM):
5944
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5945
                                 " instances", errors.ECODE_STATE)
5946

    
5947
    if self.op.mode == constants.INSTANCE_IMPORT:
5948
      src_node = self.op.src_node
5949
      src_path = self.op.src_path
5950

    
5951
      if src_node is None:
5952
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5953
        exp_list = self.rpc.call_export_list(locked_nodes)
5954
        found = False
5955
        for node in exp_list:
5956
          if exp_list[node].fail_msg:
5957
            continue
5958
          if src_path in exp_list[node].payload:
5959
            found = True
5960
            self.op.src_node = src_node = node
5961
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5962
                                                       src_path)
5963
            break
5964
        if not found:
5965
          raise errors.OpPrereqError("No export found for relative path %s" %
5966
                                      src_path, errors.ECODE_INVAL)
5967

    
5968
      _CheckNodeOnline(self, src_node)
5969
      result = self.rpc.call_export_info(src_node, src_path)
5970
      result.Raise("No export or invalid export found in dir %s" % src_path)
5971

    
5972
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5973
      if not export_info.has_section(constants.INISECT_EXP):
5974
        raise errors.ProgrammerError("Corrupted export config",
5975
                                     errors.ECODE_ENVIRON)
5976

    
5977
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5978
      if (int(ei_version) != constants.EXPORT_VERSION):
5979
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5980
                                   (ei_version, constants.EXPORT_VERSION),
5981
                                   errors.ECODE_ENVIRON)
5982

    
5983
      # Check that the new instance doesn't have less disks than the export
5984
      instance_disks = len(self.disks)
5985
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5986
      if instance_disks < export_disks:
5987
        raise errors.OpPrereqError("Not enough disks to import."
5988
                                   " (instance: %d, export: %d)" %
5989
                                   (instance_disks, export_disks),
5990
                                   errors.ECODE_INVAL)
5991

    
5992
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5993
      disk_images = []
5994
      for idx in range(export_disks):
5995
        option = 'disk%d_dump' % idx
5996
        if export_info.has_option(constants.INISECT_INS, option):
5997
          # FIXME: are the old os-es, disk sizes, etc. useful?
5998
          export_name = export_info.get(constants.INISECT_INS, option)
5999
          image = os.path.join(src_path, export_name)
6000
          disk_images.append(image)
6001
        else:
6002
          disk_images.append(False)
6003

    
6004
      self.src_images = disk_images
6005

    
6006
      old_name = export_info.get(constants.INISECT_INS, 'name')
6007
      # FIXME: int() here could throw a ValueError on broken exports
6008
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6009
      if self.op.instance_name == old_name:
6010
        for idx, nic in enumerate(self.nics):
6011
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6012
            nic_mac_ini = 'nic%d_mac' % idx
6013
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6014

    
6015
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6016
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6017
    if self.op.start and not self.op.ip_check:
6018
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
6019
                                 " adding an instance in start mode",
6020
                                 errors.ECODE_INVAL)
6021

    
6022
    if self.op.ip_check:
6023
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6024
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6025
                                   (self.check_ip, self.op.instance_name),
6026
                                   errors.ECODE_NOTUNIQUE)
6027

    
6028
    #### mac address generation
6029
    # By generating here the mac address both the allocator and the hooks get
6030
    # the real final mac address rather than the 'auto' or 'generate' value.
6031
    # There is a race condition between the generation and the instance object
6032
    # creation, which means that we know the mac is valid now, but we're not
6033
    # sure it will be when we actually add the instance. If things go bad
6034
    # adding the instance will abort because of a duplicate mac, and the
6035
    # creation job will fail.
6036
    for nic in self.nics:
6037
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6038
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6039

    
6040
    #### allocator run
6041

    
6042
    if self.op.iallocator is not None:
6043
      self._RunAllocator()
6044

    
6045
    #### node related checks
6046

    
6047
    # check primary node
6048
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6049
    assert self.pnode is not None, \
6050
      "Cannot retrieve locked node %s" % self.op.pnode
6051
    if pnode.offline:
6052
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6053
                                 pnode.name, errors.ECODE_STATE)
6054
    if pnode.drained:
6055
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6056
                                 pnode.name, errors.ECODE_STATE)
6057

    
6058
    self.secondaries = []
6059

    
6060
    # mirror node verification
6061
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6062
      if self.op.snode is None:
6063
        raise errors.OpPrereqError("The networked disk templates need"
6064
                                   " a mirror node", errors.ECODE_INVAL)
6065
      if self.op.snode == pnode.name:
6066
        raise errors.OpPrereqError("The secondary node cannot be the"
6067
                                   " primary node.", errors.ECODE_INVAL)
6068
      _CheckNodeOnline(self, self.op.snode)
6069
      _CheckNodeNotDrained(self, self.op.snode)
6070
      self.secondaries.append(self.op.snode)
6071

    
6072
    nodenames = [pnode.name] + self.secondaries
6073

    
6074
    req_size = _ComputeDiskSize(self.op.disk_template,
6075
                                self.disks)
6076

    
6077
    # Check lv size requirements
6078
    if req_size is not None:
6079
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6080
                                         self.op.hypervisor)
6081
      for node in nodenames:
6082
        info = nodeinfo[node]
6083
        info.Raise("Cannot get current information from node %s" % node)
6084
        info = info.payload
6085
        vg_free = info.get('vg_free', None)
6086
        if not isinstance(vg_free, int):
6087
          raise errors.OpPrereqError("Can't compute free disk space on"
6088
                                     " node %s" % node, errors.ECODE_ENVIRON)
6089
        if req_size > vg_free:
6090
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6091
                                     " %d MB available, %d MB required" %
6092
                                     (node, vg_free, req_size),
6093
                                     errors.ECODE_NORES)
6094

    
6095
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6096

    
6097
    # os verification
6098
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6099
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6100
                 (self.op.os_type, pnode.name),
6101
                 prereq=True, ecode=errors.ECODE_INVAL)
6102
    if not self.op.force_variant:
6103
      _CheckOSVariant(result.payload, self.op.os_type)
6104

    
6105
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6106

    
6107
    # memory check on primary node
6108
    if self.op.start:
6109
      _CheckNodeFreeMemory(self, self.pnode.name,
6110
                           "creating instance %s" % self.op.instance_name,
6111
                           self.be_full[constants.BE_MEMORY],
6112
                           self.op.hypervisor)
6113

    
6114
    self.dry_run_result = list(nodenames)
6115

    
6116
  def Exec(self, feedback_fn):
6117
    """Create and add the instance to the cluster.
6118

6119
    """
6120
    instance = self.op.instance_name
6121
    pnode_name = self.pnode.name
6122

    
6123
    ht_kind = self.op.hypervisor
6124
    if ht_kind in constants.HTS_REQ_PORT:
6125
      network_port = self.cfg.AllocatePort()
6126
    else:
6127
      network_port = None
6128

    
6129
    ##if self.op.vnc_bind_address is None:
6130
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6131

    
6132
    # this is needed because os.path.join does not accept None arguments
6133
    if self.op.file_storage_dir is None:
6134
      string_file_storage_dir = ""
6135
    else:
6136
      string_file_storage_dir = self.op.file_storage_dir
6137

    
6138
    # build the full file storage dir path
6139
    file_storage_dir = os.path.normpath(os.path.join(
6140
                                        self.cfg.GetFileStorageDir(),
6141
                                        string_file_storage_dir, instance))
6142

    
6143

    
6144
    disks = _GenerateDiskTemplate(self,
6145
                                  self.op.disk_template,
6146
                                  instance, pnode_name,
6147
                                  self.secondaries,
6148
                                  self.disks,
6149
                                  file_storage_dir,
6150
                                  self.op.file_driver,
6151
                                  0)
6152

    
6153
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6154
                            primary_node=pnode_name,
6155
                            nics=self.nics, disks=disks,
6156
                            disk_template=self.op.disk_template,
6157
                            admin_up=False,
6158
                            network_port=network_port,
6159
                            beparams=self.op.beparams,
6160
                            hvparams=self.op.hvparams,
6161
                            hypervisor=self.op.hypervisor,
6162
                            )
6163

    
6164
    feedback_fn("* creating instance disks...")
6165
    try:
6166
      _CreateDisks(self, iobj)
6167
    except errors.OpExecError:
6168
      self.LogWarning("Device creation failed, reverting...")
6169
      try:
6170
        _RemoveDisks(self, iobj)
6171
      finally:
6172
        self.cfg.ReleaseDRBDMinors(instance)
6173
        raise
6174

    
6175
    feedback_fn("adding instance %s to cluster config" % instance)
6176

    
6177
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6178

    
6179
    # Declare that we don't want to remove the instance lock anymore, as we've
6180
    # added the instance to the config
6181
    del self.remove_locks[locking.LEVEL_INSTANCE]
6182
    # Unlock all the nodes
6183
    if self.op.mode == constants.INSTANCE_IMPORT:
6184
      nodes_keep = [self.op.src_node]
6185
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6186
                       if node != self.op.src_node]
6187
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6188
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6189
    else:
6190
      self.context.glm.release(locking.LEVEL_NODE)
6191
      del self.acquired_locks[locking.LEVEL_NODE]
6192

    
6193
    if self.op.wait_for_sync:
6194
      disk_abort = not _WaitForSync(self, iobj)
6195
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6196
      # make sure the disks are not degraded (still sync-ing is ok)
6197
      time.sleep(15)
6198
      feedback_fn("* checking mirrors status")
6199
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6200
    else:
6201
      disk_abort = False
6202

    
6203
    if disk_abort:
6204
      _RemoveDisks(self, iobj)
6205
      self.cfg.RemoveInstance(iobj.name)
6206
      # Make sure the instance lock gets removed
6207
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6208
      raise errors.OpExecError("There are some degraded disks for"
6209
                               " this instance")
6210

    
6211
    feedback_fn("creating os for instance %s on node %s" %
6212
                (instance, pnode_name))
6213

    
6214
    if iobj.disk_template != constants.DT_DISKLESS:
6215
      if self.op.mode == constants.INSTANCE_CREATE:
6216
        feedback_fn("* running the instance OS create scripts...")
6217
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
6218
        result.Raise("Could not add os for instance %s"
6219
                     " on node %s" % (instance, pnode_name))
6220

    
6221
      elif self.op.mode == constants.INSTANCE_IMPORT:
6222
        feedback_fn("* running the instance OS import scripts...")
6223
        src_node = self.op.src_node
6224
        src_images = self.src_images
6225
        cluster_name = self.cfg.GetClusterName()
6226
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6227
                                                         src_node, src_images,
6228
                                                         cluster_name)
6229
        msg = import_result.fail_msg
6230
        if msg:
6231
          self.LogWarning("Error while importing the disk images for instance"
6232
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6233
      else:
6234
        # also checked in the prereq part
6235
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6236
                                     % self.op.mode)
6237

    
6238
    if self.op.start:
6239
      iobj.admin_up = True
6240
      self.cfg.Update(iobj, feedback_fn)
6241
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6242
      feedback_fn("* starting instance...")
6243
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6244
      result.Raise("Could not start instance")
6245

    
6246
    return list(iobj.all_nodes)
6247

    
6248

    
6249
class LUConnectConsole(NoHooksLU):
6250
  """Connect to an instance's console.
6251

6252
  This is somewhat special in that it returns the command line that
6253
  you need to run on the master node in order to connect to the
6254
  console.
6255

6256
  """
6257
  _OP_REQP = ["instance_name"]
6258
  REQ_BGL = False
6259

    
6260
  def ExpandNames(self):
6261
    self._ExpandAndLockInstance()
6262

    
6263
  def CheckPrereq(self):
6264
    """Check prerequisites.
6265

6266
    This checks that the instance is in the cluster.
6267

6268
    """
6269
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6270
    assert self.instance is not None, \
6271
      "Cannot retrieve locked instance %s" % self.op.instance_name
6272
    _CheckNodeOnline(self, self.instance.primary_node)
6273

    
6274
  def Exec(self, feedback_fn):
6275
    """Connect to the console of an instance
6276

6277
    """
6278
    instance = self.instance
6279
    node = instance.primary_node
6280

    
6281
    node_insts = self.rpc.call_instance_list([node],
6282
                                             [instance.hypervisor])[node]
6283
    node_insts.Raise("Can't get node information from %s" % node)
6284

    
6285
    if instance.name not in node_insts.payload:
6286
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6287

    
6288
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6289

    
6290
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6291
    cluster = self.cfg.GetClusterInfo()
6292
    # beparams and hvparams are passed separately, to avoid editing the
6293
    # instance and then saving the defaults in the instance itself.
6294
    hvparams = cluster.FillHV(instance)
6295
    beparams = cluster.FillBE(instance)
6296
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6297

    
6298
    # build ssh cmdline
6299
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6300

    
6301

    
6302
class LUReplaceDisks(LogicalUnit):
6303
  """Replace the disks of an instance.
6304

6305
  """
6306
  HPATH = "mirrors-replace"
6307
  HTYPE = constants.HTYPE_INSTANCE
6308
  _OP_REQP = ["instance_name", "mode", "disks"]
6309
  REQ_BGL = False
6310

    
6311
  def CheckArguments(self):
6312
    if not hasattr(self.op, "remote_node"):
6313
      self.op.remote_node = None
6314
    if not hasattr(self.op, "iallocator"):
6315
      self.op.iallocator = None
6316

    
6317
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6318
                                  self.op.iallocator)
6319

    
6320
  def ExpandNames(self):
6321
    self._ExpandAndLockInstance()
6322

    
6323
    if self.op.iallocator is not None:
6324
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6325

    
6326
    elif self.op.remote_node is not None:
6327
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6328
      if remote_node is None:
6329
        raise errors.OpPrereqError("Node '%s' not known" %
6330
                                   self.op.remote_node, errors.ECODE_NOENT)
6331

    
6332
      self.op.remote_node = remote_node
6333

    
6334
      # Warning: do not remove the locking of the new secondary here
6335
      # unless DRBD8.AddChildren is changed to work in parallel;
6336
      # currently it doesn't since parallel invocations of
6337
      # FindUnusedMinor will conflict
6338
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6339
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6340

    
6341
    else:
6342
      self.needed_locks[locking.LEVEL_NODE] = []
6343
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6344

    
6345
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6346
                                   self.op.iallocator, self.op.remote_node,
6347
                                   self.op.disks)
6348

    
6349
    self.tasklets = [self.replacer]
6350

    
6351
  def DeclareLocks(self, level):
6352
    # If we're not already locking all nodes in the set we have to declare the
6353
    # instance's primary/secondary nodes.
6354
    if (level == locking.LEVEL_NODE and
6355
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6356
      self._LockInstancesNodes()
6357

    
6358
  def BuildHooksEnv(self):
6359
    """Build hooks env.
6360

6361
    This runs on the master, the primary and all the secondaries.
6362

6363
    """
6364
    instance = self.replacer.instance
6365
    env = {
6366
      "MODE": self.op.mode,
6367
      "NEW_SECONDARY": self.op.remote_node,
6368
      "OLD_SECONDARY": instance.secondary_nodes[0],
6369
      }
6370
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6371
    nl = [
6372
      self.cfg.GetMasterNode(),
6373
      instance.primary_node,
6374
      ]
6375
    if self.op.remote_node is not None:
6376
      nl.append(self.op.remote_node)
6377
    return env, nl, nl
6378

    
6379

    
6380
class LUEvacuateNode(LogicalUnit):
6381
  """Relocate the secondary instances from a node.
6382

6383
  """
6384
  HPATH = "node-evacuate"
6385
  HTYPE = constants.HTYPE_NODE
6386
  _OP_REQP = ["node_name"]
6387
  REQ_BGL = False
6388

    
6389
  def CheckArguments(self):
6390
    if not hasattr(self.op, "remote_node"):
6391
      self.op.remote_node = None
6392
    if not hasattr(self.op, "iallocator"):
6393
      self.op.iallocator = None
6394

    
6395
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6396
                                  self.op.remote_node,
6397
                                  self.op.iallocator)
6398

    
6399
  def ExpandNames(self):
6400
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6401
    if self.op.node_name is None:
6402
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
6403
                                 errors.ECODE_NOENT)
6404

    
6405
    self.needed_locks = {}
6406

    
6407
    # Declare node locks
6408
    if self.op.iallocator is not None:
6409
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6410

    
6411
    elif self.op.remote_node is not None:
6412
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6413
      if remote_node is None:
6414
        raise errors.OpPrereqError("Node '%s' not known" %
6415
                                   self.op.remote_node, errors.ECODE_NOENT)
6416

    
6417
      self.op.remote_node = remote_node
6418

    
6419
      # Warning: do not remove the locking of the new secondary here
6420
      # unless DRBD8.AddChildren is changed to work in parallel;
6421
      # currently it doesn't since parallel invocations of
6422
      # FindUnusedMinor will conflict
6423
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6424
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6425

    
6426
    else:
6427
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6428

    
6429
    # Create tasklets for replacing disks for all secondary instances on this
6430
    # node
6431
    names = []
6432
    tasklets = []
6433

    
6434
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6435
      logging.debug("Replacing disks for instance %s", inst.name)
6436
      names.append(inst.name)
6437

    
6438
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6439
                                self.op.iallocator, self.op.remote_node, [])
6440
      tasklets.append(replacer)
6441

    
6442
    self.tasklets = tasklets
6443
    self.instance_names = names
6444

    
6445
    # Declare instance locks
6446
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6447

    
6448
  def DeclareLocks(self, level):
6449
    # If we're not already locking all nodes in the set we have to declare the
6450
    # instance's primary/secondary nodes.
6451
    if (level == locking.LEVEL_NODE and
6452
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6453
      self._LockInstancesNodes()
6454

    
6455
  def BuildHooksEnv(self):
6456
    """Build hooks env.
6457

6458
    This runs on the master, the primary and all the secondaries.
6459

6460
    """
6461
    env = {
6462
      "NODE_NAME": self.op.node_name,
6463
      }
6464

    
6465
    nl = [self.cfg.GetMasterNode()]
6466

    
6467
    if self.op.remote_node is not None:
6468
      env["NEW_SECONDARY"] = self.op.remote_node
6469
      nl.append(self.op.remote_node)
6470

    
6471
    return (env, nl, nl)
6472

    
6473

    
6474
class TLReplaceDisks(Tasklet):
6475
  """Replaces disks for an instance.
6476

6477
  Note: Locking is not within the scope of this class.
6478

6479
  """
6480
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6481
               disks):
6482
    """Initializes this class.
6483

6484
    """
6485
    Tasklet.__init__(self, lu)
6486

    
6487
    # Parameters
6488
    self.instance_name = instance_name
6489
    self.mode = mode
6490
    self.iallocator_name = iallocator_name
6491
    self.remote_node = remote_node
6492
    self.disks = disks
6493

    
6494
    # Runtime data
6495
    self.instance = None
6496
    self.new_node = None
6497
    self.target_node = None
6498
    self.other_node = None
6499
    self.remote_node_info = None
6500
    self.node_secondary_ip = None
6501

    
6502
  @staticmethod
6503
  def CheckArguments(mode, remote_node, iallocator):
6504
    """Helper function for users of this class.
6505

6506
    """
6507
    # check for valid parameter combination
6508
    if mode == constants.REPLACE_DISK_CHG:
6509
      if remote_node is None and iallocator is None:
6510
        raise errors.OpPrereqError("When changing the secondary either an"
6511
                                   " iallocator script must be used or the"
6512
                                   " new node given", errors.ECODE_INVAL)
6513

    
6514
      if remote_node is not None and iallocator is not None:
6515
        raise errors.OpPrereqError("Give either the iallocator or the new"
6516
                                   " secondary, not both", errors.ECODE_INVAL)
6517

    
6518
    elif remote_node is not None or iallocator is not None:
6519
      # Not replacing the secondary
6520
      raise errors.OpPrereqError("The iallocator and new node options can"
6521
                                 " only be used when changing the"
6522
                                 " secondary node", errors.ECODE_INVAL)
6523

    
6524
  @staticmethod
6525
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6526
    """Compute a new secondary node using an IAllocator.
6527

6528
    """
6529
    ial = IAllocator(lu.cfg, lu.rpc,
6530
                     mode=constants.IALLOCATOR_MODE_RELOC,
6531
                     name=instance_name,
6532
                     relocate_from=relocate_from)
6533

    
6534
    ial.Run(iallocator_name)
6535

    
6536
    if not ial.success:
6537
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6538
                                 " %s" % (iallocator_name, ial.info),
6539
                                 errors.ECODE_NORES)
6540

    
6541
    if len(ial.nodes) != ial.required_nodes:
6542
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6543
                                 " of nodes (%s), required %s" %
6544
                                 (len(ial.nodes), ial.required_nodes),
6545
                                 errors.ECODE_FAULT)
6546

    
6547
    remote_node_name = ial.nodes[0]
6548

    
6549
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6550
               instance_name, remote_node_name)
6551

    
6552
    return remote_node_name
6553

    
6554
  def _FindFaultyDisks(self, node_name):
6555
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6556
                                    node_name, True)
6557

    
6558
  def CheckPrereq(self):
6559
    """Check prerequisites.
6560

6561
    This checks that the instance is in the cluster.
6562

6563
    """
6564
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6565
    assert instance is not None, \
6566
      "Cannot retrieve locked instance %s" % self.instance_name
6567

    
6568
    if instance.disk_template != constants.DT_DRBD8:
6569
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6570
                                 " instances", errors.ECODE_INVAL)
6571

    
6572
    if len(instance.secondary_nodes) != 1:
6573
      raise errors.OpPrereqError("The instance has a strange layout,"
6574
                                 " expected one secondary but found %d" %
6575
                                 len(instance.secondary_nodes),
6576
                                 errors.ECODE_FAULT)
6577

    
6578
    secondary_node = instance.secondary_nodes[0]
6579

    
6580
    if self.iallocator_name is None:
6581
      remote_node = self.remote_node
6582
    else:
6583
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6584
                                       instance.name, instance.secondary_nodes)
6585

    
6586
    if remote_node is not None:
6587
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6588
      assert self.remote_node_info is not None, \
6589
        "Cannot retrieve locked node %s" % remote_node
6590
    else:
6591
      self.remote_node_info = None
6592

    
6593
    if remote_node == self.instance.primary_node:
6594
      raise errors.OpPrereqError("The specified node is the primary node of"
6595
                                 " the instance.", errors.ECODE_INVAL)
6596

    
6597
    if remote_node == secondary_node:
6598
      raise errors.OpPrereqError("The specified node is already the"
6599
                                 " secondary node of the instance.",
6600
                                 errors.ECODE_INVAL)
6601

    
6602
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6603
                                    constants.REPLACE_DISK_CHG):
6604
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6605
                                 errors.ECODE_INVAL)
6606

    
6607
    if self.mode == constants.REPLACE_DISK_AUTO:
6608
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6609
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6610

    
6611
      if faulty_primary and faulty_secondary:
6612
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6613
                                   " one node and can not be repaired"
6614
                                   " automatically" % self.instance_name,
6615
                                   errors.ECODE_STATE)
6616

    
6617
      if faulty_primary:
6618
        self.disks = faulty_primary
6619
        self.target_node = instance.primary_node
6620
        self.other_node = secondary_node
6621
        check_nodes = [self.target_node, self.other_node]
6622
      elif faulty_secondary:
6623
        self.disks = faulty_secondary
6624
        self.target_node = secondary_node
6625
        self.other_node = instance.primary_node
6626
        check_nodes = [self.target_node, self.other_node]
6627
      else:
6628
        self.disks = []
6629
        check_nodes = []
6630

    
6631
    else:
6632
      # Non-automatic modes
6633
      if self.mode == constants.REPLACE_DISK_PRI:
6634
        self.target_node = instance.primary_node
6635
        self.other_node = secondary_node
6636
        check_nodes = [self.target_node, self.other_node]
6637

    
6638
      elif self.mode == constants.REPLACE_DISK_SEC:
6639
        self.target_node = secondary_node
6640
        self.other_node = instance.primary_node
6641
        check_nodes = [self.target_node, self.other_node]
6642

    
6643
      elif self.mode == constants.REPLACE_DISK_CHG:
6644
        self.new_node = remote_node
6645
        self.other_node = instance.primary_node
6646
        self.target_node = secondary_node
6647
        check_nodes = [self.new_node, self.other_node]
6648

    
6649
        _CheckNodeNotDrained(self.lu, remote_node)
6650

    
6651
      else:
6652
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6653
                                     self.mode)
6654

    
6655
      # If not specified all disks should be replaced
6656
      if not self.disks:
6657
        self.disks = range(len(self.instance.disks))
6658

    
6659
    for node in check_nodes:
6660
      _CheckNodeOnline(self.lu, node)
6661

    
6662
    # Check whether disks are valid
6663
    for disk_idx in self.disks:
6664
      instance.FindDisk(disk_idx)
6665

    
6666
    # Get secondary node IP addresses
6667
    node_2nd_ip = {}
6668

    
6669
    for node_name in [self.target_node, self.other_node, self.new_node]:
6670
      if node_name is not None:
6671
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6672

    
6673
    self.node_secondary_ip = node_2nd_ip
6674

    
6675
  def Exec(self, feedback_fn):
6676
    """Execute disk replacement.
6677

6678
    This dispatches the disk replacement to the appropriate handler.
6679

6680
    """
6681
    if not self.disks:
6682
      feedback_fn("No disks need replacement")
6683
      return
6684

    
6685
    feedback_fn("Replacing disk(s) %s for %s" %
6686
                (utils.CommaJoin(self.disks), self.instance.name))
6687

    
6688
    activate_disks = (not self.instance.admin_up)
6689

    
6690
    # Activate the instance disks if we're replacing them on a down instance
6691
    if activate_disks:
6692
      _StartInstanceDisks(self.lu, self.instance, True)
6693

    
6694
    try:
6695
      # Should we replace the secondary node?
6696
      if self.new_node is not None:
6697
        fn = self._ExecDrbd8Secondary
6698
      else:
6699
        fn = self._ExecDrbd8DiskOnly
6700

    
6701
      return fn(feedback_fn)
6702

    
6703
    finally:
6704
      # Deactivate the instance disks if we're replacing them on a
6705
      # down instance
6706
      if activate_disks:
6707
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6708

    
6709
  def _CheckVolumeGroup(self, nodes):
6710
    self.lu.LogInfo("Checking volume groups")
6711

    
6712
    vgname = self.cfg.GetVGName()
6713

    
6714
    # Make sure volume group exists on all involved nodes
6715
    results = self.rpc.call_vg_list(nodes)
6716
    if not results:
6717
      raise errors.OpExecError("Can't list volume groups on the nodes")
6718

    
6719
    for node in nodes:
6720
      res = results[node]
6721
      res.Raise("Error checking node %s" % node)
6722
      if vgname not in res.payload:
6723
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6724
                                 (vgname, node))
6725

    
6726
  def _CheckDisksExistence(self, nodes):
6727
    # Check disk existence
6728
    for idx, dev in enumerate(self.instance.disks):
6729
      if idx not in self.disks:
6730
        continue
6731

    
6732
      for node in nodes:
6733
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6734
        self.cfg.SetDiskID(dev, node)
6735

    
6736
        result = self.rpc.call_blockdev_find(node, dev)
6737

    
6738
        msg = result.fail_msg
6739
        if msg or not result.payload:
6740
          if not msg:
6741
            msg = "disk not found"
6742
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6743
                                   (idx, node, msg))
6744

    
6745
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6746
    for idx, dev in enumerate(self.instance.disks):
6747
      if idx not in self.disks:
6748
        continue
6749

    
6750
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6751
                      (idx, node_name))
6752

    
6753
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6754
                                   ldisk=ldisk):
6755
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6756
                                 " replace disks for instance %s" %
6757
                                 (node_name, self.instance.name))
6758

    
6759
  def _CreateNewStorage(self, node_name):
6760
    vgname = self.cfg.GetVGName()
6761
    iv_names = {}
6762

    
6763
    for idx, dev in enumerate(self.instance.disks):
6764
      if idx not in self.disks:
6765
        continue
6766

    
6767
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6768

    
6769
      self.cfg.SetDiskID(dev, node_name)
6770

    
6771
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6772
      names = _GenerateUniqueNames(self.lu, lv_names)
6773

    
6774
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6775
                             logical_id=(vgname, names[0]))
6776
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6777
                             logical_id=(vgname, names[1]))
6778

    
6779
      new_lvs = [lv_data, lv_meta]
6780
      old_lvs = dev.children
6781
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6782

    
6783
      # we pass force_create=True to force the LVM creation
6784
      for new_lv in new_lvs:
6785
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6786
                        _GetInstanceInfoText(self.instance), False)
6787

    
6788
    return iv_names
6789

    
6790
  def _CheckDevices(self, node_name, iv_names):
6791
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6792
      self.cfg.SetDiskID(dev, node_name)
6793

    
6794
      result = self.rpc.call_blockdev_find(node_name, dev)
6795

    
6796
      msg = result.fail_msg
6797
      if msg or not result.payload:
6798
        if not msg:
6799
          msg = "disk not found"
6800
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6801
                                 (name, msg))
6802

    
6803
      if result.payload.is_degraded:
6804
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6805

    
6806
  def _RemoveOldStorage(self, node_name, iv_names):
6807
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6808
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6809

    
6810
      for lv in old_lvs:
6811
        self.cfg.SetDiskID(lv, node_name)
6812

    
6813
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6814
        if msg:
6815
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6816
                             hint="remove unused LVs manually")
6817

    
6818
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6819
    """Replace a disk on the primary or secondary for DRBD 8.
6820

6821
    The algorithm for replace is quite complicated:
6822

6823
      1. for each disk to be replaced:
6824

6825
        1. create new LVs on the target node with unique names
6826
        1. detach old LVs from the drbd device
6827
        1. rename old LVs to name_replaced.<time_t>
6828
        1. rename new LVs to old LVs
6829
        1. attach the new LVs (with the old names now) to the drbd device
6830

6831
      1. wait for sync across all devices
6832

6833
      1. for each modified disk:
6834

6835
        1. remove old LVs (which have the name name_replaces.<time_t>)
6836

6837
    Failures are not very well handled.
6838

6839
    """
6840
    steps_total = 6
6841

    
6842
    # Step: check device activation
6843
    self.lu.LogStep(1, steps_total, "Check device existence")
6844
    self._CheckDisksExistence([self.other_node, self.target_node])
6845
    self._CheckVolumeGroup([self.target_node, self.other_node])
6846

    
6847
    # Step: check other node consistency
6848
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6849
    self._CheckDisksConsistency(self.other_node,
6850
                                self.other_node == self.instance.primary_node,
6851
                                False)
6852

    
6853
    # Step: create new storage
6854
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6855
    iv_names = self._CreateNewStorage(self.target_node)
6856

    
6857
    # Step: for each lv, detach+rename*2+attach
6858
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6859
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6860
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6861

    
6862
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6863
                                                     old_lvs)
6864
      result.Raise("Can't detach drbd from local storage on node"
6865
                   " %s for device %s" % (self.target_node, dev.iv_name))
6866
      #dev.children = []
6867
      #cfg.Update(instance)
6868

    
6869
      # ok, we created the new LVs, so now we know we have the needed
6870
      # storage; as such, we proceed on the target node to rename
6871
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6872
      # using the assumption that logical_id == physical_id (which in
6873
      # turn is the unique_id on that node)
6874

    
6875
      # FIXME(iustin): use a better name for the replaced LVs
6876
      temp_suffix = int(time.time())
6877
      ren_fn = lambda d, suff: (d.physical_id[0],
6878
                                d.physical_id[1] + "_replaced-%s" % suff)
6879

    
6880
      # Build the rename list based on what LVs exist on the node
6881
      rename_old_to_new = []
6882
      for to_ren in old_lvs:
6883
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6884
        if not result.fail_msg and result.payload:
6885
          # device exists
6886
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6887

    
6888
      self.lu.LogInfo("Renaming the old LVs on the target node")
6889
      result = self.rpc.call_blockdev_rename(self.target_node,
6890
                                             rename_old_to_new)
6891
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6892

    
6893
      # Now we rename the new LVs to the old LVs
6894
      self.lu.LogInfo("Renaming the new LVs on the target node")
6895
      rename_new_to_old = [(new, old.physical_id)
6896
                           for old, new in zip(old_lvs, new_lvs)]
6897
      result = self.rpc.call_blockdev_rename(self.target_node,
6898
                                             rename_new_to_old)
6899
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6900

    
6901
      for old, new in zip(old_lvs, new_lvs):
6902
        new.logical_id = old.logical_id
6903
        self.cfg.SetDiskID(new, self.target_node)
6904

    
6905
      for disk in old_lvs:
6906
        disk.logical_id = ren_fn(disk, temp_suffix)
6907
        self.cfg.SetDiskID(disk, self.target_node)
6908

    
6909
      # Now that the new lvs have the old name, we can add them to the device
6910
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6911
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6912
                                                  new_lvs)
6913
      msg = result.fail_msg
6914
      if msg:
6915
        for new_lv in new_lvs:
6916
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6917
                                               new_lv).fail_msg
6918
          if msg2:
6919
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6920
                               hint=("cleanup manually the unused logical"
6921
                                     "volumes"))
6922
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6923

    
6924
      dev.children = new_lvs
6925

    
6926
      self.cfg.Update(self.instance, feedback_fn)
6927

    
6928
    # Wait for sync
6929
    # This can fail as the old devices are degraded and _WaitForSync
6930
    # does a combined result over all disks, so we don't check its return value
6931
    self.lu.LogStep(5, steps_total, "Sync devices")
6932
    _WaitForSync(self.lu, self.instance)
6933

    
6934
    # Check all devices manually
6935
    self._CheckDevices(self.instance.primary_node, iv_names)
6936

    
6937
    # Step: remove old storage
6938
    self.lu.LogStep(6, steps_total, "Removing old storage")
6939
    self._RemoveOldStorage(self.target_node, iv_names)
6940

    
6941
  def _ExecDrbd8Secondary(self, feedback_fn):
6942
    """Replace the secondary node for DRBD 8.
6943

6944
    The algorithm for replace is quite complicated:
6945
      - for all disks of the instance:
6946
        - create new LVs on the new node with same names
6947
        - shutdown the drbd device on the old secondary
6948
        - disconnect the drbd network on the primary
6949
        - create the drbd device on the new secondary
6950
        - network attach the drbd on the primary, using an artifice:
6951
          the drbd code for Attach() will connect to the network if it
6952
          finds a device which is connected to the good local disks but
6953
          not network enabled
6954
      - wait for sync across all devices
6955
      - remove all disks from the old secondary
6956

6957
    Failures are not very well handled.
6958

6959
    """
6960
    steps_total = 6
6961

    
6962
    # Step: check device activation
6963
    self.lu.LogStep(1, steps_total, "Check device existence")
6964
    self._CheckDisksExistence([self.instance.primary_node])
6965
    self._CheckVolumeGroup([self.instance.primary_node])
6966

    
6967
    # Step: check other node consistency
6968
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6969
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6970

    
6971
    # Step: create new storage
6972
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6973
    for idx, dev in enumerate(self.instance.disks):
6974
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6975
                      (self.new_node, idx))
6976
      # we pass force_create=True to force LVM creation
6977
      for new_lv in dev.children:
6978
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6979
                        _GetInstanceInfoText(self.instance), False)
6980

    
6981
    # Step 4: dbrd minors and drbd setups changes
6982
    # after this, we must manually remove the drbd minors on both the
6983
    # error and the success paths
6984
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6985
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6986
                                         for dev in self.instance.disks],
6987
                                        self.instance.name)
6988
    logging.debug("Allocated minors %r", minors)
6989

    
6990
    iv_names = {}
6991
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6992
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6993
                      (self.new_node, idx))
6994
      # create new devices on new_node; note that we create two IDs:
6995
      # one without port, so the drbd will be activated without
6996
      # networking information on the new node at this stage, and one
6997
      # with network, for the latter activation in step 4
6998
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6999
      if self.instance.primary_node == o_node1:
7000
        p_minor = o_minor1
7001
      else:
7002
        p_minor = o_minor2
7003

    
7004
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7005
                      p_minor, new_minor, o_secret)
7006
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7007
                    p_minor, new_minor, o_secret)
7008

    
7009
      iv_names[idx] = (dev, dev.children, new_net_id)
7010
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7011
                    new_net_id)
7012
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7013
                              logical_id=new_alone_id,
7014
                              children=dev.children,
7015
                              size=dev.size)
7016
      try:
7017
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7018
                              _GetInstanceInfoText(self.instance), False)
7019
      except errors.GenericError:
7020
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7021
        raise
7022

    
7023
    # We have new devices, shutdown the drbd on the old secondary
7024
    for idx, dev in enumerate(self.instance.disks):
7025
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7026
      self.cfg.SetDiskID(dev, self.target_node)
7027
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7028
      if msg:
7029
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7030
                           "node: %s" % (idx, msg),
7031
                           hint=("Please cleanup this device manually as"
7032
                                 " soon as possible"))
7033

    
7034
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7035
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7036
                                               self.node_secondary_ip,
7037
                                               self.instance.disks)\
7038
                                              [self.instance.primary_node]
7039

    
7040
    msg = result.fail_msg
7041
    if msg:
7042
      # detaches didn't succeed (unlikely)
7043
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7044
      raise errors.OpExecError("Can't detach the disks from the network on"
7045
                               " old node: %s" % (msg,))
7046

    
7047
    # if we managed to detach at least one, we update all the disks of
7048
    # the instance to point to the new secondary
7049
    self.lu.LogInfo("Updating instance configuration")
7050
    for dev, _, new_logical_id in iv_names.itervalues():
7051
      dev.logical_id = new_logical_id
7052
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7053

    
7054
    self.cfg.Update(self.instance, feedback_fn)
7055

    
7056
    # and now perform the drbd attach
7057
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7058
                    " (standalone => connected)")
7059
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7060
                                            self.new_node],
7061
                                           self.node_secondary_ip,
7062
                                           self.instance.disks,
7063
                                           self.instance.name,
7064
                                           False)
7065
    for to_node, to_result in result.items():
7066
      msg = to_result.fail_msg
7067
      if msg:
7068
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7069
                           to_node, msg,
7070
                           hint=("please do a gnt-instance info to see the"
7071
                                 " status of disks"))
7072

    
7073
    # Wait for sync
7074
    # This can fail as the old devices are degraded and _WaitForSync
7075
    # does a combined result over all disks, so we don't check its return value
7076
    self.lu.LogStep(5, steps_total, "Sync devices")
7077
    _WaitForSync(self.lu, self.instance)
7078

    
7079
    # Check all devices manually
7080
    self._CheckDevices(self.instance.primary_node, iv_names)
7081

    
7082
    # Step: remove old storage
7083
    self.lu.LogStep(6, steps_total, "Removing old storage")
7084
    self._RemoveOldStorage(self.target_node, iv_names)
7085

    
7086

    
7087
class LURepairNodeStorage(NoHooksLU):
7088
  """Repairs the volume group on a node.
7089

7090
  """
7091
  _OP_REQP = ["node_name"]
7092
  REQ_BGL = False
7093

    
7094
  def CheckArguments(self):
7095
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
7096
    if node_name is None:
7097
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
7098
                                 errors.ECODE_NOENT)
7099

    
7100
    self.op.node_name = node_name
7101

    
7102
  def ExpandNames(self):
7103
    self.needed_locks = {
7104
      locking.LEVEL_NODE: [self.op.node_name],
7105
      }
7106

    
7107
  def _CheckFaultyDisks(self, instance, node_name):
7108
    """Ensure faulty disks abort the opcode or at least warn."""
7109
    try:
7110
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7111
                                  node_name, True):
7112
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7113
                                   " node '%s'" % (instance.name, node_name),
7114
                                   errors.ECODE_STATE)
7115
    except errors.OpPrereqError, err:
7116
      if self.op.ignore_consistency:
7117
        self.proc.LogWarning(str(err.args[0]))
7118
      else:
7119
        raise
7120

    
7121
  def CheckPrereq(self):
7122
    """Check prerequisites.
7123

7124
    """
7125
    storage_type = self.op.storage_type
7126

    
7127
    if (constants.SO_FIX_CONSISTENCY not in
7128
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7129
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7130
                                 " repaired" % storage_type,
7131
                                 errors.ECODE_INVAL)
7132

    
7133
    # Check whether any instance on this node has faulty disks
7134
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7135
      if not inst.admin_up:
7136
        continue
7137
      check_nodes = set(inst.all_nodes)
7138
      check_nodes.discard(self.op.node_name)
7139
      for inst_node_name in check_nodes:
7140
        self._CheckFaultyDisks(inst, inst_node_name)
7141

    
7142
  def Exec(self, feedback_fn):
7143
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7144
                (self.op.name, self.op.node_name))
7145

    
7146
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7147
    result = self.rpc.call_storage_execute(self.op.node_name,
7148
                                           self.op.storage_type, st_args,
7149
                                           self.op.name,
7150
                                           constants.SO_FIX_CONSISTENCY)
7151
    result.Raise("Failed to repair storage unit '%s' on %s" %
7152
                 (self.op.name, self.op.node_name))
7153

    
7154

    
7155
class LUGrowDisk(LogicalUnit):
7156
  """Grow a disk of an instance.
7157

7158
  """
7159
  HPATH = "disk-grow"
7160
  HTYPE = constants.HTYPE_INSTANCE
7161
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7162
  REQ_BGL = False
7163

    
7164
  def ExpandNames(self):
7165
    self._ExpandAndLockInstance()
7166
    self.needed_locks[locking.LEVEL_NODE] = []
7167
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7168

    
7169
  def DeclareLocks(self, level):
7170
    if level == locking.LEVEL_NODE:
7171
      self._LockInstancesNodes()
7172

    
7173
  def BuildHooksEnv(self):
7174
    """Build hooks env.
7175

7176
    This runs on the master, the primary and all the secondaries.
7177

7178
    """
7179
    env = {
7180
      "DISK": self.op.disk,
7181
      "AMOUNT": self.op.amount,
7182
      }
7183
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7184
    nl = [
7185
      self.cfg.GetMasterNode(),
7186
      self.instance.primary_node,
7187
      ]
7188
    return env, nl, nl
7189

    
7190
  def CheckPrereq(self):
7191
    """Check prerequisites.
7192

7193
    This checks that the instance is in the cluster.
7194

7195
    """
7196
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7197
    assert instance is not None, \
7198
      "Cannot retrieve locked instance %s" % self.op.instance_name
7199
    nodenames = list(instance.all_nodes)
7200
    for node in nodenames:
7201
      _CheckNodeOnline(self, node)
7202

    
7203

    
7204
    self.instance = instance
7205

    
7206
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7207
      raise errors.OpPrereqError("Instance's disk layout does not support"
7208
                                 " growing.", errors.ECODE_INVAL)
7209

    
7210
    self.disk = instance.FindDisk(self.op.disk)
7211

    
7212
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7213
                                       instance.hypervisor)
7214
    for node in nodenames:
7215
      info = nodeinfo[node]
7216
      info.Raise("Cannot get current information from node %s" % node)
7217
      vg_free = info.payload.get('vg_free', None)
7218
      if not isinstance(vg_free, int):
7219
        raise errors.OpPrereqError("Can't compute free disk space on"
7220
                                   " node %s" % node, errors.ECODE_ENVIRON)
7221
      if self.op.amount > vg_free:
7222
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7223
                                   " %d MiB available, %d MiB required" %
7224
                                   (node, vg_free, self.op.amount),
7225
                                   errors.ECODE_NORES)
7226

    
7227
  def Exec(self, feedback_fn):
7228
    """Execute disk grow.
7229

7230
    """
7231
    instance = self.instance
7232
    disk = self.disk
7233
    for node in instance.all_nodes:
7234
      self.cfg.SetDiskID(disk, node)
7235
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7236
      result.Raise("Grow request failed to node %s" % node)
7237

    
7238
      # TODO: Rewrite code to work properly
7239
      # DRBD goes into sync mode for a short amount of time after executing the
7240
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7241
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7242
      # time is a work-around.
7243
      time.sleep(5)
7244

    
7245
    disk.RecordGrow(self.op.amount)
7246
    self.cfg.Update(instance, feedback_fn)
7247
    if self.op.wait_for_sync:
7248
      disk_abort = not _WaitForSync(self, instance)
7249
      if disk_abort:
7250
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7251
                             " status.\nPlease check the instance.")
7252

    
7253

    
7254
class LUQueryInstanceData(NoHooksLU):
7255
  """Query runtime instance data.
7256

7257
  """
7258
  _OP_REQP = ["instances", "static"]
7259
  REQ_BGL = False
7260

    
7261
  def ExpandNames(self):
7262
    self.needed_locks = {}
7263
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7264

    
7265
    if not isinstance(self.op.instances, list):
7266
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7267
                                 errors.ECODE_INVAL)
7268

    
7269
    if self.op.instances:
7270
      self.wanted_names = []
7271
      for name in self.op.instances:
7272
        full_name = self.cfg.ExpandInstanceName(name)
7273
        if full_name is None:
7274
          raise errors.OpPrereqError("Instance '%s' not known" % name,
7275
                                     errors.ECODE_NOENT)
7276
        self.wanted_names.append(full_name)
7277
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7278
    else:
7279
      self.wanted_names = None
7280
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7281

    
7282
    self.needed_locks[locking.LEVEL_NODE] = []
7283
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7284

    
7285
  def DeclareLocks(self, level):
7286
    if level == locking.LEVEL_NODE:
7287
      self._LockInstancesNodes()
7288

    
7289
  def CheckPrereq(self):
7290
    """Check prerequisites.
7291

7292
    This only checks the optional instance list against the existing names.
7293

7294
    """
7295
    if self.wanted_names is None:
7296
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7297

    
7298
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7299
                             in self.wanted_names]
7300
    return
7301

    
7302
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7303
    """Returns the status of a block device
7304

7305
    """
7306
    if self.op.static or not node:
7307
      return None
7308

    
7309
    self.cfg.SetDiskID(dev, node)
7310

    
7311
    result = self.rpc.call_blockdev_find(node, dev)
7312
    if result.offline:
7313
      return None
7314

    
7315
    result.Raise("Can't compute disk status for %s" % instance_name)
7316

    
7317
    status = result.payload
7318
    if status is None:
7319
      return None
7320

    
7321
    return (status.dev_path, status.major, status.minor,
7322
            status.sync_percent, status.estimated_time,
7323
            status.is_degraded, status.ldisk_status)
7324

    
7325
  def _ComputeDiskStatus(self, instance, snode, dev):
7326
    """Compute block device status.
7327

7328
    """
7329
    if dev.dev_type in constants.LDS_DRBD:
7330
      # we change the snode then (otherwise we use the one passed in)
7331
      if dev.logical_id[0] == instance.primary_node:
7332
        snode = dev.logical_id[1]
7333
      else:
7334
        snode = dev.logical_id[0]
7335

    
7336
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7337
                                              instance.name, dev)
7338
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7339

    
7340
    if dev.children:
7341
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7342
                      for child in dev.children]
7343
    else:
7344
      dev_children = []
7345

    
7346
    data = {
7347
      "iv_name": dev.iv_name,
7348
      "dev_type": dev.dev_type,
7349
      "logical_id": dev.logical_id,
7350
      "physical_id": dev.physical_id,
7351
      "pstatus": dev_pstatus,
7352
      "sstatus": dev_sstatus,
7353
      "children": dev_children,
7354
      "mode": dev.mode,
7355
      "size": dev.size,
7356
      }
7357

    
7358
    return data
7359

    
7360
  def Exec(self, feedback_fn):
7361
    """Gather and return data"""
7362
    result = {}
7363

    
7364
    cluster = self.cfg.GetClusterInfo()
7365

    
7366
    for instance in self.wanted_instances:
7367
      if not self.op.static:
7368
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7369
                                                  instance.name,
7370
                                                  instance.hypervisor)
7371
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7372
        remote_info = remote_info.payload
7373
        if remote_info and "state" in remote_info:
7374
          remote_state = "up"
7375
        else:
7376
          remote_state = "down"
7377
      else:
7378
        remote_state = None
7379
      if instance.admin_up:
7380
        config_state = "up"
7381
      else:
7382
        config_state = "down"
7383

    
7384
      disks = [self._ComputeDiskStatus(instance, None, device)
7385
               for device in instance.disks]
7386

    
7387
      idict = {
7388
        "name": instance.name,
7389
        "config_state": config_state,
7390
        "run_state": remote_state,
7391
        "pnode": instance.primary_node,
7392
        "snodes": instance.secondary_nodes,
7393
        "os": instance.os,
7394
        # this happens to be the same format used for hooks
7395
        "nics": _NICListToTuple(self, instance.nics),
7396
        "disks": disks,
7397
        "hypervisor": instance.hypervisor,
7398
        "network_port": instance.network_port,
7399
        "hv_instance": instance.hvparams,
7400
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7401
        "be_instance": instance.beparams,
7402
        "be_actual": cluster.FillBE(instance),
7403
        "serial_no": instance.serial_no,
7404
        "mtime": instance.mtime,
7405
        "ctime": instance.ctime,
7406
        "uuid": instance.uuid,
7407
        }
7408

    
7409
      result[instance.name] = idict
7410

    
7411
    return result
7412

    
7413

    
7414
class LUSetInstanceParams(LogicalUnit):
7415
  """Modifies an instances's parameters.
7416

7417
  """
7418
  HPATH = "instance-modify"
7419
  HTYPE = constants.HTYPE_INSTANCE
7420
  _OP_REQP = ["instance_name"]
7421
  REQ_BGL = False
7422

    
7423
  def CheckArguments(self):
7424
    if not hasattr(self.op, 'nics'):
7425
      self.op.nics = []
7426
    if not hasattr(self.op, 'disks'):
7427
      self.op.disks = []
7428
    if not hasattr(self.op, 'beparams'):
7429
      self.op.beparams = {}
7430
    if not hasattr(self.op, 'hvparams'):
7431
      self.op.hvparams = {}
7432
    self.op.force = getattr(self.op, "force", False)
7433
    if not (self.op.nics or self.op.disks or
7434
            self.op.hvparams or self.op.beparams):
7435
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7436

    
7437
    if self.op.hvparams:
7438
      _CheckGlobalHvParams(self.op.hvparams)
7439

    
7440
    # Disk validation
7441
    disk_addremove = 0
7442
    for disk_op, disk_dict in self.op.disks:
7443
      if disk_op == constants.DDM_REMOVE:
7444
        disk_addremove += 1
7445
        continue
7446
      elif disk_op == constants.DDM_ADD:
7447
        disk_addremove += 1
7448
      else:
7449
        if not isinstance(disk_op, int):
7450
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7451
        if not isinstance(disk_dict, dict):
7452
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7453
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7454

    
7455
      if disk_op == constants.DDM_ADD:
7456
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7457
        if mode not in constants.DISK_ACCESS_SET:
7458
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7459
                                     errors.ECODE_INVAL)
7460
        size = disk_dict.get('size', None)
7461
        if size is None:
7462
          raise errors.OpPrereqError("Required disk parameter size missing",
7463
                                     errors.ECODE_INVAL)
7464
        try:
7465
          size = int(size)
7466
        except ValueError, err:
7467
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7468
                                     str(err), errors.ECODE_INVAL)
7469
        disk_dict['size'] = size
7470
      else:
7471
        # modification of disk
7472
        if 'size' in disk_dict:
7473
          raise errors.OpPrereqError("Disk size change not possible, use"
7474
                                     " grow-disk", errors.ECODE_INVAL)
7475

    
7476
    if disk_addremove > 1:
7477
      raise errors.OpPrereqError("Only one disk add or remove operation"
7478
                                 " supported at a time", errors.ECODE_INVAL)
7479

    
7480
    # NIC validation
7481
    nic_addremove = 0
7482
    for nic_op, nic_dict in self.op.nics:
7483
      if nic_op == constants.DDM_REMOVE:
7484
        nic_addremove += 1
7485
        continue
7486
      elif nic_op == constants.DDM_ADD:
7487
        nic_addremove += 1
7488
      else:
7489
        if not isinstance(nic_op, int):
7490
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7491
        if not isinstance(nic_dict, dict):
7492
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7493
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7494

    
7495
      # nic_dict should be a dict
7496
      nic_ip = nic_dict.get('ip', None)
7497
      if nic_ip is not None:
7498
        if nic_ip.lower() == constants.VALUE_NONE:
7499
          nic_dict['ip'] = None
7500
        else:
7501
          if not utils.IsValidIP(nic_ip):
7502
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7503
                                       errors.ECODE_INVAL)
7504

    
7505
      nic_bridge = nic_dict.get('bridge', None)
7506
      nic_link = nic_dict.get('link', None)
7507
      if nic_bridge and nic_link:
7508
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7509
                                   " at the same time", errors.ECODE_INVAL)
7510
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7511
        nic_dict['bridge'] = None
7512
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7513
        nic_dict['link'] = None
7514

    
7515
      if nic_op == constants.DDM_ADD:
7516
        nic_mac = nic_dict.get('mac', None)
7517
        if nic_mac is None:
7518
          nic_dict['mac'] = constants.VALUE_AUTO
7519

    
7520
      if 'mac' in nic_dict:
7521
        nic_mac = nic_dict['mac']
7522
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7523
          if not utils.IsValidMac(nic_mac):
7524
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac,
7525
                                       errors.ECODE_INVAL)
7526
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7527
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7528
                                     " modifying an existing nic",
7529
                                     errors.ECODE_INVAL)
7530

    
7531
    if nic_addremove > 1:
7532
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7533
                                 " supported at a time", errors.ECODE_INVAL)
7534

    
7535
  def ExpandNames(self):
7536
    self._ExpandAndLockInstance()
7537
    self.needed_locks[locking.LEVEL_NODE] = []
7538
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7539

    
7540
  def DeclareLocks(self, level):
7541
    if level == locking.LEVEL_NODE:
7542
      self._LockInstancesNodes()
7543

    
7544
  def BuildHooksEnv(self):
7545
    """Build hooks env.
7546

7547
    This runs on the master, primary and secondaries.
7548

7549
    """
7550
    args = dict()
7551
    if constants.BE_MEMORY in self.be_new:
7552
      args['memory'] = self.be_new[constants.BE_MEMORY]
7553
    if constants.BE_VCPUS in self.be_new:
7554
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7555
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7556
    # information at all.
7557
    if self.op.nics:
7558
      args['nics'] = []
7559
      nic_override = dict(self.op.nics)
7560
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7561
      for idx, nic in enumerate(self.instance.nics):
7562
        if idx in nic_override:
7563
          this_nic_override = nic_override[idx]
7564
        else:
7565
          this_nic_override = {}
7566
        if 'ip' in this_nic_override:
7567
          ip = this_nic_override['ip']
7568
        else:
7569
          ip = nic.ip
7570
        if 'mac' in this_nic_override:
7571
          mac = this_nic_override['mac']
7572
        else:
7573
          mac = nic.mac
7574
        if idx in self.nic_pnew:
7575
          nicparams = self.nic_pnew[idx]
7576
        else:
7577
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7578
        mode = nicparams[constants.NIC_MODE]
7579
        link = nicparams[constants.NIC_LINK]
7580
        args['nics'].append((ip, mac, mode, link))
7581
      if constants.DDM_ADD in nic_override:
7582
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7583
        mac = nic_override[constants.DDM_ADD]['mac']
7584
        nicparams = self.nic_pnew[constants.DDM_ADD]
7585
        mode = nicparams[constants.NIC_MODE]
7586
        link = nicparams[constants.NIC_LINK]
7587
        args['nics'].append((ip, mac, mode, link))
7588
      elif constants.DDM_REMOVE in nic_override:
7589
        del args['nics'][-1]
7590

    
7591
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7592
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7593
    return env, nl, nl
7594

    
7595
  def _GetUpdatedParams(self, old_params, update_dict,
7596
                        default_values, parameter_types):
7597
    """Return the new params dict for the given params.
7598

7599
    @type old_params: dict
7600
    @param old_params: old parameters
7601
    @type update_dict: dict
7602
    @param update_dict: dict containing new parameter values,
7603
                        or constants.VALUE_DEFAULT to reset the
7604
                        parameter to its default value
7605
    @type default_values: dict
7606
    @param default_values: default values for the filled parameters
7607
    @type parameter_types: dict
7608
    @param parameter_types: dict mapping target dict keys to types
7609
                            in constants.ENFORCEABLE_TYPES
7610
    @rtype: (dict, dict)
7611
    @return: (new_parameters, filled_parameters)
7612

7613
    """
7614
    params_copy = copy.deepcopy(old_params)
7615
    for key, val in update_dict.iteritems():
7616
      if val == constants.VALUE_DEFAULT:
7617
        try:
7618
          del params_copy[key]
7619
        except KeyError:
7620
          pass
7621
      else:
7622
        params_copy[key] = val
7623
    utils.ForceDictType(params_copy, parameter_types)
7624
    params_filled = objects.FillDict(default_values, params_copy)
7625
    return (params_copy, params_filled)
7626

    
7627
  def CheckPrereq(self):
7628
    """Check prerequisites.
7629

7630
    This only checks the instance list against the existing names.
7631

7632
    """
7633
    self.force = self.op.force
7634

    
7635
    # checking the new params on the primary/secondary nodes
7636

    
7637
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7638
    cluster = self.cluster = self.cfg.GetClusterInfo()
7639
    assert self.instance is not None, \
7640
      "Cannot retrieve locked instance %s" % self.op.instance_name
7641
    pnode = instance.primary_node
7642
    nodelist = list(instance.all_nodes)
7643

    
7644
    # hvparams processing
7645
    if self.op.hvparams:
7646
      i_hvdict, hv_new = self._GetUpdatedParams(
7647
                             instance.hvparams, self.op.hvparams,
7648
                             cluster.hvparams[instance.hypervisor],
7649
                             constants.HVS_PARAMETER_TYPES)
7650
      # local check
7651
      hypervisor.GetHypervisor(
7652
        instance.hypervisor).CheckParameterSyntax(hv_new)
7653
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7654
      self.hv_new = hv_new # the new actual values
7655
      self.hv_inst = i_hvdict # the new dict (without defaults)
7656
    else:
7657
      self.hv_new = self.hv_inst = {}
7658

    
7659
    # beparams processing
7660
    if self.op.beparams:
7661
      i_bedict, be_new = self._GetUpdatedParams(
7662
                             instance.beparams, self.op.beparams,
7663
                             cluster.beparams[constants.PP_DEFAULT],
7664
                             constants.BES_PARAMETER_TYPES)
7665
      self.be_new = be_new # the new actual values
7666
      self.be_inst = i_bedict # the new dict (without defaults)
7667
    else:
7668
      self.be_new = self.be_inst = {}
7669

    
7670
    self.warn = []
7671

    
7672
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7673
      mem_check_list = [pnode]
7674
      if be_new[constants.BE_AUTO_BALANCE]:
7675
        # either we changed auto_balance to yes or it was from before
7676
        mem_check_list.extend(instance.secondary_nodes)
7677
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7678
                                                  instance.hypervisor)
7679
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7680
                                         instance.hypervisor)
7681
      pninfo = nodeinfo[pnode]
7682
      msg = pninfo.fail_msg
7683
      if msg:
7684
        # Assume the primary node is unreachable and go ahead
7685
        self.warn.append("Can't get info from primary node %s: %s" %
7686
                         (pnode,  msg))
7687
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7688
        self.warn.append("Node data from primary node %s doesn't contain"
7689
                         " free memory information" % pnode)
7690
      elif instance_info.fail_msg:
7691
        self.warn.append("Can't get instance runtime information: %s" %
7692
                        instance_info.fail_msg)
7693
      else:
7694
        if instance_info.payload:
7695
          current_mem = int(instance_info.payload['memory'])
7696
        else:
7697
          # Assume instance not running
7698
          # (there is a slight race condition here, but it's not very probable,
7699
          # and we have no other way to check)
7700
          current_mem = 0
7701
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7702
                    pninfo.payload['memory_free'])
7703
        if miss_mem > 0:
7704
          raise errors.OpPrereqError("This change will prevent the instance"
7705
                                     " from starting, due to %d MB of memory"
7706
                                     " missing on its primary node" % miss_mem,
7707
                                     errors.ECODE_NORES)
7708

    
7709
      if be_new[constants.BE_AUTO_BALANCE]:
7710
        for node, nres in nodeinfo.items():
7711
          if node not in instance.secondary_nodes:
7712
            continue
7713
          msg = nres.fail_msg
7714
          if msg:
7715
            self.warn.append("Can't get info from secondary node %s: %s" %
7716
                             (node, msg))
7717
          elif not isinstance(nres.payload.get('memory_free', None), int):
7718
            self.warn.append("Secondary node %s didn't return free"
7719
                             " memory information" % node)
7720
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7721
            self.warn.append("Not enough memory to failover instance to"
7722
                             " secondary node %s" % node)
7723

    
7724
    # NIC processing
7725
    self.nic_pnew = {}
7726
    self.nic_pinst = {}
7727
    for nic_op, nic_dict in self.op.nics:
7728
      if nic_op == constants.DDM_REMOVE:
7729
        if not instance.nics:
7730
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7731
                                     errors.ECODE_INVAL)
7732
        continue
7733
      if nic_op != constants.DDM_ADD:
7734
        # an existing nic
7735
        if not instance.nics:
7736
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7737
                                     " no NICs" % nic_op,
7738
                                     errors.ECODE_INVAL)
7739
        if nic_op < 0 or nic_op >= len(instance.nics):
7740
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7741
                                     " are 0 to %d" %
7742
                                     (nic_op, len(instance.nics) - 1),
7743
                                     errors.ECODE_INVAL)
7744
        old_nic_params = instance.nics[nic_op].nicparams
7745
        old_nic_ip = instance.nics[nic_op].ip
7746
      else:
7747
        old_nic_params = {}
7748
        old_nic_ip = None
7749

    
7750
      update_params_dict = dict([(key, nic_dict[key])
7751
                                 for key in constants.NICS_PARAMETERS
7752
                                 if key in nic_dict])
7753

    
7754
      if 'bridge' in nic_dict:
7755
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7756

    
7757
      new_nic_params, new_filled_nic_params = \
7758
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7759
                                 cluster.nicparams[constants.PP_DEFAULT],
7760
                                 constants.NICS_PARAMETER_TYPES)
7761
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7762
      self.nic_pinst[nic_op] = new_nic_params
7763
      self.nic_pnew[nic_op] = new_filled_nic_params
7764
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7765

    
7766
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7767
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7768
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7769
        if msg:
7770
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7771
          if self.force:
7772
            self.warn.append(msg)
7773
          else:
7774
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7775
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7776
        if 'ip' in nic_dict:
7777
          nic_ip = nic_dict['ip']
7778
        else:
7779
          nic_ip = old_nic_ip
7780
        if nic_ip is None:
7781
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7782
                                     ' on a routed nic', errors.ECODE_INVAL)
7783
      if 'mac' in nic_dict:
7784
        nic_mac = nic_dict['mac']
7785
        if nic_mac is None:
7786
          raise errors.OpPrereqError('Cannot set the nic mac to None',
7787
                                     errors.ECODE_INVAL)
7788
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7789
          # otherwise generate the mac
7790
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7791
        else:
7792
          # or validate/reserve the current one
7793
          try:
7794
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7795
          except errors.ReservationError:
7796
            raise errors.OpPrereqError("MAC address %s already in use"
7797
                                       " in cluster" % nic_mac,
7798
                                       errors.ECODE_NOTUNIQUE)
7799

    
7800
    # DISK processing
7801
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7802
      raise errors.OpPrereqError("Disk operations not supported for"
7803
                                 " diskless instances",
7804
                                 errors.ECODE_INVAL)
7805
    for disk_op, disk_dict in self.op.disks:
7806
      if disk_op == constants.DDM_REMOVE:
7807
        if len(instance.disks) == 1:
7808
          raise errors.OpPrereqError("Cannot remove the last disk of"
7809
                                     " an instance",
7810
                                     errors.ECODE_INVAL)
7811
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7812
        ins_l = ins_l[pnode]
7813
        msg = ins_l.fail_msg
7814
        if msg:
7815
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7816
                                     (pnode, msg), errors.ECODE_ENVIRON)
7817
        if instance.name in ins_l.payload:
7818
          raise errors.OpPrereqError("Instance is running, can't remove"
7819
                                     " disks.", errors.ECODE_STATE)
7820

    
7821
      if (disk_op == constants.DDM_ADD and
7822
          len(instance.nics) >= constants.MAX_DISKS):
7823
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7824
                                   " add more" % constants.MAX_DISKS,
7825
                                   errors.ECODE_STATE)
7826
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7827
        # an existing disk
7828
        if disk_op < 0 or disk_op >= len(instance.disks):
7829
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7830
                                     " are 0 to %d" %
7831
                                     (disk_op, len(instance.disks)),
7832
                                     errors.ECODE_INVAL)
7833

    
7834
    return
7835

    
7836
  def Exec(self, feedback_fn):
7837
    """Modifies an instance.
7838

7839
    All parameters take effect only at the next restart of the instance.
7840

7841
    """
7842
    # Process here the warnings from CheckPrereq, as we don't have a
7843
    # feedback_fn there.
7844
    for warn in self.warn:
7845
      feedback_fn("WARNING: %s" % warn)
7846

    
7847
    result = []
7848
    instance = self.instance
7849
    cluster = self.cluster
7850
    # disk changes
7851
    for disk_op, disk_dict in self.op.disks:
7852
      if disk_op == constants.DDM_REMOVE:
7853
        # remove the last disk
7854
        device = instance.disks.pop()
7855
        device_idx = len(instance.disks)
7856
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7857
          self.cfg.SetDiskID(disk, node)
7858
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7859
          if msg:
7860
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7861
                            " continuing anyway", device_idx, node, msg)
7862
        result.append(("disk/%d" % device_idx, "remove"))
7863
      elif disk_op == constants.DDM_ADD:
7864
        # add a new disk
7865
        if instance.disk_template == constants.DT_FILE:
7866
          file_driver, file_path = instance.disks[0].logical_id
7867
          file_path = os.path.dirname(file_path)
7868
        else:
7869
          file_driver = file_path = None
7870
        disk_idx_base = len(instance.disks)
7871
        new_disk = _GenerateDiskTemplate(self,
7872
                                         instance.disk_template,
7873
                                         instance.name, instance.primary_node,
7874
                                         instance.secondary_nodes,
7875
                                         [disk_dict],
7876
                                         file_path,
7877
                                         file_driver,
7878
                                         disk_idx_base)[0]
7879
        instance.disks.append(new_disk)
7880
        info = _GetInstanceInfoText(instance)
7881

    
7882
        logging.info("Creating volume %s for instance %s",
7883
                     new_disk.iv_name, instance.name)
7884
        # Note: this needs to be kept in sync with _CreateDisks
7885
        #HARDCODE
7886
        for node in instance.all_nodes:
7887
          f_create = node == instance.primary_node
7888
          try:
7889
            _CreateBlockDev(self, node, instance, new_disk,
7890
                            f_create, info, f_create)
7891
          except errors.OpExecError, err:
7892
            self.LogWarning("Failed to create volume %s (%s) on"
7893
                            " node %s: %s",
7894
                            new_disk.iv_name, new_disk, node, err)
7895
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7896
                       (new_disk.size, new_disk.mode)))
7897
      else:
7898
        # change a given disk
7899
        instance.disks[disk_op].mode = disk_dict['mode']
7900
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7901
    # NIC changes
7902
    for nic_op, nic_dict in self.op.nics:
7903
      if nic_op == constants.DDM_REMOVE:
7904
        # remove the last nic
7905
        del instance.nics[-1]
7906
        result.append(("nic.%d" % len(instance.nics), "remove"))
7907
      elif nic_op == constants.DDM_ADD:
7908
        # mac and bridge should be set, by now
7909
        mac = nic_dict['mac']
7910
        ip = nic_dict.get('ip', None)
7911
        nicparams = self.nic_pinst[constants.DDM_ADD]
7912
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7913
        instance.nics.append(new_nic)
7914
        result.append(("nic.%d" % (len(instance.nics) - 1),
7915
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7916
                       (new_nic.mac, new_nic.ip,
7917
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7918
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7919
                       )))
7920
      else:
7921
        for key in 'mac', 'ip':
7922
          if key in nic_dict:
7923
            setattr(instance.nics[nic_op], key, nic_dict[key])
7924
        if nic_op in self.nic_pinst:
7925
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7926
        for key, val in nic_dict.iteritems():
7927
          result.append(("nic.%s/%d" % (key, nic_op), val))
7928

    
7929
    # hvparams changes
7930
    if self.op.hvparams:
7931
      instance.hvparams = self.hv_inst
7932
      for key, val in self.op.hvparams.iteritems():
7933
        result.append(("hv/%s" % key, val))
7934

    
7935
    # beparams changes
7936
    if self.op.beparams:
7937
      instance.beparams = self.be_inst
7938
      for key, val in self.op.beparams.iteritems():
7939
        result.append(("be/%s" % key, val))
7940

    
7941
    self.cfg.Update(instance, feedback_fn)
7942

    
7943
    return result
7944

    
7945

    
7946
class LUQueryExports(NoHooksLU):
7947
  """Query the exports list
7948

7949
  """
7950
  _OP_REQP = ['nodes']
7951
  REQ_BGL = False
7952

    
7953
  def ExpandNames(self):
7954
    self.needed_locks = {}
7955
    self.share_locks[locking.LEVEL_NODE] = 1
7956
    if not self.op.nodes:
7957
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7958
    else:
7959
      self.needed_locks[locking.LEVEL_NODE] = \
7960
        _GetWantedNodes(self, self.op.nodes)
7961

    
7962
  def CheckPrereq(self):
7963
    """Check prerequisites.
7964

7965
    """
7966
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7967

    
7968
  def Exec(self, feedback_fn):
7969
    """Compute the list of all the exported system images.
7970

7971
    @rtype: dict
7972
    @return: a dictionary with the structure node->(export-list)
7973
        where export-list is a list of the instances exported on
7974
        that node.
7975

7976
    """
7977
    rpcresult = self.rpc.call_export_list(self.nodes)
7978
    result = {}
7979
    for node in rpcresult:
7980
      if rpcresult[node].fail_msg:
7981
        result[node] = False
7982
      else:
7983
        result[node] = rpcresult[node].payload
7984

    
7985
    return result
7986

    
7987

    
7988
class LUExportInstance(LogicalUnit):
7989
  """Export an instance to an image in the cluster.
7990

7991
  """
7992
  HPATH = "instance-export"
7993
  HTYPE = constants.HTYPE_INSTANCE
7994
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7995
  REQ_BGL = False
7996

    
7997
  def CheckArguments(self):
7998
    """Check the arguments.
7999

8000
    """
8001
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8002
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8003

    
8004
  def ExpandNames(self):
8005
    self._ExpandAndLockInstance()
8006
    # FIXME: lock only instance primary and destination node
8007
    #
8008
    # Sad but true, for now we have do lock all nodes, as we don't know where
8009
    # the previous export might be, and and in this LU we search for it and
8010
    # remove it from its current node. In the future we could fix this by:
8011
    #  - making a tasklet to search (share-lock all), then create the new one,
8012
    #    then one to remove, after
8013
    #  - removing the removal operation altogether
8014
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8015

    
8016
  def DeclareLocks(self, level):
8017
    """Last minute lock declaration."""
8018
    # All nodes are locked anyway, so nothing to do here.
8019

    
8020
  def BuildHooksEnv(self):
8021
    """Build hooks env.
8022

8023
    This will run on the master, primary node and target node.
8024

8025
    """
8026
    env = {
8027
      "EXPORT_NODE": self.op.target_node,
8028
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8029
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8030
      }
8031
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8032
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8033
          self.op.target_node]
8034
    return env, nl, nl
8035

    
8036
  def CheckPrereq(self):
8037
    """Check prerequisites.
8038

8039
    This checks that the instance and node names are valid.
8040

8041
    """
8042
    instance_name = self.op.instance_name
8043
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8044
    assert self.instance is not None, \
8045
          "Cannot retrieve locked instance %s" % self.op.instance_name
8046
    _CheckNodeOnline(self, self.instance.primary_node)
8047

    
8048
    self.dst_node = self.cfg.GetNodeInfo(
8049
      self.cfg.ExpandNodeName(self.op.target_node))
8050

    
8051
    if self.dst_node is None:
8052
      # This is wrong node name, not a non-locked node
8053
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
8054
                                 errors.ECODE_NOENT)
8055
    _CheckNodeOnline(self, self.dst_node.name)
8056
    _CheckNodeNotDrained(self, self.dst_node.name)
8057

    
8058
    # instance disk type verification
8059
    for disk in self.instance.disks:
8060
      if disk.dev_type == constants.LD_FILE:
8061
        raise errors.OpPrereqError("Export not supported for instances with"
8062
                                   " file-based disks", errors.ECODE_INVAL)
8063

    
8064
  def Exec(self, feedback_fn):
8065
    """Export an instance to an image in the cluster.
8066

8067
    """
8068
    instance = self.instance
8069
    dst_node = self.dst_node
8070
    src_node = instance.primary_node
8071

    
8072
    if self.op.shutdown:
8073
      # shutdown the instance, but not the disks
8074
      feedback_fn("Shutting down instance %s" % instance.name)
8075
      result = self.rpc.call_instance_shutdown(src_node, instance,
8076
                                               self.shutdown_timeout)
8077
      result.Raise("Could not shutdown instance %s on"
8078
                   " node %s" % (instance.name, src_node))
8079

    
8080
    vgname = self.cfg.GetVGName()
8081

    
8082
    snap_disks = []
8083

    
8084
    # set the disks ID correctly since call_instance_start needs the
8085
    # correct drbd minor to create the symlinks
8086
    for disk in instance.disks:
8087
      self.cfg.SetDiskID(disk, src_node)
8088

    
8089
    activate_disks = (not instance.admin_up)
8090

    
8091
    if activate_disks:
8092
      # Activate the instance disks if we'exporting a stopped instance
8093
      feedback_fn("Activating disks for %s" % instance.name)
8094
      _StartInstanceDisks(self, instance, None)
8095

    
8096
    try:
8097
      # per-disk results
8098
      dresults = []
8099
      try:
8100
        for idx, disk in enumerate(instance.disks):
8101
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8102
                      (idx, src_node))
8103

    
8104
          # result.payload will be a snapshot of an lvm leaf of the one we
8105
          # passed
8106
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8107
          msg = result.fail_msg
8108
          if msg:
8109
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8110
                            idx, src_node, msg)
8111
            snap_disks.append(False)
8112
          else:
8113
            disk_id = (vgname, result.payload)
8114
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8115
                                   logical_id=disk_id, physical_id=disk_id,
8116
                                   iv_name=disk.iv_name)
8117
            snap_disks.append(new_dev)
8118

    
8119
      finally:
8120
        if self.op.shutdown and instance.admin_up:
8121
          feedback_fn("Starting instance %s" % instance.name)
8122
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8123
          msg = result.fail_msg
8124
          if msg:
8125
            _ShutdownInstanceDisks(self, instance)
8126
            raise errors.OpExecError("Could not start instance: %s" % msg)
8127

    
8128
      # TODO: check for size
8129

    
8130
      cluster_name = self.cfg.GetClusterName()
8131
      for idx, dev in enumerate(snap_disks):
8132
        feedback_fn("Exporting snapshot %s from %s to %s" %
8133
                    (idx, src_node, dst_node.name))
8134
        if dev:
8135
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8136
                                                 instance, cluster_name, idx)
8137
          msg = result.fail_msg
8138
          if msg:
8139
            self.LogWarning("Could not export disk/%s from node %s to"
8140
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8141
            dresults.append(False)
8142
          else:
8143
            dresults.append(True)
8144
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8145
          if msg:
8146
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8147
                            " %s: %s", idx, src_node, msg)
8148
        else:
8149
          dresults.append(False)
8150

    
8151
      feedback_fn("Finalizing export on %s" % dst_node.name)
8152
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8153
                                             snap_disks)
8154
      fin_resu = True
8155
      msg = result.fail_msg
8156
      if msg:
8157
        self.LogWarning("Could not finalize export for instance %s"
8158
                        " on node %s: %s", instance.name, dst_node.name, msg)
8159
        fin_resu = False
8160

    
8161
    finally:
8162
      if activate_disks:
8163
        feedback_fn("Deactivating disks for %s" % instance.name)
8164
        _ShutdownInstanceDisks(self, instance)
8165

    
8166
    nodelist = self.cfg.GetNodeList()
8167
    nodelist.remove(dst_node.name)
8168

    
8169
    # on one-node clusters nodelist will be empty after the removal
8170
    # if we proceed the backup would be removed because OpQueryExports
8171
    # substitutes an empty list with the full cluster node list.
8172
    iname = instance.name
8173
    if nodelist:
8174
      feedback_fn("Removing old exports for instance %s" % iname)
8175
      exportlist = self.rpc.call_export_list(nodelist)
8176
      for node in exportlist:
8177
        if exportlist[node].fail_msg:
8178
          continue
8179
        if iname in exportlist[node].payload:
8180
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8181
          if msg:
8182
            self.LogWarning("Could not remove older export for instance %s"
8183
                            " on node %s: %s", iname, node, msg)
8184
    return fin_resu, dresults
8185

    
8186

    
8187
class LURemoveExport(NoHooksLU):
8188
  """Remove exports related to the named instance.
8189

8190
  """
8191
  _OP_REQP = ["instance_name"]
8192
  REQ_BGL = False
8193

    
8194
  def ExpandNames(self):
8195
    self.needed_locks = {}
8196
    # We need all nodes to be locked in order for RemoveExport to work, but we
8197
    # don't need to lock the instance itself, as nothing will happen to it (and
8198
    # we can remove exports also for a removed instance)
8199
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8200

    
8201
  def CheckPrereq(self):
8202
    """Check prerequisites.
8203
    """
8204
    pass
8205

    
8206
  def Exec(self, feedback_fn):
8207
    """Remove any export.
8208

8209
    """
8210
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8211
    # If the instance was not found we'll try with the name that was passed in.
8212
    # This will only work if it was an FQDN, though.
8213
    fqdn_warn = False
8214
    if not instance_name:
8215
      fqdn_warn = True
8216
      instance_name = self.op.instance_name
8217

    
8218
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8219
    exportlist = self.rpc.call_export_list(locked_nodes)
8220
    found = False
8221
    for node in exportlist:
8222
      msg = exportlist[node].fail_msg
8223
      if msg:
8224
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8225
        continue
8226
      if instance_name in exportlist[node].payload:
8227
        found = True
8228
        result = self.rpc.call_export_remove(node, instance_name)
8229
        msg = result.fail_msg
8230
        if msg:
8231
          logging.error("Could not remove export for instance %s"
8232
                        " on node %s: %s", instance_name, node, msg)
8233

    
8234
    if fqdn_warn and not found:
8235
      feedback_fn("Export not found. If trying to remove an export belonging"
8236
                  " to a deleted instance please use its Fully Qualified"
8237
                  " Domain Name.")
8238

    
8239

    
8240
class TagsLU(NoHooksLU):
8241
  """Generic tags LU.
8242

8243
  This is an abstract class which is the parent of all the other tags LUs.
8244

8245
  """
8246

    
8247
  def ExpandNames(self):
8248
    self.needed_locks = {}
8249
    if self.op.kind == constants.TAG_NODE:
8250
      name = self.cfg.ExpandNodeName(self.op.name)
8251
      if name is None:
8252
        raise errors.OpPrereqError("Invalid node name (%s)" %
8253
                                   (self.op.name,), errors.ECODE_NOENT)
8254
      self.op.name = name
8255
      self.needed_locks[locking.LEVEL_NODE] = name
8256
    elif self.op.kind == constants.TAG_INSTANCE:
8257
      name = self.cfg.ExpandInstanceName(self.op.name)
8258
      if name is None:
8259
        raise errors.OpPrereqError("Invalid instance name (%s)" %
8260
                                   (self.op.name,), errors.ECODE_NOENT)
8261
      self.op.name = name
8262
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8263

    
8264
  def CheckPrereq(self):
8265
    """Check prerequisites.
8266

8267
    """
8268
    if self.op.kind == constants.TAG_CLUSTER:
8269
      self.target = self.cfg.GetClusterInfo()
8270
    elif self.op.kind == constants.TAG_NODE:
8271
      self.target = self.cfg.GetNodeInfo(self.op.name)
8272
    elif self.op.kind == constants.TAG_INSTANCE:
8273
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8274
    else:
8275
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8276
                                 str(self.op.kind), errors.ECODE_INVAL)
8277

    
8278

    
8279
class LUGetTags(TagsLU):
8280
  """Returns the tags of a given object.
8281

8282
  """
8283
  _OP_REQP = ["kind", "name"]
8284
  REQ_BGL = False
8285

    
8286
  def Exec(self, feedback_fn):
8287
    """Returns the tag list.
8288

8289
    """
8290
    return list(self.target.GetTags())
8291

    
8292

    
8293
class LUSearchTags(NoHooksLU):
8294
  """Searches the tags for a given pattern.
8295

8296
  """
8297
  _OP_REQP = ["pattern"]
8298
  REQ_BGL = False
8299

    
8300
  def ExpandNames(self):
8301
    self.needed_locks = {}
8302

    
8303
  def CheckPrereq(self):
8304
    """Check prerequisites.
8305

8306
    This checks the pattern passed for validity by compiling it.
8307

8308
    """
8309
    try:
8310
      self.re = re.compile(self.op.pattern)
8311
    except re.error, err:
8312
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8313
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8314

    
8315
  def Exec(self, feedback_fn):
8316
    """Returns the tag list.
8317

8318
    """
8319
    cfg = self.cfg
8320
    tgts = [("/cluster", cfg.GetClusterInfo())]
8321
    ilist = cfg.GetAllInstancesInfo().values()
8322
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8323
    nlist = cfg.GetAllNodesInfo().values()
8324
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8325
    results = []
8326
    for path, target in tgts:
8327
      for tag in target.GetTags():
8328
        if self.re.search(tag):
8329
          results.append((path, tag))
8330
    return results
8331

    
8332

    
8333
class LUAddTags(TagsLU):
8334
  """Sets a tag on a given object.
8335

8336
  """
8337
  _OP_REQP = ["kind", "name", "tags"]
8338
  REQ_BGL = False
8339

    
8340
  def CheckPrereq(self):
8341
    """Check prerequisites.
8342

8343
    This checks the type and length of the tag name and value.
8344

8345
    """
8346
    TagsLU.CheckPrereq(self)
8347
    for tag in self.op.tags:
8348
      objects.TaggableObject.ValidateTag(tag)
8349

    
8350
  def Exec(self, feedback_fn):
8351
    """Sets the tag.
8352

8353
    """
8354
    try:
8355
      for tag in self.op.tags:
8356
        self.target.AddTag(tag)
8357
    except errors.TagError, err:
8358
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8359
    self.cfg.Update(self.target, feedback_fn)
8360

    
8361

    
8362
class LUDelTags(TagsLU):
8363
  """Delete a list of tags from a given object.
8364

8365
  """
8366
  _OP_REQP = ["kind", "name", "tags"]
8367
  REQ_BGL = False
8368

    
8369
  def CheckPrereq(self):
8370
    """Check prerequisites.
8371

8372
    This checks that we have the given tag.
8373

8374
    """
8375
    TagsLU.CheckPrereq(self)
8376
    for tag in self.op.tags:
8377
      objects.TaggableObject.ValidateTag(tag)
8378
    del_tags = frozenset(self.op.tags)
8379
    cur_tags = self.target.GetTags()
8380
    if not del_tags <= cur_tags:
8381
      diff_tags = del_tags - cur_tags
8382
      diff_names = ["'%s'" % tag for tag in diff_tags]
8383
      diff_names.sort()
8384
      raise errors.OpPrereqError("Tag(s) %s not found" %
8385
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8386

    
8387
  def Exec(self, feedback_fn):
8388
    """Remove the tag from the object.
8389

8390
    """
8391
    for tag in self.op.tags:
8392
      self.target.RemoveTag(tag)
8393
    self.cfg.Update(self.target, feedback_fn)
8394

    
8395

    
8396
class LUTestDelay(NoHooksLU):
8397
  """Sleep for a specified amount of time.
8398

8399
  This LU sleeps on the master and/or nodes for a specified amount of
8400
  time.
8401

8402
  """
8403
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8404
  REQ_BGL = False
8405

    
8406
  def ExpandNames(self):
8407
    """Expand names and set required locks.
8408

8409
    This expands the node list, if any.
8410

8411
    """
8412
    self.needed_locks = {}
8413
    if self.op.on_nodes:
8414
      # _GetWantedNodes can be used here, but is not always appropriate to use
8415
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8416
      # more information.
8417
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8418
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8419

    
8420
  def CheckPrereq(self):
8421
    """Check prerequisites.
8422

8423
    """
8424

    
8425
  def Exec(self, feedback_fn):
8426
    """Do the actual sleep.
8427

8428
    """
8429
    if self.op.on_master:
8430
      if not utils.TestDelay(self.op.duration):
8431
        raise errors.OpExecError("Error during master delay test")
8432
    if self.op.on_nodes:
8433
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8434
      for node, node_result in result.items():
8435
        node_result.Raise("Failure during rpc call to node %s" % node)
8436

    
8437

    
8438
class IAllocator(object):
8439
  """IAllocator framework.
8440

8441
  An IAllocator instance has three sets of attributes:
8442
    - cfg that is needed to query the cluster
8443
    - input data (all members of the _KEYS class attribute are required)
8444
    - four buffer attributes (in|out_data|text), that represent the
8445
      input (to the external script) in text and data structure format,
8446
      and the output from it, again in two formats
8447
    - the result variables from the script (success, info, nodes) for
8448
      easy usage
8449

8450
  """
8451
  _ALLO_KEYS = [
8452
    "mem_size", "disks", "disk_template",
8453
    "os", "tags", "nics", "vcpus", "hypervisor",
8454
    ]
8455
  _RELO_KEYS = [
8456
    "relocate_from",
8457
    ]
8458

    
8459
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8460
    self.cfg = cfg
8461
    self.rpc = rpc
8462
    # init buffer variables
8463
    self.in_text = self.out_text = self.in_data = self.out_data = None
8464
    # init all input fields so that pylint is happy
8465
    self.mode = mode
8466
    self.name = name
8467
    self.mem_size = self.disks = self.disk_template = None
8468
    self.os = self.tags = self.nics = self.vcpus = None
8469
    self.hypervisor = None
8470
    self.relocate_from = None
8471
    # computed fields
8472
    self.required_nodes = None
8473
    # init result fields
8474
    self.success = self.info = self.nodes = None
8475
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8476
      keyset = self._ALLO_KEYS
8477
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8478
      keyset = self._RELO_KEYS
8479
    else:
8480
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8481
                                   " IAllocator" % self.mode)
8482
    for key in kwargs:
8483
      if key not in keyset:
8484
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8485
                                     " IAllocator" % key)
8486
      setattr(self, key, kwargs[key])
8487
    for key in keyset:
8488
      if key not in kwargs:
8489
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8490
                                     " IAllocator" % key)
8491
    self._BuildInputData()
8492

    
8493
  def _ComputeClusterData(self):
8494
    """Compute the generic allocator input data.
8495

8496
    This is the data that is independent of the actual operation.
8497

8498
    """
8499
    cfg = self.cfg
8500
    cluster_info = cfg.GetClusterInfo()
8501
    # cluster data
8502
    data = {
8503
      "version": constants.IALLOCATOR_VERSION,
8504
      "cluster_name": cfg.GetClusterName(),
8505
      "cluster_tags": list(cluster_info.GetTags()),
8506
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8507
      # we don't have job IDs
8508
      }
8509
    iinfo = cfg.GetAllInstancesInfo().values()
8510
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8511

    
8512
    # node data
8513
    node_results = {}
8514
    node_list = cfg.GetNodeList()
8515

    
8516
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8517
      hypervisor_name = self.hypervisor
8518
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8519
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8520

    
8521
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8522
                                        hypervisor_name)
8523
    node_iinfo = \
8524
      self.rpc.call_all_instances_info(node_list,
8525
                                       cluster_info.enabled_hypervisors)
8526
    for nname, nresult in node_data.items():
8527
      # first fill in static (config-based) values
8528
      ninfo = cfg.GetNodeInfo(nname)
8529
      pnr = {
8530
        "tags": list(ninfo.GetTags()),
8531
        "primary_ip": ninfo.primary_ip,
8532
        "secondary_ip": ninfo.secondary_ip,
8533
        "offline": ninfo.offline,
8534
        "drained": ninfo.drained,
8535
        "master_candidate": ninfo.master_candidate,
8536
        }
8537

    
8538
      if not (ninfo.offline or ninfo.drained):
8539
        nresult.Raise("Can't get data for node %s" % nname)
8540
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8541
                                nname)
8542
        remote_info = nresult.payload
8543

    
8544
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8545
                     'vg_size', 'vg_free', 'cpu_total']:
8546
          if attr not in remote_info:
8547
            raise errors.OpExecError("Node '%s' didn't return attribute"
8548
                                     " '%s'" % (nname, attr))
8549
          if not isinstance(remote_info[attr], int):
8550
            raise errors.OpExecError("Node '%s' returned invalid value"
8551
                                     " for '%s': %s" %
8552
                                     (nname, attr, remote_info[attr]))
8553
        # compute memory used by primary instances
8554
        i_p_mem = i_p_up_mem = 0
8555
        for iinfo, beinfo in i_list:
8556
          if iinfo.primary_node == nname:
8557
            i_p_mem += beinfo[constants.BE_MEMORY]
8558
            if iinfo.name not in node_iinfo[nname].payload:
8559
              i_used_mem = 0
8560
            else:
8561
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8562
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8563
            remote_info['memory_free'] -= max(0, i_mem_diff)
8564

    
8565
            if iinfo.admin_up:
8566
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8567

    
8568
        # compute memory used by instances
8569
        pnr_dyn = {
8570
          "total_memory": remote_info['memory_total'],
8571
          "reserved_memory": remote_info['memory_dom0'],
8572
          "free_memory": remote_info['memory_free'],
8573
          "total_disk": remote_info['vg_size'],
8574
          "free_disk": remote_info['vg_free'],
8575
          "total_cpus": remote_info['cpu_total'],
8576
          "i_pri_memory": i_p_mem,
8577
          "i_pri_up_memory": i_p_up_mem,
8578
          }
8579
        pnr.update(pnr_dyn)
8580

    
8581
      node_results[nname] = pnr
8582
    data["nodes"] = node_results
8583

    
8584
    # instance data
8585
    instance_data = {}
8586
    for iinfo, beinfo in i_list:
8587
      nic_data = []
8588
      for nic in iinfo.nics:
8589
        filled_params = objects.FillDict(
8590
            cluster_info.nicparams[constants.PP_DEFAULT],
8591
            nic.nicparams)
8592
        nic_dict = {"mac": nic.mac,
8593
                    "ip": nic.ip,
8594
                    "mode": filled_params[constants.NIC_MODE],
8595
                    "link": filled_params[constants.NIC_LINK],
8596
                   }
8597
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8598
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8599
        nic_data.append(nic_dict)
8600
      pir = {
8601
        "tags": list(iinfo.GetTags()),
8602
        "admin_up": iinfo.admin_up,
8603
        "vcpus": beinfo[constants.BE_VCPUS],
8604
        "memory": beinfo[constants.BE_MEMORY],
8605
        "os": iinfo.os,
8606
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8607
        "nics": nic_data,
8608
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8609
        "disk_template": iinfo.disk_template,
8610
        "hypervisor": iinfo.hypervisor,
8611
        }
8612
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8613
                                                 pir["disks"])
8614
      instance_data[iinfo.name] = pir
8615

    
8616
    data["instances"] = instance_data
8617

    
8618
    self.in_data = data
8619

    
8620
  def _AddNewInstance(self):
8621
    """Add new instance data to allocator structure.
8622

8623
    This in combination with _AllocatorGetClusterData will create the
8624
    correct structure needed as input for the allocator.
8625

8626
    The checks for the completeness of the opcode must have already been
8627
    done.
8628

8629
    """
8630
    data = self.in_data
8631

    
8632
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8633

    
8634
    if self.disk_template in constants.DTS_NET_MIRROR:
8635
      self.required_nodes = 2
8636
    else:
8637
      self.required_nodes = 1
8638
    request = {
8639
      "type": "allocate",
8640
      "name": self.name,
8641
      "disk_template": self.disk_template,
8642
      "tags": self.tags,
8643
      "os": self.os,
8644
      "vcpus": self.vcpus,
8645
      "memory": self.mem_size,
8646
      "disks": self.disks,
8647
      "disk_space_total": disk_space,
8648
      "nics": self.nics,
8649
      "required_nodes": self.required_nodes,
8650
      }
8651
    data["request"] = request
8652

    
8653
  def _AddRelocateInstance(self):
8654
    """Add relocate instance data to allocator structure.
8655

8656
    This in combination with _IAllocatorGetClusterData will create the
8657
    correct structure needed as input for the allocator.
8658

8659
    The checks for the completeness of the opcode must have already been
8660
    done.
8661

8662
    """
8663
    instance = self.cfg.GetInstanceInfo(self.name)
8664
    if instance is None:
8665
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8666
                                   " IAllocator" % self.name)
8667

    
8668
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8669
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8670
                                 errors.ECODE_INVAL)
8671

    
8672
    if len(instance.secondary_nodes) != 1:
8673
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8674
                                 errors.ECODE_STATE)
8675

    
8676
    self.required_nodes = 1
8677
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8678
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8679

    
8680
    request = {
8681
      "type": "relocate",
8682
      "name": self.name,
8683
      "disk_space_total": disk_space,
8684
      "required_nodes": self.required_nodes,
8685
      "relocate_from": self.relocate_from,
8686
      }
8687
    self.in_data["request"] = request
8688

    
8689
  def _BuildInputData(self):
8690
    """Build input data structures.
8691

8692
    """
8693
    self._ComputeClusterData()
8694

    
8695
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8696
      self._AddNewInstance()
8697
    else:
8698
      self._AddRelocateInstance()
8699

    
8700
    self.in_text = serializer.Dump(self.in_data)
8701

    
8702
  def Run(self, name, validate=True, call_fn=None):
8703
    """Run an instance allocator and return the results.
8704

8705
    """
8706
    if call_fn is None:
8707
      call_fn = self.rpc.call_iallocator_runner
8708

    
8709
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8710
    result.Raise("Failure while running the iallocator script")
8711

    
8712
    self.out_text = result.payload
8713
    if validate:
8714
      self._ValidateResult()
8715

    
8716
  def _ValidateResult(self):
8717
    """Process the allocator results.
8718

8719
    This will process and if successful save the result in
8720
    self.out_data and the other parameters.
8721

8722
    """
8723
    try:
8724
      rdict = serializer.Load(self.out_text)
8725
    except Exception, err:
8726
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8727

    
8728
    if not isinstance(rdict, dict):
8729
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8730

    
8731
    for key in "success", "info", "nodes":
8732
      if key not in rdict:
8733
        raise errors.OpExecError("Can't parse iallocator results:"
8734
                                 " missing key '%s'" % key)
8735
      setattr(self, key, rdict[key])
8736

    
8737
    if not isinstance(rdict["nodes"], list):
8738
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8739
                               " is not a list")
8740
    self.out_data = rdict
8741

    
8742

    
8743
class LUTestAllocator(NoHooksLU):
8744
  """Run allocator tests.
8745

8746
  This LU runs the allocator tests
8747

8748
  """
8749
  _OP_REQP = ["direction", "mode", "name"]
8750

    
8751
  def CheckPrereq(self):
8752
    """Check prerequisites.
8753

8754
    This checks the opcode parameters depending on the director and mode test.
8755

8756
    """
8757
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8758
      for attr in ["name", "mem_size", "disks", "disk_template",
8759
                   "os", "tags", "nics", "vcpus"]:
8760
        if not hasattr(self.op, attr):
8761
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8762
                                     attr, errors.ECODE_INVAL)
8763
      iname = self.cfg.ExpandInstanceName(self.op.name)
8764
      if iname is not None:
8765
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8766
                                   iname, errors.ECODE_EXISTS)
8767
      if not isinstance(self.op.nics, list):
8768
        raise errors.OpPrereqError("Invalid parameter 'nics'",
8769
                                   errors.ECODE_INVAL)
8770
      for row in self.op.nics:
8771
        if (not isinstance(row, dict) or
8772
            "mac" not in row or
8773
            "ip" not in row or
8774
            "bridge" not in row):
8775
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
8776
                                     " parameter", errors.ECODE_INVAL)
8777
      if not isinstance(self.op.disks, list):
8778
        raise errors.OpPrereqError("Invalid parameter 'disks'",
8779
                                   errors.ECODE_INVAL)
8780
      for row in self.op.disks:
8781
        if (not isinstance(row, dict) or
8782
            "size" not in row or
8783
            not isinstance(row["size"], int) or
8784
            "mode" not in row or
8785
            row["mode"] not in ['r', 'w']):
8786
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
8787
                                     " parameter", errors.ECODE_INVAL)
8788
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8789
        self.op.hypervisor = self.cfg.GetHypervisorType()
8790
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8791
      if not hasattr(self.op, "name"):
8792
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8793
                                   errors.ECODE_INVAL)
8794
      fname = self.cfg.ExpandInstanceName(self.op.name)
8795
      if fname is None:
8796
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8797
                                   self.op.name, errors.ECODE_NOENT)
8798
      self.op.name = fname
8799
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8800
    else:
8801
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8802
                                 self.op.mode, errors.ECODE_INVAL)
8803

    
8804
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8805
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8806
        raise errors.OpPrereqError("Missing allocator name",
8807
                                   errors.ECODE_INVAL)
8808
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8809
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8810
                                 self.op.direction, errors.ECODE_INVAL)
8811

    
8812
  def Exec(self, feedback_fn):
8813
    """Run the allocator test.
8814

8815
    """
8816
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8817
      ial = IAllocator(self.cfg, self.rpc,
8818
                       mode=self.op.mode,
8819
                       name=self.op.name,
8820
                       mem_size=self.op.mem_size,
8821
                       disks=self.op.disks,
8822
                       disk_template=self.op.disk_template,
8823
                       os=self.op.os,
8824
                       tags=self.op.tags,
8825
                       nics=self.op.nics,
8826
                       vcpus=self.op.vcpus,
8827
                       hypervisor=self.op.hypervisor,
8828
                       )
8829
    else:
8830
      ial = IAllocator(self.cfg, self.rpc,
8831
                       mode=self.op.mode,
8832
                       name=self.op.name,
8833
                       relocate_from=list(self.relocate_from),
8834
                       )
8835

    
8836
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8837
      result = ial.in_text
8838
    else:
8839
      ial.Run(self.op.allocator, validate=False)
8840
      result = ial.out_text
8841
    return result