Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ f208978a

History | View | Annotate | Download (267 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodePrimaryInstances(cfg, node_name):
715
  """Returns primary instances on a node.
716

717
  """
718
  instances = []
719

    
720
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
721
    if node_name == inst.primary_node:
722
      instances.append(inst)
723

    
724
  return instances
725

    
726

    
727
def _GetNodeSecondaryInstances(cfg, node_name):
728
  """Returns secondary instances on a node.
729

730
  """
731
  instances = []
732

    
733
  for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
734
    if node_name in inst.secondary_nodes:
735
      instances.append(inst)
736

    
737
  return instances
738

    
739

    
740
def _GetStorageTypeArgs(cfg, storage_type):
741
  """Returns the arguments for a storage type.
742

743
  """
744
  # Special case for file storage
745
  if storage_type == constants.ST_FILE:
746
    # storage.FileStorage wants a list of storage directories
747
    return [[cfg.GetFileStorageDir()]]
748

    
749
  return []
750

    
751

    
752
class LUDestroyCluster(NoHooksLU):
753
  """Logical unit for destroying the cluster.
754

755
  """
756
  _OP_REQP = []
757

    
758
  def CheckPrereq(self):
759
    """Check prerequisites.
760

761
    This checks whether the cluster is empty.
762

763
    Any errors are signaled by raising errors.OpPrereqError.
764

765
    """
766
    master = self.cfg.GetMasterNode()
767

    
768
    nodelist = self.cfg.GetNodeList()
769
    if len(nodelist) != 1 or nodelist[0] != master:
770
      raise errors.OpPrereqError("There are still %d node(s) in"
771
                                 " this cluster." % (len(nodelist) - 1))
772
    instancelist = self.cfg.GetInstanceList()
773
    if instancelist:
774
      raise errors.OpPrereqError("There are still %d instance(s) in"
775
                                 " this cluster." % len(instancelist))
776

    
777
  def Exec(self, feedback_fn):
778
    """Destroys the cluster.
779

780
    """
781
    master = self.cfg.GetMasterNode()
782
    result = self.rpc.call_node_stop_master(master, False)
783
    result.Raise("Could not disable the master role")
784
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
785
    utils.CreateBackup(priv_key)
786
    utils.CreateBackup(pub_key)
787
    return master
788

    
789

    
790
class LUVerifyCluster(LogicalUnit):
791
  """Verifies the cluster status.
792

793
  """
794
  HPATH = "cluster-verify"
795
  HTYPE = constants.HTYPE_CLUSTER
796
  _OP_REQP = ["skip_checks"]
797
  REQ_BGL = False
798

    
799
  def ExpandNames(self):
800
    self.needed_locks = {
801
      locking.LEVEL_NODE: locking.ALL_SET,
802
      locking.LEVEL_INSTANCE: locking.ALL_SET,
803
    }
804
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
805

    
806
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
807
                  node_result, feedback_fn, master_files,
808
                  drbd_map, vg_name):
809
    """Run multiple tests against a node.
810

811
    Test list:
812

813
      - compares ganeti version
814
      - checks vg existence and size > 20G
815
      - checks config file checksum
816
      - checks ssh to other nodes
817

818
    @type nodeinfo: L{objects.Node}
819
    @param nodeinfo: the node to check
820
    @param file_list: required list of files
821
    @param local_cksum: dictionary of local files and their checksums
822
    @param node_result: the results from the node
823
    @param feedback_fn: function used to accumulate results
824
    @param master_files: list of files that only masters should have
825
    @param drbd_map: the useddrbd minors for this node, in
826
        form of minor: (instance, must_exist) which correspond to instances
827
        and their running status
828
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
829

830
    """
831
    node = nodeinfo.name
832

    
833
    # main result, node_result should be a non-empty dict
834
    if not node_result or not isinstance(node_result, dict):
835
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
836
      return True
837

    
838
    # compares ganeti version
839
    local_version = constants.PROTOCOL_VERSION
840
    remote_version = node_result.get('version', None)
841
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
842
            len(remote_version) == 2):
843
      feedback_fn("  - ERROR: connection to %s failed" % (node))
844
      return True
845

    
846
    if local_version != remote_version[0]:
847
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
848
                  " node %s %s" % (local_version, node, remote_version[0]))
849
      return True
850

    
851
    # node seems compatible, we can actually try to look into its results
852

    
853
    bad = False
854

    
855
    # full package version
856
    if constants.RELEASE_VERSION != remote_version[1]:
857
      feedback_fn("  - WARNING: software version mismatch: master %s,"
858
                  " node %s %s" %
859
                  (constants.RELEASE_VERSION, node, remote_version[1]))
860

    
861
    # checks vg existence and size > 20G
862
    if vg_name is not None:
863
      vglist = node_result.get(constants.NV_VGLIST, None)
864
      if not vglist:
865
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
866
                        (node,))
867
        bad = True
868
      else:
869
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
870
                                              constants.MIN_VG_SIZE)
871
        if vgstatus:
872
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
873
          bad = True
874

    
875
    # checks config file checksum
876

    
877
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
878
    if not isinstance(remote_cksum, dict):
879
      bad = True
880
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
881
    else:
882
      for file_name in file_list:
883
        node_is_mc = nodeinfo.master_candidate
884
        must_have_file = file_name not in master_files
885
        if file_name not in remote_cksum:
886
          if node_is_mc or must_have_file:
887
            bad = True
888
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
889
        elif remote_cksum[file_name] != local_cksum[file_name]:
890
          if node_is_mc or must_have_file:
891
            bad = True
892
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
893
          else:
894
            # not candidate and this is not a must-have file
895
            bad = True
896
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
897
                        " candidates (and the file is outdated)" % file_name)
898
        else:
899
          # all good, except non-master/non-must have combination
900
          if not node_is_mc and not must_have_file:
901
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
902
                        " candidates" % file_name)
903

    
904
    # checks ssh to any
905

    
906
    if constants.NV_NODELIST not in node_result:
907
      bad = True
908
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
909
    else:
910
      if node_result[constants.NV_NODELIST]:
911
        bad = True
912
        for node in node_result[constants.NV_NODELIST]:
913
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
914
                          (node, node_result[constants.NV_NODELIST][node]))
915

    
916
    if constants.NV_NODENETTEST not in node_result:
917
      bad = True
918
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
919
    else:
920
      if node_result[constants.NV_NODENETTEST]:
921
        bad = True
922
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
923
        for node in nlist:
924
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
925
                          (node, node_result[constants.NV_NODENETTEST][node]))
926

    
927
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
928
    if isinstance(hyp_result, dict):
929
      for hv_name, hv_result in hyp_result.iteritems():
930
        if hv_result is not None:
931
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
932
                      (hv_name, hv_result))
933

    
934
    # check used drbd list
935
    if vg_name is not None:
936
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
937
      if not isinstance(used_minors, (tuple, list)):
938
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
939
                    str(used_minors))
940
      else:
941
        for minor, (iname, must_exist) in drbd_map.items():
942
          if minor not in used_minors and must_exist:
943
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
944
                        " not active" % (minor, iname))
945
            bad = True
946
        for minor in used_minors:
947
          if minor not in drbd_map:
948
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
949
                        minor)
950
            bad = True
951

    
952
    return bad
953

    
954
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
955
                      node_instance, feedback_fn, n_offline):
956
    """Verify an instance.
957

958
    This function checks to see if the required block devices are
959
    available on the instance's node.
960

961
    """
962
    bad = False
963

    
964
    node_current = instanceconfig.primary_node
965

    
966
    node_vol_should = {}
967
    instanceconfig.MapLVsByNode(node_vol_should)
968

    
969
    for node in node_vol_should:
970
      if node in n_offline:
971
        # ignore missing volumes on offline nodes
972
        continue
973
      for volume in node_vol_should[node]:
974
        if node not in node_vol_is or volume not in node_vol_is[node]:
975
          feedback_fn("  - ERROR: volume %s missing on node %s" %
976
                          (volume, node))
977
          bad = True
978

    
979
    if instanceconfig.admin_up:
980
      if ((node_current not in node_instance or
981
          not instance in node_instance[node_current]) and
982
          node_current not in n_offline):
983
        feedback_fn("  - ERROR: instance %s not running on node %s" %
984
                        (instance, node_current))
985
        bad = True
986

    
987
    for node in node_instance:
988
      if (not node == node_current):
989
        if instance in node_instance[node]:
990
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
991
                          (instance, node))
992
          bad = True
993

    
994
    return bad
995

    
996
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
997
    """Verify if there are any unknown volumes in the cluster.
998

999
    The .os, .swap and backup volumes are ignored. All other volumes are
1000
    reported as unknown.
1001

1002
    """
1003
    bad = False
1004

    
1005
    for node in node_vol_is:
1006
      for volume in node_vol_is[node]:
1007
        if node not in node_vol_should or volume not in node_vol_should[node]:
1008
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1009
                      (volume, node))
1010
          bad = True
1011
    return bad
1012

    
1013
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1014
    """Verify the list of running instances.
1015

1016
    This checks what instances are running but unknown to the cluster.
1017

1018
    """
1019
    bad = False
1020
    for node in node_instance:
1021
      for runninginstance in node_instance[node]:
1022
        if runninginstance not in instancelist:
1023
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1024
                          (runninginstance, node))
1025
          bad = True
1026
    return bad
1027

    
1028
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1029
    """Verify N+1 Memory Resilience.
1030

1031
    Check that if one single node dies we can still start all the instances it
1032
    was primary for.
1033

1034
    """
1035
    bad = False
1036

    
1037
    for node, nodeinfo in node_info.iteritems():
1038
      # This code checks that every node which is now listed as secondary has
1039
      # enough memory to host all instances it is supposed to should a single
1040
      # other node in the cluster fail.
1041
      # FIXME: not ready for failover to an arbitrary node
1042
      # FIXME: does not support file-backed instances
1043
      # WARNING: we currently take into account down instances as well as up
1044
      # ones, considering that even if they're down someone might want to start
1045
      # them even in the event of a node failure.
1046
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1047
        needed_mem = 0
1048
        for instance in instances:
1049
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1050
          if bep[constants.BE_AUTO_BALANCE]:
1051
            needed_mem += bep[constants.BE_MEMORY]
1052
        if nodeinfo['mfree'] < needed_mem:
1053
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1054
                      " failovers should node %s fail" % (node, prinode))
1055
          bad = True
1056
    return bad
1057

    
1058
  def CheckPrereq(self):
1059
    """Check prerequisites.
1060

1061
    Transform the list of checks we're going to skip into a set and check that
1062
    all its members are valid.
1063

1064
    """
1065
    self.skip_set = frozenset(self.op.skip_checks)
1066
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1067
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1068

    
1069
  def BuildHooksEnv(self):
1070
    """Build hooks env.
1071

1072
    Cluster-Verify hooks just ran in the post phase and their failure makes
1073
    the output be logged in the verify output and the verification to fail.
1074

1075
    """
1076
    all_nodes = self.cfg.GetNodeList()
1077
    env = {
1078
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1079
      }
1080
    for node in self.cfg.GetAllNodesInfo().values():
1081
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1082

    
1083
    return env, [], all_nodes
1084

    
1085
  def Exec(self, feedback_fn):
1086
    """Verify integrity of cluster, performing various test on nodes.
1087

1088
    """
1089
    bad = False
1090
    feedback_fn("* Verifying global settings")
1091
    for msg in self.cfg.VerifyConfig():
1092
      feedback_fn("  - ERROR: %s" % msg)
1093

    
1094
    vg_name = self.cfg.GetVGName()
1095
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1096
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1097
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1098
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1099
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1100
                        for iname in instancelist)
1101
    i_non_redundant = [] # Non redundant instances
1102
    i_non_a_balanced = [] # Non auto-balanced instances
1103
    n_offline = [] # List of offline nodes
1104
    n_drained = [] # List of nodes being drained
1105
    node_volume = {}
1106
    node_instance = {}
1107
    node_info = {}
1108
    instance_cfg = {}
1109

    
1110
    # FIXME: verify OS list
1111
    # do local checksums
1112
    master_files = [constants.CLUSTER_CONF_FILE]
1113

    
1114
    file_names = ssconf.SimpleStore().GetFileList()
1115
    file_names.append(constants.SSL_CERT_FILE)
1116
    file_names.append(constants.RAPI_CERT_FILE)
1117
    file_names.extend(master_files)
1118

    
1119
    local_checksums = utils.FingerprintFiles(file_names)
1120

    
1121
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1122
    node_verify_param = {
1123
      constants.NV_FILELIST: file_names,
1124
      constants.NV_NODELIST: [node.name for node in nodeinfo
1125
                              if not node.offline],
1126
      constants.NV_HYPERVISOR: hypervisors,
1127
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1128
                                  node.secondary_ip) for node in nodeinfo
1129
                                 if not node.offline],
1130
      constants.NV_INSTANCELIST: hypervisors,
1131
      constants.NV_VERSION: None,
1132
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1133
      }
1134
    if vg_name is not None:
1135
      node_verify_param[constants.NV_VGLIST] = None
1136
      node_verify_param[constants.NV_LVLIST] = vg_name
1137
      node_verify_param[constants.NV_DRBDLIST] = None
1138
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1139
                                           self.cfg.GetClusterName())
1140

    
1141
    cluster = self.cfg.GetClusterInfo()
1142
    master_node = self.cfg.GetMasterNode()
1143
    all_drbd_map = self.cfg.ComputeDRBDMap()
1144

    
1145
    for node_i in nodeinfo:
1146
      node = node_i.name
1147

    
1148
      if node_i.offline:
1149
        feedback_fn("* Skipping offline node %s" % (node,))
1150
        n_offline.append(node)
1151
        continue
1152

    
1153
      if node == master_node:
1154
        ntype = "master"
1155
      elif node_i.master_candidate:
1156
        ntype = "master candidate"
1157
      elif node_i.drained:
1158
        ntype = "drained"
1159
        n_drained.append(node)
1160
      else:
1161
        ntype = "regular"
1162
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1163

    
1164
      msg = all_nvinfo[node].fail_msg
1165
      if msg:
1166
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1167
        bad = True
1168
        continue
1169

    
1170
      nresult = all_nvinfo[node].payload
1171
      node_drbd = {}
1172
      for minor, instance in all_drbd_map[node].items():
1173
        if instance not in instanceinfo:
1174
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1175
                      instance)
1176
          # ghost instance should not be running, but otherwise we
1177
          # don't give double warnings (both ghost instance and
1178
          # unallocated minor in use)
1179
          node_drbd[minor] = (instance, False)
1180
        else:
1181
          instance = instanceinfo[instance]
1182
          node_drbd[minor] = (instance.name, instance.admin_up)
1183
      result = self._VerifyNode(node_i, file_names, local_checksums,
1184
                                nresult, feedback_fn, master_files,
1185
                                node_drbd, vg_name)
1186
      bad = bad or result
1187

    
1188
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1189
      if vg_name is None:
1190
        node_volume[node] = {}
1191
      elif isinstance(lvdata, basestring):
1192
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1193
                    (node, utils.SafeEncode(lvdata)))
1194
        bad = True
1195
        node_volume[node] = {}
1196
      elif not isinstance(lvdata, dict):
1197
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1198
        bad = True
1199
        continue
1200
      else:
1201
        node_volume[node] = lvdata
1202

    
1203
      # node_instance
1204
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1205
      if not isinstance(idata, list):
1206
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1207
                    (node,))
1208
        bad = True
1209
        continue
1210

    
1211
      node_instance[node] = idata
1212

    
1213
      # node_info
1214
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1215
      if not isinstance(nodeinfo, dict):
1216
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1217
        bad = True
1218
        continue
1219

    
1220
      try:
1221
        node_info[node] = {
1222
          "mfree": int(nodeinfo['memory_free']),
1223
          "pinst": [],
1224
          "sinst": [],
1225
          # dictionary holding all instances this node is secondary for,
1226
          # grouped by their primary node. Each key is a cluster node, and each
1227
          # value is a list of instances which have the key as primary and the
1228
          # current node as secondary.  this is handy to calculate N+1 memory
1229
          # availability if you can only failover from a primary to its
1230
          # secondary.
1231
          "sinst-by-pnode": {},
1232
        }
1233
        # FIXME: devise a free space model for file based instances as well
1234
        if vg_name is not None:
1235
          if (constants.NV_VGLIST not in nresult or
1236
              vg_name not in nresult[constants.NV_VGLIST]):
1237
            feedback_fn("  - ERROR: node %s didn't return data for the"
1238
                        " volume group '%s' - it is either missing or broken" %
1239
                        (node, vg_name))
1240
            bad = True
1241
            continue
1242
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1243
      except (ValueError, KeyError):
1244
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1245
                    " from node %s" % (node,))
1246
        bad = True
1247
        continue
1248

    
1249
    node_vol_should = {}
1250

    
1251
    for instance in instancelist:
1252
      feedback_fn("* Verifying instance %s" % instance)
1253
      inst_config = instanceinfo[instance]
1254
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1255
                                     node_instance, feedback_fn, n_offline)
1256
      bad = bad or result
1257
      inst_nodes_offline = []
1258

    
1259
      inst_config.MapLVsByNode(node_vol_should)
1260

    
1261
      instance_cfg[instance] = inst_config
1262

    
1263
      pnode = inst_config.primary_node
1264
      if pnode in node_info:
1265
        node_info[pnode]['pinst'].append(instance)
1266
      elif pnode not in n_offline:
1267
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1268
                    " %s failed" % (instance, pnode))
1269
        bad = True
1270

    
1271
      if pnode in n_offline:
1272
        inst_nodes_offline.append(pnode)
1273

    
1274
      # If the instance is non-redundant we cannot survive losing its primary
1275
      # node, so we are not N+1 compliant. On the other hand we have no disk
1276
      # templates with more than one secondary so that situation is not well
1277
      # supported either.
1278
      # FIXME: does not support file-backed instances
1279
      if len(inst_config.secondary_nodes) == 0:
1280
        i_non_redundant.append(instance)
1281
      elif len(inst_config.secondary_nodes) > 1:
1282
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1283
                    % instance)
1284

    
1285
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1286
        i_non_a_balanced.append(instance)
1287

    
1288
      for snode in inst_config.secondary_nodes:
1289
        if snode in node_info:
1290
          node_info[snode]['sinst'].append(instance)
1291
          if pnode not in node_info[snode]['sinst-by-pnode']:
1292
            node_info[snode]['sinst-by-pnode'][pnode] = []
1293
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1294
        elif snode not in n_offline:
1295
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1296
                      " %s failed" % (instance, snode))
1297
          bad = True
1298
        if snode in n_offline:
1299
          inst_nodes_offline.append(snode)
1300

    
1301
      if inst_nodes_offline:
1302
        # warn that the instance lives on offline nodes, and set bad=True
1303
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1304
                    ", ".join(inst_nodes_offline))
1305
        bad = True
1306

    
1307
    feedback_fn("* Verifying orphan volumes")
1308
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1309
                                       feedback_fn)
1310
    bad = bad or result
1311

    
1312
    feedback_fn("* Verifying remaining instances")
1313
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1314
                                         feedback_fn)
1315
    bad = bad or result
1316

    
1317
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1318
      feedback_fn("* Verifying N+1 Memory redundancy")
1319
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1320
      bad = bad or result
1321

    
1322
    feedback_fn("* Other Notes")
1323
    if i_non_redundant:
1324
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1325
                  % len(i_non_redundant))
1326

    
1327
    if i_non_a_balanced:
1328
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1329
                  % len(i_non_a_balanced))
1330

    
1331
    if n_offline:
1332
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1333

    
1334
    if n_drained:
1335
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1336

    
1337
    return not bad
1338

    
1339
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1340
    """Analyze the post-hooks' result
1341

1342
    This method analyses the hook result, handles it, and sends some
1343
    nicely-formatted feedback back to the user.
1344

1345
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1346
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1347
    @param hooks_results: the results of the multi-node hooks rpc call
1348
    @param feedback_fn: function used send feedback back to the caller
1349
    @param lu_result: previous Exec result
1350
    @return: the new Exec result, based on the previous result
1351
        and hook results
1352

1353
    """
1354
    # We only really run POST phase hooks, and are only interested in
1355
    # their results
1356
    if phase == constants.HOOKS_PHASE_POST:
1357
      # Used to change hooks' output to proper indentation
1358
      indent_re = re.compile('^', re.M)
1359
      feedback_fn("* Hooks Results")
1360
      if not hooks_results:
1361
        feedback_fn("  - ERROR: general communication failure")
1362
        lu_result = 1
1363
      else:
1364
        for node_name in hooks_results:
1365
          show_node_header = True
1366
          res = hooks_results[node_name]
1367
          msg = res.fail_msg
1368
          if msg:
1369
            if res.offline:
1370
              # no need to warn or set fail return value
1371
              continue
1372
            feedback_fn("    Communication failure in hooks execution: %s" %
1373
                        msg)
1374
            lu_result = 1
1375
            continue
1376
          for script, hkr, output in res.payload:
1377
            if hkr == constants.HKR_FAIL:
1378
              # The node header is only shown once, if there are
1379
              # failing hooks on that node
1380
              if show_node_header:
1381
                feedback_fn("  Node %s:" % node_name)
1382
                show_node_header = False
1383
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1384
              output = indent_re.sub('      ', output)
1385
              feedback_fn("%s" % output)
1386
              lu_result = 1
1387

    
1388
      return lu_result
1389

    
1390

    
1391
class LUVerifyDisks(NoHooksLU):
1392
  """Verifies the cluster disks status.
1393

1394
  """
1395
  _OP_REQP = []
1396
  REQ_BGL = False
1397

    
1398
  def ExpandNames(self):
1399
    self.needed_locks = {
1400
      locking.LEVEL_NODE: locking.ALL_SET,
1401
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1402
    }
1403
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1404

    
1405
  def CheckPrereq(self):
1406
    """Check prerequisites.
1407

1408
    This has no prerequisites.
1409

1410
    """
1411
    pass
1412

    
1413
  def Exec(self, feedback_fn):
1414
    """Verify integrity of cluster disks.
1415

1416
    @rtype: tuple of three items
1417
    @return: a tuple of (dict of node-to-node_error, list of instances
1418
        which need activate-disks, dict of instance: (node, volume) for
1419
        missing volumes
1420

1421
    """
1422
    result = res_nodes, res_instances, res_missing = {}, [], {}
1423

    
1424
    vg_name = self.cfg.GetVGName()
1425
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1426
    instances = [self.cfg.GetInstanceInfo(name)
1427
                 for name in self.cfg.GetInstanceList()]
1428

    
1429
    nv_dict = {}
1430
    for inst in instances:
1431
      inst_lvs = {}
1432
      if (not inst.admin_up or
1433
          inst.disk_template not in constants.DTS_NET_MIRROR):
1434
        continue
1435
      inst.MapLVsByNode(inst_lvs)
1436
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1437
      for node, vol_list in inst_lvs.iteritems():
1438
        for vol in vol_list:
1439
          nv_dict[(node, vol)] = inst
1440

    
1441
    if not nv_dict:
1442
      return result
1443

    
1444
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1445

    
1446
    for node in nodes:
1447
      # node_volume
1448
      node_res = node_lvs[node]
1449
      if node_res.offline:
1450
        continue
1451
      msg = node_res.fail_msg
1452
      if msg:
1453
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1454
        res_nodes[node] = msg
1455
        continue
1456

    
1457
      lvs = node_res.payload
1458
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1459
        inst = nv_dict.pop((node, lv_name), None)
1460
        if (not lv_online and inst is not None
1461
            and inst.name not in res_instances):
1462
          res_instances.append(inst.name)
1463

    
1464
    # any leftover items in nv_dict are missing LVs, let's arrange the
1465
    # data better
1466
    for key, inst in nv_dict.iteritems():
1467
      if inst.name not in res_missing:
1468
        res_missing[inst.name] = []
1469
      res_missing[inst.name].append(key)
1470

    
1471
    return result
1472

    
1473

    
1474
class LURenameCluster(LogicalUnit):
1475
  """Rename the cluster.
1476

1477
  """
1478
  HPATH = "cluster-rename"
1479
  HTYPE = constants.HTYPE_CLUSTER
1480
  _OP_REQP = ["name"]
1481

    
1482
  def BuildHooksEnv(self):
1483
    """Build hooks env.
1484

1485
    """
1486
    env = {
1487
      "OP_TARGET": self.cfg.GetClusterName(),
1488
      "NEW_NAME": self.op.name,
1489
      }
1490
    mn = self.cfg.GetMasterNode()
1491
    return env, [mn], [mn]
1492

    
1493
  def CheckPrereq(self):
1494
    """Verify that the passed name is a valid one.
1495

1496
    """
1497
    hostname = utils.HostInfo(self.op.name)
1498

    
1499
    new_name = hostname.name
1500
    self.ip = new_ip = hostname.ip
1501
    old_name = self.cfg.GetClusterName()
1502
    old_ip = self.cfg.GetMasterIP()
1503
    if new_name == old_name and new_ip == old_ip:
1504
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1505
                                 " cluster has changed")
1506
    if new_ip != old_ip:
1507
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1508
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1509
                                   " reachable on the network. Aborting." %
1510
                                   new_ip)
1511

    
1512
    self.op.name = new_name
1513

    
1514
  def Exec(self, feedback_fn):
1515
    """Rename the cluster.
1516

1517
    """
1518
    clustername = self.op.name
1519
    ip = self.ip
1520

    
1521
    # shutdown the master IP
1522
    master = self.cfg.GetMasterNode()
1523
    result = self.rpc.call_node_stop_master(master, False)
1524
    result.Raise("Could not disable the master role")
1525

    
1526
    try:
1527
      cluster = self.cfg.GetClusterInfo()
1528
      cluster.cluster_name = clustername
1529
      cluster.master_ip = ip
1530
      self.cfg.Update(cluster)
1531

    
1532
      # update the known hosts file
1533
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1534
      node_list = self.cfg.GetNodeList()
1535
      try:
1536
        node_list.remove(master)
1537
      except ValueError:
1538
        pass
1539
      result = self.rpc.call_upload_file(node_list,
1540
                                         constants.SSH_KNOWN_HOSTS_FILE)
1541
      for to_node, to_result in result.iteritems():
1542
        msg = to_result.fail_msg
1543
        if msg:
1544
          msg = ("Copy of file %s to node %s failed: %s" %
1545
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1546
          self.proc.LogWarning(msg)
1547

    
1548
    finally:
1549
      result = self.rpc.call_node_start_master(master, False, False)
1550
      msg = result.fail_msg
1551
      if msg:
1552
        self.LogWarning("Could not re-enable the master role on"
1553
                        " the master, please restart manually: %s", msg)
1554

    
1555

    
1556
def _RecursiveCheckIfLVMBased(disk):
1557
  """Check if the given disk or its children are lvm-based.
1558

1559
  @type disk: L{objects.Disk}
1560
  @param disk: the disk to check
1561
  @rtype: boolean
1562
  @return: boolean indicating whether a LD_LV dev_type was found or not
1563

1564
  """
1565
  if disk.children:
1566
    for chdisk in disk.children:
1567
      if _RecursiveCheckIfLVMBased(chdisk):
1568
        return True
1569
  return disk.dev_type == constants.LD_LV
1570

    
1571

    
1572
class LUSetClusterParams(LogicalUnit):
1573
  """Change the parameters of the cluster.
1574

1575
  """
1576
  HPATH = "cluster-modify"
1577
  HTYPE = constants.HTYPE_CLUSTER
1578
  _OP_REQP = []
1579
  REQ_BGL = False
1580

    
1581
  def CheckArguments(self):
1582
    """Check parameters
1583

1584
    """
1585
    if not hasattr(self.op, "candidate_pool_size"):
1586
      self.op.candidate_pool_size = None
1587
    if self.op.candidate_pool_size is not None:
1588
      try:
1589
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1590
      except (ValueError, TypeError), err:
1591
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1592
                                   str(err))
1593
      if self.op.candidate_pool_size < 1:
1594
        raise errors.OpPrereqError("At least one master candidate needed")
1595

    
1596
  def ExpandNames(self):
1597
    # FIXME: in the future maybe other cluster params won't require checking on
1598
    # all nodes to be modified.
1599
    self.needed_locks = {
1600
      locking.LEVEL_NODE: locking.ALL_SET,
1601
    }
1602
    self.share_locks[locking.LEVEL_NODE] = 1
1603

    
1604
  def BuildHooksEnv(self):
1605
    """Build hooks env.
1606

1607
    """
1608
    env = {
1609
      "OP_TARGET": self.cfg.GetClusterName(),
1610
      "NEW_VG_NAME": self.op.vg_name,
1611
      }
1612
    mn = self.cfg.GetMasterNode()
1613
    return env, [mn], [mn]
1614

    
1615
  def CheckPrereq(self):
1616
    """Check prerequisites.
1617

1618
    This checks whether the given params don't conflict and
1619
    if the given volume group is valid.
1620

1621
    """
1622
    if self.op.vg_name is not None and not self.op.vg_name:
1623
      instances = self.cfg.GetAllInstancesInfo().values()
1624
      for inst in instances:
1625
        for disk in inst.disks:
1626
          if _RecursiveCheckIfLVMBased(disk):
1627
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1628
                                       " lvm-based instances exist")
1629

    
1630
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1631

    
1632
    # if vg_name not None, checks given volume group on all nodes
1633
    if self.op.vg_name:
1634
      vglist = self.rpc.call_vg_list(node_list)
1635
      for node in node_list:
1636
        msg = vglist[node].fail_msg
1637
        if msg:
1638
          # ignoring down node
1639
          self.LogWarning("Error while gathering data on node %s"
1640
                          " (ignoring node): %s", node, msg)
1641
          continue
1642
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1643
                                              self.op.vg_name,
1644
                                              constants.MIN_VG_SIZE)
1645
        if vgstatus:
1646
          raise errors.OpPrereqError("Error on node '%s': %s" %
1647
                                     (node, vgstatus))
1648

    
1649
    self.cluster = cluster = self.cfg.GetClusterInfo()
1650
    # validate params changes
1651
    if self.op.beparams:
1652
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1653
      self.new_beparams = objects.FillDict(
1654
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1655

    
1656
    if self.op.nicparams:
1657
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1658
      self.new_nicparams = objects.FillDict(
1659
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1660
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1661

    
1662
    # hypervisor list/parameters
1663
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1664
    if self.op.hvparams:
1665
      if not isinstance(self.op.hvparams, dict):
1666
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1667
      for hv_name, hv_dict in self.op.hvparams.items():
1668
        if hv_name not in self.new_hvparams:
1669
          self.new_hvparams[hv_name] = hv_dict
1670
        else:
1671
          self.new_hvparams[hv_name].update(hv_dict)
1672

    
1673
    if self.op.enabled_hypervisors is not None:
1674
      self.hv_list = self.op.enabled_hypervisors
1675
      if not self.hv_list:
1676
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1677
                                   " least one member")
1678
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1679
      if invalid_hvs:
1680
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1681
                                   " entries: %s" % invalid_hvs)
1682
    else:
1683
      self.hv_list = cluster.enabled_hypervisors
1684

    
1685
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1686
      # either the enabled list has changed, or the parameters have, validate
1687
      for hv_name, hv_params in self.new_hvparams.items():
1688
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1689
            (self.op.enabled_hypervisors and
1690
             hv_name in self.op.enabled_hypervisors)):
1691
          # either this is a new hypervisor, or its parameters have changed
1692
          hv_class = hypervisor.GetHypervisor(hv_name)
1693
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1694
          hv_class.CheckParameterSyntax(hv_params)
1695
          _CheckHVParams(self, node_list, hv_name, hv_params)
1696

    
1697
  def Exec(self, feedback_fn):
1698
    """Change the parameters of the cluster.
1699

1700
    """
1701
    if self.op.vg_name is not None:
1702
      new_volume = self.op.vg_name
1703
      if not new_volume:
1704
        new_volume = None
1705
      if new_volume != self.cfg.GetVGName():
1706
        self.cfg.SetVGName(new_volume)
1707
      else:
1708
        feedback_fn("Cluster LVM configuration already in desired"
1709
                    " state, not changing")
1710
    if self.op.hvparams:
1711
      self.cluster.hvparams = self.new_hvparams
1712
    if self.op.enabled_hypervisors is not None:
1713
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1714
    if self.op.beparams:
1715
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1716
    if self.op.nicparams:
1717
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1718

    
1719
    if self.op.candidate_pool_size is not None:
1720
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1721
      # we need to update the pool size here, otherwise the save will fail
1722
      _AdjustCandidatePool(self)
1723

    
1724
    self.cfg.Update(self.cluster)
1725

    
1726

    
1727
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1728
  """Distribute additional files which are part of the cluster configuration.
1729

1730
  ConfigWriter takes care of distributing the config and ssconf files, but
1731
  there are more files which should be distributed to all nodes. This function
1732
  makes sure those are copied.
1733

1734
  @param lu: calling logical unit
1735
  @param additional_nodes: list of nodes not in the config to distribute to
1736

1737
  """
1738
  # 1. Gather target nodes
1739
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1740
  dist_nodes = lu.cfg.GetNodeList()
1741
  if additional_nodes is not None:
1742
    dist_nodes.extend(additional_nodes)
1743
  if myself.name in dist_nodes:
1744
    dist_nodes.remove(myself.name)
1745
  # 2. Gather files to distribute
1746
  dist_files = set([constants.ETC_HOSTS,
1747
                    constants.SSH_KNOWN_HOSTS_FILE,
1748
                    constants.RAPI_CERT_FILE,
1749
                    constants.RAPI_USERS_FILE,
1750
                    constants.HMAC_CLUSTER_KEY,
1751
                   ])
1752

    
1753
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1754
  for hv_name in enabled_hypervisors:
1755
    hv_class = hypervisor.GetHypervisor(hv_name)
1756
    dist_files.update(hv_class.GetAncillaryFiles())
1757

    
1758
  # 3. Perform the files upload
1759
  for fname in dist_files:
1760
    if os.path.exists(fname):
1761
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1762
      for to_node, to_result in result.items():
1763
        msg = to_result.fail_msg
1764
        if msg:
1765
          msg = ("Copy of file %s to node %s failed: %s" %
1766
                 (fname, to_node, msg))
1767
          lu.proc.LogWarning(msg)
1768

    
1769

    
1770
class LURedistributeConfig(NoHooksLU):
1771
  """Force the redistribution of cluster configuration.
1772

1773
  This is a very simple LU.
1774

1775
  """
1776
  _OP_REQP = []
1777
  REQ_BGL = False
1778

    
1779
  def ExpandNames(self):
1780
    self.needed_locks = {
1781
      locking.LEVEL_NODE: locking.ALL_SET,
1782
    }
1783
    self.share_locks[locking.LEVEL_NODE] = 1
1784

    
1785
  def CheckPrereq(self):
1786
    """Check prerequisites.
1787

1788
    """
1789

    
1790
  def Exec(self, feedback_fn):
1791
    """Redistribute the configuration.
1792

1793
    """
1794
    self.cfg.Update(self.cfg.GetClusterInfo())
1795
    _RedistributeAncillaryFiles(self)
1796

    
1797

    
1798
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1799
  """Sleep and poll for an instance's disk to sync.
1800

1801
  """
1802
  if not instance.disks:
1803
    return True
1804

    
1805
  if not oneshot:
1806
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1807

    
1808
  node = instance.primary_node
1809

    
1810
  for dev in instance.disks:
1811
    lu.cfg.SetDiskID(dev, node)
1812

    
1813
  retries = 0
1814
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1815
  while True:
1816
    max_time = 0
1817
    done = True
1818
    cumul_degraded = False
1819
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1820
    msg = rstats.fail_msg
1821
    if msg:
1822
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1823
      retries += 1
1824
      if retries >= 10:
1825
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1826
                                 " aborting." % node)
1827
      time.sleep(6)
1828
      continue
1829
    rstats = rstats.payload
1830
    retries = 0
1831
    for i, mstat in enumerate(rstats):
1832
      if mstat is None:
1833
        lu.LogWarning("Can't compute data for node %s/%s",
1834
                           node, instance.disks[i].iv_name)
1835
        continue
1836

    
1837
      cumul_degraded = (cumul_degraded or
1838
                        (mstat.is_degraded and mstat.sync_percent is None))
1839
      if mstat.sync_percent is not None:
1840
        done = False
1841
        if mstat.estimated_time is not None:
1842
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
1843
          max_time = mstat.estimated_time
1844
        else:
1845
          rem_time = "no time estimate"
1846
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1847
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
1848

    
1849
    # if we're done but degraded, let's do a few small retries, to
1850
    # make sure we see a stable and not transient situation; therefore
1851
    # we force restart of the loop
1852
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1853
      logging.info("Degraded disks found, %d retries left", degr_retries)
1854
      degr_retries -= 1
1855
      time.sleep(1)
1856
      continue
1857

    
1858
    if done or oneshot:
1859
      break
1860

    
1861
    time.sleep(min(60, max_time))
1862

    
1863
  if done:
1864
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1865
  return not cumul_degraded
1866

    
1867

    
1868
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1869
  """Check that mirrors are not degraded.
1870

1871
  The ldisk parameter, if True, will change the test from the
1872
  is_degraded attribute (which represents overall non-ok status for
1873
  the device(s)) to the ldisk (representing the local storage status).
1874

1875
  """
1876
  lu.cfg.SetDiskID(dev, node)
1877

    
1878
  result = True
1879

    
1880
  if on_primary or dev.AssembleOnSecondary():
1881
    rstats = lu.rpc.call_blockdev_find(node, dev)
1882
    msg = rstats.fail_msg
1883
    if msg:
1884
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1885
      result = False
1886
    elif not rstats.payload:
1887
      lu.LogWarning("Can't find disk on node %s", node)
1888
      result = False
1889
    else:
1890
      if ldisk:
1891
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
1892
      else:
1893
        result = result and not rstats.payload.is_degraded
1894

    
1895
  if dev.children:
1896
    for child in dev.children:
1897
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1898

    
1899
  return result
1900

    
1901

    
1902
class LUDiagnoseOS(NoHooksLU):
1903
  """Logical unit for OS diagnose/query.
1904

1905
  """
1906
  _OP_REQP = ["output_fields", "names"]
1907
  REQ_BGL = False
1908
  _FIELDS_STATIC = utils.FieldSet()
1909
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1910

    
1911
  def ExpandNames(self):
1912
    if self.op.names:
1913
      raise errors.OpPrereqError("Selective OS query not supported")
1914

    
1915
    _CheckOutputFields(static=self._FIELDS_STATIC,
1916
                       dynamic=self._FIELDS_DYNAMIC,
1917
                       selected=self.op.output_fields)
1918

    
1919
    # Lock all nodes, in shared mode
1920
    # Temporary removal of locks, should be reverted later
1921
    # TODO: reintroduce locks when they are lighter-weight
1922
    self.needed_locks = {}
1923
    #self.share_locks[locking.LEVEL_NODE] = 1
1924
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1925

    
1926
  def CheckPrereq(self):
1927
    """Check prerequisites.
1928

1929
    """
1930

    
1931
  @staticmethod
1932
  def _DiagnoseByOS(node_list, rlist):
1933
    """Remaps a per-node return list into an a per-os per-node dictionary
1934

1935
    @param node_list: a list with the names of all nodes
1936
    @param rlist: a map with node names as keys and OS objects as values
1937

1938
    @rtype: dict
1939
    @return: a dictionary with osnames as keys and as value another map, with
1940
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1941

1942
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1943
                                     (/srv/..., False, "invalid api")],
1944
                           "node2": [(/srv/..., True, "")]}
1945
          }
1946

1947
    """
1948
    all_os = {}
1949
    # we build here the list of nodes that didn't fail the RPC (at RPC
1950
    # level), so that nodes with a non-responding node daemon don't
1951
    # make all OSes invalid
1952
    good_nodes = [node_name for node_name in rlist
1953
                  if not rlist[node_name].fail_msg]
1954
    for node_name, nr in rlist.items():
1955
      if nr.fail_msg or not nr.payload:
1956
        continue
1957
      for name, path, status, diagnose in nr.payload:
1958
        if name not in all_os:
1959
          # build a list of nodes for this os containing empty lists
1960
          # for each node in node_list
1961
          all_os[name] = {}
1962
          for nname in good_nodes:
1963
            all_os[name][nname] = []
1964
        all_os[name][node_name].append((path, status, diagnose))
1965
    return all_os
1966

    
1967
  def Exec(self, feedback_fn):
1968
    """Compute the list of OSes.
1969

1970
    """
1971
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1972
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1973
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1974
    output = []
1975
    for os_name, os_data in pol.items():
1976
      row = []
1977
      for field in self.op.output_fields:
1978
        if field == "name":
1979
          val = os_name
1980
        elif field == "valid":
1981
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1982
        elif field == "node_status":
1983
          # this is just a copy of the dict
1984
          val = {}
1985
          for node_name, nos_list in os_data.items():
1986
            val[node_name] = nos_list
1987
        else:
1988
          raise errors.ParameterError(field)
1989
        row.append(val)
1990
      output.append(row)
1991

    
1992
    return output
1993

    
1994

    
1995
class LURemoveNode(LogicalUnit):
1996
  """Logical unit for removing a node.
1997

1998
  """
1999
  HPATH = "node-remove"
2000
  HTYPE = constants.HTYPE_NODE
2001
  _OP_REQP = ["node_name"]
2002

    
2003
  def BuildHooksEnv(self):
2004
    """Build hooks env.
2005

2006
    This doesn't run on the target node in the pre phase as a failed
2007
    node would then be impossible to remove.
2008

2009
    """
2010
    env = {
2011
      "OP_TARGET": self.op.node_name,
2012
      "NODE_NAME": self.op.node_name,
2013
      }
2014
    all_nodes = self.cfg.GetNodeList()
2015
    all_nodes.remove(self.op.node_name)
2016
    return env, all_nodes, all_nodes
2017

    
2018
  def CheckPrereq(self):
2019
    """Check prerequisites.
2020

2021
    This checks:
2022
     - the node exists in the configuration
2023
     - it does not have primary or secondary instances
2024
     - it's not the master
2025

2026
    Any errors are signaled by raising errors.OpPrereqError.
2027

2028
    """
2029
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2030
    if node is None:
2031
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2032

    
2033
    instance_list = self.cfg.GetInstanceList()
2034

    
2035
    masternode = self.cfg.GetMasterNode()
2036
    if node.name == masternode:
2037
      raise errors.OpPrereqError("Node is the master node,"
2038
                                 " you need to failover first.")
2039

    
2040
    for instance_name in instance_list:
2041
      instance = self.cfg.GetInstanceInfo(instance_name)
2042
      if node.name in instance.all_nodes:
2043
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2044
                                   " please remove first." % instance_name)
2045
    self.op.node_name = node.name
2046
    self.node = node
2047

    
2048
  def Exec(self, feedback_fn):
2049
    """Removes the node from the cluster.
2050

2051
    """
2052
    node = self.node
2053
    logging.info("Stopping the node daemon and removing configs from node %s",
2054
                 node.name)
2055

    
2056
    self.context.RemoveNode(node.name)
2057

    
2058
    result = self.rpc.call_node_leave_cluster(node.name)
2059
    msg = result.fail_msg
2060
    if msg:
2061
      self.LogWarning("Errors encountered on the remote node while leaving"
2062
                      " the cluster: %s", msg)
2063

    
2064
    # Promote nodes to master candidate as needed
2065
    _AdjustCandidatePool(self)
2066

    
2067

    
2068
class LUQueryNodes(NoHooksLU):
2069
  """Logical unit for querying nodes.
2070

2071
  """
2072
  _OP_REQP = ["output_fields", "names", "use_locking"]
2073
  REQ_BGL = False
2074
  _FIELDS_DYNAMIC = utils.FieldSet(
2075
    "dtotal", "dfree",
2076
    "mtotal", "mnode", "mfree",
2077
    "bootid",
2078
    "ctotal", "cnodes", "csockets",
2079
    )
2080

    
2081
  _FIELDS_STATIC = utils.FieldSet(
2082
    "name", "pinst_cnt", "sinst_cnt",
2083
    "pinst_list", "sinst_list",
2084
    "pip", "sip", "tags",
2085
    "serial_no",
2086
    "master_candidate",
2087
    "master",
2088
    "offline",
2089
    "drained",
2090
    "role",
2091
    )
2092

    
2093
  def ExpandNames(self):
2094
    _CheckOutputFields(static=self._FIELDS_STATIC,
2095
                       dynamic=self._FIELDS_DYNAMIC,
2096
                       selected=self.op.output_fields)
2097

    
2098
    self.needed_locks = {}
2099
    self.share_locks[locking.LEVEL_NODE] = 1
2100

    
2101
    if self.op.names:
2102
      self.wanted = _GetWantedNodes(self, self.op.names)
2103
    else:
2104
      self.wanted = locking.ALL_SET
2105

    
2106
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2107
    self.do_locking = self.do_node_query and self.op.use_locking
2108
    if self.do_locking:
2109
      # if we don't request only static fields, we need to lock the nodes
2110
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2111

    
2112

    
2113
  def CheckPrereq(self):
2114
    """Check prerequisites.
2115

2116
    """
2117
    # The validation of the node list is done in the _GetWantedNodes,
2118
    # if non empty, and if empty, there's no validation to do
2119
    pass
2120

    
2121
  def Exec(self, feedback_fn):
2122
    """Computes the list of nodes and their attributes.
2123

2124
    """
2125
    all_info = self.cfg.GetAllNodesInfo()
2126
    if self.do_locking:
2127
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2128
    elif self.wanted != locking.ALL_SET:
2129
      nodenames = self.wanted
2130
      missing = set(nodenames).difference(all_info.keys())
2131
      if missing:
2132
        raise errors.OpExecError(
2133
          "Some nodes were removed before retrieving their data: %s" % missing)
2134
    else:
2135
      nodenames = all_info.keys()
2136

    
2137
    nodenames = utils.NiceSort(nodenames)
2138
    nodelist = [all_info[name] for name in nodenames]
2139

    
2140
    # begin data gathering
2141

    
2142
    if self.do_node_query:
2143
      live_data = {}
2144
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2145
                                          self.cfg.GetHypervisorType())
2146
      for name in nodenames:
2147
        nodeinfo = node_data[name]
2148
        if not nodeinfo.fail_msg and nodeinfo.payload:
2149
          nodeinfo = nodeinfo.payload
2150
          fn = utils.TryConvert
2151
          live_data[name] = {
2152
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2153
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2154
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2155
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2156
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2157
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2158
            "bootid": nodeinfo.get('bootid', None),
2159
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2160
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2161
            }
2162
        else:
2163
          live_data[name] = {}
2164
    else:
2165
      live_data = dict.fromkeys(nodenames, {})
2166

    
2167
    node_to_primary = dict([(name, set()) for name in nodenames])
2168
    node_to_secondary = dict([(name, set()) for name in nodenames])
2169

    
2170
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2171
                             "sinst_cnt", "sinst_list"))
2172
    if inst_fields & frozenset(self.op.output_fields):
2173
      instancelist = self.cfg.GetInstanceList()
2174

    
2175
      for instance_name in instancelist:
2176
        inst = self.cfg.GetInstanceInfo(instance_name)
2177
        if inst.primary_node in node_to_primary:
2178
          node_to_primary[inst.primary_node].add(inst.name)
2179
        for secnode in inst.secondary_nodes:
2180
          if secnode in node_to_secondary:
2181
            node_to_secondary[secnode].add(inst.name)
2182

    
2183
    master_node = self.cfg.GetMasterNode()
2184

    
2185
    # end data gathering
2186

    
2187
    output = []
2188
    for node in nodelist:
2189
      node_output = []
2190
      for field in self.op.output_fields:
2191
        if field == "name":
2192
          val = node.name
2193
        elif field == "pinst_list":
2194
          val = list(node_to_primary[node.name])
2195
        elif field == "sinst_list":
2196
          val = list(node_to_secondary[node.name])
2197
        elif field == "pinst_cnt":
2198
          val = len(node_to_primary[node.name])
2199
        elif field == "sinst_cnt":
2200
          val = len(node_to_secondary[node.name])
2201
        elif field == "pip":
2202
          val = node.primary_ip
2203
        elif field == "sip":
2204
          val = node.secondary_ip
2205
        elif field == "tags":
2206
          val = list(node.GetTags())
2207
        elif field == "serial_no":
2208
          val = node.serial_no
2209
        elif field == "master_candidate":
2210
          val = node.master_candidate
2211
        elif field == "master":
2212
          val = node.name == master_node
2213
        elif field == "offline":
2214
          val = node.offline
2215
        elif field == "drained":
2216
          val = node.drained
2217
        elif self._FIELDS_DYNAMIC.Matches(field):
2218
          val = live_data[node.name].get(field, None)
2219
        elif field == "role":
2220
          if node.name == master_node:
2221
            val = "M"
2222
          elif node.master_candidate:
2223
            val = "C"
2224
          elif node.drained:
2225
            val = "D"
2226
          elif node.offline:
2227
            val = "O"
2228
          else:
2229
            val = "R"
2230
        else:
2231
          raise errors.ParameterError(field)
2232
        node_output.append(val)
2233
      output.append(node_output)
2234

    
2235
    return output
2236

    
2237

    
2238
class LUQueryNodeVolumes(NoHooksLU):
2239
  """Logical unit for getting volumes on node(s).
2240

2241
  """
2242
  _OP_REQP = ["nodes", "output_fields"]
2243
  REQ_BGL = False
2244
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2245
  _FIELDS_STATIC = utils.FieldSet("node")
2246

    
2247
  def ExpandNames(self):
2248
    _CheckOutputFields(static=self._FIELDS_STATIC,
2249
                       dynamic=self._FIELDS_DYNAMIC,
2250
                       selected=self.op.output_fields)
2251

    
2252
    self.needed_locks = {}
2253
    self.share_locks[locking.LEVEL_NODE] = 1
2254
    if not self.op.nodes:
2255
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2256
    else:
2257
      self.needed_locks[locking.LEVEL_NODE] = \
2258
        _GetWantedNodes(self, self.op.nodes)
2259

    
2260
  def CheckPrereq(self):
2261
    """Check prerequisites.
2262

2263
    This checks that the fields required are valid output fields.
2264

2265
    """
2266
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2267

    
2268
  def Exec(self, feedback_fn):
2269
    """Computes the list of nodes and their attributes.
2270

2271
    """
2272
    nodenames = self.nodes
2273
    volumes = self.rpc.call_node_volumes(nodenames)
2274

    
2275
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2276
             in self.cfg.GetInstanceList()]
2277

    
2278
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2279

    
2280
    output = []
2281
    for node in nodenames:
2282
      nresult = volumes[node]
2283
      if nresult.offline:
2284
        continue
2285
      msg = nresult.fail_msg
2286
      if msg:
2287
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2288
        continue
2289

    
2290
      node_vols = nresult.payload[:]
2291
      node_vols.sort(key=lambda vol: vol['dev'])
2292

    
2293
      for vol in node_vols:
2294
        node_output = []
2295
        for field in self.op.output_fields:
2296
          if field == "node":
2297
            val = node
2298
          elif field == "phys":
2299
            val = vol['dev']
2300
          elif field == "vg":
2301
            val = vol['vg']
2302
          elif field == "name":
2303
            val = vol['name']
2304
          elif field == "size":
2305
            val = int(float(vol['size']))
2306
          elif field == "instance":
2307
            for inst in ilist:
2308
              if node not in lv_by_node[inst]:
2309
                continue
2310
              if vol['name'] in lv_by_node[inst][node]:
2311
                val = inst.name
2312
                break
2313
            else:
2314
              val = '-'
2315
          else:
2316
            raise errors.ParameterError(field)
2317
          node_output.append(str(val))
2318

    
2319
        output.append(node_output)
2320

    
2321
    return output
2322

    
2323

    
2324
class LUQueryNodeStorage(NoHooksLU):
2325
  """Logical unit for getting information on storage units on node(s).
2326

2327
  """
2328
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2329
  REQ_BGL = False
2330
  _FIELDS_STATIC = utils.FieldSet("node")
2331

    
2332
  def ExpandNames(self):
2333
    storage_type = self.op.storage_type
2334

    
2335
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2336
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2337

    
2338
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2339

    
2340
    _CheckOutputFields(static=self._FIELDS_STATIC,
2341
                       dynamic=utils.FieldSet(*dynamic_fields),
2342
                       selected=self.op.output_fields)
2343

    
2344
    self.needed_locks = {}
2345
    self.share_locks[locking.LEVEL_NODE] = 1
2346

    
2347
    if self.op.nodes:
2348
      self.needed_locks[locking.LEVEL_NODE] = \
2349
        _GetWantedNodes(self, self.op.nodes)
2350
    else:
2351
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2352

    
2353
  def CheckPrereq(self):
2354
    """Check prerequisites.
2355

2356
    This checks that the fields required are valid output fields.
2357

2358
    """
2359
    self.op.name = getattr(self.op, "name", None)
2360

    
2361
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2362

    
2363
  def Exec(self, feedback_fn):
2364
    """Computes the list of nodes and their attributes.
2365

2366
    """
2367
    # Always get name to sort by
2368
    if constants.SF_NAME in self.op.output_fields:
2369
      fields = self.op.output_fields[:]
2370
    else:
2371
      fields = [constants.SF_NAME] + self.op.output_fields
2372

    
2373
    # Never ask for node as it's only known to the LU
2374
    while "node" in fields:
2375
      fields.remove("node")
2376

    
2377
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2378
    name_idx = field_idx[constants.SF_NAME]
2379

    
2380
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2381
    data = self.rpc.call_storage_list(self.nodes,
2382
                                      self.op.storage_type, st_args,
2383
                                      self.op.name, fields)
2384

    
2385
    result = []
2386

    
2387
    for node in utils.NiceSort(self.nodes):
2388
      nresult = data[node]
2389
      if nresult.offline:
2390
        continue
2391

    
2392
      msg = nresult.fail_msg
2393
      if msg:
2394
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2395
        continue
2396

    
2397
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2398

    
2399
      for name in utils.NiceSort(rows.keys()):
2400
        row = rows[name]
2401

    
2402
        out = []
2403

    
2404
        for field in self.op.output_fields:
2405
          if field == "node":
2406
            val = node
2407
          elif field in field_idx:
2408
            val = row[field_idx[field]]
2409
          else:
2410
            raise errors.ParameterError(field)
2411

    
2412
          out.append(val)
2413

    
2414
        result.append(out)
2415

    
2416
    return result
2417

    
2418

    
2419
class LUModifyNodeStorage(NoHooksLU):
2420
  """Logical unit for modifying a storage volume on a node.
2421

2422
  """
2423
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2424
  REQ_BGL = False
2425

    
2426
  def CheckArguments(self):
2427
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2428
    if node_name is None:
2429
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2430

    
2431
    self.op.node_name = node_name
2432

    
2433
    storage_type = self.op.storage_type
2434
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2435
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2436

    
2437
  def ExpandNames(self):
2438
    self.needed_locks = {
2439
      locking.LEVEL_NODE: self.op.node_name,
2440
      }
2441

    
2442
  def CheckPrereq(self):
2443
    """Check prerequisites.
2444

2445
    """
2446
    storage_type = self.op.storage_type
2447

    
2448
    try:
2449
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2450
    except KeyError:
2451
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2452
                                 " modified" % storage_type)
2453

    
2454
    diff = set(self.op.changes.keys()) - modifiable
2455
    if diff:
2456
      raise errors.OpPrereqError("The following fields can not be modified for"
2457
                                 " storage units of type '%s': %r" %
2458
                                 (storage_type, list(diff)))
2459

    
2460
  def Exec(self, feedback_fn):
2461
    """Computes the list of nodes and their attributes.
2462

2463
    """
2464
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2465
    result = self.rpc.call_storage_modify(self.op.node_name,
2466
                                          self.op.storage_type, st_args,
2467
                                          self.op.name, self.op.changes)
2468
    result.Raise("Failed to modify storage unit '%s' on %s" %
2469
                 (self.op.name, self.op.node_name))
2470

    
2471

    
2472
class LUAddNode(LogicalUnit):
2473
  """Logical unit for adding node to the cluster.
2474

2475
  """
2476
  HPATH = "node-add"
2477
  HTYPE = constants.HTYPE_NODE
2478
  _OP_REQP = ["node_name"]
2479

    
2480
  def BuildHooksEnv(self):
2481
    """Build hooks env.
2482

2483
    This will run on all nodes before, and on all nodes + the new node after.
2484

2485
    """
2486
    env = {
2487
      "OP_TARGET": self.op.node_name,
2488
      "NODE_NAME": self.op.node_name,
2489
      "NODE_PIP": self.op.primary_ip,
2490
      "NODE_SIP": self.op.secondary_ip,
2491
      }
2492
    nodes_0 = self.cfg.GetNodeList()
2493
    nodes_1 = nodes_0 + [self.op.node_name, ]
2494
    return env, nodes_0, nodes_1
2495

    
2496
  def CheckPrereq(self):
2497
    """Check prerequisites.
2498

2499
    This checks:
2500
     - the new node is not already in the config
2501
     - it is resolvable
2502
     - its parameters (single/dual homed) matches the cluster
2503

2504
    Any errors are signaled by raising errors.OpPrereqError.
2505

2506
    """
2507
    node_name = self.op.node_name
2508
    cfg = self.cfg
2509

    
2510
    dns_data = utils.HostInfo(node_name)
2511

    
2512
    node = dns_data.name
2513
    primary_ip = self.op.primary_ip = dns_data.ip
2514
    secondary_ip = getattr(self.op, "secondary_ip", None)
2515
    if secondary_ip is None:
2516
      secondary_ip = primary_ip
2517
    if not utils.IsValidIP(secondary_ip):
2518
      raise errors.OpPrereqError("Invalid secondary IP given")
2519
    self.op.secondary_ip = secondary_ip
2520

    
2521
    node_list = cfg.GetNodeList()
2522
    if not self.op.readd and node in node_list:
2523
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2524
                                 node)
2525
    elif self.op.readd and node not in node_list:
2526
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2527

    
2528
    for existing_node_name in node_list:
2529
      existing_node = cfg.GetNodeInfo(existing_node_name)
2530

    
2531
      if self.op.readd and node == existing_node_name:
2532
        if (existing_node.primary_ip != primary_ip or
2533
            existing_node.secondary_ip != secondary_ip):
2534
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2535
                                     " address configuration as before")
2536
        continue
2537

    
2538
      if (existing_node.primary_ip == primary_ip or
2539
          existing_node.secondary_ip == primary_ip or
2540
          existing_node.primary_ip == secondary_ip or
2541
          existing_node.secondary_ip == secondary_ip):
2542
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2543
                                   " existing node %s" % existing_node.name)
2544

    
2545
    # check that the type of the node (single versus dual homed) is the
2546
    # same as for the master
2547
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2548
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2549
    newbie_singlehomed = secondary_ip == primary_ip
2550
    if master_singlehomed != newbie_singlehomed:
2551
      if master_singlehomed:
2552
        raise errors.OpPrereqError("The master has no private ip but the"
2553
                                   " new node has one")
2554
      else:
2555
        raise errors.OpPrereqError("The master has a private ip but the"
2556
                                   " new node doesn't have one")
2557

    
2558
    # checks reachability
2559
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2560
      raise errors.OpPrereqError("Node not reachable by ping")
2561

    
2562
    if not newbie_singlehomed:
2563
      # check reachability from my secondary ip to newbie's secondary ip
2564
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2565
                           source=myself.secondary_ip):
2566
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2567
                                   " based ping to noded port")
2568

    
2569
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2570
    if self.op.readd:
2571
      exceptions = [node]
2572
    else:
2573
      exceptions = []
2574
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2575
    # the new node will increase mc_max with one, so:
2576
    mc_max = min(mc_max + 1, cp_size)
2577
    self.master_candidate = mc_now < mc_max
2578

    
2579
    if self.op.readd:
2580
      self.new_node = self.cfg.GetNodeInfo(node)
2581
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2582
    else:
2583
      self.new_node = objects.Node(name=node,
2584
                                   primary_ip=primary_ip,
2585
                                   secondary_ip=secondary_ip,
2586
                                   master_candidate=self.master_candidate,
2587
                                   offline=False, drained=False)
2588

    
2589
  def Exec(self, feedback_fn):
2590
    """Adds the new node to the cluster.
2591

2592
    """
2593
    new_node = self.new_node
2594
    node = new_node.name
2595

    
2596
    # for re-adds, reset the offline/drained/master-candidate flags;
2597
    # we need to reset here, otherwise offline would prevent RPC calls
2598
    # later in the procedure; this also means that if the re-add
2599
    # fails, we are left with a non-offlined, broken node
2600
    if self.op.readd:
2601
      new_node.drained = new_node.offline = False
2602
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2603
      # if we demote the node, we do cleanup later in the procedure
2604
      new_node.master_candidate = self.master_candidate
2605

    
2606
    # notify the user about any possible mc promotion
2607
    if new_node.master_candidate:
2608
      self.LogInfo("Node will be a master candidate")
2609

    
2610
    # check connectivity
2611
    result = self.rpc.call_version([node])[node]
2612
    result.Raise("Can't get version information from node %s" % node)
2613
    if constants.PROTOCOL_VERSION == result.payload:
2614
      logging.info("Communication to node %s fine, sw version %s match",
2615
                   node, result.payload)
2616
    else:
2617
      raise errors.OpExecError("Version mismatch master version %s,"
2618
                               " node version %s" %
2619
                               (constants.PROTOCOL_VERSION, result.payload))
2620

    
2621
    # setup ssh on node
2622
    logging.info("Copy ssh key to node %s", node)
2623
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2624
    keyarray = []
2625
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2626
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2627
                priv_key, pub_key]
2628

    
2629
    for i in keyfiles:
2630
      f = open(i, 'r')
2631
      try:
2632
        keyarray.append(f.read())
2633
      finally:
2634
        f.close()
2635

    
2636
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2637
                                    keyarray[2],
2638
                                    keyarray[3], keyarray[4], keyarray[5])
2639
    result.Raise("Cannot transfer ssh keys to the new node")
2640

    
2641
    # Add node to our /etc/hosts, and add key to known_hosts
2642
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2643
      utils.AddHostToEtcHosts(new_node.name)
2644

    
2645
    if new_node.secondary_ip != new_node.primary_ip:
2646
      result = self.rpc.call_node_has_ip_address(new_node.name,
2647
                                                 new_node.secondary_ip)
2648
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2649
                   prereq=True)
2650
      if not result.payload:
2651
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2652
                                 " you gave (%s). Please fix and re-run this"
2653
                                 " command." % new_node.secondary_ip)
2654

    
2655
    node_verify_list = [self.cfg.GetMasterNode()]
2656
    node_verify_param = {
2657
      'nodelist': [node],
2658
      # TODO: do a node-net-test as well?
2659
    }
2660

    
2661
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2662
                                       self.cfg.GetClusterName())
2663
    for verifier in node_verify_list:
2664
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2665
      nl_payload = result[verifier].payload['nodelist']
2666
      if nl_payload:
2667
        for failed in nl_payload:
2668
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2669
                      (verifier, nl_payload[failed]))
2670
        raise errors.OpExecError("ssh/hostname verification failed.")
2671

    
2672
    if self.op.readd:
2673
      _RedistributeAncillaryFiles(self)
2674
      self.context.ReaddNode(new_node)
2675
      # make sure we redistribute the config
2676
      self.cfg.Update(new_node)
2677
      # and make sure the new node will not have old files around
2678
      if not new_node.master_candidate:
2679
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2680
        msg = result.RemoteFailMsg()
2681
        if msg:
2682
          self.LogWarning("Node failed to demote itself from master"
2683
                          " candidate status: %s" % msg)
2684
    else:
2685
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2686
      self.context.AddNode(new_node)
2687

    
2688

    
2689
class LUSetNodeParams(LogicalUnit):
2690
  """Modifies the parameters of a node.
2691

2692
  """
2693
  HPATH = "node-modify"
2694
  HTYPE = constants.HTYPE_NODE
2695
  _OP_REQP = ["node_name"]
2696
  REQ_BGL = False
2697

    
2698
  def CheckArguments(self):
2699
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2700
    if node_name is None:
2701
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2702
    self.op.node_name = node_name
2703
    _CheckBooleanOpField(self.op, 'master_candidate')
2704
    _CheckBooleanOpField(self.op, 'offline')
2705
    _CheckBooleanOpField(self.op, 'drained')
2706
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2707
    if all_mods.count(None) == 3:
2708
      raise errors.OpPrereqError("Please pass at least one modification")
2709
    if all_mods.count(True) > 1:
2710
      raise errors.OpPrereqError("Can't set the node into more than one"
2711
                                 " state at the same time")
2712

    
2713
  def ExpandNames(self):
2714
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2715

    
2716
  def BuildHooksEnv(self):
2717
    """Build hooks env.
2718

2719
    This runs on the master node.
2720

2721
    """
2722
    env = {
2723
      "OP_TARGET": self.op.node_name,
2724
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2725
      "OFFLINE": str(self.op.offline),
2726
      "DRAINED": str(self.op.drained),
2727
      }
2728
    nl = [self.cfg.GetMasterNode(),
2729
          self.op.node_name]
2730
    return env, nl, nl
2731

    
2732
  def CheckPrereq(self):
2733
    """Check prerequisites.
2734

2735
    This only checks the instance list against the existing names.
2736

2737
    """
2738
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2739

    
2740
    if ((self.op.master_candidate == False or self.op.offline == True or
2741
         self.op.drained == True) and node.master_candidate):
2742
      # we will demote the node from master_candidate
2743
      if self.op.node_name == self.cfg.GetMasterNode():
2744
        raise errors.OpPrereqError("The master node has to be a"
2745
                                   " master candidate, online and not drained")
2746
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2747
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2748
      if num_candidates <= cp_size:
2749
        msg = ("Not enough master candidates (desired"
2750
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2751
        if self.op.force:
2752
          self.LogWarning(msg)
2753
        else:
2754
          raise errors.OpPrereqError(msg)
2755

    
2756
    if (self.op.master_candidate == True and
2757
        ((node.offline and not self.op.offline == False) or
2758
         (node.drained and not self.op.drained == False))):
2759
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2760
                                 " to master_candidate" % node.name)
2761

    
2762
    return
2763

    
2764
  def Exec(self, feedback_fn):
2765
    """Modifies a node.
2766

2767
    """
2768
    node = self.node
2769

    
2770
    result = []
2771
    changed_mc = False
2772

    
2773
    if self.op.offline is not None:
2774
      node.offline = self.op.offline
2775
      result.append(("offline", str(self.op.offline)))
2776
      if self.op.offline == True:
2777
        if node.master_candidate:
2778
          node.master_candidate = False
2779
          changed_mc = True
2780
          result.append(("master_candidate", "auto-demotion due to offline"))
2781
        if node.drained:
2782
          node.drained = False
2783
          result.append(("drained", "clear drained status due to offline"))
2784

    
2785
    if self.op.master_candidate is not None:
2786
      node.master_candidate = self.op.master_candidate
2787
      changed_mc = True
2788
      result.append(("master_candidate", str(self.op.master_candidate)))
2789
      if self.op.master_candidate == False:
2790
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2791
        msg = rrc.fail_msg
2792
        if msg:
2793
          self.LogWarning("Node failed to demote itself: %s" % msg)
2794

    
2795
    if self.op.drained is not None:
2796
      node.drained = self.op.drained
2797
      result.append(("drained", str(self.op.drained)))
2798
      if self.op.drained == True:
2799
        if node.master_candidate:
2800
          node.master_candidate = False
2801
          changed_mc = True
2802
          result.append(("master_candidate", "auto-demotion due to drain"))
2803
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2804
          msg = rrc.RemoteFailMsg()
2805
          if msg:
2806
            self.LogWarning("Node failed to demote itself: %s" % msg)
2807
        if node.offline:
2808
          node.offline = False
2809
          result.append(("offline", "clear offline status due to drain"))
2810

    
2811
    # this will trigger configuration file update, if needed
2812
    self.cfg.Update(node)
2813
    # this will trigger job queue propagation or cleanup
2814
    if changed_mc:
2815
      self.context.ReaddNode(node)
2816

    
2817
    return result
2818

    
2819

    
2820
class LUPowercycleNode(NoHooksLU):
2821
  """Powercycles a node.
2822

2823
  """
2824
  _OP_REQP = ["node_name", "force"]
2825
  REQ_BGL = False
2826

    
2827
  def CheckArguments(self):
2828
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2829
    if node_name is None:
2830
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2831
    self.op.node_name = node_name
2832
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2833
      raise errors.OpPrereqError("The node is the master and the force"
2834
                                 " parameter was not set")
2835

    
2836
  def ExpandNames(self):
2837
    """Locking for PowercycleNode.
2838

2839
    This is a last-resort option and shouldn't block on other
2840
    jobs. Therefore, we grab no locks.
2841

2842
    """
2843
    self.needed_locks = {}
2844

    
2845
  def CheckPrereq(self):
2846
    """Check prerequisites.
2847

2848
    This LU has no prereqs.
2849

2850
    """
2851
    pass
2852

    
2853
  def Exec(self, feedback_fn):
2854
    """Reboots a node.
2855

2856
    """
2857
    result = self.rpc.call_node_powercycle(self.op.node_name,
2858
                                           self.cfg.GetHypervisorType())
2859
    result.Raise("Failed to schedule the reboot")
2860
    return result.payload
2861

    
2862

    
2863
class LUQueryClusterInfo(NoHooksLU):
2864
  """Query cluster configuration.
2865

2866
  """
2867
  _OP_REQP = []
2868
  REQ_BGL = False
2869

    
2870
  def ExpandNames(self):
2871
    self.needed_locks = {}
2872

    
2873
  def CheckPrereq(self):
2874
    """No prerequsites needed for this LU.
2875

2876
    """
2877
    pass
2878

    
2879
  def Exec(self, feedback_fn):
2880
    """Return cluster config.
2881

2882
    """
2883
    cluster = self.cfg.GetClusterInfo()
2884
    result = {
2885
      "software_version": constants.RELEASE_VERSION,
2886
      "protocol_version": constants.PROTOCOL_VERSION,
2887
      "config_version": constants.CONFIG_VERSION,
2888
      "os_api_version": max(constants.OS_API_VERSIONS),
2889
      "export_version": constants.EXPORT_VERSION,
2890
      "architecture": (platform.architecture()[0], platform.machine()),
2891
      "name": cluster.cluster_name,
2892
      "master": cluster.master_node,
2893
      "default_hypervisor": cluster.enabled_hypervisors[0],
2894
      "enabled_hypervisors": cluster.enabled_hypervisors,
2895
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
2896
                        for hypervisor_name in cluster.enabled_hypervisors]),
2897
      "beparams": cluster.beparams,
2898
      "nicparams": cluster.nicparams,
2899
      "candidate_pool_size": cluster.candidate_pool_size,
2900
      "master_netdev": cluster.master_netdev,
2901
      "volume_group_name": cluster.volume_group_name,
2902
      "file_storage_dir": cluster.file_storage_dir,
2903
      }
2904

    
2905
    return result
2906

    
2907

    
2908
class LUQueryConfigValues(NoHooksLU):
2909
  """Return configuration values.
2910

2911
  """
2912
  _OP_REQP = []
2913
  REQ_BGL = False
2914
  _FIELDS_DYNAMIC = utils.FieldSet()
2915
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2916

    
2917
  def ExpandNames(self):
2918
    self.needed_locks = {}
2919

    
2920
    _CheckOutputFields(static=self._FIELDS_STATIC,
2921
                       dynamic=self._FIELDS_DYNAMIC,
2922
                       selected=self.op.output_fields)
2923

    
2924
  def CheckPrereq(self):
2925
    """No prerequisites.
2926

2927
    """
2928
    pass
2929

    
2930
  def Exec(self, feedback_fn):
2931
    """Dump a representation of the cluster config to the standard output.
2932

2933
    """
2934
    values = []
2935
    for field in self.op.output_fields:
2936
      if field == "cluster_name":
2937
        entry = self.cfg.GetClusterName()
2938
      elif field == "master_node":
2939
        entry = self.cfg.GetMasterNode()
2940
      elif field == "drain_flag":
2941
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2942
      else:
2943
        raise errors.ParameterError(field)
2944
      values.append(entry)
2945
    return values
2946

    
2947

    
2948
class LUActivateInstanceDisks(NoHooksLU):
2949
  """Bring up an instance's disks.
2950

2951
  """
2952
  _OP_REQP = ["instance_name"]
2953
  REQ_BGL = False
2954

    
2955
  def ExpandNames(self):
2956
    self._ExpandAndLockInstance()
2957
    self.needed_locks[locking.LEVEL_NODE] = []
2958
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2959

    
2960
  def DeclareLocks(self, level):
2961
    if level == locking.LEVEL_NODE:
2962
      self._LockInstancesNodes()
2963

    
2964
  def CheckPrereq(self):
2965
    """Check prerequisites.
2966

2967
    This checks that the instance is in the cluster.
2968

2969
    """
2970
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2971
    assert self.instance is not None, \
2972
      "Cannot retrieve locked instance %s" % self.op.instance_name
2973
    _CheckNodeOnline(self, self.instance.primary_node)
2974

    
2975
  def Exec(self, feedback_fn):
2976
    """Activate the disks.
2977

2978
    """
2979
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2980
    if not disks_ok:
2981
      raise errors.OpExecError("Cannot activate block devices")
2982

    
2983
    return disks_info
2984

    
2985

    
2986
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2987
  """Prepare the block devices for an instance.
2988

2989
  This sets up the block devices on all nodes.
2990

2991
  @type lu: L{LogicalUnit}
2992
  @param lu: the logical unit on whose behalf we execute
2993
  @type instance: L{objects.Instance}
2994
  @param instance: the instance for whose disks we assemble
2995
  @type ignore_secondaries: boolean
2996
  @param ignore_secondaries: if true, errors on secondary nodes
2997
      won't result in an error return from the function
2998
  @return: False if the operation failed, otherwise a list of
2999
      (host, instance_visible_name, node_visible_name)
3000
      with the mapping from node devices to instance devices
3001

3002
  """
3003
  device_info = []
3004
  disks_ok = True
3005
  iname = instance.name
3006
  # With the two passes mechanism we try to reduce the window of
3007
  # opportunity for the race condition of switching DRBD to primary
3008
  # before handshaking occured, but we do not eliminate it
3009

    
3010
  # The proper fix would be to wait (with some limits) until the
3011
  # connection has been made and drbd transitions from WFConnection
3012
  # into any other network-connected state (Connected, SyncTarget,
3013
  # SyncSource, etc.)
3014

    
3015
  # 1st pass, assemble on all nodes in secondary mode
3016
  for inst_disk in instance.disks:
3017
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3018
      lu.cfg.SetDiskID(node_disk, node)
3019
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3020
      msg = result.fail_msg
3021
      if msg:
3022
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3023
                           " (is_primary=False, pass=1): %s",
3024
                           inst_disk.iv_name, node, msg)
3025
        if not ignore_secondaries:
3026
          disks_ok = False
3027

    
3028
  # FIXME: race condition on drbd migration to primary
3029

    
3030
  # 2nd pass, do only the primary node
3031
  for inst_disk in instance.disks:
3032
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3033
      if node != instance.primary_node:
3034
        continue
3035
      lu.cfg.SetDiskID(node_disk, node)
3036
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3037
      msg = result.fail_msg
3038
      if msg:
3039
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3040
                           " (is_primary=True, pass=2): %s",
3041
                           inst_disk.iv_name, node, msg)
3042
        disks_ok = False
3043
    device_info.append((instance.primary_node, inst_disk.iv_name,
3044
                        result.payload))
3045

    
3046
  # leave the disks configured for the primary node
3047
  # this is a workaround that would be fixed better by
3048
  # improving the logical/physical id handling
3049
  for disk in instance.disks:
3050
    lu.cfg.SetDiskID(disk, instance.primary_node)
3051

    
3052
  return disks_ok, device_info
3053

    
3054

    
3055
def _StartInstanceDisks(lu, instance, force):
3056
  """Start the disks of an instance.
3057

3058
  """
3059
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3060
                                           ignore_secondaries=force)
3061
  if not disks_ok:
3062
    _ShutdownInstanceDisks(lu, instance)
3063
    if force is not None and not force:
3064
      lu.proc.LogWarning("", hint="If the message above refers to a"
3065
                         " secondary node,"
3066
                         " you can retry the operation using '--force'.")
3067
    raise errors.OpExecError("Disk consistency error")
3068

    
3069

    
3070
class LUDeactivateInstanceDisks(NoHooksLU):
3071
  """Shutdown an instance's disks.
3072

3073
  """
3074
  _OP_REQP = ["instance_name"]
3075
  REQ_BGL = False
3076

    
3077
  def ExpandNames(self):
3078
    self._ExpandAndLockInstance()
3079
    self.needed_locks[locking.LEVEL_NODE] = []
3080
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3081

    
3082
  def DeclareLocks(self, level):
3083
    if level == locking.LEVEL_NODE:
3084
      self._LockInstancesNodes()
3085

    
3086
  def CheckPrereq(self):
3087
    """Check prerequisites.
3088

3089
    This checks that the instance is in the cluster.
3090

3091
    """
3092
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3093
    assert self.instance is not None, \
3094
      "Cannot retrieve locked instance %s" % self.op.instance_name
3095

    
3096
  def Exec(self, feedback_fn):
3097
    """Deactivate the disks
3098

3099
    """
3100
    instance = self.instance
3101
    _SafeShutdownInstanceDisks(self, instance)
3102

    
3103

    
3104
def _SafeShutdownInstanceDisks(lu, instance):
3105
  """Shutdown block devices of an instance.
3106

3107
  This function checks if an instance is running, before calling
3108
  _ShutdownInstanceDisks.
3109

3110
  """
3111
  pnode = instance.primary_node
3112
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3113
  ins_l.Raise("Can't contact node %s" % pnode)
3114

    
3115
  if instance.name in ins_l.payload:
3116
    raise errors.OpExecError("Instance is running, can't shutdown"
3117
                             " block devices.")
3118

    
3119
  _ShutdownInstanceDisks(lu, instance)
3120

    
3121

    
3122
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3123
  """Shutdown block devices of an instance.
3124

3125
  This does the shutdown on all nodes of the instance.
3126

3127
  If the ignore_primary is false, errors on the primary node are
3128
  ignored.
3129

3130
  """
3131
  all_result = True
3132
  for disk in instance.disks:
3133
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3134
      lu.cfg.SetDiskID(top_disk, node)
3135
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3136
      msg = result.fail_msg
3137
      if msg:
3138
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3139
                      disk.iv_name, node, msg)
3140
        if not ignore_primary or node != instance.primary_node:
3141
          all_result = False
3142
  return all_result
3143

    
3144

    
3145
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3146
  """Checks if a node has enough free memory.
3147

3148
  This function check if a given node has the needed amount of free
3149
  memory. In case the node has less memory or we cannot get the
3150
  information from the node, this function raise an OpPrereqError
3151
  exception.
3152

3153
  @type lu: C{LogicalUnit}
3154
  @param lu: a logical unit from which we get configuration data
3155
  @type node: C{str}
3156
  @param node: the node to check
3157
  @type reason: C{str}
3158
  @param reason: string to use in the error message
3159
  @type requested: C{int}
3160
  @param requested: the amount of memory in MiB to check for
3161
  @type hypervisor_name: C{str}
3162
  @param hypervisor_name: the hypervisor to ask for memory stats
3163
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3164
      we cannot check the node
3165

3166
  """
3167
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3168
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3169
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3170
  if not isinstance(free_mem, int):
3171
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3172
                               " was '%s'" % (node, free_mem))
3173
  if requested > free_mem:
3174
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3175
                               " needed %s MiB, available %s MiB" %
3176
                               (node, reason, requested, free_mem))
3177

    
3178

    
3179
class LUStartupInstance(LogicalUnit):
3180
  """Starts an instance.
3181

3182
  """
3183
  HPATH = "instance-start"
3184
  HTYPE = constants.HTYPE_INSTANCE
3185
  _OP_REQP = ["instance_name", "force"]
3186
  REQ_BGL = False
3187

    
3188
  def ExpandNames(self):
3189
    self._ExpandAndLockInstance()
3190

    
3191
  def BuildHooksEnv(self):
3192
    """Build hooks env.
3193

3194
    This runs on master, primary and secondary nodes of the instance.
3195

3196
    """
3197
    env = {
3198
      "FORCE": self.op.force,
3199
      }
3200
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3201
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3202
    return env, nl, nl
3203

    
3204
  def CheckPrereq(self):
3205
    """Check prerequisites.
3206

3207
    This checks that the instance is in the cluster.
3208

3209
    """
3210
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3211
    assert self.instance is not None, \
3212
      "Cannot retrieve locked instance %s" % self.op.instance_name
3213

    
3214
    # extra beparams
3215
    self.beparams = getattr(self.op, "beparams", {})
3216
    if self.beparams:
3217
      if not isinstance(self.beparams, dict):
3218
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3219
                                   " dict" % (type(self.beparams), ))
3220
      # fill the beparams dict
3221
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3222
      self.op.beparams = self.beparams
3223

    
3224
    # extra hvparams
3225
    self.hvparams = getattr(self.op, "hvparams", {})
3226
    if self.hvparams:
3227
      if not isinstance(self.hvparams, dict):
3228
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3229
                                   " dict" % (type(self.hvparams), ))
3230

    
3231
      # check hypervisor parameter syntax (locally)
3232
      cluster = self.cfg.GetClusterInfo()
3233
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3234
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3235
                                    instance.hvparams)
3236
      filled_hvp.update(self.hvparams)
3237
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3238
      hv_type.CheckParameterSyntax(filled_hvp)
3239
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3240
      self.op.hvparams = self.hvparams
3241

    
3242
    _CheckNodeOnline(self, instance.primary_node)
3243

    
3244
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3245
    # check bridges existence
3246
    _CheckInstanceBridgesExist(self, instance)
3247

    
3248
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3249
                                              instance.name,
3250
                                              instance.hypervisor)
3251
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3252
                      prereq=True)
3253
    if not remote_info.payload: # not running already
3254
      _CheckNodeFreeMemory(self, instance.primary_node,
3255
                           "starting instance %s" % instance.name,
3256
                           bep[constants.BE_MEMORY], instance.hypervisor)
3257

    
3258
  def Exec(self, feedback_fn):
3259
    """Start the instance.
3260

3261
    """
3262
    instance = self.instance
3263
    force = self.op.force
3264

    
3265
    self.cfg.MarkInstanceUp(instance.name)
3266

    
3267
    node_current = instance.primary_node
3268

    
3269
    _StartInstanceDisks(self, instance, force)
3270

    
3271
    result = self.rpc.call_instance_start(node_current, instance,
3272
                                          self.hvparams, self.beparams)
3273
    msg = result.fail_msg
3274
    if msg:
3275
      _ShutdownInstanceDisks(self, instance)
3276
      raise errors.OpExecError("Could not start instance: %s" % msg)
3277

    
3278

    
3279
class LURebootInstance(LogicalUnit):
3280
  """Reboot an instance.
3281

3282
  """
3283
  HPATH = "instance-reboot"
3284
  HTYPE = constants.HTYPE_INSTANCE
3285
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3286
  REQ_BGL = False
3287

    
3288
  def ExpandNames(self):
3289
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3290
                                   constants.INSTANCE_REBOOT_HARD,
3291
                                   constants.INSTANCE_REBOOT_FULL]:
3292
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3293
                                  (constants.INSTANCE_REBOOT_SOFT,
3294
                                   constants.INSTANCE_REBOOT_HARD,
3295
                                   constants.INSTANCE_REBOOT_FULL))
3296
    self._ExpandAndLockInstance()
3297

    
3298
  def BuildHooksEnv(self):
3299
    """Build hooks env.
3300

3301
    This runs on master, primary and secondary nodes of the instance.
3302

3303
    """
3304
    env = {
3305
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3306
      "REBOOT_TYPE": self.op.reboot_type,
3307
      }
3308
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3309
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3310
    return env, nl, nl
3311

    
3312
  def CheckPrereq(self):
3313
    """Check prerequisites.
3314

3315
    This checks that the instance is in the cluster.
3316

3317
    """
3318
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3319
    assert self.instance is not None, \
3320
      "Cannot retrieve locked instance %s" % self.op.instance_name
3321

    
3322
    _CheckNodeOnline(self, instance.primary_node)
3323

    
3324
    # check bridges existence
3325
    _CheckInstanceBridgesExist(self, instance)
3326

    
3327
  def Exec(self, feedback_fn):
3328
    """Reboot the instance.
3329

3330
    """
3331
    instance = self.instance
3332
    ignore_secondaries = self.op.ignore_secondaries
3333
    reboot_type = self.op.reboot_type
3334

    
3335
    node_current = instance.primary_node
3336

    
3337
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3338
                       constants.INSTANCE_REBOOT_HARD]:
3339
      for disk in instance.disks:
3340
        self.cfg.SetDiskID(disk, node_current)
3341
      result = self.rpc.call_instance_reboot(node_current, instance,
3342
                                             reboot_type)
3343
      result.Raise("Could not reboot instance")
3344
    else:
3345
      result = self.rpc.call_instance_shutdown(node_current, instance)
3346
      result.Raise("Could not shutdown instance for full reboot")
3347
      _ShutdownInstanceDisks(self, instance)
3348
      _StartInstanceDisks(self, instance, ignore_secondaries)
3349
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3350
      msg = result.fail_msg
3351
      if msg:
3352
        _ShutdownInstanceDisks(self, instance)
3353
        raise errors.OpExecError("Could not start instance for"
3354
                                 " full reboot: %s" % msg)
3355

    
3356
    self.cfg.MarkInstanceUp(instance.name)
3357

    
3358

    
3359
class LUShutdownInstance(LogicalUnit):
3360
  """Shutdown an instance.
3361

3362
  """
3363
  HPATH = "instance-stop"
3364
  HTYPE = constants.HTYPE_INSTANCE
3365
  _OP_REQP = ["instance_name"]
3366
  REQ_BGL = False
3367

    
3368
  def ExpandNames(self):
3369
    self._ExpandAndLockInstance()
3370

    
3371
  def BuildHooksEnv(self):
3372
    """Build hooks env.
3373

3374
    This runs on master, primary and secondary nodes of the instance.
3375

3376
    """
3377
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3378
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3379
    return env, nl, nl
3380

    
3381
  def CheckPrereq(self):
3382
    """Check prerequisites.
3383

3384
    This checks that the instance is in the cluster.
3385

3386
    """
3387
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3388
    assert self.instance is not None, \
3389
      "Cannot retrieve locked instance %s" % self.op.instance_name
3390
    _CheckNodeOnline(self, self.instance.primary_node)
3391

    
3392
  def Exec(self, feedback_fn):
3393
    """Shutdown the instance.
3394

3395
    """
3396
    instance = self.instance
3397
    node_current = instance.primary_node
3398
    self.cfg.MarkInstanceDown(instance.name)
3399
    result = self.rpc.call_instance_shutdown(node_current, instance)
3400
    msg = result.fail_msg
3401
    if msg:
3402
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3403

    
3404
    _ShutdownInstanceDisks(self, instance)
3405

    
3406

    
3407
class LUReinstallInstance(LogicalUnit):
3408
  """Reinstall an instance.
3409

3410
  """
3411
  HPATH = "instance-reinstall"
3412
  HTYPE = constants.HTYPE_INSTANCE
3413
  _OP_REQP = ["instance_name"]
3414
  REQ_BGL = False
3415

    
3416
  def ExpandNames(self):
3417
    self._ExpandAndLockInstance()
3418

    
3419
  def BuildHooksEnv(self):
3420
    """Build hooks env.
3421

3422
    This runs on master, primary and secondary nodes of the instance.
3423

3424
    """
3425
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3426
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3427
    return env, nl, nl
3428

    
3429
  def CheckPrereq(self):
3430
    """Check prerequisites.
3431

3432
    This checks that the instance is in the cluster and is not running.
3433

3434
    """
3435
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3436
    assert instance is not None, \
3437
      "Cannot retrieve locked instance %s" % self.op.instance_name
3438
    _CheckNodeOnline(self, instance.primary_node)
3439

    
3440
    if instance.disk_template == constants.DT_DISKLESS:
3441
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3442
                                 self.op.instance_name)
3443
    if instance.admin_up:
3444
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3445
                                 self.op.instance_name)
3446
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3447
                                              instance.name,
3448
                                              instance.hypervisor)
3449
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3450
                      prereq=True)
3451
    if remote_info.payload:
3452
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3453
                                 (self.op.instance_name,
3454
                                  instance.primary_node))
3455

    
3456
    self.op.os_type = getattr(self.op, "os_type", None)
3457
    if self.op.os_type is not None:
3458
      # OS verification
3459
      pnode = self.cfg.GetNodeInfo(
3460
        self.cfg.ExpandNodeName(instance.primary_node))
3461
      if pnode is None:
3462
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3463
                                   self.op.pnode)
3464
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3465
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3466
                   (self.op.os_type, pnode.name), prereq=True)
3467

    
3468
    self.instance = instance
3469

    
3470
  def Exec(self, feedback_fn):
3471
    """Reinstall the instance.
3472

3473
    """
3474
    inst = self.instance
3475

    
3476
    if self.op.os_type is not None:
3477
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3478
      inst.os = self.op.os_type
3479
      self.cfg.Update(inst)
3480

    
3481
    _StartInstanceDisks(self, inst, None)
3482
    try:
3483
      feedback_fn("Running the instance OS create scripts...")
3484
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3485
      result.Raise("Could not install OS for instance %s on node %s" %
3486
                   (inst.name, inst.primary_node))
3487
    finally:
3488
      _ShutdownInstanceDisks(self, inst)
3489

    
3490

    
3491
class LURenameInstance(LogicalUnit):
3492
  """Rename an instance.
3493

3494
  """
3495
  HPATH = "instance-rename"
3496
  HTYPE = constants.HTYPE_INSTANCE
3497
  _OP_REQP = ["instance_name", "new_name"]
3498

    
3499
  def BuildHooksEnv(self):
3500
    """Build hooks env.
3501

3502
    This runs on master, primary and secondary nodes of the instance.
3503

3504
    """
3505
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3506
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3507
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3508
    return env, nl, nl
3509

    
3510
  def CheckPrereq(self):
3511
    """Check prerequisites.
3512

3513
    This checks that the instance is in the cluster and is not running.
3514

3515
    """
3516
    instance = self.cfg.GetInstanceInfo(
3517
      self.cfg.ExpandInstanceName(self.op.instance_name))
3518
    if instance is None:
3519
      raise errors.OpPrereqError("Instance '%s' not known" %
3520
                                 self.op.instance_name)
3521
    _CheckNodeOnline(self, instance.primary_node)
3522

    
3523
    if instance.admin_up:
3524
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3525
                                 self.op.instance_name)
3526
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3527
                                              instance.name,
3528
                                              instance.hypervisor)
3529
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3530
                      prereq=True)
3531
    if remote_info.payload:
3532
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3533
                                 (self.op.instance_name,
3534
                                  instance.primary_node))
3535
    self.instance = instance
3536

    
3537
    # new name verification
3538
    name_info = utils.HostInfo(self.op.new_name)
3539

    
3540
    self.op.new_name = new_name = name_info.name
3541
    instance_list = self.cfg.GetInstanceList()
3542
    if new_name in instance_list:
3543
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3544
                                 new_name)
3545

    
3546
    if not getattr(self.op, "ignore_ip", False):
3547
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3548
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3549
                                   (name_info.ip, new_name))
3550

    
3551

    
3552
  def Exec(self, feedback_fn):
3553
    """Reinstall the instance.
3554

3555
    """
3556
    inst = self.instance
3557
    old_name = inst.name
3558

    
3559
    if inst.disk_template == constants.DT_FILE:
3560
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3561

    
3562
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3563
    # Change the instance lock. This is definitely safe while we hold the BGL
3564
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3565
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3566

    
3567
    # re-read the instance from the configuration after rename
3568
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3569

    
3570
    if inst.disk_template == constants.DT_FILE:
3571
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3572
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3573
                                                     old_file_storage_dir,
3574
                                                     new_file_storage_dir)
3575
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3576
                   " (but the instance has been renamed in Ganeti)" %
3577
                   (inst.primary_node, old_file_storage_dir,
3578
                    new_file_storage_dir))
3579

    
3580
    _StartInstanceDisks(self, inst, None)
3581
    try:
3582
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3583
                                                 old_name)
3584
      msg = result.fail_msg
3585
      if msg:
3586
        msg = ("Could not run OS rename script for instance %s on node %s"
3587
               " (but the instance has been renamed in Ganeti): %s" %
3588
               (inst.name, inst.primary_node, msg))
3589
        self.proc.LogWarning(msg)
3590
    finally:
3591
      _ShutdownInstanceDisks(self, inst)
3592

    
3593

    
3594
class LURemoveInstance(LogicalUnit):
3595
  """Remove an instance.
3596

3597
  """
3598
  HPATH = "instance-remove"
3599
  HTYPE = constants.HTYPE_INSTANCE
3600
  _OP_REQP = ["instance_name", "ignore_failures"]
3601
  REQ_BGL = False
3602

    
3603
  def ExpandNames(self):
3604
    self._ExpandAndLockInstance()
3605
    self.needed_locks[locking.LEVEL_NODE] = []
3606
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3607

    
3608
  def DeclareLocks(self, level):
3609
    if level == locking.LEVEL_NODE:
3610
      self._LockInstancesNodes()
3611

    
3612
  def BuildHooksEnv(self):
3613
    """Build hooks env.
3614

3615
    This runs on master, primary and secondary nodes of the instance.
3616

3617
    """
3618
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3619
    nl = [self.cfg.GetMasterNode()]
3620
    return env, nl, nl
3621

    
3622
  def CheckPrereq(self):
3623
    """Check prerequisites.
3624

3625
    This checks that the instance is in the cluster.
3626

3627
    """
3628
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3629
    assert self.instance is not None, \
3630
      "Cannot retrieve locked instance %s" % self.op.instance_name
3631

    
3632
  def Exec(self, feedback_fn):
3633
    """Remove the instance.
3634

3635
    """
3636
    instance = self.instance
3637
    logging.info("Shutting down instance %s on node %s",
3638
                 instance.name, instance.primary_node)
3639

    
3640
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3641
    msg = result.fail_msg
3642
    if msg:
3643
      if self.op.ignore_failures:
3644
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3645
      else:
3646
        raise errors.OpExecError("Could not shutdown instance %s on"
3647
                                 " node %s: %s" %
3648
                                 (instance.name, instance.primary_node, msg))
3649

    
3650
    logging.info("Removing block devices for instance %s", instance.name)
3651

    
3652
    if not _RemoveDisks(self, instance):
3653
      if self.op.ignore_failures:
3654
        feedback_fn("Warning: can't remove instance's disks")
3655
      else:
3656
        raise errors.OpExecError("Can't remove instance's disks")
3657

    
3658
    logging.info("Removing instance %s out of cluster config", instance.name)
3659

    
3660
    self.cfg.RemoveInstance(instance.name)
3661
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3662

    
3663

    
3664
class LUQueryInstances(NoHooksLU):
3665
  """Logical unit for querying instances.
3666

3667
  """
3668
  _OP_REQP = ["output_fields", "names", "use_locking"]
3669
  REQ_BGL = False
3670
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3671
                                    "admin_state",
3672
                                    "disk_template", "ip", "mac", "bridge",
3673
                                    "nic_mode", "nic_link",
3674
                                    "sda_size", "sdb_size", "vcpus", "tags",
3675
                                    "network_port", "beparams",
3676
                                    r"(disk)\.(size)/([0-9]+)",
3677
                                    r"(disk)\.(sizes)", "disk_usage",
3678
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3679
                                    r"(nic)\.(bridge)/([0-9]+)",
3680
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3681
                                    r"(disk|nic)\.(count)",
3682
                                    "serial_no", "hypervisor", "hvparams",] +
3683
                                  ["hv/%s" % name
3684
                                   for name in constants.HVS_PARAMETERS] +
3685
                                  ["be/%s" % name
3686
                                   for name in constants.BES_PARAMETERS])
3687
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3688

    
3689

    
3690
  def ExpandNames(self):
3691
    _CheckOutputFields(static=self._FIELDS_STATIC,
3692
                       dynamic=self._FIELDS_DYNAMIC,
3693
                       selected=self.op.output_fields)
3694

    
3695
    self.needed_locks = {}
3696
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3697
    self.share_locks[locking.LEVEL_NODE] = 1
3698

    
3699
    if self.op.names:
3700
      self.wanted = _GetWantedInstances(self, self.op.names)
3701
    else:
3702
      self.wanted = locking.ALL_SET
3703

    
3704
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3705
    self.do_locking = self.do_node_query and self.op.use_locking
3706
    if self.do_locking:
3707
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3708
      self.needed_locks[locking.LEVEL_NODE] = []
3709
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3710

    
3711
  def DeclareLocks(self, level):
3712
    if level == locking.LEVEL_NODE and self.do_locking:
3713
      self._LockInstancesNodes()
3714

    
3715
  def CheckPrereq(self):
3716
    """Check prerequisites.
3717

3718
    """
3719
    pass
3720

    
3721
  def Exec(self, feedback_fn):
3722
    """Computes the list of nodes and their attributes.
3723

3724
    """
3725
    all_info = self.cfg.GetAllInstancesInfo()
3726
    if self.wanted == locking.ALL_SET:
3727
      # caller didn't specify instance names, so ordering is not important
3728
      if self.do_locking:
3729
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3730
      else:
3731
        instance_names = all_info.keys()
3732
      instance_names = utils.NiceSort(instance_names)
3733
    else:
3734
      # caller did specify names, so we must keep the ordering
3735
      if self.do_locking:
3736
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3737
      else:
3738
        tgt_set = all_info.keys()
3739
      missing = set(self.wanted).difference(tgt_set)
3740
      if missing:
3741
        raise errors.OpExecError("Some instances were removed before"
3742
                                 " retrieving their data: %s" % missing)
3743
      instance_names = self.wanted
3744

    
3745
    instance_list = [all_info[iname] for iname in instance_names]
3746

    
3747
    # begin data gathering
3748

    
3749
    nodes = frozenset([inst.primary_node for inst in instance_list])
3750
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3751

    
3752
    bad_nodes = []
3753
    off_nodes = []
3754
    if self.do_node_query:
3755
      live_data = {}
3756
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3757
      for name in nodes:
3758
        result = node_data[name]
3759
        if result.offline:
3760
          # offline nodes will be in both lists
3761
          off_nodes.append(name)
3762
        if result.failed or result.fail_msg:
3763
          bad_nodes.append(name)
3764
        else:
3765
          if result.payload:
3766
            live_data.update(result.payload)
3767
          # else no instance is alive
3768
    else:
3769
      live_data = dict([(name, {}) for name in instance_names])
3770

    
3771
    # end data gathering
3772

    
3773
    HVPREFIX = "hv/"
3774
    BEPREFIX = "be/"
3775
    output = []
3776
    cluster = self.cfg.GetClusterInfo()
3777
    for instance in instance_list:
3778
      iout = []
3779
      i_hv = cluster.FillHV(instance)
3780
      i_be = cluster.FillBE(instance)
3781
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3782
                                 nic.nicparams) for nic in instance.nics]
3783
      for field in self.op.output_fields:
3784
        st_match = self._FIELDS_STATIC.Matches(field)
3785
        if field == "name":
3786
          val = instance.name
3787
        elif field == "os":
3788
          val = instance.os
3789
        elif field == "pnode":
3790
          val = instance.primary_node
3791
        elif field == "snodes":
3792
          val = list(instance.secondary_nodes)
3793
        elif field == "admin_state":
3794
          val = instance.admin_up
3795
        elif field == "oper_state":
3796
          if instance.primary_node in bad_nodes:
3797
            val = None
3798
          else:
3799
            val = bool(live_data.get(instance.name))
3800
        elif field == "status":
3801
          if instance.primary_node in off_nodes:
3802
            val = "ERROR_nodeoffline"
3803
          elif instance.primary_node in bad_nodes:
3804
            val = "ERROR_nodedown"
3805
          else:
3806
            running = bool(live_data.get(instance.name))
3807
            if running:
3808
              if instance.admin_up:
3809
                val = "running"
3810
              else:
3811
                val = "ERROR_up"
3812
            else:
3813
              if instance.admin_up:
3814
                val = "ERROR_down"
3815
              else:
3816
                val = "ADMIN_down"
3817
        elif field == "oper_ram":
3818
          if instance.primary_node in bad_nodes:
3819
            val = None
3820
          elif instance.name in live_data:
3821
            val = live_data[instance.name].get("memory", "?")
3822
          else:
3823
            val = "-"
3824
        elif field == "vcpus":
3825
          val = i_be[constants.BE_VCPUS]
3826
        elif field == "disk_template":
3827
          val = instance.disk_template
3828
        elif field == "ip":
3829
          if instance.nics:
3830
            val = instance.nics[0].ip
3831
          else:
3832
            val = None
3833
        elif field == "nic_mode":
3834
          if instance.nics:
3835
            val = i_nicp[0][constants.NIC_MODE]
3836
          else:
3837
            val = None
3838
        elif field == "nic_link":
3839
          if instance.nics:
3840
            val = i_nicp[0][constants.NIC_LINK]
3841
          else:
3842
            val = None
3843
        elif field == "bridge":
3844
          if (instance.nics and
3845
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3846
            val = i_nicp[0][constants.NIC_LINK]
3847
          else:
3848
            val = None
3849
        elif field == "mac":
3850
          if instance.nics:
3851
            val = instance.nics[0].mac
3852
          else:
3853
            val = None
3854
        elif field == "sda_size" or field == "sdb_size":
3855
          idx = ord(field[2]) - ord('a')
3856
          try:
3857
            val = instance.FindDisk(idx).size
3858
          except errors.OpPrereqError:
3859
            val = None
3860
        elif field == "disk_usage": # total disk usage per node
3861
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3862
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3863
        elif field == "tags":
3864
          val = list(instance.GetTags())
3865
        elif field == "serial_no":
3866
          val = instance.serial_no
3867
        elif field == "network_port":
3868
          val = instance.network_port
3869
        elif field == "hypervisor":
3870
          val = instance.hypervisor
3871
        elif field == "hvparams":
3872
          val = i_hv
3873
        elif (field.startswith(HVPREFIX) and
3874
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3875
          val = i_hv.get(field[len(HVPREFIX):], None)
3876
        elif field == "beparams":
3877
          val = i_be
3878
        elif (field.startswith(BEPREFIX) and
3879
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3880
          val = i_be.get(field[len(BEPREFIX):], None)
3881
        elif st_match and st_match.groups():
3882
          # matches a variable list
3883
          st_groups = st_match.groups()
3884
          if st_groups and st_groups[0] == "disk":
3885
            if st_groups[1] == "count":
3886
              val = len(instance.disks)
3887
            elif st_groups[1] == "sizes":
3888
              val = [disk.size for disk in instance.disks]
3889
            elif st_groups[1] == "size":
3890
              try:
3891
                val = instance.FindDisk(st_groups[2]).size
3892
              except errors.OpPrereqError:
3893
                val = None
3894
            else:
3895
              assert False, "Unhandled disk parameter"
3896
          elif st_groups[0] == "nic":
3897
            if st_groups[1] == "count":
3898
              val = len(instance.nics)
3899
            elif st_groups[1] == "macs":
3900
              val = [nic.mac for nic in instance.nics]
3901
            elif st_groups[1] == "ips":
3902
              val = [nic.ip for nic in instance.nics]
3903
            elif st_groups[1] == "modes":
3904
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3905
            elif st_groups[1] == "links":
3906
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3907
            elif st_groups[1] == "bridges":
3908
              val = []
3909
              for nicp in i_nicp:
3910
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3911
                  val.append(nicp[constants.NIC_LINK])
3912
                else:
3913
                  val.append(None)
3914
            else:
3915
              # index-based item
3916
              nic_idx = int(st_groups[2])
3917
              if nic_idx >= len(instance.nics):
3918
                val = None
3919
              else:
3920
                if st_groups[1] == "mac":
3921
                  val = instance.nics[nic_idx].mac
3922
                elif st_groups[1] == "ip":
3923
                  val = instance.nics[nic_idx].ip
3924
                elif st_groups[1] == "mode":
3925
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3926
                elif st_groups[1] == "link":
3927
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3928
                elif st_groups[1] == "bridge":
3929
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3930
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3931
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3932
                  else:
3933
                    val = None
3934
                else:
3935
                  assert False, "Unhandled NIC parameter"
3936
          else:
3937
            assert False, ("Declared but unhandled variable parameter '%s'" %
3938
                           field)
3939
        else:
3940
          assert False, "Declared but unhandled parameter '%s'" % field
3941
        iout.append(val)
3942
      output.append(iout)
3943

    
3944
    return output
3945

    
3946

    
3947
class LUFailoverInstance(LogicalUnit):
3948
  """Failover an instance.
3949

3950
  """
3951
  HPATH = "instance-failover"
3952
  HTYPE = constants.HTYPE_INSTANCE
3953
  _OP_REQP = ["instance_name", "ignore_consistency"]
3954
  REQ_BGL = False
3955

    
3956
  def ExpandNames(self):
3957
    self._ExpandAndLockInstance()
3958
    self.needed_locks[locking.LEVEL_NODE] = []
3959
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3960

    
3961
  def DeclareLocks(self, level):
3962
    if level == locking.LEVEL_NODE:
3963
      self._LockInstancesNodes()
3964

    
3965
  def BuildHooksEnv(self):
3966
    """Build hooks env.
3967

3968
    This runs on master, primary and secondary nodes of the instance.
3969

3970
    """
3971
    env = {
3972
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3973
      }
3974
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3975
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3976
    return env, nl, nl
3977

    
3978
  def CheckPrereq(self):
3979
    """Check prerequisites.
3980

3981
    This checks that the instance is in the cluster.
3982

3983
    """
3984
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3985
    assert self.instance is not None, \
3986
      "Cannot retrieve locked instance %s" % self.op.instance_name
3987

    
3988
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3989
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3990
      raise errors.OpPrereqError("Instance's disk layout is not"
3991
                                 " network mirrored, cannot failover.")
3992

    
3993
    secondary_nodes = instance.secondary_nodes
3994
    if not secondary_nodes:
3995
      raise errors.ProgrammerError("no secondary node but using "
3996
                                   "a mirrored disk template")
3997

    
3998
    target_node = secondary_nodes[0]
3999
    _CheckNodeOnline(self, target_node)
4000
    _CheckNodeNotDrained(self, target_node)
4001
    if instance.admin_up:
4002
      # check memory requirements on the secondary node
4003
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4004
                           instance.name, bep[constants.BE_MEMORY],
4005
                           instance.hypervisor)
4006
    else:
4007
      self.LogInfo("Not checking memory on the secondary node as"
4008
                   " instance will not be started")
4009

    
4010
    # check bridge existance
4011
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4012

    
4013
  def Exec(self, feedback_fn):
4014
    """Failover an instance.
4015

4016
    The failover is done by shutting it down on its present node and
4017
    starting it on the secondary.
4018

4019
    """
4020
    instance = self.instance
4021

    
4022
    source_node = instance.primary_node
4023
    target_node = instance.secondary_nodes[0]
4024

    
4025
    feedback_fn("* checking disk consistency between source and target")
4026
    for dev in instance.disks:
4027
      # for drbd, these are drbd over lvm
4028
      if not _CheckDiskConsistency(self, dev, target_node, False):
4029
        if instance.admin_up and not self.op.ignore_consistency:
4030
          raise errors.OpExecError("Disk %s is degraded on target node,"
4031
                                   " aborting failover." % dev.iv_name)
4032

    
4033
    feedback_fn("* shutting down instance on source node")
4034
    logging.info("Shutting down instance %s on node %s",
4035
                 instance.name, source_node)
4036

    
4037
    result = self.rpc.call_instance_shutdown(source_node, instance)
4038
    msg = result.fail_msg
4039
    if msg:
4040
      if self.op.ignore_consistency:
4041
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4042
                             " Proceeding anyway. Please make sure node"
4043
                             " %s is down. Error details: %s",
4044
                             instance.name, source_node, source_node, msg)
4045
      else:
4046
        raise errors.OpExecError("Could not shutdown instance %s on"
4047
                                 " node %s: %s" %
4048
                                 (instance.name, source_node, msg))
4049

    
4050
    feedback_fn("* deactivating the instance's disks on source node")
4051
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4052
      raise errors.OpExecError("Can't shut down the instance's disks.")
4053

    
4054
    instance.primary_node = target_node
4055
    # distribute new instance config to the other nodes
4056
    self.cfg.Update(instance)
4057

    
4058
    # Only start the instance if it's marked as up
4059
    if instance.admin_up:
4060
      feedback_fn("* activating the instance's disks on target node")
4061
      logging.info("Starting instance %s on node %s",
4062
                   instance.name, target_node)
4063

    
4064
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4065
                                               ignore_secondaries=True)
4066
      if not disks_ok:
4067
        _ShutdownInstanceDisks(self, instance)
4068
        raise errors.OpExecError("Can't activate the instance's disks")
4069

    
4070
      feedback_fn("* starting the instance on the target node")
4071
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4072
      msg = result.fail_msg
4073
      if msg:
4074
        _ShutdownInstanceDisks(self, instance)
4075
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4076
                                 (instance.name, target_node, msg))
4077

    
4078

    
4079
class LUMigrateInstance(LogicalUnit):
4080
  """Migrate an instance.
4081

4082
  This is migration without shutting down, compared to the failover,
4083
  which is done with shutdown.
4084

4085
  """
4086
  HPATH = "instance-migrate"
4087
  HTYPE = constants.HTYPE_INSTANCE
4088
  _OP_REQP = ["instance_name", "live", "cleanup"]
4089

    
4090
  REQ_BGL = False
4091

    
4092
  def ExpandNames(self):
4093
    self._ExpandAndLockInstance()
4094

    
4095
    self.needed_locks[locking.LEVEL_NODE] = []
4096
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4097

    
4098
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4099
                                       self.op.live, self.op.cleanup)
4100
    self.tasklets = [self._migrater]
4101

    
4102
  def DeclareLocks(self, level):
4103
    if level == locking.LEVEL_NODE:
4104
      self._LockInstancesNodes()
4105

    
4106
  def BuildHooksEnv(self):
4107
    """Build hooks env.
4108

4109
    This runs on master, primary and secondary nodes of the instance.
4110

4111
    """
4112
    instance = self._migrater.instance
4113
    env = _BuildInstanceHookEnvByObject(self, instance)
4114
    env["MIGRATE_LIVE"] = self.op.live
4115
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4116
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4117
    return env, nl, nl
4118

    
4119

    
4120
class LUMigrateNode(LogicalUnit):
4121
  """Migrate all instances from a node.
4122

4123
  """
4124
  HPATH = "node-migrate"
4125
  HTYPE = constants.HTYPE_NODE
4126
  _OP_REQP = ["node_name", "live"]
4127
  REQ_BGL = False
4128

    
4129
  def ExpandNames(self):
4130
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4131
    if self.op.node_name is None:
4132
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4133

    
4134
    self.needed_locks = {
4135
      locking.LEVEL_NODE: [self.op.node_name],
4136
      }
4137

    
4138
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4139

    
4140
    # Create tasklets for migrating instances for all instances on this node
4141
    names = []
4142
    tasklets = []
4143

    
4144
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4145
      logging.debug("Migrating instance %s", inst.name)
4146
      names.append(inst.name)
4147

    
4148
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4149

    
4150
    self.tasklets = tasklets
4151

    
4152
    # Declare instance locks
4153
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4154

    
4155
  def DeclareLocks(self, level):
4156
    if level == locking.LEVEL_NODE:
4157
      self._LockInstancesNodes()
4158

    
4159
  def BuildHooksEnv(self):
4160
    """Build hooks env.
4161

4162
    This runs on the master, the primary and all the secondaries.
4163

4164
    """
4165
    env = {
4166
      "NODE_NAME": self.op.node_name,
4167
      }
4168

    
4169
    nl = [self.cfg.GetMasterNode()]
4170

    
4171
    return (env, nl, nl)
4172

    
4173

    
4174
class TLMigrateInstance(Tasklet):
4175
  def __init__(self, lu, instance_name, live, cleanup):
4176
    """Initializes this class.
4177

4178
    """
4179
    Tasklet.__init__(self, lu)
4180

    
4181
    # Parameters
4182
    self.instance_name = instance_name
4183
    self.live = live
4184
    self.cleanup = cleanup
4185

    
4186
  def CheckPrereq(self):
4187
    """Check prerequisites.
4188

4189
    This checks that the instance is in the cluster.
4190

4191
    """
4192
    instance = self.cfg.GetInstanceInfo(
4193
      self.cfg.ExpandInstanceName(self.instance_name))
4194
    if instance is None:
4195
      raise errors.OpPrereqError("Instance '%s' not known" %
4196
                                 self.instance_name)
4197

    
4198
    if instance.disk_template != constants.DT_DRBD8:
4199
      raise errors.OpPrereqError("Instance's disk layout is not"
4200
                                 " drbd8, cannot migrate.")
4201

    
4202
    secondary_nodes = instance.secondary_nodes
4203
    if not secondary_nodes:
4204
      raise errors.ConfigurationError("No secondary node but using"
4205
                                      " drbd8 disk template")
4206

    
4207
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4208

    
4209
    target_node = secondary_nodes[0]
4210
    # check memory requirements on the secondary node
4211
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4212
                         instance.name, i_be[constants.BE_MEMORY],
4213
                         instance.hypervisor)
4214

    
4215
    # check bridge existance
4216
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4217

    
4218
    if not self.cleanup:
4219
      _CheckNodeNotDrained(self, target_node)
4220
      result = self.rpc.call_instance_migratable(instance.primary_node,
4221
                                                 instance)
4222
      result.Raise("Can't migrate, please use failover", prereq=True)
4223

    
4224
    self.instance = instance
4225

    
4226
  def _WaitUntilSync(self):
4227
    """Poll with custom rpc for disk sync.
4228

4229
    This uses our own step-based rpc call.
4230

4231
    """
4232
    self.feedback_fn("* wait until resync is done")
4233
    all_done = False
4234
    while not all_done:
4235
      all_done = True
4236
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4237
                                            self.nodes_ip,
4238
                                            self.instance.disks)
4239
      min_percent = 100
4240
      for node, nres in result.items():
4241
        nres.Raise("Cannot resync disks on node %s" % node)
4242
        node_done, node_percent = nres.payload
4243
        all_done = all_done and node_done
4244
        if node_percent is not None:
4245
          min_percent = min(min_percent, node_percent)
4246
      if not all_done:
4247
        if min_percent < 100:
4248
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4249
        time.sleep(2)
4250

    
4251
  def _EnsureSecondary(self, node):
4252
    """Demote a node to secondary.
4253

4254
    """
4255
    self.feedback_fn("* switching node %s to secondary mode" % node)
4256

    
4257
    for dev in self.instance.disks:
4258
      self.cfg.SetDiskID(dev, node)
4259

    
4260
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4261
                                          self.instance.disks)
4262
    result.Raise("Cannot change disk to secondary on node %s" % node)
4263

    
4264
  def _GoStandalone(self):
4265
    """Disconnect from the network.
4266

4267
    """
4268
    self.feedback_fn("* changing into standalone mode")
4269
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4270
                                               self.instance.disks)
4271
    for node, nres in result.items():
4272
      nres.Raise("Cannot disconnect disks node %s" % node)
4273

    
4274
  def _GoReconnect(self, multimaster):
4275
    """Reconnect to the network.
4276

4277
    """
4278
    if multimaster:
4279
      msg = "dual-master"
4280
    else:
4281
      msg = "single-master"
4282
    self.feedback_fn("* changing disks into %s mode" % msg)
4283
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4284
                                           self.instance.disks,
4285
                                           self.instance.name, multimaster)
4286
    for node, nres in result.items():
4287
      nres.Raise("Cannot change disks config on node %s" % node)
4288

    
4289
  def _ExecCleanup(self):
4290
    """Try to cleanup after a failed migration.
4291

4292
    The cleanup is done by:
4293
      - check that the instance is running only on one node
4294
        (and update the config if needed)
4295
      - change disks on its secondary node to secondary
4296
      - wait until disks are fully synchronized
4297
      - disconnect from the network
4298
      - change disks into single-master mode
4299
      - wait again until disks are fully synchronized
4300

4301
    """
4302
    instance = self.instance
4303
    target_node = self.target_node
4304
    source_node = self.source_node
4305

    
4306
    # check running on only one node
4307
    self.feedback_fn("* checking where the instance actually runs"
4308
                     " (if this hangs, the hypervisor might be in"
4309
                     " a bad state)")
4310
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4311
    for node, result in ins_l.items():
4312
      result.Raise("Can't contact node %s" % node)
4313

    
4314
    runningon_source = instance.name in ins_l[source_node].payload
4315
    runningon_target = instance.name in ins_l[target_node].payload
4316

    
4317
    if runningon_source and runningon_target:
4318
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4319
                               " or the hypervisor is confused. You will have"
4320
                               " to ensure manually that it runs only on one"
4321
                               " and restart this operation.")
4322

    
4323
    if not (runningon_source or runningon_target):
4324
      raise errors.OpExecError("Instance does not seem to be running at all."
4325
                               " In this case, it's safer to repair by"
4326
                               " running 'gnt-instance stop' to ensure disk"
4327
                               " shutdown, and then restarting it.")
4328

    
4329
    if runningon_target:
4330
      # the migration has actually succeeded, we need to update the config
4331
      self.feedback_fn("* instance running on secondary node (%s),"
4332
                       " updating config" % target_node)
4333
      instance.primary_node = target_node
4334
      self.cfg.Update(instance)
4335
      demoted_node = source_node
4336
    else:
4337
      self.feedback_fn("* instance confirmed to be running on its"
4338
                       " primary node (%s)" % source_node)
4339
      demoted_node = target_node
4340

    
4341
    self._EnsureSecondary(demoted_node)
4342
    try:
4343
      self._WaitUntilSync()
4344
    except errors.OpExecError:
4345
      # we ignore here errors, since if the device is standalone, it
4346
      # won't be able to sync
4347
      pass
4348
    self._GoStandalone()
4349
    self._GoReconnect(False)
4350
    self._WaitUntilSync()
4351

    
4352
    self.feedback_fn("* done")
4353

    
4354
  def _RevertDiskStatus(self):
4355
    """Try to revert the disk status after a failed migration.
4356

4357
    """
4358
    target_node = self.target_node
4359
    try:
4360
      self._EnsureSecondary(target_node)
4361
      self._GoStandalone()
4362
      self._GoReconnect(False)
4363
      self._WaitUntilSync()
4364
    except errors.OpExecError, err:
4365
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4366
                         " drives: error '%s'\n"
4367
                         "Please look and recover the instance status" %
4368
                         str(err))
4369

    
4370
  def _AbortMigration(self):
4371
    """Call the hypervisor code to abort a started migration.
4372

4373
    """
4374
    instance = self.instance
4375
    target_node = self.target_node
4376
    migration_info = self.migration_info
4377

    
4378
    abort_result = self.rpc.call_finalize_migration(target_node,
4379
                                                    instance,
4380
                                                    migration_info,
4381
                                                    False)
4382
    abort_msg = abort_result.fail_msg
4383
    if abort_msg:
4384
      logging.error("Aborting migration failed on target node %s: %s" %
4385
                    (target_node, abort_msg))
4386
      # Don't raise an exception here, as we stil have to try to revert the
4387
      # disk status, even if this step failed.
4388

    
4389
  def _ExecMigration(self):
4390
    """Migrate an instance.
4391

4392
    The migrate is done by:
4393
      - change the disks into dual-master mode
4394
      - wait until disks are fully synchronized again
4395
      - migrate the instance
4396
      - change disks on the new secondary node (the old primary) to secondary
4397
      - wait until disks are fully synchronized
4398
      - change disks into single-master mode
4399

4400
    """
4401
    instance = self.instance
4402
    target_node = self.target_node
4403
    source_node = self.source_node
4404

    
4405
    self.feedback_fn("* checking disk consistency between source and target")
4406
    for dev in instance.disks:
4407
      if not _CheckDiskConsistency(self, dev, target_node, False):
4408
        raise errors.OpExecError("Disk %s is degraded or not fully"
4409
                                 " synchronized on target node,"
4410
                                 " aborting migrate." % dev.iv_name)
4411

    
4412
    # First get the migration information from the remote node
4413
    result = self.rpc.call_migration_info(source_node, instance)
4414
    msg = result.fail_msg
4415
    if msg:
4416
      log_err = ("Failed fetching source migration information from %s: %s" %
4417
                 (source_node, msg))
4418
      logging.error(log_err)
4419
      raise errors.OpExecError(log_err)
4420

    
4421
    self.migration_info = migration_info = result.payload
4422

    
4423
    # Then switch the disks to master/master mode
4424
    self._EnsureSecondary(target_node)
4425
    self._GoStandalone()
4426
    self._GoReconnect(True)
4427
    self._WaitUntilSync()
4428

    
4429
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4430
    result = self.rpc.call_accept_instance(target_node,
4431
                                           instance,
4432
                                           migration_info,
4433
                                           self.nodes_ip[target_node])
4434

    
4435
    msg = result.fail_msg
4436
    if msg:
4437
      logging.error("Instance pre-migration failed, trying to revert"
4438
                    " disk status: %s", msg)
4439
      self._AbortMigration()
4440
      self._RevertDiskStatus()
4441
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4442
                               (instance.name, msg))
4443

    
4444
    self.feedback_fn("* migrating instance to %s" % target_node)
4445
    time.sleep(10)
4446
    result = self.rpc.call_instance_migrate(source_node, instance,
4447
                                            self.nodes_ip[target_node],
4448
                                            self.live)
4449
    msg = result.fail_msg
4450
    if msg:
4451
      logging.error("Instance migration failed, trying to revert"
4452
                    " disk status: %s", msg)
4453
      self._AbortMigration()
4454
      self._RevertDiskStatus()
4455
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4456
                               (instance.name, msg))
4457
    time.sleep(10)
4458

    
4459
    instance.primary_node = target_node
4460
    # distribute new instance config to the other nodes
4461
    self.cfg.Update(instance)
4462

    
4463
    result = self.rpc.call_finalize_migration(target_node,
4464
                                              instance,
4465
                                              migration_info,
4466
                                              True)
4467
    msg = result.fail_msg
4468
    if msg:
4469
      logging.error("Instance migration succeeded, but finalization failed:"
4470
                    " %s" % msg)
4471
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4472
                               msg)
4473

    
4474
    self._EnsureSecondary(source_node)
4475
    self._WaitUntilSync()
4476
    self._GoStandalone()
4477
    self._GoReconnect(False)
4478
    self._WaitUntilSync()
4479

    
4480
    self.feedback_fn("* done")
4481

    
4482
  def Exec(self, feedback_fn):
4483
    """Perform the migration.
4484

4485
    """
4486
    feedback_fn("Migrating instance %s" % self.instance.name)
4487

    
4488
    self.feedback_fn = feedback_fn
4489

    
4490
    self.source_node = self.instance.primary_node
4491
    self.target_node = self.instance.secondary_nodes[0]
4492
    self.all_nodes = [self.source_node, self.target_node]
4493
    self.nodes_ip = {
4494
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4495
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4496
      }
4497

    
4498
    if self.cleanup:
4499
      return self._ExecCleanup()
4500
    else:
4501
      return self._ExecMigration()
4502

    
4503

    
4504
def _CreateBlockDev(lu, node, instance, device, force_create,
4505
                    info, force_open):
4506
  """Create a tree of block devices on a given node.
4507

4508
  If this device type has to be created on secondaries, create it and
4509
  all its children.
4510

4511
  If not, just recurse to children keeping the same 'force' value.
4512

4513
  @param lu: the lu on whose behalf we execute
4514
  @param node: the node on which to create the device
4515
  @type instance: L{objects.Instance}
4516
  @param instance: the instance which owns the device
4517
  @type device: L{objects.Disk}
4518
  @param device: the device to create
4519
  @type force_create: boolean
4520
  @param force_create: whether to force creation of this device; this
4521
      will be change to True whenever we find a device which has
4522
      CreateOnSecondary() attribute
4523
  @param info: the extra 'metadata' we should attach to the device
4524
      (this will be represented as a LVM tag)
4525
  @type force_open: boolean
4526
  @param force_open: this parameter will be passes to the
4527
      L{backend.BlockdevCreate} function where it specifies
4528
      whether we run on primary or not, and it affects both
4529
      the child assembly and the device own Open() execution
4530

4531
  """
4532
  if device.CreateOnSecondary():
4533
    force_create = True
4534

    
4535
  if device.children:
4536
    for child in device.children:
4537
      _CreateBlockDev(lu, node, instance, child, force_create,
4538
                      info, force_open)
4539

    
4540
  if not force_create:
4541
    return
4542

    
4543
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4544

    
4545

    
4546
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4547
  """Create a single block device on a given node.
4548

4549
  This will not recurse over children of the device, so they must be
4550
  created in advance.
4551

4552
  @param lu: the lu on whose behalf we execute
4553
  @param node: the node on which to create the device
4554
  @type instance: L{objects.Instance}
4555
  @param instance: the instance which owns the device
4556
  @type device: L{objects.Disk}
4557
  @param device: the device to create
4558
  @param info: the extra 'metadata' we should attach to the device
4559
      (this will be represented as a LVM tag)
4560
  @type force_open: boolean
4561
  @param force_open: this parameter will be passes to the
4562
      L{backend.BlockdevCreate} function where it specifies
4563
      whether we run on primary or not, and it affects both
4564
      the child assembly and the device own Open() execution
4565

4566
  """
4567
  lu.cfg.SetDiskID(device, node)
4568
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4569
                                       instance.name, force_open, info)
4570
  result.Raise("Can't create block device %s on"
4571
               " node %s for instance %s" % (device, node, instance.name))
4572
  if device.physical_id is None:
4573
    device.physical_id = result.payload
4574

    
4575

    
4576
def _GenerateUniqueNames(lu, exts):
4577
  """Generate a suitable LV name.
4578

4579
  This will generate a logical volume name for the given instance.
4580

4581
  """
4582
  results = []
4583
  for val in exts:
4584
    new_id = lu.cfg.GenerateUniqueID()
4585
    results.append("%s%s" % (new_id, val))
4586
  return results
4587

    
4588

    
4589
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4590
                         p_minor, s_minor):
4591
  """Generate a drbd8 device complete with its children.
4592

4593
  """
4594
  port = lu.cfg.AllocatePort()
4595
  vgname = lu.cfg.GetVGName()
4596
  shared_secret = lu.cfg.GenerateDRBDSecret()
4597
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4598
                          logical_id=(vgname, names[0]))
4599
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4600
                          logical_id=(vgname, names[1]))
4601
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4602
                          logical_id=(primary, secondary, port,
4603
                                      p_minor, s_minor,
4604
                                      shared_secret),
4605
                          children=[dev_data, dev_meta],
4606
                          iv_name=iv_name)
4607
  return drbd_dev
4608

    
4609

    
4610
def _GenerateDiskTemplate(lu, template_name,
4611
                          instance_name, primary_node,
4612
                          secondary_nodes, disk_info,
4613
                          file_storage_dir, file_driver,
4614
                          base_index):
4615
  """Generate the entire disk layout for a given template type.
4616

4617
  """
4618
  #TODO: compute space requirements
4619

    
4620
  vgname = lu.cfg.GetVGName()
4621
  disk_count = len(disk_info)
4622
  disks = []
4623
  if template_name == constants.DT_DISKLESS:
4624
    pass
4625
  elif template_name == constants.DT_PLAIN:
4626
    if len(secondary_nodes) != 0:
4627
      raise errors.ProgrammerError("Wrong template configuration")
4628

    
4629
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4630
                                      for i in range(disk_count)])
4631
    for idx, disk in enumerate(disk_info):
4632
      disk_index = idx + base_index
4633
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4634
                              logical_id=(vgname, names[idx]),
4635
                              iv_name="disk/%d" % disk_index,
4636
                              mode=disk["mode"])
4637
      disks.append(disk_dev)
4638
  elif template_name == constants.DT_DRBD8:
4639
    if len(secondary_nodes) != 1:
4640
      raise errors.ProgrammerError("Wrong template configuration")
4641
    remote_node = secondary_nodes[0]
4642
    minors = lu.cfg.AllocateDRBDMinor(
4643
      [primary_node, remote_node] * len(disk_info), instance_name)
4644

    
4645
    names = []
4646
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4647
                                               for i in range(disk_count)]):
4648
      names.append(lv_prefix + "_data")
4649
      names.append(lv_prefix + "_meta")
4650
    for idx, disk in enumerate(disk_info):
4651
      disk_index = idx + base_index
4652
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4653
                                      disk["size"], names[idx*2:idx*2+2],
4654
                                      "disk/%d" % disk_index,
4655
                                      minors[idx*2], minors[idx*2+1])
4656
      disk_dev.mode = disk["mode"]
4657
      disks.append(disk_dev)
4658
  elif template_name == constants.DT_FILE:
4659
    if len(secondary_nodes) != 0:
4660
      raise errors.ProgrammerError("Wrong template configuration")
4661

    
4662
    for idx, disk in enumerate(disk_info):
4663
      disk_index = idx + base_index
4664
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4665
                              iv_name="disk/%d" % disk_index,
4666
                              logical_id=(file_driver,
4667
                                          "%s/disk%d" % (file_storage_dir,
4668
                                                         disk_index)),
4669
                              mode=disk["mode"])
4670
      disks.append(disk_dev)
4671
  else:
4672
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4673
  return disks
4674

    
4675

    
4676
def _GetInstanceInfoText(instance):
4677
  """Compute that text that should be added to the disk's metadata.
4678

4679
  """
4680
  return "originstname+%s" % instance.name
4681

    
4682

    
4683
def _CreateDisks(lu, instance):
4684
  """Create all disks for an instance.
4685

4686
  This abstracts away some work from AddInstance.
4687

4688
  @type lu: L{LogicalUnit}
4689
  @param lu: the logical unit on whose behalf we execute
4690
  @type instance: L{objects.Instance}
4691
  @param instance: the instance whose disks we should create
4692
  @rtype: boolean
4693
  @return: the success of the creation
4694

4695
  """
4696
  info = _GetInstanceInfoText(instance)
4697
  pnode = instance.primary_node
4698

    
4699
  if instance.disk_template == constants.DT_FILE:
4700
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4701
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4702

    
4703
    result.Raise("Failed to create directory '%s' on"
4704
                 " node %s: %s" % (file_storage_dir, pnode))
4705

    
4706
  # Note: this needs to be kept in sync with adding of disks in
4707
  # LUSetInstanceParams
4708
  for device in instance.disks:
4709
    logging.info("Creating volume %s for instance %s",
4710
                 device.iv_name, instance.name)
4711
    #HARDCODE
4712
    for node in instance.all_nodes:
4713
      f_create = node == pnode
4714
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4715

    
4716

    
4717
def _RemoveDisks(lu, instance):
4718
  """Remove all disks for an instance.
4719

4720
  This abstracts away some work from `AddInstance()` and
4721
  `RemoveInstance()`. Note that in case some of the devices couldn't
4722
  be removed, the removal will continue with the other ones (compare
4723
  with `_CreateDisks()`).
4724

4725
  @type lu: L{LogicalUnit}
4726
  @param lu: the logical unit on whose behalf we execute
4727
  @type instance: L{objects.Instance}
4728
  @param instance: the instance whose disks we should remove
4729
  @rtype: boolean
4730
  @return: the success of the removal
4731

4732
  """
4733
  logging.info("Removing block devices for instance %s", instance.name)
4734

    
4735
  all_result = True
4736
  for device in instance.disks:
4737
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4738
      lu.cfg.SetDiskID(disk, node)
4739
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4740
      if msg:
4741
        lu.LogWarning("Could not remove block device %s on node %s,"
4742
                      " continuing anyway: %s", device.iv_name, node, msg)
4743
        all_result = False
4744

    
4745
  if instance.disk_template == constants.DT_FILE:
4746
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4747
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4748
                                                 file_storage_dir)
4749
    msg = result.fail_msg
4750
    if msg:
4751
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4752
                    file_storage_dir, instance.primary_node, msg)
4753
      all_result = False
4754

    
4755
  return all_result
4756

    
4757

    
4758
def _ComputeDiskSize(disk_template, disks):
4759
  """Compute disk size requirements in the volume group
4760

4761
  """
4762
  # Required free disk space as a function of disk and swap space
4763
  req_size_dict = {
4764
    constants.DT_DISKLESS: None,
4765
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4766
    # 128 MB are added for drbd metadata for each disk
4767
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4768
    constants.DT_FILE: None,
4769
  }
4770

    
4771
  if disk_template not in req_size_dict:
4772
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4773
                                 " is unknown" %  disk_template)
4774

    
4775
  return req_size_dict[disk_template]
4776

    
4777

    
4778
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4779
  """Hypervisor parameter validation.
4780

4781
  This function abstract the hypervisor parameter validation to be
4782
  used in both instance create and instance modify.
4783

4784
  @type lu: L{LogicalUnit}
4785
  @param lu: the logical unit for which we check
4786
  @type nodenames: list
4787
  @param nodenames: the list of nodes on which we should check
4788
  @type hvname: string
4789
  @param hvname: the name of the hypervisor we should use
4790
  @type hvparams: dict
4791
  @param hvparams: the parameters which we need to check
4792
  @raise errors.OpPrereqError: if the parameters are not valid
4793

4794
  """
4795
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4796
                                                  hvname,
4797
                                                  hvparams)
4798
  for node in nodenames:
4799
    info = hvinfo[node]
4800
    if info.offline:
4801
      continue
4802
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4803

    
4804

    
4805
class LUCreateInstance(LogicalUnit):
4806
  """Create an instance.
4807

4808
  """
4809
  HPATH = "instance-add"
4810
  HTYPE = constants.HTYPE_INSTANCE
4811
  _OP_REQP = ["instance_name", "disks", "disk_template",
4812
              "mode", "start",
4813
              "wait_for_sync", "ip_check", "nics",
4814
              "hvparams", "beparams"]
4815
  REQ_BGL = False
4816

    
4817
  def _ExpandNode(self, node):
4818
    """Expands and checks one node name.
4819

4820
    """
4821
    node_full = self.cfg.ExpandNodeName(node)
4822
    if node_full is None:
4823
      raise errors.OpPrereqError("Unknown node %s" % node)
4824
    return node_full
4825

    
4826
  def ExpandNames(self):
4827
    """ExpandNames for CreateInstance.
4828

4829
    Figure out the right locks for instance creation.
4830

4831
    """
4832
    self.needed_locks = {}
4833

    
4834
    # set optional parameters to none if they don't exist
4835
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4836
      if not hasattr(self.op, attr):
4837
        setattr(self.op, attr, None)
4838

    
4839
    # cheap checks, mostly valid constants given
4840

    
4841
    # verify creation mode
4842
    if self.op.mode not in (constants.INSTANCE_CREATE,
4843
                            constants.INSTANCE_IMPORT):
4844
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4845
                                 self.op.mode)
4846

    
4847
    # disk template and mirror node verification
4848
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4849
      raise errors.OpPrereqError("Invalid disk template name")
4850

    
4851
    if self.op.hypervisor is None:
4852
      self.op.hypervisor = self.cfg.GetHypervisorType()
4853

    
4854
    cluster = self.cfg.GetClusterInfo()
4855
    enabled_hvs = cluster.enabled_hypervisors
4856
    if self.op.hypervisor not in enabled_hvs:
4857
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4858
                                 " cluster (%s)" % (self.op.hypervisor,
4859
                                  ",".join(enabled_hvs)))
4860

    
4861
    # check hypervisor parameter syntax (locally)
4862
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4863
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4864
                                  self.op.hvparams)
4865
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4866
    hv_type.CheckParameterSyntax(filled_hvp)
4867
    self.hv_full = filled_hvp
4868

    
4869
    # fill and remember the beparams dict
4870
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4871
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4872
                                    self.op.beparams)
4873

    
4874
    #### instance parameters check
4875

    
4876
    # instance name verification
4877
    hostname1 = utils.HostInfo(self.op.instance_name)
4878
    self.op.instance_name = instance_name = hostname1.name
4879

    
4880
    # this is just a preventive check, but someone might still add this
4881
    # instance in the meantime, and creation will fail at lock-add time
4882
    if instance_name in self.cfg.GetInstanceList():
4883
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4884
                                 instance_name)
4885

    
4886
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4887

    
4888
    # NIC buildup
4889
    self.nics = []
4890
    for idx, nic in enumerate(self.op.nics):
4891
      nic_mode_req = nic.get("mode", None)
4892
      nic_mode = nic_mode_req
4893
      if nic_mode is None:
4894
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4895

    
4896
      # in routed mode, for the first nic, the default ip is 'auto'
4897
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4898
        default_ip_mode = constants.VALUE_AUTO
4899
      else:
4900
        default_ip_mode = constants.VALUE_NONE
4901

    
4902
      # ip validity checks
4903
      ip = nic.get("ip", default_ip_mode)
4904
      if ip is None or ip.lower() == constants.VALUE_NONE:
4905
        nic_ip = None
4906
      elif ip.lower() == constants.VALUE_AUTO:
4907
        nic_ip = hostname1.ip
4908
      else:
4909
        if not utils.IsValidIP(ip):
4910
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4911
                                     " like a valid IP" % ip)
4912
        nic_ip = ip
4913

    
4914
      # TODO: check the ip for uniqueness !!
4915
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4916
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4917

    
4918
      # MAC address verification
4919
      mac = nic.get("mac", constants.VALUE_AUTO)
4920
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4921
        if not utils.IsValidMac(mac.lower()):
4922
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4923
                                     mac)
4924
      # bridge verification
4925
      bridge = nic.get("bridge", None)
4926
      link = nic.get("link", None)
4927
      if bridge and link:
4928
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4929
                                   " at the same time")
4930
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4931
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4932
      elif bridge:
4933
        link = bridge
4934

    
4935
      nicparams = {}
4936
      if nic_mode_req:
4937
        nicparams[constants.NIC_MODE] = nic_mode_req
4938
      if link:
4939
        nicparams[constants.NIC_LINK] = link
4940

    
4941
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4942
                                      nicparams)
4943
      objects.NIC.CheckParameterSyntax(check_params)
4944
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4945

    
4946
    # disk checks/pre-build
4947
    self.disks = []
4948
    for disk in self.op.disks:
4949
      mode = disk.get("mode", constants.DISK_RDWR)
4950
      if mode not in constants.DISK_ACCESS_SET:
4951
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4952
                                   mode)
4953
      size = disk.get("size", None)
4954
      if size is None:
4955
        raise errors.OpPrereqError("Missing disk size")
4956
      try:
4957
        size = int(size)
4958
      except ValueError:
4959
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4960
      self.disks.append({"size": size, "mode": mode})
4961

    
4962
    # used in CheckPrereq for ip ping check
4963
    self.check_ip = hostname1.ip
4964

    
4965
    # file storage checks
4966
    if (self.op.file_driver and
4967
        not self.op.file_driver in constants.FILE_DRIVER):
4968
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4969
                                 self.op.file_driver)
4970

    
4971
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4972
      raise errors.OpPrereqError("File storage directory path not absolute")
4973

    
4974
    ### Node/iallocator related checks
4975
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4976
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4977
                                 " node must be given")
4978

    
4979
    if self.op.iallocator:
4980
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4981
    else:
4982
      self.op.pnode = self._ExpandNode(self.op.pnode)
4983
      nodelist = [self.op.pnode]
4984
      if self.op.snode is not None:
4985
        self.op.snode = self._ExpandNode(self.op.snode)
4986
        nodelist.append(self.op.snode)
4987
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4988

    
4989
    # in case of import lock the source node too
4990
    if self.op.mode == constants.INSTANCE_IMPORT:
4991
      src_node = getattr(self.op, "src_node", None)
4992
      src_path = getattr(self.op, "src_path", None)
4993

    
4994
      if src_path is None:
4995
        self.op.src_path = src_path = self.op.instance_name
4996

    
4997
      if src_node is None:
4998
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4999
        self.op.src_node = None
5000
        if os.path.isabs(src_path):
5001
          raise errors.OpPrereqError("Importing an instance from an absolute"
5002
                                     " path requires a source node option.")
5003
      else:
5004
        self.op.src_node = src_node = self._ExpandNode(src_node)
5005
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5006
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5007
        if not os.path.isabs(src_path):
5008
          self.op.src_path = src_path = \
5009
            os.path.join(constants.EXPORT_DIR, src_path)
5010

    
5011
    else: # INSTANCE_CREATE
5012
      if getattr(self.op, "os_type", None) is None:
5013
        raise errors.OpPrereqError("No guest OS specified")
5014

    
5015
  def _RunAllocator(self):
5016
    """Run the allocator based on input opcode.
5017

5018
    """
5019
    nics = [n.ToDict() for n in self.nics]
5020
    ial = IAllocator(self.cfg, self.rpc,
5021
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5022
                     name=self.op.instance_name,
5023
                     disk_template=self.op.disk_template,
5024
                     tags=[],
5025
                     os=self.op.os_type,
5026
                     vcpus=self.be_full[constants.BE_VCPUS],
5027
                     mem_size=self.be_full[constants.BE_MEMORY],
5028
                     disks=self.disks,
5029
                     nics=nics,
5030
                     hypervisor=self.op.hypervisor,
5031
                     )
5032

    
5033
    ial.Run(self.op.iallocator)
5034

    
5035
    if not ial.success:
5036
      raise errors.OpPrereqError("Can't compute nodes using"
5037
                                 " iallocator '%s': %s" % (self.op.iallocator,
5038
                                                           ial.info))
5039
    if len(ial.nodes) != ial.required_nodes:
5040
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5041
                                 " of nodes (%s), required %s" %
5042
                                 (self.op.iallocator, len(ial.nodes),
5043
                                  ial.required_nodes))
5044
    self.op.pnode = ial.nodes[0]
5045
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5046
                 self.op.instance_name, self.op.iallocator,
5047
                 ", ".join(ial.nodes))
5048
    if ial.required_nodes == 2:
5049
      self.op.snode = ial.nodes[1]
5050

    
5051
  def BuildHooksEnv(self):
5052
    """Build hooks env.
5053

5054
    This runs on master, primary and secondary nodes of the instance.
5055

5056
    """
5057
    env = {
5058
      "ADD_MODE": self.op.mode,
5059
      }
5060
    if self.op.mode == constants.INSTANCE_IMPORT:
5061
      env["SRC_NODE"] = self.op.src_node
5062
      env["SRC_PATH"] = self.op.src_path
5063
      env["SRC_IMAGES"] = self.src_images
5064

    
5065
    env.update(_BuildInstanceHookEnv(
5066
      name=self.op.instance_name,
5067
      primary_node=self.op.pnode,
5068
      secondary_nodes=self.secondaries,
5069
      status=self.op.start,
5070
      os_type=self.op.os_type,
5071
      memory=self.be_full[constants.BE_MEMORY],
5072
      vcpus=self.be_full[constants.BE_VCPUS],
5073
      nics=_NICListToTuple(self, self.nics),
5074
      disk_template=self.op.disk_template,
5075
      disks=[(d["size"], d["mode"]) for d in self.disks],
5076
      bep=self.be_full,
5077
      hvp=self.hv_full,
5078
      hypervisor_name=self.op.hypervisor,
5079
    ))
5080

    
5081
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5082
          self.secondaries)
5083
    return env, nl, nl
5084

    
5085

    
5086
  def CheckPrereq(self):
5087
    """Check prerequisites.
5088

5089
    """
5090
    if (not self.cfg.GetVGName() and
5091
        self.op.disk_template not in constants.DTS_NOT_LVM):
5092
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5093
                                 " instances")
5094

    
5095
    if self.op.mode == constants.INSTANCE_IMPORT:
5096
      src_node = self.op.src_node
5097
      src_path = self.op.src_path
5098

    
5099
      if src_node is None:
5100
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5101
        exp_list = self.rpc.call_export_list(locked_nodes)
5102
        found = False
5103
        for node in exp_list:
5104
          if exp_list[node].fail_msg:
5105
            continue
5106
          if src_path in exp_list[node].payload:
5107
            found = True
5108
            self.op.src_node = src_node = node
5109
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5110
                                                       src_path)
5111
            break
5112
        if not found:
5113
          raise errors.OpPrereqError("No export found for relative path %s" %
5114
                                      src_path)
5115

    
5116
      _CheckNodeOnline(self, src_node)
5117
      result = self.rpc.call_export_info(src_node, src_path)
5118
      result.Raise("No export or invalid export found in dir %s" % src_path)
5119

    
5120
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5121
      if not export_info.has_section(constants.INISECT_EXP):
5122
        raise errors.ProgrammerError("Corrupted export config")
5123

    
5124
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5125
      if (int(ei_version) != constants.EXPORT_VERSION):
5126
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5127
                                   (ei_version, constants.EXPORT_VERSION))
5128

    
5129
      # Check that the new instance doesn't have less disks than the export
5130
      instance_disks = len(self.disks)
5131
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5132
      if instance_disks < export_disks:
5133
        raise errors.OpPrereqError("Not enough disks to import."
5134
                                   " (instance: %d, export: %d)" %
5135
                                   (instance_disks, export_disks))
5136

    
5137
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5138
      disk_images = []
5139
      for idx in range(export_disks):
5140
        option = 'disk%d_dump' % idx
5141
        if export_info.has_option(constants.INISECT_INS, option):
5142
          # FIXME: are the old os-es, disk sizes, etc. useful?
5143
          export_name = export_info.get(constants.INISECT_INS, option)
5144
          image = os.path.join(src_path, export_name)
5145
          disk_images.append(image)
5146
        else:
5147
          disk_images.append(False)
5148

    
5149
      self.src_images = disk_images
5150

    
5151
      old_name = export_info.get(constants.INISECT_INS, 'name')
5152
      # FIXME: int() here could throw a ValueError on broken exports
5153
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5154
      if self.op.instance_name == old_name:
5155
        for idx, nic in enumerate(self.nics):
5156
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5157
            nic_mac_ini = 'nic%d_mac' % idx
5158
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5159

    
5160
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5161
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5162
    if self.op.start and not self.op.ip_check:
5163
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5164
                                 " adding an instance in start mode")
5165

    
5166
    if self.op.ip_check:
5167
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5168
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5169
                                   (self.check_ip, self.op.instance_name))
5170

    
5171
    #### mac address generation
5172
    # By generating here the mac address both the allocator and the hooks get
5173
    # the real final mac address rather than the 'auto' or 'generate' value.
5174
    # There is a race condition between the generation and the instance object
5175
    # creation, which means that we know the mac is valid now, but we're not
5176
    # sure it will be when we actually add the instance. If things go bad
5177
    # adding the instance will abort because of a duplicate mac, and the
5178
    # creation job will fail.
5179
    for nic in self.nics:
5180
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5181
        nic.mac = self.cfg.GenerateMAC()
5182

    
5183
    #### allocator run
5184

    
5185
    if self.op.iallocator is not None:
5186
      self._RunAllocator()
5187

    
5188
    #### node related checks
5189

    
5190
    # check primary node
5191
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5192
    assert self.pnode is not None, \
5193
      "Cannot retrieve locked node %s" % self.op.pnode
5194
    if pnode.offline:
5195
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5196
                                 pnode.name)
5197
    if pnode.drained:
5198
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5199
                                 pnode.name)
5200

    
5201
    self.secondaries = []
5202

    
5203
    # mirror node verification
5204
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5205
      if self.op.snode is None:
5206
        raise errors.OpPrereqError("The networked disk templates need"
5207
                                   " a mirror node")
5208
      if self.op.snode == pnode.name:
5209
        raise errors.OpPrereqError("The secondary node cannot be"
5210
                                   " the primary node.")
5211
      _CheckNodeOnline(self, self.op.snode)
5212
      _CheckNodeNotDrained(self, self.op.snode)
5213
      self.secondaries.append(self.op.snode)
5214

    
5215
    nodenames = [pnode.name] + self.secondaries
5216

    
5217
    req_size = _ComputeDiskSize(self.op.disk_template,
5218
                                self.disks)
5219

    
5220
    # Check lv size requirements
5221
    if req_size is not None:
5222
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5223
                                         self.op.hypervisor)
5224
      for node in nodenames:
5225
        info = nodeinfo[node]
5226
        info.Raise("Cannot get current information from node %s" % node)
5227
        info = info.payload
5228
        vg_free = info.get('vg_free', None)
5229
        if not isinstance(vg_free, int):
5230
          raise errors.OpPrereqError("Can't compute free disk space on"
5231
                                     " node %s" % node)
5232
        if req_size > vg_free:
5233
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5234
                                     " %d MB available, %d MB required" %
5235
                                     (node, vg_free, req_size))
5236

    
5237
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5238

    
5239
    # os verification
5240
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5241
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5242
                 (self.op.os_type, pnode.name), prereq=True)
5243

    
5244
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5245

    
5246
    # memory check on primary node
5247
    if self.op.start:
5248
      _CheckNodeFreeMemory(self, self.pnode.name,
5249
                           "creating instance %s" % self.op.instance_name,
5250
                           self.be_full[constants.BE_MEMORY],
5251
                           self.op.hypervisor)
5252

    
5253
    self.dry_run_result = list(nodenames)
5254

    
5255
  def Exec(self, feedback_fn):
5256
    """Create and add the instance to the cluster.
5257

5258
    """
5259
    instance = self.op.instance_name
5260
    pnode_name = self.pnode.name
5261

    
5262
    ht_kind = self.op.hypervisor
5263
    if ht_kind in constants.HTS_REQ_PORT:
5264
      network_port = self.cfg.AllocatePort()
5265
    else:
5266
      network_port = None
5267

    
5268
    ##if self.op.vnc_bind_address is None:
5269
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5270

    
5271
    # this is needed because os.path.join does not accept None arguments
5272
    if self.op.file_storage_dir is None:
5273
      string_file_storage_dir = ""
5274
    else:
5275
      string_file_storage_dir = self.op.file_storage_dir
5276

    
5277
    # build the full file storage dir path
5278
    file_storage_dir = os.path.normpath(os.path.join(
5279
                                        self.cfg.GetFileStorageDir(),
5280
                                        string_file_storage_dir, instance))
5281

    
5282

    
5283
    disks = _GenerateDiskTemplate(self,
5284
                                  self.op.disk_template,
5285
                                  instance, pnode_name,
5286
                                  self.secondaries,
5287
                                  self.disks,
5288
                                  file_storage_dir,
5289
                                  self.op.file_driver,
5290
                                  0)
5291

    
5292
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5293
                            primary_node=pnode_name,
5294
                            nics=self.nics, disks=disks,
5295
                            disk_template=self.op.disk_template,
5296
                            admin_up=False,
5297
                            network_port=network_port,
5298
                            beparams=self.op.beparams,
5299
                            hvparams=self.op.hvparams,
5300
                            hypervisor=self.op.hypervisor,
5301
                            )
5302

    
5303
    feedback_fn("* creating instance disks...")
5304
    try:
5305
      _CreateDisks(self, iobj)
5306
    except errors.OpExecError:
5307
      self.LogWarning("Device creation failed, reverting...")
5308
      try:
5309
        _RemoveDisks(self, iobj)
5310
      finally:
5311
        self.cfg.ReleaseDRBDMinors(instance)
5312
        raise
5313

    
5314
    feedback_fn("adding instance %s to cluster config" % instance)
5315

    
5316
    self.cfg.AddInstance(iobj)
5317
    # Declare that we don't want to remove the instance lock anymore, as we've
5318
    # added the instance to the config
5319
    del self.remove_locks[locking.LEVEL_INSTANCE]
5320
    # Unlock all the nodes
5321
    if self.op.mode == constants.INSTANCE_IMPORT:
5322
      nodes_keep = [self.op.src_node]
5323
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5324
                       if node != self.op.src_node]
5325
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5326
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5327
    else:
5328
      self.context.glm.release(locking.LEVEL_NODE)
5329
      del self.acquired_locks[locking.LEVEL_NODE]
5330

    
5331
    if self.op.wait_for_sync:
5332
      disk_abort = not _WaitForSync(self, iobj)
5333
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5334
      # make sure the disks are not degraded (still sync-ing is ok)
5335
      time.sleep(15)
5336
      feedback_fn("* checking mirrors status")
5337
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5338
    else:
5339
      disk_abort = False
5340

    
5341
    if disk_abort:
5342
      _RemoveDisks(self, iobj)
5343
      self.cfg.RemoveInstance(iobj.name)
5344
      # Make sure the instance lock gets removed
5345
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5346
      raise errors.OpExecError("There are some degraded disks for"
5347
                               " this instance")
5348

    
5349
    feedback_fn("creating os for instance %s on node %s" %
5350
                (instance, pnode_name))
5351

    
5352
    if iobj.disk_template != constants.DT_DISKLESS:
5353
      if self.op.mode == constants.INSTANCE_CREATE:
5354
        feedback_fn("* running the instance OS create scripts...")
5355
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5356
        result.Raise("Could not add os for instance %s"
5357
                     " on node %s" % (instance, pnode_name))
5358

    
5359
      elif self.op.mode == constants.INSTANCE_IMPORT:
5360
        feedback_fn("* running the instance OS import scripts...")
5361
        src_node = self.op.src_node
5362
        src_images = self.src_images
5363
        cluster_name = self.cfg.GetClusterName()
5364
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5365
                                                         src_node, src_images,
5366
                                                         cluster_name)
5367
        msg = import_result.fail_msg
5368
        if msg:
5369
          self.LogWarning("Error while importing the disk images for instance"
5370
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5371
      else:
5372
        # also checked in the prereq part
5373
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5374
                                     % self.op.mode)
5375

    
5376
    if self.op.start:
5377
      iobj.admin_up = True
5378
      self.cfg.Update(iobj)
5379
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5380
      feedback_fn("* starting instance...")
5381
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5382
      result.Raise("Could not start instance")
5383

    
5384
    return list(iobj.all_nodes)
5385

    
5386

    
5387
class LUConnectConsole(NoHooksLU):
5388
  """Connect to an instance's console.
5389

5390
  This is somewhat special in that it returns the command line that
5391
  you need to run on the master node in order to connect to the
5392
  console.
5393

5394
  """
5395
  _OP_REQP = ["instance_name"]
5396
  REQ_BGL = False
5397

    
5398
  def ExpandNames(self):
5399
    self._ExpandAndLockInstance()
5400

    
5401
  def CheckPrereq(self):
5402
    """Check prerequisites.
5403

5404
    This checks that the instance is in the cluster.
5405

5406
    """
5407
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5408
    assert self.instance is not None, \
5409
      "Cannot retrieve locked instance %s" % self.op.instance_name
5410
    _CheckNodeOnline(self, self.instance.primary_node)
5411

    
5412
  def Exec(self, feedback_fn):
5413
    """Connect to the console of an instance
5414

5415
    """
5416
    instance = self.instance
5417
    node = instance.primary_node
5418

    
5419
    node_insts = self.rpc.call_instance_list([node],
5420
                                             [instance.hypervisor])[node]
5421
    node_insts.Raise("Can't get node information from %s" % node)
5422

    
5423
    if instance.name not in node_insts.payload:
5424
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5425

    
5426
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5427

    
5428
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5429
    cluster = self.cfg.GetClusterInfo()
5430
    # beparams and hvparams are passed separately, to avoid editing the
5431
    # instance and then saving the defaults in the instance itself.
5432
    hvparams = cluster.FillHV(instance)
5433
    beparams = cluster.FillBE(instance)
5434
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5435

    
5436
    # build ssh cmdline
5437
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5438

    
5439

    
5440
class LUReplaceDisks(LogicalUnit):
5441
  """Replace the disks of an instance.
5442

5443
  """
5444
  HPATH = "mirrors-replace"
5445
  HTYPE = constants.HTYPE_INSTANCE
5446
  _OP_REQP = ["instance_name", "mode", "disks"]
5447
  REQ_BGL = False
5448

    
5449
  def CheckArguments(self):
5450
    if not hasattr(self.op, "remote_node"):
5451
      self.op.remote_node = None
5452
    if not hasattr(self.op, "iallocator"):
5453
      self.op.iallocator = None
5454

    
5455
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5456
                                  self.op.iallocator)
5457

    
5458
  def ExpandNames(self):
5459
    self._ExpandAndLockInstance()
5460

    
5461
    if self.op.iallocator is not None:
5462
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5463

    
5464
    elif self.op.remote_node is not None:
5465
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5466
      if remote_node is None:
5467
        raise errors.OpPrereqError("Node '%s' not known" %
5468
                                   self.op.remote_node)
5469

    
5470
      self.op.remote_node = remote_node
5471

    
5472
      # Warning: do not remove the locking of the new secondary here
5473
      # unless DRBD8.AddChildren is changed to work in parallel;
5474
      # currently it doesn't since parallel invocations of
5475
      # FindUnusedMinor will conflict
5476
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5477
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5478

    
5479
    else:
5480
      self.needed_locks[locking.LEVEL_NODE] = []
5481
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5482

    
5483
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5484
                                   self.op.iallocator, self.op.remote_node,
5485
                                   self.op.disks)
5486

    
5487
    self.tasklets = [self.replacer]
5488

    
5489
  def DeclareLocks(self, level):
5490
    # If we're not already locking all nodes in the set we have to declare the
5491
    # instance's primary/secondary nodes.
5492
    if (level == locking.LEVEL_NODE and
5493
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5494
      self._LockInstancesNodes()
5495

    
5496
  def BuildHooksEnv(self):
5497
    """Build hooks env.
5498

5499
    This runs on the master, the primary and all the secondaries.
5500

5501
    """
5502
    instance = self.replacer.instance
5503
    env = {
5504
      "MODE": self.op.mode,
5505
      "NEW_SECONDARY": self.op.remote_node,
5506
      "OLD_SECONDARY": instance.secondary_nodes[0],
5507
      }
5508
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5509
    nl = [
5510
      self.cfg.GetMasterNode(),
5511
      instance.primary_node,
5512
      ]
5513
    if self.op.remote_node is not None:
5514
      nl.append(self.op.remote_node)
5515
    return env, nl, nl
5516

    
5517

    
5518
class LUEvacuateNode(LogicalUnit):
5519
  """Relocate the secondary instances from a node.
5520

5521
  """
5522
  HPATH = "node-evacuate"
5523
  HTYPE = constants.HTYPE_NODE
5524
  _OP_REQP = ["node_name"]
5525
  REQ_BGL = False
5526

    
5527
  def CheckArguments(self):
5528
    if not hasattr(self.op, "remote_node"):
5529
      self.op.remote_node = None
5530
    if not hasattr(self.op, "iallocator"):
5531
      self.op.iallocator = None
5532

    
5533
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5534
                                  self.op.remote_node,
5535
                                  self.op.iallocator)
5536

    
5537
  def ExpandNames(self):
5538
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5539
    if self.op.node_name is None:
5540
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5541

    
5542
    self.needed_locks = {}
5543

    
5544
    # Declare node locks
5545
    if self.op.iallocator is not None:
5546
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5547

    
5548
    elif self.op.remote_node is not None:
5549
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5550
      if remote_node is None:
5551
        raise errors.OpPrereqError("Node '%s' not known" %
5552
                                   self.op.remote_node)
5553

    
5554
      self.op.remote_node = remote_node
5555

    
5556
      # Warning: do not remove the locking of the new secondary here
5557
      # unless DRBD8.AddChildren is changed to work in parallel;
5558
      # currently it doesn't since parallel invocations of
5559
      # FindUnusedMinor will conflict
5560
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5561
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5562

    
5563
    else:
5564
      raise errors.OpPrereqError("Invalid parameters")
5565

    
5566
    # Create tasklets for replacing disks for all secondary instances on this
5567
    # node
5568
    names = []
5569
    tasklets = []
5570

    
5571
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5572
      logging.debug("Replacing disks for instance %s", inst.name)
5573
      names.append(inst.name)
5574

    
5575
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5576
                                self.op.iallocator, self.op.remote_node, [])
5577
      tasklets.append(replacer)
5578

    
5579
    self.tasklets = tasklets
5580
    self.instance_names = names
5581

    
5582
    # Declare instance locks
5583
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5584

    
5585
  def DeclareLocks(self, level):
5586
    # If we're not already locking all nodes in the set we have to declare the
5587
    # instance's primary/secondary nodes.
5588
    if (level == locking.LEVEL_NODE and
5589
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5590
      self._LockInstancesNodes()
5591

    
5592
  def BuildHooksEnv(self):
5593
    """Build hooks env.
5594

5595
    This runs on the master, the primary and all the secondaries.
5596

5597
    """
5598
    env = {
5599
      "NODE_NAME": self.op.node_name,
5600
      }
5601

    
5602
    nl = [self.cfg.GetMasterNode()]
5603

    
5604
    if self.op.remote_node is not None:
5605
      env["NEW_SECONDARY"] = self.op.remote_node
5606
      nl.append(self.op.remote_node)
5607

    
5608
    return (env, nl, nl)
5609

    
5610

    
5611
class TLReplaceDisks(Tasklet):
5612
  """Replaces disks for an instance.
5613

5614
  Note: Locking is not within the scope of this class.
5615

5616
  """
5617
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5618
               disks):
5619
    """Initializes this class.
5620

5621
    """
5622
    Tasklet.__init__(self, lu)
5623

    
5624
    # Parameters
5625
    self.instance_name = instance_name
5626
    self.mode = mode
5627
    self.iallocator_name = iallocator_name
5628
    self.remote_node = remote_node
5629
    self.disks = disks
5630

    
5631
    # Runtime data
5632
    self.instance = None
5633
    self.new_node = None
5634
    self.target_node = None
5635
    self.other_node = None
5636
    self.remote_node_info = None
5637
    self.node_secondary_ip = None
5638

    
5639
  @staticmethod
5640
  def CheckArguments(mode, remote_node, iallocator):
5641
    """Helper function for users of this class.
5642

5643
    """
5644
    # check for valid parameter combination
5645
    cnt = [remote_node, iallocator].count(None)
5646
    if mode == constants.REPLACE_DISK_CHG:
5647
      if cnt == 2:
5648
        raise errors.OpPrereqError("When changing the secondary either an"
5649
                                   " iallocator script must be used or the"
5650
                                   " new node given")
5651
      elif cnt == 0:
5652
        raise errors.OpPrereqError("Give either the iallocator or the new"
5653
                                   " secondary, not both")
5654
    else: # not replacing the secondary
5655
      if cnt != 2:
5656
        raise errors.OpPrereqError("The iallocator and new node options can"
5657
                                   " be used only when changing the"
5658
                                   " secondary node")
5659

    
5660
  @staticmethod
5661
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5662
    """Compute a new secondary node using an IAllocator.
5663

5664
    """
5665
    ial = IAllocator(lu.cfg, lu.rpc,
5666
                     mode=constants.IALLOCATOR_MODE_RELOC,
5667
                     name=instance_name,
5668
                     relocate_from=relocate_from)
5669

    
5670
    ial.Run(iallocator_name)
5671

    
5672
    if not ial.success:
5673
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5674
                                 " %s" % (iallocator_name, ial.info))
5675

    
5676
    if len(ial.nodes) != ial.required_nodes:
5677
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5678
                                 " of nodes (%s), required %s" %
5679
                                 (len(ial.nodes), ial.required_nodes))
5680

    
5681
    remote_node_name = ial.nodes[0]
5682

    
5683
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5684
               instance_name, remote_node_name)
5685

    
5686
    return remote_node_name
5687

    
5688
  def CheckPrereq(self):
5689
    """Check prerequisites.
5690

5691
    This checks that the instance is in the cluster.
5692

5693
    """
5694
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5695
    assert self.instance is not None, \
5696
      "Cannot retrieve locked instance %s" % self.instance_name
5697

    
5698
    if self.instance.disk_template != constants.DT_DRBD8:
5699
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5700
                                 " instances")
5701

    
5702
    if len(self.instance.secondary_nodes) != 1:
5703
      raise errors.OpPrereqError("The instance has a strange layout,"
5704
                                 " expected one secondary but found %d" %
5705
                                 len(self.instance.secondary_nodes))
5706

    
5707
    secondary_node = self.instance.secondary_nodes[0]
5708

    
5709
    if self.iallocator_name is None:
5710
      remote_node = self.remote_node
5711
    else:
5712
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5713
                                       self.instance.name, secondary_node)
5714

    
5715
    if remote_node is not None:
5716
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5717
      assert self.remote_node_info is not None, \
5718
        "Cannot retrieve locked node %s" % remote_node
5719
    else:
5720
      self.remote_node_info = None
5721

    
5722
    if remote_node == self.instance.primary_node:
5723
      raise errors.OpPrereqError("The specified node is the primary node of"
5724
                                 " the instance.")
5725

    
5726
    if remote_node == secondary_node:
5727
      raise errors.OpPrereqError("The specified node is already the"
5728
                                 " secondary node of the instance.")
5729

    
5730
    if self.mode == constants.REPLACE_DISK_PRI:
5731
      self.target_node = self.instance.primary_node
5732
      self.other_node = secondary_node
5733
      check_nodes = [self.target_node, self.other_node]
5734

    
5735
    elif self.mode == constants.REPLACE_DISK_SEC:
5736
      self.target_node = secondary_node
5737
      self.other_node = self.instance.primary_node
5738
      check_nodes = [self.target_node, self.other_node]
5739

    
5740
    elif self.mode == constants.REPLACE_DISK_CHG:
5741
      self.new_node = remote_node
5742
      self.other_node = self.instance.primary_node
5743
      self.target_node = secondary_node
5744
      check_nodes = [self.new_node, self.other_node]
5745

    
5746
      _CheckNodeNotDrained(self.lu, remote_node)
5747

    
5748
    else:
5749
      raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
5750
                                   self.mode)
5751

    
5752
    for node in check_nodes:
5753
      _CheckNodeOnline(self.lu, node)
5754

    
5755
    # If not specified all disks should be replaced
5756
    if not self.disks:
5757
      self.disks = range(len(self.instance.disks))
5758

    
5759
    # Check whether disks are valid
5760
    for disk_idx in self.disks:
5761
      self.instance.FindDisk(disk_idx)
5762

    
5763
    # Get secondary node IP addresses
5764
    node_2nd_ip = {}
5765

    
5766
    for node_name in [self.target_node, self.other_node, self.new_node]:
5767
      if node_name is not None:
5768
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
5769

    
5770
    self.node_secondary_ip = node_2nd_ip
5771

    
5772
  def Exec(self, feedback_fn):
5773
    """Execute disk replacement.
5774

5775
    This dispatches the disk replacement to the appropriate handler.
5776

5777
    """
5778
    feedback_fn("Replacing disks for %s" % self.instance.name)
5779

    
5780
    activate_disks = (not self.instance.admin_up)
5781

    
5782
    # Activate the instance disks if we're replacing them on a down instance
5783
    if activate_disks:
5784
      _StartInstanceDisks(self.lu, self.instance, True)
5785

    
5786
    try:
5787
      if self.mode == constants.REPLACE_DISK_CHG:
5788
        return self._ExecDrbd8Secondary()
5789
      else:
5790
        return self._ExecDrbd8DiskOnly()
5791

    
5792
    finally:
5793
      # Deactivate the instance disks if we're replacing them on a down instance
5794
      if activate_disks:
5795
        _SafeShutdownInstanceDisks(self.lu, self.instance)
5796

    
5797
  def _CheckVolumeGroup(self, nodes):
5798
    self.lu.LogInfo("Checking volume groups")
5799

    
5800
    vgname = self.cfg.GetVGName()
5801

    
5802
    # Make sure volume group exists on all involved nodes
5803
    results = self.rpc.call_vg_list(nodes)
5804
    if not results:
5805
      raise errors.OpExecError("Can't list volume groups on the nodes")
5806

    
5807
    for node in nodes:
5808
      res = results[node]
5809
      res.Raise("Error checking node %s" % node)
5810
      if vgname not in res.payload:
5811
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
5812
                                 (vgname, node))
5813

    
5814
  def _CheckDisksExistence(self, nodes):
5815
    # Check disk existence
5816
    for idx, dev in enumerate(self.instance.disks):
5817
      if idx not in self.disks:
5818
        continue
5819

    
5820
      for node in nodes:
5821
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
5822
        self.cfg.SetDiskID(dev, node)
5823

    
5824
        result = self.rpc.call_blockdev_find(node, dev)
5825

    
5826
        msg = result.fail_msg
5827
        if msg or not result.payload:
5828
          if not msg:
5829
            msg = "disk not found"
5830
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5831
                                   (idx, node, msg))
5832

    
5833
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
5834
    for idx, dev in enumerate(self.instance.disks):
5835
      if idx not in self.disks:
5836
        continue
5837

    
5838
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
5839
                      (idx, node_name))
5840

    
5841
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
5842
                                   ldisk=ldisk):
5843
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
5844
                                 " replace disks for instance %s" %
5845
                                 (node_name, self.instance.name))
5846

    
5847
  def _CreateNewStorage(self, node_name):
5848
    vgname = self.cfg.GetVGName()
5849
    iv_names = {}
5850

    
5851
    for idx, dev in enumerate(self.instance.disks):
5852
      if idx not in self.disks:
5853
        continue
5854

    
5855
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
5856

    
5857
      self.cfg.SetDiskID(dev, node_name)
5858

    
5859
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
5860
      names = _GenerateUniqueNames(self.lu, lv_names)
5861

    
5862
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
5863
                             logical_id=(vgname, names[0]))
5864
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5865
                             logical_id=(vgname, names[1]))
5866

    
5867
      new_lvs = [lv_data, lv_meta]
5868
      old_lvs = dev.children
5869
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5870

    
5871
      # we pass force_create=True to force the LVM creation
5872
      for new_lv in new_lvs:
5873
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
5874
                        _GetInstanceInfoText(self.instance), False)
5875

    
5876
    return iv_names
5877

    
5878
  def _CheckDevices(self, node_name, iv_names):
5879
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5880
      self.cfg.SetDiskID(dev, node_name)
5881

    
5882
      result = self.rpc.call_blockdev_find(node_name, dev)
5883

    
5884
      msg = result.fail_msg
5885
      if msg or not result.payload:
5886
        if not msg:
5887
          msg = "disk not found"
5888
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5889
                                 (name, msg))
5890

    
5891
      if result.payload.is_degraded:
5892
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5893

    
5894
  def _RemoveOldStorage(self, node_name, iv_names):
5895
    for name, (dev, old_lvs, _) in iv_names.iteritems():
5896
      self.lu.LogInfo("Remove logical volumes for %s" % name)
5897

    
5898
      for lv in old_lvs:
5899
        self.cfg.SetDiskID(lv, node_name)
5900

    
5901
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
5902
        if msg:
5903
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
5904
                             hint="remove unused LVs manually")
5905

    
5906
  def _ExecDrbd8DiskOnly(self):
5907
    """Replace a disk on the primary or secondary for DRBD 8.
5908

5909
    The algorithm for replace is quite complicated:
5910

5911
      1. for each disk to be replaced:
5912

5913
        1. create new LVs on the target node with unique names
5914
        1. detach old LVs from the drbd device
5915
        1. rename old LVs to name_replaced.<time_t>
5916
        1. rename new LVs to old LVs
5917
        1. attach the new LVs (with the old names now) to the drbd device
5918

5919
      1. wait for sync across all devices
5920

5921
      1. for each modified disk:
5922

5923
        1. remove old LVs (which have the name name_replaces.<time_t>)
5924

5925
    Failures are not very well handled.
5926

5927
    """
5928
    steps_total = 6
5929

    
5930
    # Step: check device activation
5931
    self.lu.LogStep(1, steps_total, "Check device existence")
5932
    self._CheckDisksExistence([self.other_node, self.target_node])
5933
    self._CheckVolumeGroup([self.target_node, self.other_node])
5934

    
5935
    # Step: check other node consistency
5936
    self.lu.LogStep(2, steps_total, "Check peer consistency")
5937
    self._CheckDisksConsistency(self.other_node,
5938
                                self.other_node == self.instance.primary_node,
5939
                                False)
5940

    
5941
    # Step: create new storage
5942
    self.lu.LogStep(3, steps_total, "Allocate new storage")
5943
    iv_names = self._CreateNewStorage(self.target_node)
5944

    
5945
    # Step: for each lv, detach+rename*2+attach
5946
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
5947
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5948
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
5949

    
5950
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
5951
      result.Raise("Can't detach drbd from local storage on node"
5952
                   " %s for device %s" % (self.target_node, dev.iv_name))
5953
      #dev.children = []
5954
      #cfg.Update(instance)
5955

    
5956
      # ok, we created the new LVs, so now we know we have the needed
5957
      # storage; as such, we proceed on the target node to rename
5958
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5959
      # using the assumption that logical_id == physical_id (which in
5960
      # turn is the unique_id on that node)
5961

    
5962
      # FIXME(iustin): use a better name for the replaced LVs
5963
      temp_suffix = int(time.time())
5964
      ren_fn = lambda d, suff: (d.physical_id[0],
5965
                                d.physical_id[1] + "_replaced-%s" % suff)
5966

    
5967
      # Build the rename list based on what LVs exist on the node
5968
      rename_old_to_new = []
5969
      for to_ren in old_lvs:
5970
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
5971
        if not result.fail_msg and result.payload:
5972
          # device exists
5973
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
5974

    
5975
      self.lu.LogInfo("Renaming the old LVs on the target node")
5976
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
5977
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
5978

    
5979
      # Now we rename the new LVs to the old LVs
5980
      self.lu.LogInfo("Renaming the new LVs on the target node")
5981
      rename_new_to_old = [(new, old.physical_id)
5982
                           for old, new in zip(old_lvs, new_lvs)]
5983
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
5984
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
5985

    
5986
      for old, new in zip(old_lvs, new_lvs):
5987
        new.logical_id = old.logical_id
5988
        self.cfg.SetDiskID(new, self.target_node)
5989

    
5990
      for disk in old_lvs:
5991
        disk.logical_id = ren_fn(disk, temp_suffix)
5992
        self.cfg.SetDiskID(disk, self.target_node)
5993

    
5994
      # Now that the new lvs have the old name, we can add them to the device
5995
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
5996
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
5997
      msg = result.fail_msg
5998
      if msg:
5999
        for new_lv in new_lvs:
6000
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6001
          if msg2:
6002
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6003
                               hint=("cleanup manually the unused logical"
6004
                                     "volumes"))
6005
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6006

    
6007
      dev.children = new_lvs
6008

    
6009
      self.cfg.Update(self.instance)
6010

    
6011
    # Wait for sync
6012
    # This can fail as the old devices are degraded and _WaitForSync
6013
    # does a combined result over all disks, so we don't check its return value
6014
    self.lu.LogStep(5, steps_total, "Sync devices")
6015
    _WaitForSync(self.lu, self.instance, unlock=True)
6016

    
6017
    # Check all devices manually
6018
    self._CheckDevices(self.instance.primary_node, iv_names)
6019

    
6020
    # Step: remove old storage
6021
    self.lu.LogStep(6, steps_total, "Removing old storage")
6022
    self._RemoveOldStorage(self.target_node, iv_names)
6023

    
6024
  def _ExecDrbd8Secondary(self):
6025
    """Replace the secondary node for DRBD 8.
6026

6027
    The algorithm for replace is quite complicated:
6028
      - for all disks of the instance:
6029
        - create new LVs on the new node with same names
6030
        - shutdown the drbd device on the old secondary
6031
        - disconnect the drbd network on the primary
6032
        - create the drbd device on the new secondary
6033
        - network attach the drbd on the primary, using an artifice:
6034
          the drbd code for Attach() will connect to the network if it
6035
          finds a device which is connected to the good local disks but
6036
          not network enabled
6037
      - wait for sync across all devices
6038
      - remove all disks from the old secondary
6039

6040
    Failures are not very well handled.
6041

6042
    """
6043
    steps_total = 6
6044

    
6045
    # Step: check device activation
6046
    self.lu.LogStep(1, steps_total, "Check device existence")
6047
    self._CheckDisksExistence([self.instance.primary_node])
6048
    self._CheckVolumeGroup([self.instance.primary_node])
6049

    
6050
    # Step: check other node consistency
6051
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6052
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6053

    
6054
    # Step: create new storage
6055
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6056
    for idx, dev in enumerate(self.instance.disks):
6057
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6058
                      (self.new_node, idx))
6059
      # we pass force_create=True to force LVM creation
6060
      for new_lv in dev.children:
6061
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6062
                        _GetInstanceInfoText(self.instance), False)
6063

    
6064
    # Step 4: dbrd minors and drbd setups changes
6065
    # after this, we must manually remove the drbd minors on both the
6066
    # error and the success paths
6067
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6068
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6069
                                        self.instance.name)
6070
    logging.debug("Allocated minors %r" % (minors,))
6071

    
6072
    iv_names = {}
6073
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6074
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6075
      # create new devices on new_node; note that we create two IDs:
6076
      # one without port, so the drbd will be activated without
6077
      # networking information on the new node at this stage, and one
6078
      # with network, for the latter activation in step 4
6079
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6080
      if self.instance.primary_node == o_node1:
6081
        p_minor = o_minor1
6082
      else:
6083
        p_minor = o_minor2
6084

    
6085
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6086
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6087

    
6088
      iv_names[idx] = (dev, dev.children, new_net_id)
6089
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6090
                    new_net_id)
6091
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6092
                              logical_id=new_alone_id,
6093
                              children=dev.children,
6094
                              size=dev.size)
6095
      try:
6096
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6097
                              _GetInstanceInfoText(self.instance), False)
6098
      except errors.GenericError:
6099
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6100
        raise
6101

    
6102
    # We have new devices, shutdown the drbd on the old secondary
6103
    for idx, dev in enumerate(self.instance.disks):
6104
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6105
      self.cfg.SetDiskID(dev, self.target_node)
6106
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6107
      if msg:
6108
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6109
                           "node: %s" % (idx, msg),
6110
                           hint=("Please cleanup this device manually as"
6111
                                 " soon as possible"))
6112

    
6113
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6114
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6115
                                               self.instance.disks)[self.instance.primary_node]
6116

    
6117
    msg = result.fail_msg
6118
    if msg:
6119
      # detaches didn't succeed (unlikely)
6120
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6121
      raise errors.OpExecError("Can't detach the disks from the network on"
6122
                               " old node: %s" % (msg,))
6123

    
6124
    # if we managed to detach at least one, we update all the disks of
6125
    # the instance to point to the new secondary
6126
    self.lu.LogInfo("Updating instance configuration")
6127
    for dev, _, new_logical_id in iv_names.itervalues():
6128
      dev.logical_id = new_logical_id
6129
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6130

    
6131
    self.cfg.Update(self.instance)
6132

    
6133
    # and now perform the drbd attach
6134
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6135
                    " (standalone => connected)")
6136
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6137
                                           self.instance.disks, self.instance.name,
6138
                                           False)
6139
    for to_node, to_result in result.items():
6140
      msg = to_result.fail_msg
6141
      if msg:
6142
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6143
                           hint=("please do a gnt-instance info to see the"
6144
                                 " status of disks"))
6145

    
6146
    # Wait for sync
6147
    # This can fail as the old devices are degraded and _WaitForSync
6148
    # does a combined result over all disks, so we don't check its return value
6149
    self.lu.LogStep(5, steps_total, "Sync devices")
6150
    _WaitForSync(self.lu, self.instance, unlock=True)
6151

    
6152
    # Check all devices manually
6153
    self._CheckDevices(self.instance.primary_node, iv_names)
6154

    
6155
    # Step: remove old storage
6156
    self.lu.LogStep(6, steps_total, "Removing old storage")
6157
    self._RemoveOldStorage(self.target_node, iv_names)
6158

    
6159

    
6160
class LUGrowDisk(LogicalUnit):
6161
  """Grow a disk of an instance.
6162

6163
  """
6164
  HPATH = "disk-grow"
6165
  HTYPE = constants.HTYPE_INSTANCE
6166
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6167
  REQ_BGL = False
6168

    
6169
  def ExpandNames(self):
6170
    self._ExpandAndLockInstance()
6171
    self.needed_locks[locking.LEVEL_NODE] = []
6172
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6173

    
6174
  def DeclareLocks(self, level):
6175
    if level == locking.LEVEL_NODE:
6176
      self._LockInstancesNodes()
6177

    
6178
  def BuildHooksEnv(self):
6179
    """Build hooks env.
6180

6181
    This runs on the master, the primary and all the secondaries.
6182

6183
    """
6184
    env = {
6185
      "DISK": self.op.disk,
6186
      "AMOUNT": self.op.amount,
6187
      }
6188
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6189
    nl = [
6190
      self.cfg.GetMasterNode(),
6191
      self.instance.primary_node,
6192
      ]
6193
    return env, nl, nl
6194

    
6195
  def CheckPrereq(self):
6196
    """Check prerequisites.
6197

6198
    This checks that the instance is in the cluster.
6199

6200
    """
6201
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6202
    assert instance is not None, \
6203
      "Cannot retrieve locked instance %s" % self.op.instance_name
6204
    nodenames = list(instance.all_nodes)
6205
    for node in nodenames:
6206
      _CheckNodeOnline(self, node)
6207

    
6208

    
6209
    self.instance = instance
6210

    
6211
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6212
      raise errors.OpPrereqError("Instance's disk layout does not support"
6213
                                 " growing.")
6214

    
6215
    self.disk = instance.FindDisk(self.op.disk)
6216

    
6217
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6218
                                       instance.hypervisor)
6219
    for node in nodenames:
6220
      info = nodeinfo[node]
6221
      info.Raise("Cannot get current information from node %s" % node)
6222
      vg_free = info.payload.get('vg_free', None)
6223
      if not isinstance(vg_free, int):
6224
        raise errors.OpPrereqError("Can't compute free disk space on"
6225
                                   " node %s" % node)
6226
      if self.op.amount > vg_free:
6227
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6228
                                   " %d MiB available, %d MiB required" %
6229
                                   (node, vg_free, self.op.amount))
6230

    
6231
  def Exec(self, feedback_fn):
6232
    """Execute disk grow.
6233

6234
    """
6235
    instance = self.instance
6236
    disk = self.disk
6237
    for node in instance.all_nodes:
6238
      self.cfg.SetDiskID(disk, node)
6239
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6240
      result.Raise("Grow request failed to node %s" % node)
6241
    disk.RecordGrow(self.op.amount)
6242
    self.cfg.Update(instance)
6243
    if self.op.wait_for_sync:
6244
      disk_abort = not _WaitForSync(self, instance)
6245
      if disk_abort:
6246
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6247
                             " status.\nPlease check the instance.")
6248

    
6249

    
6250
class LUQueryInstanceData(NoHooksLU):
6251
  """Query runtime instance data.
6252

6253
  """
6254
  _OP_REQP = ["instances", "static"]
6255
  REQ_BGL = False
6256

    
6257
  def ExpandNames(self):
6258
    self.needed_locks = {}
6259
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6260

    
6261
    if not isinstance(self.op.instances, list):
6262
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6263

    
6264
    if self.op.instances:
6265
      self.wanted_names = []
6266
      for name in self.op.instances:
6267
        full_name = self.cfg.ExpandInstanceName(name)
6268
        if full_name is None:
6269
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6270
        self.wanted_names.append(full_name)
6271
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6272
    else:
6273
      self.wanted_names = None
6274
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6275

    
6276
    self.needed_locks[locking.LEVEL_NODE] = []
6277
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6278

    
6279
  def DeclareLocks(self, level):
6280
    if level == locking.LEVEL_NODE:
6281
      self._LockInstancesNodes()
6282

    
6283
  def CheckPrereq(self):
6284
    """Check prerequisites.
6285

6286
    This only checks the optional instance list against the existing names.
6287

6288
    """
6289
    if self.wanted_names is None:
6290
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6291

    
6292
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6293
                             in self.wanted_names]
6294
    return
6295

    
6296
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6297
    """Returns the status of a block device
6298

6299
    """
6300
    if self.op.static:
6301
      return None
6302

    
6303
    self.cfg.SetDiskID(dev, node)
6304

    
6305
    result = self.rpc.call_blockdev_find(node, dev)
6306
    if result.offline:
6307
      return None
6308

    
6309
    result.Raise("Can't compute disk status for %s" % instance_name)
6310

    
6311
    status = result.payload
6312
    if status is None:
6313
      return None
6314

    
6315
    return (status.dev_path, status.major, status.minor,
6316
            status.sync_percent, status.estimated_time,
6317
            status.is_degraded, status.ldisk_status)
6318

    
6319
  def _ComputeDiskStatus(self, instance, snode, dev):
6320
    """Compute block device status.
6321

6322
    """
6323
    if dev.dev_type in constants.LDS_DRBD:
6324
      # we change the snode then (otherwise we use the one passed in)
6325
      if dev.logical_id[0] == instance.primary_node:
6326
        snode = dev.logical_id[1]
6327
      else:
6328
        snode = dev.logical_id[0]
6329

    
6330
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6331
                                              instance.name, dev)
6332
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6333

    
6334
    if dev.children:
6335
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6336
                      for child in dev.children]
6337
    else:
6338
      dev_children = []
6339

    
6340
    data = {
6341
      "iv_name": dev.iv_name,
6342
      "dev_type": dev.dev_type,
6343
      "logical_id": dev.logical_id,
6344
      "physical_id": dev.physical_id,
6345
      "pstatus": dev_pstatus,
6346
      "sstatus": dev_sstatus,
6347
      "children": dev_children,
6348
      "mode": dev.mode,
6349
      "size": dev.size,
6350
      }
6351

    
6352
    return data
6353

    
6354
  def Exec(self, feedback_fn):
6355
    """Gather and return data"""
6356
    result = {}
6357

    
6358
    cluster = self.cfg.GetClusterInfo()
6359

    
6360
    for instance in self.wanted_instances:
6361
      if not self.op.static:
6362
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6363
                                                  instance.name,
6364
                                                  instance.hypervisor)
6365
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6366
        remote_info = remote_info.payload
6367
        if remote_info and "state" in remote_info:
6368
          remote_state = "up"
6369
        else:
6370
          remote_state = "down"
6371
      else:
6372
        remote_state = None
6373
      if instance.admin_up:
6374
        config_state = "up"
6375
      else:
6376
        config_state = "down"
6377

    
6378
      disks = [self._ComputeDiskStatus(instance, None, device)
6379
               for device in instance.disks]
6380

    
6381
      idict = {
6382
        "name": instance.name,
6383
        "config_state": config_state,
6384
        "run_state": remote_state,
6385
        "pnode": instance.primary_node,
6386
        "snodes": instance.secondary_nodes,
6387
        "os": instance.os,
6388
        # this happens to be the same format used for hooks
6389
        "nics": _NICListToTuple(self, instance.nics),
6390
        "disks": disks,
6391
        "hypervisor": instance.hypervisor,
6392
        "network_port": instance.network_port,
6393
        "hv_instance": instance.hvparams,
6394
        "hv_actual": cluster.FillHV(instance),
6395
        "be_instance": instance.beparams,
6396
        "be_actual": cluster.FillBE(instance),
6397
        }
6398

    
6399
      result[instance.name] = idict
6400

    
6401
    return result
6402

    
6403

    
6404
class LUSetInstanceParams(LogicalUnit):
6405
  """Modifies an instances's parameters.
6406

6407
  """
6408
  HPATH = "instance-modify"
6409
  HTYPE = constants.HTYPE_INSTANCE
6410
  _OP_REQP = ["instance_name"]
6411
  REQ_BGL = False
6412

    
6413
  def CheckArguments(self):
6414
    if not hasattr(self.op, 'nics'):
6415
      self.op.nics = []
6416
    if not hasattr(self.op, 'disks'):
6417
      self.op.disks = []
6418
    if not hasattr(self.op, 'beparams'):
6419
      self.op.beparams = {}
6420
    if not hasattr(self.op, 'hvparams'):
6421
      self.op.hvparams = {}
6422
    self.op.force = getattr(self.op, "force", False)
6423
    if not (self.op.nics or self.op.disks or
6424
            self.op.hvparams or self.op.beparams):
6425
      raise errors.OpPrereqError("No changes submitted")
6426

    
6427
    # Disk validation
6428
    disk_addremove = 0
6429
    for disk_op, disk_dict in self.op.disks:
6430
      if disk_op == constants.DDM_REMOVE:
6431
        disk_addremove += 1
6432
        continue
6433
      elif disk_op == constants.DDM_ADD:
6434
        disk_addremove += 1
6435
      else:
6436
        if not isinstance(disk_op, int):
6437
          raise errors.OpPrereqError("Invalid disk index")
6438
        if not isinstance(disk_dict, dict):
6439
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6440
          raise errors.OpPrereqError(msg)
6441

    
6442
      if disk_op == constants.DDM_ADD:
6443
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6444
        if mode not in constants.DISK_ACCESS_SET:
6445
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6446
        size = disk_dict.get('size', None)
6447
        if size is None:
6448
          raise errors.OpPrereqError("Required disk parameter size missing")
6449
        try:
6450
          size = int(size)
6451
        except ValueError, err:
6452
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6453
                                     str(err))
6454
        disk_dict['size'] = size
6455
      else:
6456
        # modification of disk
6457
        if 'size' in disk_dict:
6458
          raise errors.OpPrereqError("Disk size change not possible, use"
6459
                                     " grow-disk")
6460

    
6461
    if disk_addremove > 1:
6462
      raise errors.OpPrereqError("Only one disk add or remove operation"
6463
                                 " supported at a time")
6464

    
6465
    # NIC validation
6466
    nic_addremove = 0
6467
    for nic_op, nic_dict in self.op.nics:
6468
      if nic_op == constants.DDM_REMOVE:
6469
        nic_addremove += 1
6470
        continue
6471
      elif nic_op == constants.DDM_ADD:
6472
        nic_addremove += 1
6473
      else:
6474
        if not isinstance(nic_op, int):
6475
          raise errors.OpPrereqError("Invalid nic index")
6476
        if not isinstance(nic_dict, dict):
6477
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6478
          raise errors.OpPrereqError(msg)
6479

    
6480
      # nic_dict should be a dict
6481
      nic_ip = nic_dict.get('ip', None)
6482
      if nic_ip is not None:
6483
        if nic_ip.lower() == constants.VALUE_NONE:
6484
          nic_dict['ip'] = None
6485
        else:
6486
          if not utils.IsValidIP(nic_ip):
6487
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6488

    
6489
      nic_bridge = nic_dict.get('bridge', None)
6490
      nic_link = nic_dict.get('link', None)
6491
      if nic_bridge and nic_link:
6492
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6493
                                   " at the same time")
6494
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6495
        nic_dict['bridge'] = None
6496
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6497
        nic_dict['link'] = None
6498

    
6499
      if nic_op == constants.DDM_ADD:
6500
        nic_mac = nic_dict.get('mac', None)
6501
        if nic_mac is None:
6502
          nic_dict['mac'] = constants.VALUE_AUTO
6503

    
6504
      if 'mac' in nic_dict:
6505
        nic_mac = nic_dict['mac']
6506
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6507
          if not utils.IsValidMac(nic_mac):
6508
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6509
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6510
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6511
                                     " modifying an existing nic")
6512

    
6513
    if nic_addremove > 1:
6514
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6515
                                 " supported at a time")
6516

    
6517
  def ExpandNames(self):
6518
    self._ExpandAndLockInstance()
6519
    self.needed_locks[locking.LEVEL_NODE] = []
6520
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6521

    
6522
  def DeclareLocks(self, level):
6523
    if level == locking.LEVEL_NODE:
6524
      self._LockInstancesNodes()
6525

    
6526
  def BuildHooksEnv(self):
6527
    """Build hooks env.
6528

6529
    This runs on the master, primary and secondaries.
6530

6531
    """
6532
    args = dict()
6533
    if constants.BE_MEMORY in self.be_new:
6534
      args['memory'] = self.be_new[constants.BE_MEMORY]
6535
    if constants.BE_VCPUS in self.be_new:
6536
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6537
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6538
    # information at all.
6539
    if self.op.nics:
6540
      args['nics'] = []
6541
      nic_override = dict(self.op.nics)
6542
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6543
      for idx, nic in enumerate(self.instance.nics):
6544
        if idx in nic_override:
6545
          this_nic_override = nic_override[idx]
6546
        else:
6547
          this_nic_override = {}
6548
        if 'ip' in this_nic_override:
6549
          ip = this_nic_override['ip']
6550
        else:
6551
          ip = nic.ip
6552
        if 'mac' in this_nic_override:
6553
          mac = this_nic_override['mac']
6554
        else:
6555
          mac = nic.mac
6556
        if idx in self.nic_pnew:
6557
          nicparams = self.nic_pnew[idx]
6558
        else:
6559
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6560
        mode = nicparams[constants.NIC_MODE]
6561
        link = nicparams[constants.NIC_LINK]
6562
        args['nics'].append((ip, mac, mode, link))
6563
      if constants.DDM_ADD in nic_override:
6564
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6565
        mac = nic_override[constants.DDM_ADD]['mac']
6566
        nicparams = self.nic_pnew[constants.DDM_ADD]
6567
        mode = nicparams[constants.NIC_MODE]
6568
        link = nicparams[constants.NIC_LINK]
6569
        args['nics'].append((ip, mac, mode, link))
6570
      elif constants.DDM_REMOVE in nic_override:
6571
        del args['nics'][-1]
6572

    
6573
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6574
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6575
    return env, nl, nl
6576

    
6577
  def _GetUpdatedParams(self, old_params, update_dict,
6578
                        default_values, parameter_types):
6579
    """Return the new params dict for the given params.
6580

6581
    @type old_params: dict
6582
    @param old_params: old parameters
6583
    @type update_dict: dict
6584
    @param update_dict: dict containing new parameter values,
6585
                        or constants.VALUE_DEFAULT to reset the
6586
                        parameter to its default value
6587
    @type default_values: dict
6588
    @param default_values: default values for the filled parameters
6589
    @type parameter_types: dict
6590
    @param parameter_types: dict mapping target dict keys to types
6591
                            in constants.ENFORCEABLE_TYPES
6592
    @rtype: (dict, dict)
6593
    @return: (new_parameters, filled_parameters)
6594

6595
    """
6596
    params_copy = copy.deepcopy(old_params)
6597
    for key, val in update_dict.iteritems():
6598
      if val == constants.VALUE_DEFAULT:
6599
        try:
6600
          del params_copy[key]
6601
        except KeyError:
6602
          pass
6603
      else:
6604
        params_copy[key] = val
6605
    utils.ForceDictType(params_copy, parameter_types)
6606
    params_filled = objects.FillDict(default_values, params_copy)
6607
    return (params_copy, params_filled)
6608

    
6609
  def CheckPrereq(self):
6610
    """Check prerequisites.
6611

6612
    This only checks the instance list against the existing names.
6613

6614
    """
6615
    self.force = self.op.force
6616

    
6617
    # checking the new params on the primary/secondary nodes
6618

    
6619
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6620
    cluster = self.cluster = self.cfg.GetClusterInfo()
6621
    assert self.instance is not None, \
6622
      "Cannot retrieve locked instance %s" % self.op.instance_name
6623
    pnode = instance.primary_node
6624
    nodelist = list(instance.all_nodes)
6625

    
6626
    # hvparams processing
6627
    if self.op.hvparams:
6628
      i_hvdict, hv_new = self._GetUpdatedParams(
6629
                             instance.hvparams, self.op.hvparams,
6630
                             cluster.hvparams[instance.hypervisor],
6631
                             constants.HVS_PARAMETER_TYPES)
6632
      # local check
6633
      hypervisor.GetHypervisor(
6634
        instance.hypervisor).CheckParameterSyntax(hv_new)
6635
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6636
      self.hv_new = hv_new # the new actual values
6637
      self.hv_inst = i_hvdict # the new dict (without defaults)
6638
    else:
6639
      self.hv_new = self.hv_inst = {}
6640

    
6641
    # beparams processing
6642
    if self.op.beparams:
6643
      i_bedict, be_new = self._GetUpdatedParams(
6644
                             instance.beparams, self.op.beparams,
6645
                             cluster.beparams[constants.PP_DEFAULT],
6646
                             constants.BES_PARAMETER_TYPES)
6647
      self.be_new = be_new # the new actual values
6648
      self.be_inst = i_bedict # the new dict (without defaults)
6649
    else:
6650
      self.be_new = self.be_inst = {}
6651

    
6652
    self.warn = []
6653

    
6654
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6655
      mem_check_list = [pnode]
6656
      if be_new[constants.BE_AUTO_BALANCE]:
6657
        # either we changed auto_balance to yes or it was from before
6658
        mem_check_list.extend(instance.secondary_nodes)
6659
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6660
                                                  instance.hypervisor)
6661
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6662
                                         instance.hypervisor)
6663
      pninfo = nodeinfo[pnode]
6664
      msg = pninfo.fail_msg
6665
      if msg:
6666
        # Assume the primary node is unreachable and go ahead
6667
        self.warn.append("Can't get info from primary node %s: %s" %
6668
                         (pnode,  msg))
6669
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6670
        self.warn.append("Node data from primary node %s doesn't contain"
6671
                         " free memory information" % pnode)
6672
      elif instance_info.fail_msg:
6673
        self.warn.append("Can't get instance runtime information: %s" %
6674
                        instance_info.fail_msg)
6675
      else:
6676
        if instance_info.payload:
6677
          current_mem = int(instance_info.payload['memory'])
6678
        else:
6679
          # Assume instance not running
6680
          # (there is a slight race condition here, but it's not very probable,
6681
          # and we have no other way to check)
6682
          current_mem = 0
6683
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6684
                    pninfo.payload['memory_free'])
6685
        if miss_mem > 0:
6686
          raise errors.OpPrereqError("This change will prevent the instance"
6687
                                     " from starting, due to %d MB of memory"
6688
                                     " missing on its primary node" % miss_mem)
6689

    
6690
      if be_new[constants.BE_AUTO_BALANCE]:
6691
        for node, nres in nodeinfo.items():
6692
          if node not in instance.secondary_nodes:
6693
            continue
6694
          msg = nres.fail_msg
6695
          if msg:
6696
            self.warn.append("Can't get info from secondary node %s: %s" %
6697
                             (node, msg))
6698
          elif not isinstance(nres.payload.get('memory_free', None), int):
6699
            self.warn.append("Secondary node %s didn't return free"
6700
                             " memory information" % node)
6701
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6702
            self.warn.append("Not enough memory to failover instance to"
6703
                             " secondary node %s" % node)
6704

    
6705
    # NIC processing
6706
    self.nic_pnew = {}
6707
    self.nic_pinst = {}
6708
    for nic_op, nic_dict in self.op.nics:
6709
      if nic_op == constants.DDM_REMOVE:
6710
        if not instance.nics:
6711
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6712
        continue
6713
      if nic_op != constants.DDM_ADD:
6714
        # an existing nic
6715
        if nic_op < 0 or nic_op >= len(instance.nics):
6716
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6717
                                     " are 0 to %d" %
6718
                                     (nic_op, len(instance.nics)))
6719
        old_nic_params = instance.nics[nic_op].nicparams
6720
        old_nic_ip = instance.nics[nic_op].ip
6721
      else:
6722
        old_nic_params = {}
6723
        old_nic_ip = None
6724

    
6725
      update_params_dict = dict([(key, nic_dict[key])
6726
                                 for key in constants.NICS_PARAMETERS
6727
                                 if key in nic_dict])
6728

    
6729
      if 'bridge' in nic_dict:
6730
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6731

    
6732
      new_nic_params, new_filled_nic_params = \
6733
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6734
                                 cluster.nicparams[constants.PP_DEFAULT],
6735
                                 constants.NICS_PARAMETER_TYPES)
6736
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6737
      self.nic_pinst[nic_op] = new_nic_params
6738
      self.nic_pnew[nic_op] = new_filled_nic_params
6739
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6740

    
6741
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6742
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6743
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6744
        if msg:
6745
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6746
          if self.force:
6747
            self.warn.append(msg)
6748
          else:
6749
            raise errors.OpPrereqError(msg)
6750
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6751
        if 'ip' in nic_dict:
6752
          nic_ip = nic_dict['ip']
6753
        else:
6754
          nic_ip = old_nic_ip
6755
        if nic_ip is None:
6756
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6757
                                     ' on a routed nic')
6758
      if 'mac' in nic_dict:
6759
        nic_mac = nic_dict['mac']
6760
        if nic_mac is None:
6761
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6762
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6763
          # otherwise generate the mac
6764
          nic_dict['mac'] = self.cfg.GenerateMAC()
6765
        else:
6766
          # or validate/reserve the current one
6767
          if self.cfg.IsMacInUse(nic_mac):
6768
            raise errors.OpPrereqError("MAC address %s already in use"
6769
                                       " in cluster" % nic_mac)
6770

    
6771
    # DISK processing
6772
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6773
      raise errors.OpPrereqError("Disk operations not supported for"
6774
                                 " diskless instances")
6775
    for disk_op, disk_dict in self.op.disks:
6776
      if disk_op == constants.DDM_REMOVE:
6777
        if len(instance.disks) == 1:
6778
          raise errors.OpPrereqError("Cannot remove the last disk of"
6779
                                     " an instance")
6780
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6781
        ins_l = ins_l[pnode]
6782
        msg = ins_l.fail_msg
6783
        if msg:
6784
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6785
                                     (pnode, msg))
6786
        if instance.name in ins_l.payload:
6787
          raise errors.OpPrereqError("Instance is running, can't remove"
6788
                                     " disks.")
6789

    
6790
      if (disk_op == constants.DDM_ADD and
6791
          len(instance.nics) >= constants.MAX_DISKS):
6792
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6793
                                   " add more" % constants.MAX_DISKS)
6794
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6795
        # an existing disk
6796
        if disk_op < 0 or disk_op >= len(instance.disks):
6797
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6798
                                     " are 0 to %d" %
6799
                                     (disk_op, len(instance.disks)))
6800

    
6801
    return
6802

    
6803
  def Exec(self, feedback_fn):
6804
    """Modifies an instance.
6805

6806
    All parameters take effect only at the next restart of the instance.
6807

6808
    """
6809
    # Process here the warnings from CheckPrereq, as we don't have a
6810
    # feedback_fn there.
6811
    for warn in self.warn:
6812
      feedback_fn("WARNING: %s" % warn)
6813

    
6814
    result = []
6815
    instance = self.instance
6816
    cluster = self.cluster
6817
    # disk changes
6818
    for disk_op, disk_dict in self.op.disks:
6819
      if disk_op == constants.DDM_REMOVE:
6820
        # remove the last disk
6821
        device = instance.disks.pop()
6822
        device_idx = len(instance.disks)
6823
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6824
          self.cfg.SetDiskID(disk, node)
6825
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6826
          if msg:
6827
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6828
                            " continuing anyway", device_idx, node, msg)
6829
        result.append(("disk/%d" % device_idx, "remove"))
6830
      elif disk_op == constants.DDM_ADD:
6831
        # add a new disk
6832
        if instance.disk_template == constants.DT_FILE:
6833
          file_driver, file_path = instance.disks[0].logical_id
6834
          file_path = os.path.dirname(file_path)
6835
        else:
6836
          file_driver = file_path = None
6837
        disk_idx_base = len(instance.disks)
6838
        new_disk = _GenerateDiskTemplate(self,
6839
                                         instance.disk_template,
6840
                                         instance.name, instance.primary_node,
6841
                                         instance.secondary_nodes,
6842
                                         [disk_dict],
6843
                                         file_path,
6844
                                         file_driver,
6845
                                         disk_idx_base)[0]
6846
        instance.disks.append(new_disk)
6847
        info = _GetInstanceInfoText(instance)
6848

    
6849
        logging.info("Creating volume %s for instance %s",
6850
                     new_disk.iv_name, instance.name)
6851
        # Note: this needs to be kept in sync with _CreateDisks
6852
        #HARDCODE
6853
        for node in instance.all_nodes:
6854
          f_create = node == instance.primary_node
6855
          try:
6856
            _CreateBlockDev(self, node, instance, new_disk,
6857
                            f_create, info, f_create)
6858
          except errors.OpExecError, err:
6859
            self.LogWarning("Failed to create volume %s (%s) on"
6860
                            " node %s: %s",
6861
                            new_disk.iv_name, new_disk, node, err)
6862
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6863
                       (new_disk.size, new_disk.mode)))
6864
      else:
6865
        # change a given disk
6866
        instance.disks[disk_op].mode = disk_dict['mode']
6867
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6868
    # NIC changes
6869
    for nic_op, nic_dict in self.op.nics:
6870
      if nic_op == constants.DDM_REMOVE:
6871
        # remove the last nic
6872
        del instance.nics[-1]
6873
        result.append(("nic.%d" % len(instance.nics), "remove"))
6874
      elif nic_op == constants.DDM_ADD:
6875
        # mac and bridge should be set, by now
6876
        mac = nic_dict['mac']
6877
        ip = nic_dict.get('ip', None)
6878
        nicparams = self.nic_pinst[constants.DDM_ADD]
6879
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6880
        instance.nics.append(new_nic)
6881
        result.append(("nic.%d" % (len(instance.nics) - 1),
6882
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6883
                       (new_nic.mac, new_nic.ip,
6884
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6885
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6886
                       )))
6887
      else:
6888
        for key in 'mac', 'ip':
6889
          if key in nic_dict:
6890
            setattr(instance.nics[nic_op], key, nic_dict[key])
6891
        if nic_op in self.nic_pnew:
6892
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6893
        for key, val in nic_dict.iteritems():
6894
          result.append(("nic.%s/%d" % (key, nic_op), val))
6895

    
6896
    # hvparams changes
6897
    if self.op.hvparams:
6898
      instance.hvparams = self.hv_inst
6899
      for key, val in self.op.hvparams.iteritems():
6900
        result.append(("hv/%s" % key, val))
6901

    
6902
    # beparams changes
6903
    if self.op.beparams:
6904
      instance.beparams = self.be_inst
6905
      for key, val in self.op.beparams.iteritems():
6906
        result.append(("be/%s" % key, val))
6907

    
6908
    self.cfg.Update(instance)
6909

    
6910
    return result
6911

    
6912

    
6913
class LUQueryExports(NoHooksLU):
6914
  """Query the exports list
6915

6916
  """
6917
  _OP_REQP = ['nodes']
6918
  REQ_BGL = False
6919

    
6920
  def ExpandNames(self):
6921
    self.needed_locks = {}
6922
    self.share_locks[locking.LEVEL_NODE] = 1
6923
    if not self.op.nodes:
6924
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6925
    else:
6926
      self.needed_locks[locking.LEVEL_NODE] = \
6927
        _GetWantedNodes(self, self.op.nodes)
6928

    
6929
  def CheckPrereq(self):
6930
    """Check prerequisites.
6931

6932
    """
6933
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6934

    
6935
  def Exec(self, feedback_fn):
6936
    """Compute the list of all the exported system images.
6937

6938
    @rtype: dict
6939
    @return: a dictionary with the structure node->(export-list)
6940
        where export-list is a list of the instances exported on
6941
        that node.
6942

6943
    """
6944
    rpcresult = self.rpc.call_export_list(self.nodes)
6945
    result = {}
6946
    for node in rpcresult:
6947
      if rpcresult[node].fail_msg:
6948
        result[node] = False
6949
      else:
6950
        result[node] = rpcresult[node].payload
6951

    
6952
    return result
6953

    
6954

    
6955
class LUExportInstance(LogicalUnit):
6956
  """Export an instance to an image in the cluster.
6957

6958
  """
6959
  HPATH = "instance-export"
6960
  HTYPE = constants.HTYPE_INSTANCE
6961
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6962
  REQ_BGL = False
6963

    
6964
  def ExpandNames(self):
6965
    self._ExpandAndLockInstance()
6966
    # FIXME: lock only instance primary and destination node
6967
    #
6968
    # Sad but true, for now we have do lock all nodes, as we don't know where
6969
    # the previous export might be, and and in this LU we search for it and
6970
    # remove it from its current node. In the future we could fix this by:
6971
    #  - making a tasklet to search (share-lock all), then create the new one,
6972
    #    then one to remove, after
6973
    #  - removing the removal operation altogether
6974
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6975

    
6976
  def DeclareLocks(self, level):
6977
    """Last minute lock declaration."""
6978
    # All nodes are locked anyway, so nothing to do here.
6979

    
6980
  def BuildHooksEnv(self):
6981
    """Build hooks env.
6982

6983
    This will run on the master, primary node and target node.
6984

6985
    """
6986
    env = {
6987
      "EXPORT_NODE": self.op.target_node,
6988
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6989
      }
6990
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6991
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6992
          self.op.target_node]
6993
    return env, nl, nl
6994

    
6995
  def CheckPrereq(self):
6996
    """Check prerequisites.
6997

6998
    This checks that the instance and node names are valid.
6999

7000
    """
7001
    instance_name = self.op.instance_name
7002
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7003
    assert self.instance is not None, \
7004
          "Cannot retrieve locked instance %s" % self.op.instance_name
7005
    _CheckNodeOnline(self, self.instance.primary_node)
7006

    
7007
    self.dst_node = self.cfg.GetNodeInfo(
7008
      self.cfg.ExpandNodeName(self.op.target_node))
7009

    
7010
    if self.dst_node is None:
7011
      # This is wrong node name, not a non-locked node
7012
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7013
    _CheckNodeOnline(self, self.dst_node.name)
7014
    _CheckNodeNotDrained(self, self.dst_node.name)
7015

    
7016
    # instance disk type verification
7017
    for disk in self.instance.disks:
7018
      if disk.dev_type == constants.LD_FILE:
7019
        raise errors.OpPrereqError("Export not supported for instances with"
7020
                                   " file-based disks")
7021

    
7022
  def Exec(self, feedback_fn):
7023
    """Export an instance to an image in the cluster.
7024

7025
    """
7026
    instance = self.instance
7027
    dst_node = self.dst_node
7028
    src_node = instance.primary_node
7029
    if self.op.shutdown:
7030
      # shutdown the instance, but not the disks
7031
      result = self.rpc.call_instance_shutdown(src_node, instance)
7032
      result.Raise("Could not shutdown instance %s on"
7033
                   " node %s" % (instance.name, src_node))
7034

    
7035
    vgname = self.cfg.GetVGName()
7036

    
7037
    snap_disks = []
7038

    
7039
    # set the disks ID correctly since call_instance_start needs the
7040
    # correct drbd minor to create the symlinks
7041
    for disk in instance.disks:
7042
      self.cfg.SetDiskID(disk, src_node)
7043

    
7044
    try:
7045
      for idx, disk in enumerate(instance.disks):
7046
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7047
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7048
        msg = result.fail_msg
7049
        if msg:
7050
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7051
                          idx, src_node, msg)
7052
          snap_disks.append(False)
7053
        else:
7054
          disk_id = (vgname, result.payload)
7055
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7056
                                 logical_id=disk_id, physical_id=disk_id,
7057
                                 iv_name=disk.iv_name)
7058
          snap_disks.append(new_dev)
7059

    
7060
    finally:
7061
      if self.op.shutdown and instance.admin_up:
7062
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7063
        msg = result.fail_msg
7064
        if msg:
7065
          _ShutdownInstanceDisks(self, instance)
7066
          raise errors.OpExecError("Could not start instance: %s" % msg)
7067

    
7068
    # TODO: check for size
7069

    
7070
    cluster_name = self.cfg.GetClusterName()
7071
    for idx, dev in enumerate(snap_disks):
7072
      if dev:
7073
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7074
                                               instance, cluster_name, idx)
7075
        msg = result.fail_msg
7076
        if msg:
7077
          self.LogWarning("Could not export disk/%s from node %s to"
7078
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7079
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7080
        if msg:
7081
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7082
                          " %s: %s", idx, src_node, msg)
7083

    
7084
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7085
    msg = result.fail_msg
7086
    if msg:
7087
      self.LogWarning("Could not finalize export for instance %s"
7088
                      " on node %s: %s", instance.name, dst_node.name, msg)
7089

    
7090
    nodelist = self.cfg.GetNodeList()
7091
    nodelist.remove(dst_node.name)
7092

    
7093
    # on one-node clusters nodelist will be empty after the removal
7094
    # if we proceed the backup would be removed because OpQueryExports
7095
    # substitutes an empty list with the full cluster node list.
7096
    iname = instance.name
7097
    if nodelist:
7098
      exportlist = self.rpc.call_export_list(nodelist)
7099
      for node in exportlist:
7100
        if exportlist[node].fail_msg:
7101
          continue
7102
        if iname in exportlist[node].payload:
7103
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7104
          if msg:
7105
            self.LogWarning("Could not remove older export for instance %s"
7106
                            " on node %s: %s", iname, node, msg)
7107

    
7108

    
7109
class LURemoveExport(NoHooksLU):
7110
  """Remove exports related to the named instance.
7111

7112
  """
7113
  _OP_REQP = ["instance_name"]
7114
  REQ_BGL = False
7115

    
7116
  def ExpandNames(self):
7117
    self.needed_locks = {}
7118
    # We need all nodes to be locked in order for RemoveExport to work, but we
7119
    # don't need to lock the instance itself, as nothing will happen to it (and
7120
    # we can remove exports also for a removed instance)
7121
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7122

    
7123
  def CheckPrereq(self):
7124
    """Check prerequisites.
7125
    """
7126
    pass
7127

    
7128
  def Exec(self, feedback_fn):
7129
    """Remove any export.
7130

7131
    """
7132
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7133
    # If the instance was not found we'll try with the name that was passed in.
7134
    # This will only work if it was an FQDN, though.
7135
    fqdn_warn = False
7136
    if not instance_name:
7137
      fqdn_warn = True
7138
      instance_name = self.op.instance_name
7139

    
7140
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7141
    exportlist = self.rpc.call_export_list(locked_nodes)
7142
    found = False
7143
    for node in exportlist:
7144
      msg = exportlist[node].fail_msg
7145
      if msg:
7146
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7147
        continue
7148
      if instance_name in exportlist[node].payload:
7149
        found = True
7150
        result = self.rpc.call_export_remove(node, instance_name)
7151
        msg = result.fail_msg
7152
        if msg:
7153
          logging.error("Could not remove export for instance %s"
7154
                        " on node %s: %s", instance_name, node, msg)
7155

    
7156
    if fqdn_warn and not found:
7157
      feedback_fn("Export not found. If trying to remove an export belonging"
7158
                  " to a deleted instance please use its Fully Qualified"
7159
                  " Domain Name.")
7160

    
7161

    
7162
class TagsLU(NoHooksLU):
7163
  """Generic tags LU.
7164

7165
  This is an abstract class which is the parent of all the other tags LUs.
7166

7167
  """
7168

    
7169
  def ExpandNames(self):
7170
    self.needed_locks = {}
7171
    if self.op.kind == constants.TAG_NODE:
7172
      name = self.cfg.ExpandNodeName(self.op.name)
7173
      if name is None:
7174
        raise errors.OpPrereqError("Invalid node name (%s)" %
7175
                                   (self.op.name,))
7176
      self.op.name = name
7177
      self.needed_locks[locking.LEVEL_NODE] = name
7178
    elif self.op.kind == constants.TAG_INSTANCE:
7179
      name = self.cfg.ExpandInstanceName(self.op.name)
7180
      if name is None:
7181
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7182
                                   (self.op.name,))
7183
      self.op.name = name
7184
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7185

    
7186
  def CheckPrereq(self):
7187
    """Check prerequisites.
7188

7189
    """
7190
    if self.op.kind == constants.TAG_CLUSTER:
7191
      self.target = self.cfg.GetClusterInfo()
7192
    elif self.op.kind == constants.TAG_NODE:
7193
      self.target = self.cfg.GetNodeInfo(self.op.name)
7194
    elif self.op.kind == constants.TAG_INSTANCE:
7195
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7196
    else:
7197
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7198
                                 str(self.op.kind))
7199

    
7200

    
7201
class LUGetTags(TagsLU):
7202
  """Returns the tags of a given object.
7203

7204
  """
7205
  _OP_REQP = ["kind", "name"]
7206
  REQ_BGL = False
7207

    
7208
  def Exec(self, feedback_fn):
7209
    """Returns the tag list.
7210

7211
    """
7212
    return list(self.target.GetTags())
7213

    
7214

    
7215
class LUSearchTags(NoHooksLU):
7216
  """Searches the tags for a given pattern.
7217

7218
  """
7219
  _OP_REQP = ["pattern"]
7220
  REQ_BGL = False
7221

    
7222
  def ExpandNames(self):
7223
    self.needed_locks = {}
7224

    
7225
  def CheckPrereq(self):
7226
    """Check prerequisites.
7227

7228
    This checks the pattern passed for validity by compiling it.
7229

7230
    """
7231
    try:
7232
      self.re = re.compile(self.op.pattern)
7233
    except re.error, err:
7234
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7235
                                 (self.op.pattern, err))
7236

    
7237
  def Exec(self, feedback_fn):
7238
    """Returns the tag list.
7239

7240
    """
7241
    cfg = self.cfg
7242
    tgts = [("/cluster", cfg.GetClusterInfo())]
7243
    ilist = cfg.GetAllInstancesInfo().values()
7244
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7245
    nlist = cfg.GetAllNodesInfo().values()
7246
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7247
    results = []
7248
    for path, target in tgts:
7249
      for tag in target.GetTags():
7250
        if self.re.search(tag):
7251
          results.append((path, tag))
7252
    return results
7253

    
7254

    
7255
class LUAddTags(TagsLU):
7256
  """Sets a tag on a given object.
7257

7258
  """
7259
  _OP_REQP = ["kind", "name", "tags"]
7260
  REQ_BGL = False
7261

    
7262
  def CheckPrereq(self):
7263
    """Check prerequisites.
7264

7265
    This checks the type and length of the tag name and value.
7266

7267
    """
7268
    TagsLU.CheckPrereq(self)
7269
    for tag in self.op.tags:
7270
      objects.TaggableObject.ValidateTag(tag)
7271

    
7272
  def Exec(self, feedback_fn):
7273
    """Sets the tag.
7274

7275
    """
7276
    try:
7277
      for tag in self.op.tags:
7278
        self.target.AddTag(tag)
7279
    except errors.TagError, err:
7280
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7281
    try:
7282
      self.cfg.Update(self.target)
7283
    except errors.ConfigurationError:
7284
      raise errors.OpRetryError("There has been a modification to the"
7285
                                " config file and the operation has been"
7286
                                " aborted. Please retry.")
7287

    
7288

    
7289
class LUDelTags(TagsLU):
7290
  """Delete a list of tags from a given object.
7291

7292
  """
7293
  _OP_REQP = ["kind", "name", "tags"]
7294
  REQ_BGL = False
7295

    
7296
  def CheckPrereq(self):
7297
    """Check prerequisites.
7298

7299
    This checks that we have the given tag.
7300

7301
    """
7302
    TagsLU.CheckPrereq(self)
7303
    for tag in self.op.tags:
7304
      objects.TaggableObject.ValidateTag(tag)
7305
    del_tags = frozenset(self.op.tags)
7306
    cur_tags = self.target.GetTags()
7307
    if not del_tags <= cur_tags:
7308
      diff_tags = del_tags - cur_tags
7309
      diff_names = ["'%s'" % tag for tag in diff_tags]
7310
      diff_names.sort()
7311
      raise errors.OpPrereqError("Tag(s) %s not found" %
7312
                                 (",".join(diff_names)))
7313

    
7314
  def Exec(self, feedback_fn):
7315
    """Remove the tag from the object.
7316

7317
    """
7318
    for tag in self.op.tags:
7319
      self.target.RemoveTag(tag)
7320
    try:
7321
      self.cfg.Update(self.target)
7322
    except errors.ConfigurationError:
7323
      raise errors.OpRetryError("There has been a modification to the"
7324
                                " config file and the operation has been"
7325
                                " aborted. Please retry.")
7326

    
7327

    
7328
class LUTestDelay(NoHooksLU):
7329
  """Sleep for a specified amount of time.
7330

7331
  This LU sleeps on the master and/or nodes for a specified amount of
7332
  time.
7333

7334
  """
7335
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7336
  REQ_BGL = False
7337

    
7338
  def ExpandNames(self):
7339
    """Expand names and set required locks.
7340

7341
    This expands the node list, if any.
7342

7343
    """
7344
    self.needed_locks = {}
7345
    if self.op.on_nodes:
7346
      # _GetWantedNodes can be used here, but is not always appropriate to use
7347
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7348
      # more information.
7349
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7350
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7351

    
7352
  def CheckPrereq(self):
7353
    """Check prerequisites.
7354

7355
    """
7356

    
7357
  def Exec(self, feedback_fn):
7358
    """Do the actual sleep.
7359

7360
    """
7361
    if self.op.on_master:
7362
      if not utils.TestDelay(self.op.duration):
7363
        raise errors.OpExecError("Error during master delay test")
7364
    if self.op.on_nodes:
7365
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7366
      for node, node_result in result.items():
7367
        node_result.Raise("Failure during rpc call to node %s" % node)
7368

    
7369

    
7370
class IAllocator(object):
7371
  """IAllocator framework.
7372

7373
  An IAllocator instance has three sets of attributes:
7374
    - cfg that is needed to query the cluster
7375
    - input data (all members of the _KEYS class attribute are required)
7376
    - four buffer attributes (in|out_data|text), that represent the
7377
      input (to the external script) in text and data structure format,
7378
      and the output from it, again in two formats
7379
    - the result variables from the script (success, info, nodes) for
7380
      easy usage
7381

7382
  """
7383
  _ALLO_KEYS = [
7384
    "mem_size", "disks", "disk_template",
7385
    "os", "tags", "nics", "vcpus", "hypervisor",
7386
    ]
7387
  _RELO_KEYS = [
7388
    "relocate_from",
7389
    ]
7390

    
7391
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7392
    self.cfg = cfg
7393
    self.rpc = rpc
7394
    # init buffer variables
7395
    self.in_text = self.out_text = self.in_data = self.out_data = None
7396
    # init all input fields so that pylint is happy
7397
    self.mode = mode
7398
    self.name = name
7399
    self.mem_size = self.disks = self.disk_template = None
7400
    self.os = self.tags = self.nics = self.vcpus = None
7401
    self.hypervisor = None
7402
    self.relocate_from = None
7403
    # computed fields
7404
    self.required_nodes = None
7405
    # init result fields
7406
    self.success = self.info = self.nodes = None
7407
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7408
      keyset = self._ALLO_KEYS
7409
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7410
      keyset = self._RELO_KEYS
7411
    else:
7412
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7413
                                   " IAllocator" % self.mode)
7414
    for key in kwargs:
7415
      if key not in keyset:
7416
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7417
                                     " IAllocator" % key)
7418
      setattr(self, key, kwargs[key])
7419
    for key in keyset:
7420
      if key not in kwargs:
7421
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7422
                                     " IAllocator" % key)
7423
    self._BuildInputData()
7424

    
7425
  def _ComputeClusterData(self):
7426
    """Compute the generic allocator input data.
7427

7428
    This is the data that is independent of the actual operation.
7429

7430
    """
7431
    cfg = self.cfg
7432
    cluster_info = cfg.GetClusterInfo()
7433
    # cluster data
7434
    data = {
7435
      "version": constants.IALLOCATOR_VERSION,
7436
      "cluster_name": cfg.GetClusterName(),
7437
      "cluster_tags": list(cluster_info.GetTags()),
7438
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7439
      # we don't have job IDs
7440
      }
7441
    iinfo = cfg.GetAllInstancesInfo().values()
7442
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7443

    
7444
    # node data
7445
    node_results = {}
7446
    node_list = cfg.GetNodeList()
7447

    
7448
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7449
      hypervisor_name = self.hypervisor
7450
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7451
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7452

    
7453
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7454
                                        hypervisor_name)
7455
    node_iinfo = \
7456
      self.rpc.call_all_instances_info(node_list,
7457
                                       cluster_info.enabled_hypervisors)
7458
    for nname, nresult in node_data.items():
7459
      # first fill in static (config-based) values
7460
      ninfo = cfg.GetNodeInfo(nname)
7461
      pnr = {
7462
        "tags": list(ninfo.GetTags()),
7463
        "primary_ip": ninfo.primary_ip,
7464
        "secondary_ip": ninfo.secondary_ip,
7465
        "offline": ninfo.offline,
7466
        "drained": ninfo.drained,
7467
        "master_candidate": ninfo.master_candidate,
7468
        }
7469

    
7470
      if not ninfo.offline:
7471
        nresult.Raise("Can't get data for node %s" % nname)
7472
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7473
                                nname)
7474
        remote_info = nresult.payload
7475
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7476
                     'vg_size', 'vg_free', 'cpu_total']:
7477
          if attr not in remote_info:
7478
            raise errors.OpExecError("Node '%s' didn't return attribute"
7479
                                     " '%s'" % (nname, attr))
7480
          if not isinstance(remote_info[attr], int):
7481
            raise errors.OpExecError("Node '%s' returned invalid value"
7482
                                     " for '%s': %s" %
7483
                                     (nname, attr, remote_info[attr]))
7484
        # compute memory used by primary instances
7485
        i_p_mem = i_p_up_mem = 0
7486
        for iinfo, beinfo in i_list:
7487
          if iinfo.primary_node == nname:
7488
            i_p_mem += beinfo[constants.BE_MEMORY]
7489
            if iinfo.name not in node_iinfo[nname].payload:
7490
              i_used_mem = 0
7491
            else:
7492
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7493
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7494
            remote_info['memory_free'] -= max(0, i_mem_diff)
7495

    
7496
            if iinfo.admin_up:
7497
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7498

    
7499
        # compute memory used by instances
7500
        pnr_dyn = {
7501
          "total_memory": remote_info['memory_total'],
7502
          "reserved_memory": remote_info['memory_dom0'],
7503
          "free_memory": remote_info['memory_free'],
7504
          "total_disk": remote_info['vg_size'],
7505
          "free_disk": remote_info['vg_free'],
7506
          "total_cpus": remote_info['cpu_total'],
7507
          "i_pri_memory": i_p_mem,
7508
          "i_pri_up_memory": i_p_up_mem,
7509
          }
7510
        pnr.update(pnr_dyn)
7511

    
7512
      node_results[nname] = pnr
7513
    data["nodes"] = node_results
7514

    
7515
    # instance data
7516
    instance_data = {}
7517
    for iinfo, beinfo in i_list:
7518
      nic_data = []
7519
      for nic in iinfo.nics:
7520
        filled_params = objects.FillDict(
7521
            cluster_info.nicparams[constants.PP_DEFAULT],
7522
            nic.nicparams)
7523
        nic_dict = {"mac": nic.mac,
7524
                    "ip": nic.ip,
7525
                    "mode": filled_params[constants.NIC_MODE],
7526
                    "link": filled_params[constants.NIC_LINK],
7527
                   }
7528
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7529
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7530
        nic_data.append(nic_dict)
7531
      pir = {
7532
        "tags": list(iinfo.GetTags()),
7533
        "admin_up": iinfo.admin_up,
7534
        "vcpus": beinfo[constants.BE_VCPUS],
7535
        "memory": beinfo[constants.BE_MEMORY],
7536
        "os": iinfo.os,
7537
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7538
        "nics": nic_data,
7539
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7540
        "disk_template": iinfo.disk_template,
7541
        "hypervisor": iinfo.hypervisor,
7542
        }
7543
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7544
                                                 pir["disks"])
7545
      instance_data[iinfo.name] = pir
7546

    
7547
    data["instances"] = instance_data
7548

    
7549
    self.in_data = data
7550

    
7551
  def _AddNewInstance(self):
7552
    """Add new instance data to allocator structure.
7553

7554
    This in combination with _AllocatorGetClusterData will create the
7555
    correct structure needed as input for the allocator.
7556

7557
    The checks for the completeness of the opcode must have already been
7558
    done.
7559

7560
    """
7561
    data = self.in_data
7562

    
7563
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7564

    
7565
    if self.disk_template in constants.DTS_NET_MIRROR:
7566
      self.required_nodes = 2
7567
    else:
7568
      self.required_nodes = 1
7569
    request = {
7570
      "type": "allocate",
7571
      "name": self.name,
7572
      "disk_template": self.disk_template,
7573
      "tags": self.tags,
7574
      "os": self.os,
7575
      "vcpus": self.vcpus,
7576
      "memory": self.mem_size,
7577
      "disks": self.disks,
7578
      "disk_space_total": disk_space,
7579
      "nics": self.nics,
7580
      "required_nodes": self.required_nodes,
7581
      }
7582
    data["request"] = request
7583

    
7584
  def _AddRelocateInstance(self):
7585
    """Add relocate instance data to allocator structure.
7586

7587
    This in combination with _IAllocatorGetClusterData will create the
7588
    correct structure needed as input for the allocator.
7589

7590
    The checks for the completeness of the opcode must have already been
7591
    done.
7592

7593
    """
7594
    instance = self.cfg.GetInstanceInfo(self.name)
7595
    if instance is None:
7596
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7597
                                   " IAllocator" % self.name)
7598

    
7599
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7600
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7601

    
7602
    if len(instance.secondary_nodes) != 1:
7603
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7604

    
7605
    self.required_nodes = 1
7606
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7607
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7608

    
7609
    request = {
7610
      "type": "relocate",
7611
      "name": self.name,
7612
      "disk_space_total": disk_space,
7613
      "required_nodes": self.required_nodes,
7614
      "relocate_from": self.relocate_from,
7615
      }
7616
    self.in_data["request"] = request
7617

    
7618
  def _BuildInputData(self):
7619
    """Build input data structures.
7620

7621
    """
7622
    self._ComputeClusterData()
7623

    
7624
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7625
      self._AddNewInstance()
7626
    else:
7627
      self._AddRelocateInstance()
7628

    
7629
    self.in_text = serializer.Dump(self.in_data)
7630

    
7631
  def Run(self, name, validate=True, call_fn=None):
7632
    """Run an instance allocator and return the results.
7633

7634
    """
7635
    if call_fn is None:
7636
      call_fn = self.rpc.call_iallocator_runner
7637

    
7638
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7639
    result.Raise("Failure while running the iallocator script")
7640

    
7641
    self.out_text = result.payload
7642
    if validate:
7643
      self._ValidateResult()
7644

    
7645
  def _ValidateResult(self):
7646
    """Process the allocator results.
7647

7648
    This will process and if successful save the result in
7649
    self.out_data and the other parameters.
7650

7651
    """
7652
    try:
7653
      rdict = serializer.Load(self.out_text)
7654
    except Exception, err:
7655
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7656

    
7657
    if not isinstance(rdict, dict):
7658
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7659

    
7660
    for key in "success", "info", "nodes":
7661
      if key not in rdict:
7662
        raise errors.OpExecError("Can't parse iallocator results:"
7663
                                 " missing key '%s'" % key)
7664
      setattr(self, key, rdict[key])
7665

    
7666
    if not isinstance(rdict["nodes"], list):
7667
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7668
                               " is not a list")
7669
    self.out_data = rdict
7670

    
7671

    
7672
class LUTestAllocator(NoHooksLU):
7673
  """Run allocator tests.
7674

7675
  This LU runs the allocator tests
7676

7677
  """
7678
  _OP_REQP = ["direction", "mode", "name"]
7679

    
7680
  def CheckPrereq(self):
7681
    """Check prerequisites.
7682

7683
    This checks the opcode parameters depending on the director and mode test.
7684

7685
    """
7686
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7687
      for attr in ["name", "mem_size", "disks", "disk_template",
7688
                   "os", "tags", "nics", "vcpus"]:
7689
        if not hasattr(self.op, attr):
7690
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7691
                                     attr)
7692
      iname = self.cfg.ExpandInstanceName(self.op.name)
7693
      if iname is not None:
7694
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7695
                                   iname)
7696
      if not isinstance(self.op.nics, list):
7697
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7698
      for row in self.op.nics:
7699
        if (not isinstance(row, dict) or
7700
            "mac" not in row or
7701
            "ip" not in row or
7702
            "bridge" not in row):
7703
          raise errors.OpPrereqError("Invalid contents of the"
7704
                                     " 'nics' parameter")
7705
      if not isinstance(self.op.disks, list):
7706
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7707
      for row in self.op.disks:
7708
        if (not isinstance(row, dict) or
7709
            "size" not in row or
7710
            not isinstance(row["size"], int) or
7711
            "mode" not in row or
7712
            row["mode"] not in ['r', 'w']):
7713
          raise errors.OpPrereqError("Invalid contents of the"
7714
                                     " 'disks' parameter")
7715
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7716
        self.op.hypervisor = self.cfg.GetHypervisorType()
7717
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7718
      if not hasattr(self.op, "name"):
7719
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7720
      fname = self.cfg.ExpandInstanceName(self.op.name)
7721
      if fname is None:
7722
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7723
                                   self.op.name)
7724
      self.op.name = fname
7725
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7726
    else:
7727
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7728
                                 self.op.mode)
7729

    
7730
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7731
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7732
        raise errors.OpPrereqError("Missing allocator name")
7733
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7734
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7735
                                 self.op.direction)
7736

    
7737
  def Exec(self, feedback_fn):
7738
    """Run the allocator test.
7739

7740
    """
7741
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7742
      ial = IAllocator(self.cfg, self.rpc,
7743
                       mode=self.op.mode,
7744
                       name=self.op.name,
7745
                       mem_size=self.op.mem_size,
7746
                       disks=self.op.disks,
7747
                       disk_template=self.op.disk_template,
7748
                       os=self.op.os,
7749
                       tags=self.op.tags,
7750
                       nics=self.op.nics,
7751
                       vcpus=self.op.vcpus,
7752
                       hypervisor=self.op.hypervisor,
7753
                       )
7754
    else:
7755
      ial = IAllocator(self.cfg, self.rpc,
7756
                       mode=self.op.mode,
7757
                       name=self.op.name,
7758
                       relocate_from=list(self.relocate_from),
7759
                       )
7760

    
7761
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7762
      result = ial.in_text
7763
    else:
7764
      ial.Run(self.op.allocator, validate=False)
7765
      result = ial.out_text
7766
    return result