4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0613,W0201
34 from ganeti import rpc
35 from ganeti import ssh
36 from ganeti import logger
37 from ganeti import utils
38 from ganeti import errors
39 from ganeti import hypervisor
40 from ganeti import locking
41 from ganeti import config
42 from ganeti import constants
43 from ganeti import objects
44 from ganeti import opcodes
45 from ganeti import ssconf
46 from ganeti import serializer
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement CheckPrereq which also fills in the opcode instance
54 with all the fields (even if as None)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_MASTER: the LU needs to run on the master node
60 REQ_WSSTORE: the LU needs a writable SimpleStore
61 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
63 Note that all commands require root permissions.
73 def __init__(self, processor, op, context, sstore):
74 """Constructor for LogicalUnit.
76 This needs to be overriden in derived classes in order to check op
82 self.cfg = context.cfg
84 self.context = context
87 for attr_name in self._OP_REQP:
88 attr_val = getattr(op, attr_name, None)
90 raise errors.OpPrereqError("Required parameter '%s' missing" %
93 if not self.cfg.IsCluster():
94 raise errors.OpPrereqError("Cluster not initialized yet,"
95 " use 'gnt-cluster init' first.")
97 master = sstore.GetMasterNode()
98 if master != utils.HostInfo().name:
99 raise errors.OpPrereqError("Commands must be run on the master"
103 """Returns the SshRunner object
107 self.__ssh = ssh.SshRunner(self.sstore)
110 ssh = property(fget=__GetSSH)
112 def CheckPrereq(self):
113 """Check prerequisites for this LU.
115 This method should check that the prerequisites for the execution
116 of this LU are fulfilled. It can do internode communication, but
117 it should be idempotent - no cluster or system changes are
120 The method should raise errors.OpPrereqError in case something is
121 not fulfilled. Its return value is ignored.
123 This method should also update all the parameters of the opcode to
124 their canonical form; e.g. a short node name must be fully
125 expanded after this method has successfully completed (so that
126 hooks, logging, etc. work correctly).
129 raise NotImplementedError
131 def Exec(self, feedback_fn):
134 This method should implement the actual work. It should raise
135 errors.OpExecError for failures that are somewhat dealt with in
139 raise NotImplementedError
141 def BuildHooksEnv(self):
142 """Build hooks environment for this LU.
144 This method should return a three-node tuple consisting of: a dict
145 containing the environment that will be used for running the
146 specific hook for this LU, a list of node names on which the hook
147 should run before the execution, and a list of node names on which
148 the hook should run after the execution.
150 The keys of the dict must not have 'GANETI_' prefixed as this will
151 be handled in the hooks runner. Also note additional keys will be
152 added by the hooks runner. If the LU doesn't define any
153 environment, an empty dict (and not None) should be returned.
155 No nodes should be returned as an empty list (and not None).
157 Note that if the HPATH for a LU class is None, this function will
161 raise NotImplementedError
163 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
164 """Notify the LU about the results of its hooks.
166 This method is called every time a hooks phase is executed, and notifies
167 the Logical Unit about the hooks' result. The LU can then use it to alter
168 its result based on the hooks. By default the method does nothing and the
169 previous result is passed back unchanged but any LU can define it if it
170 wants to use the local cluster hook-scripts somehow.
173 phase: the hooks phase that has just been run
174 hooks_results: the results of the multi-node hooks rpc call
175 feedback_fn: function to send feedback back to the caller
176 lu_result: the previous result this LU had, or None in the PRE phase.
182 class NoHooksLU(LogicalUnit):
183 """Simple LU which runs no hooks.
185 This LU is intended as a parent for other LogicalUnits which will
186 run no hooks, in order to reduce duplicate code.
193 def _GetWantedNodes(lu, nodes):
194 """Returns list of checked and expanded node names.
197 nodes: List of nodes (strings) or None for all
200 if not isinstance(nodes, list):
201 raise errors.OpPrereqError("Invalid argument type 'nodes'")
207 node = lu.cfg.ExpandNodeName(name)
209 raise errors.OpPrereqError("No such node name '%s'" % name)
213 wanted = lu.cfg.GetNodeList()
214 return utils.NiceSort(wanted)
217 def _GetWantedInstances(lu, instances):
218 """Returns list of checked and expanded instance names.
221 instances: List of instances (strings) or None for all
224 if not isinstance(instances, list):
225 raise errors.OpPrereqError("Invalid argument type 'instances'")
230 for name in instances:
231 instance = lu.cfg.ExpandInstanceName(name)
233 raise errors.OpPrereqError("No such instance name '%s'" % name)
234 wanted.append(instance)
237 wanted = lu.cfg.GetInstanceList()
238 return utils.NiceSort(wanted)
241 def _CheckOutputFields(static, dynamic, selected):
242 """Checks whether all selected fields are valid.
245 static: Static fields
246 dynamic: Dynamic fields
249 static_fields = frozenset(static)
250 dynamic_fields = frozenset(dynamic)
252 all_fields = static_fields | dynamic_fields
254 if not all_fields.issuperset(selected):
255 raise errors.OpPrereqError("Unknown output fields selected: %s"
256 % ",".join(frozenset(selected).
257 difference(all_fields)))
260 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
261 memory, vcpus, nics):
262 """Builds instance related env variables for hooks from single variables.
265 secondary_nodes: List of secondary nodes as strings
269 "INSTANCE_NAME": name,
270 "INSTANCE_PRIMARY": primary_node,
271 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
272 "INSTANCE_OS_TYPE": os_type,
273 "INSTANCE_STATUS": status,
274 "INSTANCE_MEMORY": memory,
275 "INSTANCE_VCPUS": vcpus,
279 nic_count = len(nics)
280 for idx, (ip, bridge, mac) in enumerate(nics):
283 env["INSTANCE_NIC%d_IP" % idx] = ip
284 env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
285 env["INSTANCE_NIC%d_HWADDR" % idx] = mac
289 env["INSTANCE_NIC_COUNT"] = nic_count
294 def _BuildInstanceHookEnvByObject(instance, override=None):
295 """Builds instance related env variables for hooks from an object.
298 instance: objects.Instance object of instance
299 override: dict of values to override
302 'name': instance.name,
303 'primary_node': instance.primary_node,
304 'secondary_nodes': instance.secondary_nodes,
305 'os_type': instance.os,
306 'status': instance.os,
307 'memory': instance.memory,
308 'vcpus': instance.vcpus,
309 'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
312 args.update(override)
313 return _BuildInstanceHookEnv(**args)
316 def _CheckInstanceBridgesExist(instance):
317 """Check that the brigdes needed by an instance exist.
320 # check bridges existance
321 brlist = [nic.bridge for nic in instance.nics]
322 if not rpc.call_bridges_exist(instance.primary_node, brlist):
323 raise errors.OpPrereqError("one or more target bridges %s does not"
324 " exist on destination node '%s'" %
325 (brlist, instance.primary_node))
328 class LUDestroyCluster(NoHooksLU):
329 """Logical unit for destroying the cluster.
334 def CheckPrereq(self):
335 """Check prerequisites.
337 This checks whether the cluster is empty.
339 Any errors are signalled by raising errors.OpPrereqError.
342 master = self.sstore.GetMasterNode()
344 nodelist = self.cfg.GetNodeList()
345 if len(nodelist) != 1 or nodelist[0] != master:
346 raise errors.OpPrereqError("There are still %d node(s) in"
347 " this cluster." % (len(nodelist) - 1))
348 instancelist = self.cfg.GetInstanceList()
350 raise errors.OpPrereqError("There are still %d instance(s) in"
351 " this cluster." % len(instancelist))
353 def Exec(self, feedback_fn):
354 """Destroys the cluster.
357 master = self.sstore.GetMasterNode()
358 if not rpc.call_node_stop_master(master):
359 raise errors.OpExecError("Could not disable the master role")
360 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
361 utils.CreateBackup(priv_key)
362 utils.CreateBackup(pub_key)
363 rpc.call_node_leave_cluster(master)
366 class LUVerifyCluster(LogicalUnit):
367 """Verifies the cluster status.
370 HPATH = "cluster-verify"
371 HTYPE = constants.HTYPE_CLUSTER
372 _OP_REQP = ["skip_checks"]
374 def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
375 remote_version, feedback_fn):
376 """Run multiple tests against a node.
379 - compares ganeti version
380 - checks vg existance and size > 20G
381 - checks config file checksum
382 - checks ssh to other nodes
385 node: name of the node to check
386 file_list: required list of files
387 local_cksum: dictionary of local files and their checksums
390 # compares ganeti version
391 local_version = constants.PROTOCOL_VERSION
392 if not remote_version:
393 feedback_fn(" - ERROR: connection to %s failed" % (node))
396 if local_version != remote_version:
397 feedback_fn(" - ERROR: sw version mismatch: master %s, node(%s) %s" %
398 (local_version, node, remote_version))
401 # checks vg existance and size > 20G
405 feedback_fn(" - ERROR: unable to check volume groups on node %s." %
409 vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
410 constants.MIN_VG_SIZE)
412 feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
415 # checks config file checksum
418 if 'filelist' not in node_result:
420 feedback_fn(" - ERROR: node hasn't returned file checksum data")
422 remote_cksum = node_result['filelist']
423 for file_name in file_list:
424 if file_name not in remote_cksum:
426 feedback_fn(" - ERROR: file '%s' missing" % file_name)
427 elif remote_cksum[file_name] != local_cksum[file_name]:
429 feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
431 if 'nodelist' not in node_result:
433 feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
435 if node_result['nodelist']:
437 for node in node_result['nodelist']:
438 feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
439 (node, node_result['nodelist'][node]))
440 if 'node-net-test' not in node_result:
442 feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
444 if node_result['node-net-test']:
446 nlist = utils.NiceSort(node_result['node-net-test'].keys())
448 feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
449 (node, node_result['node-net-test'][node]))
451 hyp_result = node_result.get('hypervisor', None)
452 if hyp_result is not None:
453 feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
456 def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
457 node_instance, feedback_fn):
458 """Verify an instance.
460 This function checks to see if the required block devices are
461 available on the instance's node.
466 node_current = instanceconfig.primary_node
469 instanceconfig.MapLVsByNode(node_vol_should)
471 for node in node_vol_should:
472 for volume in node_vol_should[node]:
473 if node not in node_vol_is or volume not in node_vol_is[node]:
474 feedback_fn(" - ERROR: volume %s missing on node %s" %
478 if not instanceconfig.status == 'down':
479 if (node_current not in node_instance or
480 not instance in node_instance[node_current]):
481 feedback_fn(" - ERROR: instance %s not running on node %s" %
482 (instance, node_current))
485 for node in node_instance:
486 if (not node == node_current):
487 if instance in node_instance[node]:
488 feedback_fn(" - ERROR: instance %s should not run on node %s" %
494 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
495 """Verify if there are any unknown volumes in the cluster.
497 The .os, .swap and backup volumes are ignored. All other volumes are
503 for node in node_vol_is:
504 for volume in node_vol_is[node]:
505 if node not in node_vol_should or volume not in node_vol_should[node]:
506 feedback_fn(" - ERROR: volume %s on node %s should not exist" %
511 def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
512 """Verify the list of running instances.
514 This checks what instances are running but unknown to the cluster.
518 for node in node_instance:
519 for runninginstance in node_instance[node]:
520 if runninginstance not in instancelist:
521 feedback_fn(" - ERROR: instance %s on node %s should not exist" %
522 (runninginstance, node))
526 def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
527 """Verify N+1 Memory Resilience.
529 Check that if one single node dies we can still start all the instances it
535 for node, nodeinfo in node_info.iteritems():
536 # This code checks that every node which is now listed as secondary has
537 # enough memory to host all instances it is supposed to should a single
538 # other node in the cluster fail.
539 # FIXME: not ready for failover to an arbitrary node
540 # FIXME: does not support file-backed instances
541 # WARNING: we currently take into account down instances as well as up
542 # ones, considering that even if they're down someone might want to start
543 # them even in the event of a node failure.
544 for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
546 for instance in instances:
547 needed_mem += instance_cfg[instance].memory
548 if nodeinfo['mfree'] < needed_mem:
549 feedback_fn(" - ERROR: not enough memory on node %s to accomodate"
550 " failovers should node %s fail" % (node, prinode))
554 def CheckPrereq(self):
555 """Check prerequisites.
557 Transform the list of checks we're going to skip into a set and check that
558 all its members are valid.
561 self.skip_set = frozenset(self.op.skip_checks)
562 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
563 raise errors.OpPrereqError("Invalid checks to be skipped specified")
565 def BuildHooksEnv(self):
568 Cluster-Verify hooks just rone in the post phase and their failure makes
569 the output be logged in the verify output and the verification to fail.
572 all_nodes = self.cfg.GetNodeList()
573 # TODO: populate the environment with useful information for verify hooks
575 return env, [], all_nodes
577 def Exec(self, feedback_fn):
578 """Verify integrity of cluster, performing various test on nodes.
582 feedback_fn("* Verifying global settings")
583 for msg in self.cfg.VerifyConfig():
584 feedback_fn(" - ERROR: %s" % msg)
586 vg_name = self.cfg.GetVGName()
587 nodelist = utils.NiceSort(self.cfg.GetNodeList())
588 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
589 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
590 i_non_redundant = [] # Non redundant instances
596 # FIXME: verify OS list
598 file_names = list(self.sstore.GetFileList())
599 file_names.append(constants.SSL_CERT_FILE)
600 file_names.append(constants.CLUSTER_CONF_FILE)
601 local_checksums = utils.FingerprintFiles(file_names)
603 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
604 all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
605 all_instanceinfo = rpc.call_instance_list(nodelist)
606 all_vglist = rpc.call_vg_list(nodelist)
607 node_verify_param = {
608 'filelist': file_names,
609 'nodelist': nodelist,
611 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
612 for node in nodeinfo]
614 all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
615 all_rversion = rpc.call_version(nodelist)
616 all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
618 for node in nodelist:
619 feedback_fn("* Verifying node %s" % node)
620 result = self._VerifyNode(node, file_names, local_checksums,
621 all_vglist[node], all_nvinfo[node],
622 all_rversion[node], feedback_fn)
626 volumeinfo = all_volumeinfo[node]
628 if isinstance(volumeinfo, basestring):
629 feedback_fn(" - ERROR: LVM problem on node %s: %s" %
630 (node, volumeinfo[-400:].encode('string_escape')))
632 node_volume[node] = {}
633 elif not isinstance(volumeinfo, dict):
634 feedback_fn(" - ERROR: connection to %s failed" % (node,))
638 node_volume[node] = volumeinfo
641 nodeinstance = all_instanceinfo[node]
642 if type(nodeinstance) != list:
643 feedback_fn(" - ERROR: connection to %s failed" % (node,))
647 node_instance[node] = nodeinstance
650 nodeinfo = all_ninfo[node]
651 if not isinstance(nodeinfo, dict):
652 feedback_fn(" - ERROR: connection to %s failed" % (node,))
658 "mfree": int(nodeinfo['memory_free']),
659 "dfree": int(nodeinfo['vg_free']),
662 # dictionary holding all instances this node is secondary for,
663 # grouped by their primary node. Each key is a cluster node, and each
664 # value is a list of instances which have the key as primary and the
665 # current node as secondary. this is handy to calculate N+1 memory
666 # availability if you can only failover from a primary to its
668 "sinst-by-pnode": {},
671 feedback_fn(" - ERROR: invalid value returned from node %s" % (node,))
677 for instance in instancelist:
678 feedback_fn("* Verifying instance %s" % instance)
679 inst_config = self.cfg.GetInstanceInfo(instance)
680 result = self._VerifyInstance(instance, inst_config, node_volume,
681 node_instance, feedback_fn)
684 inst_config.MapLVsByNode(node_vol_should)
686 instance_cfg[instance] = inst_config
688 pnode = inst_config.primary_node
689 if pnode in node_info:
690 node_info[pnode]['pinst'].append(instance)
692 feedback_fn(" - ERROR: instance %s, connection to primary node"
693 " %s failed" % (instance, pnode))
696 # If the instance is non-redundant we cannot survive losing its primary
697 # node, so we are not N+1 compliant. On the other hand we have no disk
698 # templates with more than one secondary so that situation is not well
700 # FIXME: does not support file-backed instances
701 if len(inst_config.secondary_nodes) == 0:
702 i_non_redundant.append(instance)
703 elif len(inst_config.secondary_nodes) > 1:
704 feedback_fn(" - WARNING: multiple secondaries for instance %s"
707 for snode in inst_config.secondary_nodes:
708 if snode in node_info:
709 node_info[snode]['sinst'].append(instance)
710 if pnode not in node_info[snode]['sinst-by-pnode']:
711 node_info[snode]['sinst-by-pnode'][pnode] = []
712 node_info[snode]['sinst-by-pnode'][pnode].append(instance)
714 feedback_fn(" - ERROR: instance %s, connection to secondary node"
715 " %s failed" % (instance, snode))
717 feedback_fn("* Verifying orphan volumes")
718 result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
722 feedback_fn("* Verifying remaining instances")
723 result = self._VerifyOrphanInstances(instancelist, node_instance,
727 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
728 feedback_fn("* Verifying N+1 Memory redundancy")
729 result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
732 feedback_fn("* Other Notes")
734 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
735 % len(i_non_redundant))
739 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
740 """Analize the post-hooks' result, handle it, and send some
741 nicely-formatted feedback back to the user.
744 phase: the hooks phase that has just been run
745 hooks_results: the results of the multi-node hooks rpc call
746 feedback_fn: function to send feedback back to the caller
747 lu_result: previous Exec result
750 # We only really run POST phase hooks, and are only interested in their results
751 if phase == constants.HOOKS_PHASE_POST:
752 # Used to change hooks' output to proper indentation
753 indent_re = re.compile('^', re.M)
754 feedback_fn("* Hooks Results")
755 if not hooks_results:
756 feedback_fn(" - ERROR: general communication failure")
759 for node_name in hooks_results:
760 show_node_header = True
761 res = hooks_results[node_name]
762 if res is False or not isinstance(res, list):
763 feedback_fn(" Communication failure")
766 for script, hkr, output in res:
767 if hkr == constants.HKR_FAIL:
768 # The node header is only shown once, if there are
769 # failing hooks on that node
771 feedback_fn(" Node %s:" % node_name)
772 show_node_header = False
773 feedback_fn(" ERROR: Script %s failed, output:" % script)
774 output = indent_re.sub(' ', output)
775 feedback_fn("%s" % output)
781 class LUVerifyDisks(NoHooksLU):
782 """Verifies the cluster disks status.
787 def CheckPrereq(self):
788 """Check prerequisites.
790 This has no prerequisites.
795 def Exec(self, feedback_fn):
796 """Verify integrity of cluster disks.
799 result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
801 vg_name = self.cfg.GetVGName()
802 nodes = utils.NiceSort(self.cfg.GetNodeList())
803 instances = [self.cfg.GetInstanceInfo(name)
804 for name in self.cfg.GetInstanceList()]
807 for inst in instances:
809 if (inst.status != "up" or
810 inst.disk_template not in constants.DTS_NET_MIRROR):
812 inst.MapLVsByNode(inst_lvs)
813 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
814 for node, vol_list in inst_lvs.iteritems():
816 nv_dict[(node, vol)] = inst
821 node_lvs = rpc.call_volume_list(nodes, vg_name)
828 if isinstance(lvs, basestring):
829 logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
831 elif not isinstance(lvs, dict):
832 logger.Info("connection to node %s failed or invalid data returned" %
834 res_nodes.append(node)
837 for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
838 inst = nv_dict.pop((node, lv_name), None)
839 if (not lv_online and inst is not None
840 and inst.name not in res_instances):
841 res_instances.append(inst.name)
843 # any leftover items in nv_dict are missing LVs, let's arrange the
845 for key, inst in nv_dict.iteritems():
846 if inst.name not in res_missing:
847 res_missing[inst.name] = []
848 res_missing[inst.name].append(key)
853 class LURenameCluster(LogicalUnit):
854 """Rename the cluster.
857 HPATH = "cluster-rename"
858 HTYPE = constants.HTYPE_CLUSTER
862 def BuildHooksEnv(self):
867 "OP_TARGET": self.sstore.GetClusterName(),
868 "NEW_NAME": self.op.name,
870 mn = self.sstore.GetMasterNode()
871 return env, [mn], [mn]
873 def CheckPrereq(self):
874 """Verify that the passed name is a valid one.
877 hostname = utils.HostInfo(self.op.name)
879 new_name = hostname.name
880 self.ip = new_ip = hostname.ip
881 old_name = self.sstore.GetClusterName()
882 old_ip = self.sstore.GetMasterIP()
883 if new_name == old_name and new_ip == old_ip:
884 raise errors.OpPrereqError("Neither the name nor the IP address of the"
885 " cluster has changed")
887 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
888 raise errors.OpPrereqError("The given cluster IP address (%s) is"
889 " reachable on the network. Aborting." %
892 self.op.name = new_name
894 def Exec(self, feedback_fn):
895 """Rename the cluster.
898 clustername = self.op.name
902 # shutdown the master IP
903 master = ss.GetMasterNode()
904 if not rpc.call_node_stop_master(master):
905 raise errors.OpExecError("Could not disable the master role")
909 ss.SetKey(ss.SS_MASTER_IP, ip)
910 ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
912 # Distribute updated ss config to all nodes
913 myself = self.cfg.GetNodeInfo(master)
914 dist_nodes = self.cfg.GetNodeList()
915 if myself.name in dist_nodes:
916 dist_nodes.remove(myself.name)
918 logger.Debug("Copying updated ssconf data to all nodes")
919 for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
920 fname = ss.KeyToFilename(keyname)
921 result = rpc.call_upload_file(dist_nodes, fname)
922 for to_node in dist_nodes:
923 if not result[to_node]:
924 logger.Error("copy of file %s to node %s failed" %
927 if not rpc.call_node_start_master(master):
928 logger.Error("Could not re-enable the master role on the master,"
929 " please restart manually.")
932 def _RecursiveCheckIfLVMBased(disk):
933 """Check if the given disk or its children are lvm-based.
936 disk: ganeti.objects.Disk object
939 boolean indicating whether a LD_LV dev_type was found or not
943 for chdisk in disk.children:
944 if _RecursiveCheckIfLVMBased(chdisk):
946 return disk.dev_type == constants.LD_LV
949 class LUSetClusterParams(LogicalUnit):
950 """Change the parameters of the cluster.
953 HPATH = "cluster-modify"
954 HTYPE = constants.HTYPE_CLUSTER
957 def BuildHooksEnv(self):
962 "OP_TARGET": self.sstore.GetClusterName(),
963 "NEW_VG_NAME": self.op.vg_name,
965 mn = self.sstore.GetMasterNode()
966 return env, [mn], [mn]
968 def CheckPrereq(self):
969 """Check prerequisites.
971 This checks whether the given params don't conflict and
972 if the given volume group is valid.
975 if not self.op.vg_name:
976 instances = [self.cfg.GetInstanceInfo(name)
977 for name in self.cfg.GetInstanceList()]
978 for inst in instances:
979 for disk in inst.disks:
980 if _RecursiveCheckIfLVMBased(disk):
981 raise errors.OpPrereqError("Cannot disable lvm storage while"
982 " lvm-based instances exist")
984 # if vg_name not None, checks given volume group on all nodes
986 node_list = self.cfg.GetNodeList()
987 vglist = rpc.call_vg_list(node_list)
988 for node in node_list:
989 vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
990 constants.MIN_VG_SIZE)
992 raise errors.OpPrereqError("Error on node '%s': %s" %
995 def Exec(self, feedback_fn):
996 """Change the parameters of the cluster.
999 if self.op.vg_name != self.cfg.GetVGName():
1000 self.cfg.SetVGName(self.op.vg_name)
1002 feedback_fn("Cluster LVM configuration already in desired"
1003 " state, not changing")
1006 def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
1007 """Sleep and poll for an instance's disk to sync.
1010 if not instance.disks:
1014 proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1016 node = instance.primary_node
1018 for dev in instance.disks:
1019 cfgw.SetDiskID(dev, node)
1025 cumul_degraded = False
1026 rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1028 proc.LogWarning("Can't get any data from node %s" % node)
1031 raise errors.RemoteError("Can't contact node %s for mirror data,"
1032 " aborting." % node)
1036 for i in range(len(rstats)):
1039 proc.LogWarning("Can't compute data for node %s/%s" %
1040 (node, instance.disks[i].iv_name))
1042 # we ignore the ldisk parameter
1043 perc_done, est_time, is_degraded, _ = mstat
1044 cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1045 if perc_done is not None:
1047 if est_time is not None:
1048 rem_time = "%d estimated seconds remaining" % est_time
1051 rem_time = "no time estimate"
1052 proc.LogInfo("- device %s: %5.2f%% done, %s" %
1053 (instance.disks[i].iv_name, perc_done, rem_time))
1058 #utils.Unlock('cmd')
1061 time.sleep(min(60, max_time))
1068 proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1069 return not cumul_degraded
1072 def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
1073 """Check that mirrors are not degraded.
1075 The ldisk parameter, if True, will change the test from the
1076 is_degraded attribute (which represents overall non-ok status for
1077 the device(s)) to the ldisk (representing the local storage status).
1080 cfgw.SetDiskID(dev, node)
1087 if on_primary or dev.AssembleOnSecondary():
1088 rstats = rpc.call_blockdev_find(node, dev)
1090 logger.ToStderr("Node %s: Disk degraded, not found or node down" % node)
1093 result = result and (not rstats[idx])
1095 for child in dev.children:
1096 result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
1101 class LUDiagnoseOS(NoHooksLU):
1102 """Logical unit for OS diagnose/query.
1105 _OP_REQP = ["output_fields", "names"]
1107 def CheckPrereq(self):
1108 """Check prerequisites.
1110 This always succeeds, since this is a pure query LU.
1114 raise errors.OpPrereqError("Selective OS query not supported")
1116 self.dynamic_fields = frozenset(["name", "valid", "node_status"])
1117 _CheckOutputFields(static=[],
1118 dynamic=self.dynamic_fields,
1119 selected=self.op.output_fields)
1122 def _DiagnoseByOS(node_list, rlist):
1123 """Remaps a per-node return list into an a per-os per-node dictionary
1126 node_list: a list with the names of all nodes
1127 rlist: a map with node names as keys and OS objects as values
1130 map: a map with osnames as keys and as value another map, with
1132 keys and list of OS objects as values
1133 e.g. {"debian-etch": {"node1": [<object>,...],
1134 "node2": [<object>,]}
1139 for node_name, nr in rlist.iteritems():
1143 if os_obj.name not in all_os:
1144 # build a list of nodes for this os containing empty lists
1145 # for each node in node_list
1146 all_os[os_obj.name] = {}
1147 for nname in node_list:
1148 all_os[os_obj.name][nname] = []
1149 all_os[os_obj.name][node_name].append(os_obj)
1152 def Exec(self, feedback_fn):
1153 """Compute the list of OSes.
1156 node_list = self.cfg.GetNodeList()
1157 node_data = rpc.call_os_diagnose(node_list)
1158 if node_data == False:
1159 raise errors.OpExecError("Can't gather the list of OSes")
1160 pol = self._DiagnoseByOS(node_list, node_data)
1162 for os_name, os_data in pol.iteritems():
1164 for field in self.op.output_fields:
1167 elif field == "valid":
1168 val = utils.all([osl and osl[0] for osl in os_data.values()])
1169 elif field == "node_status":
1171 for node_name, nos_list in os_data.iteritems():
1172 val[node_name] = [(v.status, v.path) for v in nos_list]
1174 raise errors.ParameterError(field)
1181 class LURemoveNode(LogicalUnit):
1182 """Logical unit for removing a node.
1185 HPATH = "node-remove"
1186 HTYPE = constants.HTYPE_NODE
1187 _OP_REQP = ["node_name"]
1189 def BuildHooksEnv(self):
1192 This doesn't run on the target node in the pre phase as a failed
1193 node would then be impossible to remove.
1197 "OP_TARGET": self.op.node_name,
1198 "NODE_NAME": self.op.node_name,
1200 all_nodes = self.cfg.GetNodeList()
1201 all_nodes.remove(self.op.node_name)
1202 return env, all_nodes, all_nodes
1204 def CheckPrereq(self):
1205 """Check prerequisites.
1208 - the node exists in the configuration
1209 - it does not have primary or secondary instances
1210 - it's not the master
1212 Any errors are signalled by raising errors.OpPrereqError.
1215 node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1217 raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1219 instance_list = self.cfg.GetInstanceList()
1221 masternode = self.sstore.GetMasterNode()
1222 if node.name == masternode:
1223 raise errors.OpPrereqError("Node is the master node,"
1224 " you need to failover first.")
1226 for instance_name in instance_list:
1227 instance = self.cfg.GetInstanceInfo(instance_name)
1228 if node.name == instance.primary_node:
1229 raise errors.OpPrereqError("Instance %s still running on the node,"
1230 " please remove first." % instance_name)
1231 if node.name in instance.secondary_nodes:
1232 raise errors.OpPrereqError("Instance %s has node as a secondary,"
1233 " please remove first." % instance_name)
1234 self.op.node_name = node.name
1237 def Exec(self, feedback_fn):
1238 """Removes the node from the cluster.
1242 logger.Info("stopping the node daemon and removing configs from node %s" %
1245 rpc.call_node_leave_cluster(node.name)
1247 logger.Info("Removing node %s from config" % node.name)
1249 self.cfg.RemoveNode(node.name)
1250 # Remove the node from the Ganeti Lock Manager
1251 self.context.glm.remove(locking.LEVEL_NODE, node.name)
1253 utils.RemoveHostFromEtcHosts(node.name)
1256 class LUQueryNodes(NoHooksLU):
1257 """Logical unit for querying nodes.
1260 _OP_REQP = ["output_fields", "names"]
1262 def CheckPrereq(self):
1263 """Check prerequisites.
1265 This checks that the fields required are valid output fields.
1268 self.dynamic_fields = frozenset([
1270 "mtotal", "mnode", "mfree",
1275 _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
1276 "pinst_list", "sinst_list",
1277 "pip", "sip", "tags"],
1278 dynamic=self.dynamic_fields,
1279 selected=self.op.output_fields)
1281 self.wanted = _GetWantedNodes(self, self.op.names)
1283 def Exec(self, feedback_fn):
1284 """Computes the list of nodes and their attributes.
1287 nodenames = self.wanted
1288 nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames]
1290 # begin data gathering
1292 if self.dynamic_fields.intersection(self.op.output_fields):
1294 node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
1295 for name in nodenames:
1296 nodeinfo = node_data.get(name, None)
1299 "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1300 "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1301 "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1302 "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1303 "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1304 "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
1305 "bootid": nodeinfo['bootid'],
1308 live_data[name] = {}
1310 live_data = dict.fromkeys(nodenames, {})
1312 node_to_primary = dict([(name, set()) for name in nodenames])
1313 node_to_secondary = dict([(name, set()) for name in nodenames])
1315 inst_fields = frozenset(("pinst_cnt", "pinst_list",
1316 "sinst_cnt", "sinst_list"))
1317 if inst_fields & frozenset(self.op.output_fields):
1318 instancelist = self.cfg.GetInstanceList()
1320 for instance_name in instancelist:
1321 inst = self.cfg.GetInstanceInfo(instance_name)
1322 if inst.primary_node in node_to_primary:
1323 node_to_primary[inst.primary_node].add(inst.name)
1324 for secnode in inst.secondary_nodes:
1325 if secnode in node_to_secondary:
1326 node_to_secondary[secnode].add(inst.name)
1328 # end data gathering
1331 for node in nodelist:
1333 for field in self.op.output_fields:
1336 elif field == "pinst_list":
1337 val = list(node_to_primary[node.name])
1338 elif field == "sinst_list":
1339 val = list(node_to_secondary[node.name])
1340 elif field == "pinst_cnt":
1341 val = len(node_to_primary[node.name])
1342 elif field == "sinst_cnt":
1343 val = len(node_to_secondary[node.name])
1344 elif field == "pip":
1345 val = node.primary_ip
1346 elif field == "sip":
1347 val = node.secondary_ip
1348 elif field == "tags":
1349 val = list(node.GetTags())
1350 elif field in self.dynamic_fields:
1351 val = live_data[node.name].get(field, None)
1353 raise errors.ParameterError(field)
1354 node_output.append(val)
1355 output.append(node_output)
1360 class LUQueryNodeVolumes(NoHooksLU):
1361 """Logical unit for getting volumes on node(s).
1364 _OP_REQP = ["nodes", "output_fields"]
1366 def CheckPrereq(self):
1367 """Check prerequisites.
1369 This checks that the fields required are valid output fields.
1372 self.nodes = _GetWantedNodes(self, self.op.nodes)
1374 _CheckOutputFields(static=["node"],
1375 dynamic=["phys", "vg", "name", "size", "instance"],
1376 selected=self.op.output_fields)
1379 def Exec(self, feedback_fn):
1380 """Computes the list of nodes and their attributes.
1383 nodenames = self.nodes
1384 volumes = rpc.call_node_volumes(nodenames)
1386 ilist = [self.cfg.GetInstanceInfo(iname) for iname
1387 in self.cfg.GetInstanceList()]
1389 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1392 for node in nodenames:
1393 if node not in volumes or not volumes[node]:
1396 node_vols = volumes[node][:]
1397 node_vols.sort(key=lambda vol: vol['dev'])
1399 for vol in node_vols:
1401 for field in self.op.output_fields:
1404 elif field == "phys":
1408 elif field == "name":
1410 elif field == "size":
1411 val = int(float(vol['size']))
1412 elif field == "instance":
1414 if node not in lv_by_node[inst]:
1416 if vol['name'] in lv_by_node[inst][node]:
1422 raise errors.ParameterError(field)
1423 node_output.append(str(val))
1425 output.append(node_output)
1430 class LUAddNode(LogicalUnit):
1431 """Logical unit for adding node to the cluster.
1435 HTYPE = constants.HTYPE_NODE
1436 _OP_REQP = ["node_name"]
1438 def BuildHooksEnv(self):
1441 This will run on all nodes before, and on all nodes + the new node after.
1445 "OP_TARGET": self.op.node_name,
1446 "NODE_NAME": self.op.node_name,
1447 "NODE_PIP": self.op.primary_ip,
1448 "NODE_SIP": self.op.secondary_ip,
1450 nodes_0 = self.cfg.GetNodeList()
1451 nodes_1 = nodes_0 + [self.op.node_name, ]
1452 return env, nodes_0, nodes_1
1454 def CheckPrereq(self):
1455 """Check prerequisites.
1458 - the new node is not already in the config
1460 - its parameters (single/dual homed) matches the cluster
1462 Any errors are signalled by raising errors.OpPrereqError.
1465 node_name = self.op.node_name
1468 dns_data = utils.HostInfo(node_name)
1470 node = dns_data.name
1471 primary_ip = self.op.primary_ip = dns_data.ip
1472 secondary_ip = getattr(self.op, "secondary_ip", None)
1473 if secondary_ip is None:
1474 secondary_ip = primary_ip
1475 if not utils.IsValidIP(secondary_ip):
1476 raise errors.OpPrereqError("Invalid secondary IP given")
1477 self.op.secondary_ip = secondary_ip
1479 node_list = cfg.GetNodeList()
1480 if not self.op.readd and node in node_list:
1481 raise errors.OpPrereqError("Node %s is already in the configuration" %
1483 elif self.op.readd and node not in node_list:
1484 raise errors.OpPrereqError("Node %s is not in the configuration" % node)
1486 for existing_node_name in node_list:
1487 existing_node = cfg.GetNodeInfo(existing_node_name)
1489 if self.op.readd and node == existing_node_name:
1490 if (existing_node.primary_ip != primary_ip or
1491 existing_node.secondary_ip != secondary_ip):
1492 raise errors.OpPrereqError("Readded node doesn't have the same IP"
1493 " address configuration as before")
1496 if (existing_node.primary_ip == primary_ip or
1497 existing_node.secondary_ip == primary_ip or
1498 existing_node.primary_ip == secondary_ip or
1499 existing_node.secondary_ip == secondary_ip):
1500 raise errors.OpPrereqError("New node ip address(es) conflict with"
1501 " existing node %s" % existing_node.name)
1503 # check that the type of the node (single versus dual homed) is the
1504 # same as for the master
1505 myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
1506 master_singlehomed = myself.secondary_ip == myself.primary_ip
1507 newbie_singlehomed = secondary_ip == primary_ip
1508 if master_singlehomed != newbie_singlehomed:
1509 if master_singlehomed:
1510 raise errors.OpPrereqError("The master has no private ip but the"
1511 " new node has one")
1513 raise errors.OpPrereqError("The master has a private ip but the"
1514 " new node doesn't have one")
1516 # checks reachablity
1517 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
1518 raise errors.OpPrereqError("Node not reachable by ping")
1520 if not newbie_singlehomed:
1521 # check reachability from my secondary ip to newbie's secondary ip
1522 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
1523 source=myself.secondary_ip):
1524 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
1525 " based ping to noded port")
1527 self.new_node = objects.Node(name=node,
1528 primary_ip=primary_ip,
1529 secondary_ip=secondary_ip)
1531 def Exec(self, feedback_fn):
1532 """Adds the new node to the cluster.
1535 new_node = self.new_node
1536 node = new_node.name
1538 # check connectivity
1539 result = rpc.call_version([node])[node]
1541 if constants.PROTOCOL_VERSION == result:
1542 logger.Info("communication to node %s fine, sw version %s match" %
1545 raise errors.OpExecError("Version mismatch master version %s,"
1546 " node version %s" %
1547 (constants.PROTOCOL_VERSION, result))
1549 raise errors.OpExecError("Cannot get version from the new node")
1552 logger.Info("copy ssh key to node %s" % node)
1553 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1555 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
1556 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
1562 keyarray.append(f.read())
1566 result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
1567 keyarray[3], keyarray[4], keyarray[5])
1570 raise errors.OpExecError("Cannot transfer ssh keys to the new node")
1572 # Add node to our /etc/hosts, and add key to known_hosts
1573 utils.AddHostToEtcHosts(new_node.name)
1575 if new_node.secondary_ip != new_node.primary_ip:
1576 if not rpc.call_node_tcp_ping(new_node.name,
1577 constants.LOCALHOST_IP_ADDRESS,
1578 new_node.secondary_ip,
1579 constants.DEFAULT_NODED_PORT,
1581 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
1582 " you gave (%s). Please fix and re-run this"
1583 " command." % new_node.secondary_ip)
1585 node_verify_list = [self.sstore.GetMasterNode()]
1586 node_verify_param = {
1588 # TODO: do a node-net-test as well?
1591 result = rpc.call_node_verify(node_verify_list, node_verify_param)
1592 for verifier in node_verify_list:
1593 if not result[verifier]:
1594 raise errors.OpExecError("Cannot communicate with %s's node daemon"
1595 " for remote verification" % verifier)
1596 if result[verifier]['nodelist']:
1597 for failed in result[verifier]['nodelist']:
1598 feedback_fn("ssh/hostname verification failed %s -> %s" %
1599 (verifier, result[verifier]['nodelist'][failed]))
1600 raise errors.OpExecError("ssh/hostname verification failed.")
1602 # Distribute updated /etc/hosts and known_hosts to all nodes,
1603 # including the node just added
1604 myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
1605 dist_nodes = self.cfg.GetNodeList()
1606 if not self.op.readd:
1607 dist_nodes.append(node)
1608 if myself.name in dist_nodes:
1609 dist_nodes.remove(myself.name)
1611 logger.Debug("Copying hosts and known_hosts to all nodes")
1612 for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
1613 result = rpc.call_upload_file(dist_nodes, fname)
1614 for to_node in dist_nodes:
1615 if not result[to_node]:
1616 logger.Error("copy of file %s to node %s failed" %
1619 to_copy = self.sstore.GetFileList()
1620 if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
1621 to_copy.append(constants.VNC_PASSWORD_FILE)
1622 for fname in to_copy:
1623 result = rpc.call_upload_file([node], fname)
1624 if not result[node]:
1625 logger.Error("could not copy file %s to node %s" % (fname, node))
1627 if not self.op.readd:
1628 logger.Info("adding node %s to cluster.conf" % node)
1629 self.cfg.AddNode(new_node)
1630 # Add the new node to the Ganeti Lock Manager
1631 self.context.glm.add(locking.LEVEL_NODE, node)
1634 class LUMasterFailover(LogicalUnit):
1635 """Failover the master node to the current node.
1637 This is a special LU in that it must run on a non-master node.
1640 HPATH = "master-failover"
1641 HTYPE = constants.HTYPE_CLUSTER
1646 def BuildHooksEnv(self):
1649 This will run on the new master only in the pre phase, and on all
1650 the nodes in the post phase.
1654 "OP_TARGET": self.new_master,
1655 "NEW_MASTER": self.new_master,
1656 "OLD_MASTER": self.old_master,
1658 return env, [self.new_master], self.cfg.GetNodeList()
1660 def CheckPrereq(self):
1661 """Check prerequisites.
1663 This checks that we are not already the master.
1666 self.new_master = utils.HostInfo().name
1667 self.old_master = self.sstore.GetMasterNode()
1669 if self.old_master == self.new_master:
1670 raise errors.OpPrereqError("This commands must be run on the node"
1671 " where you want the new master to be."
1672 " %s is already the master" %
1675 def Exec(self, feedback_fn):
1676 """Failover the master node.
1678 This command, when run on a non-master node, will cause the current
1679 master to cease being master, and the non-master to become new
1683 #TODO: do not rely on gethostname returning the FQDN
1684 logger.Info("setting master to %s, old master: %s" %
1685 (self.new_master, self.old_master))
1687 if not rpc.call_node_stop_master(self.old_master):
1688 logger.Error("could disable the master role on the old master"
1689 " %s, please disable manually" % self.old_master)
1692 ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
1693 if not rpc.call_upload_file(self.cfg.GetNodeList(),
1694 ss.KeyToFilename(ss.SS_MASTER_NODE)):
1695 logger.Error("could not distribute the new simple store master file"
1696 " to the other nodes, please check.")
1698 if not rpc.call_node_start_master(self.new_master):
1699 logger.Error("could not start the master role on the new master"
1700 " %s, please check" % self.new_master)
1701 feedback_fn("Error in activating the master IP on the new master,"
1702 " please fix manually.")
1706 class LUQueryClusterInfo(NoHooksLU):
1707 """Query cluster configuration.
1713 def CheckPrereq(self):
1714 """No prerequsites needed for this LU.
1719 def Exec(self, feedback_fn):
1720 """Return cluster config.
1724 "name": self.sstore.GetClusterName(),
1725 "software_version": constants.RELEASE_VERSION,
1726 "protocol_version": constants.PROTOCOL_VERSION,
1727 "config_version": constants.CONFIG_VERSION,
1728 "os_api_version": constants.OS_API_VERSION,
1729 "export_version": constants.EXPORT_VERSION,
1730 "master": self.sstore.GetMasterNode(),
1731 "architecture": (platform.architecture()[0], platform.machine()),
1732 "hypervisor_type": self.sstore.GetHypervisorType(),
1738 class LUDumpClusterConfig(NoHooksLU):
1739 """Return a text-representation of the cluster-config.
1744 def CheckPrereq(self):
1745 """No prerequisites.
1750 def Exec(self, feedback_fn):
1751 """Dump a representation of the cluster config to the standard output.
1754 return self.cfg.DumpConfig()
1757 class LUActivateInstanceDisks(NoHooksLU):
1758 """Bring up an instance's disks.
1761 _OP_REQP = ["instance_name"]
1763 def CheckPrereq(self):
1764 """Check prerequisites.
1766 This checks that the instance is in the cluster.
1769 instance = self.cfg.GetInstanceInfo(
1770 self.cfg.ExpandInstanceName(self.op.instance_name))
1771 if instance is None:
1772 raise errors.OpPrereqError("Instance '%s' not known" %
1773 self.op.instance_name)
1774 self.instance = instance
1777 def Exec(self, feedback_fn):
1778 """Activate the disks.
1781 disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
1783 raise errors.OpExecError("Cannot activate block devices")
1788 def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
1789 """Prepare the block devices for an instance.
1791 This sets up the block devices on all nodes.
1794 instance: a ganeti.objects.Instance object
1795 ignore_secondaries: if true, errors on secondary nodes won't result
1796 in an error return from the function
1799 false if the operation failed
1800 list of (host, instance_visible_name, node_visible_name) if the operation
1801 suceeded with the mapping from node devices to instance devices
1805 iname = instance.name
1806 # With the two passes mechanism we try to reduce the window of
1807 # opportunity for the race condition of switching DRBD to primary
1808 # before handshaking occured, but we do not eliminate it
1810 # The proper fix would be to wait (with some limits) until the
1811 # connection has been made and drbd transitions from WFConnection
1812 # into any other network-connected state (Connected, SyncTarget,
1815 # 1st pass, assemble on all nodes in secondary mode
1816 for inst_disk in instance.disks:
1817 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1818 cfg.SetDiskID(node_disk, node)
1819 result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
1821 logger.Error("could not prepare block device %s on node %s"
1822 " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
1823 if not ignore_secondaries:
1826 # FIXME: race condition on drbd migration to primary
1828 # 2nd pass, do only the primary node
1829 for inst_disk in instance.disks:
1830 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1831 if node != instance.primary_node:
1833 cfg.SetDiskID(node_disk, node)
1834 result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
1836 logger.Error("could not prepare block device %s on node %s"
1837 " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
1839 device_info.append((instance.primary_node, inst_disk.iv_name, result))
1841 # leave the disks configured for the primary node
1842 # this is a workaround that would be fixed better by
1843 # improving the logical/physical id handling
1844 for disk in instance.disks:
1845 cfg.SetDiskID(disk, instance.primary_node)
1847 return disks_ok, device_info
1850 def _StartInstanceDisks(cfg, instance, force):
1851 """Start the disks of an instance.
1854 disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
1855 ignore_secondaries=force)
1857 _ShutdownInstanceDisks(instance, cfg)
1858 if force is not None and not force:
1859 logger.Error("If the message above refers to a secondary node,"
1860 " you can retry the operation using '--force'.")
1861 raise errors.OpExecError("Disk consistency error")
1864 class LUDeactivateInstanceDisks(NoHooksLU):
1865 """Shutdown an instance's disks.
1868 _OP_REQP = ["instance_name"]
1870 def CheckPrereq(self):
1871 """Check prerequisites.
1873 This checks that the instance is in the cluster.
1876 instance = self.cfg.GetInstanceInfo(
1877 self.cfg.ExpandInstanceName(self.op.instance_name))
1878 if instance is None:
1879 raise errors.OpPrereqError("Instance '%s' not known" %
1880 self.op.instance_name)
1881 self.instance = instance
1883 def Exec(self, feedback_fn):
1884 """Deactivate the disks
1887 instance = self.instance
1888 ins_l = rpc.call_instance_list([instance.primary_node])
1889 ins_l = ins_l[instance.primary_node]
1890 if not type(ins_l) is list:
1891 raise errors.OpExecError("Can't contact node '%s'" %
1892 instance.primary_node)
1894 if self.instance.name in ins_l:
1895 raise errors.OpExecError("Instance is running, can't shutdown"
1898 _ShutdownInstanceDisks(instance, self.cfg)
1901 def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
1902 """Shutdown block devices of an instance.
1904 This does the shutdown on all nodes of the instance.
1906 If the ignore_primary is false, errors on the primary node are
1911 for disk in instance.disks:
1912 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1913 cfg.SetDiskID(top_disk, node)
1914 if not rpc.call_blockdev_shutdown(node, top_disk):
1915 logger.Error("could not shutdown block device %s on node %s" %
1916 (disk.iv_name, node))
1917 if not ignore_primary or node != instance.primary_node:
1922 def _CheckNodeFreeMemory(cfg, node, reason, requested):
1923 """Checks if a node has enough free memory.
1925 This function check if a given node has the needed amount of free
1926 memory. In case the node has less memory or we cannot get the
1927 information from the node, this function raise an OpPrereqError
1931 - cfg: a ConfigWriter instance
1932 - node: the node name
1933 - reason: string to use in the error message
1934 - requested: the amount of memory in MiB
1937 nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
1938 if not nodeinfo or not isinstance(nodeinfo, dict):
1939 raise errors.OpPrereqError("Could not contact node %s for resource"
1940 " information" % (node,))
1942 free_mem = nodeinfo[node].get('memory_free')
1943 if not isinstance(free_mem, int):
1944 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
1945 " was '%s'" % (node, free_mem))
1946 if requested > free_mem:
1947 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
1948 " needed %s MiB, available %s MiB" %
1949 (node, reason, requested, free_mem))
1952 class LUStartupInstance(LogicalUnit):
1953 """Starts an instance.
1956 HPATH = "instance-start"
1957 HTYPE = constants.HTYPE_INSTANCE
1958 _OP_REQP = ["instance_name", "force"]
1960 def BuildHooksEnv(self):
1963 This runs on master, primary and secondary nodes of the instance.
1967 "FORCE": self.op.force,
1969 env.update(_BuildInstanceHookEnvByObject(self.instance))
1970 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1971 list(self.instance.secondary_nodes))
1974 def CheckPrereq(self):
1975 """Check prerequisites.
1977 This checks that the instance is in the cluster.
1980 instance = self.cfg.GetInstanceInfo(
1981 self.cfg.ExpandInstanceName(self.op.instance_name))
1982 if instance is None:
1983 raise errors.OpPrereqError("Instance '%s' not known" %
1984 self.op.instance_name)
1986 # check bridges existance
1987 _CheckInstanceBridgesExist(instance)
1989 _CheckNodeFreeMemory(self.cfg, instance.primary_node,
1990 "starting instance %s" % instance.name,
1993 self.instance = instance
1994 self.op.instance_name = instance.name
1996 def Exec(self, feedback_fn):
1997 """Start the instance.
2000 instance = self.instance
2001 force = self.op.force
2002 extra_args = getattr(self.op, "extra_args", "")
2004 self.cfg.MarkInstanceUp(instance.name)
2006 node_current = instance.primary_node
2008 _StartInstanceDisks(self.cfg, instance, force)
2010 if not rpc.call_instance_start(node_current, instance, extra_args):
2011 _ShutdownInstanceDisks(instance, self.cfg)
2012 raise errors.OpExecError("Could not start instance")
2015 class LURebootInstance(LogicalUnit):
2016 """Reboot an instance.
2019 HPATH = "instance-reboot"
2020 HTYPE = constants.HTYPE_INSTANCE
2021 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2023 def BuildHooksEnv(self):
2026 This runs on master, primary and secondary nodes of the instance.
2030 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2032 env.update(_BuildInstanceHookEnvByObject(self.instance))
2033 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2034 list(self.instance.secondary_nodes))
2037 def CheckPrereq(self):
2038 """Check prerequisites.
2040 This checks that the instance is in the cluster.
2043 instance = self.cfg.GetInstanceInfo(
2044 self.cfg.ExpandInstanceName(self.op.instance_name))
2045 if instance is None:
2046 raise errors.OpPrereqError("Instance '%s' not known" %
2047 self.op.instance_name)
2049 # check bridges existance
2050 _CheckInstanceBridgesExist(instance)
2052 self.instance = instance
2053 self.op.instance_name = instance.name
2055 def Exec(self, feedback_fn):
2056 """Reboot the instance.
2059 instance = self.instance
2060 ignore_secondaries = self.op.ignore_secondaries
2061 reboot_type = self.op.reboot_type
2062 extra_args = getattr(self.op, "extra_args", "")
2064 node_current = instance.primary_node
2066 if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2067 constants.INSTANCE_REBOOT_HARD,
2068 constants.INSTANCE_REBOOT_FULL]:
2069 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2070 (constants.INSTANCE_REBOOT_SOFT,
2071 constants.INSTANCE_REBOOT_HARD,
2072 constants.INSTANCE_REBOOT_FULL))
2074 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2075 constants.INSTANCE_REBOOT_HARD]:
2076 if not rpc.call_instance_reboot(node_current, instance,
2077 reboot_type, extra_args):
2078 raise errors.OpExecError("Could not reboot instance")
2080 if not rpc.call_instance_shutdown(node_current, instance):
2081 raise errors.OpExecError("could not shutdown instance for full reboot")
2082 _ShutdownInstanceDisks(instance, self.cfg)
2083 _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
2084 if not rpc.call_instance_start(node_current, instance, extra_args):
2085 _ShutdownInstanceDisks(instance, self.cfg)
2086 raise errors.OpExecError("Could not start instance for full reboot")
2088 self.cfg.MarkInstanceUp(instance.name)
2091 class LUShutdownInstance(LogicalUnit):
2092 """Shutdown an instance.
2095 HPATH = "instance-stop"
2096 HTYPE = constants.HTYPE_INSTANCE
2097 _OP_REQP = ["instance_name"]
2099 def BuildHooksEnv(self):
2102 This runs on master, primary and secondary nodes of the instance.
2105 env = _BuildInstanceHookEnvByObject(self.instance)
2106 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2107 list(self.instance.secondary_nodes))
2110 def CheckPrereq(self):
2111 """Check prerequisites.
2113 This checks that the instance is in the cluster.
2116 instance = self.cfg.GetInstanceInfo(
2117 self.cfg.ExpandInstanceName(self.op.instance_name))
2118 if instance is None:
2119 raise errors.OpPrereqError("Instance '%s' not known" %
2120 self.op.instance_name)
2121 self.instance = instance
2123 def Exec(self, feedback_fn):
2124 """Shutdown the instance.
2127 instance = self.instance
2128 node_current = instance.primary_node
2129 self.cfg.MarkInstanceDown(instance.name)
2130 if not rpc.call_instance_shutdown(node_current, instance):
2131 logger.Error("could not shutdown instance")
2133 _ShutdownInstanceDisks(instance, self.cfg)
2136 class LUReinstallInstance(LogicalUnit):
2137 """Reinstall an instance.
2140 HPATH = "instance-reinstall"
2141 HTYPE = constants.HTYPE_INSTANCE
2142 _OP_REQP = ["instance_name"]
2144 def BuildHooksEnv(self):
2147 This runs on master, primary and secondary nodes of the instance.
2150 env = _BuildInstanceHookEnvByObject(self.instance)
2151 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2152 list(self.instance.secondary_nodes))
2155 def CheckPrereq(self):
2156 """Check prerequisites.
2158 This checks that the instance is in the cluster and is not running.
2161 instance = self.cfg.GetInstanceInfo(
2162 self.cfg.ExpandInstanceName(self.op.instance_name))
2163 if instance is None:
2164 raise errors.OpPrereqError("Instance '%s' not known" %
2165 self.op.instance_name)
2166 if instance.disk_template == constants.DT_DISKLESS:
2167 raise errors.OpPrereqError("Instance '%s' has no disks" %
2168 self.op.instance_name)
2169 if instance.status != "down":
2170 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2171 self.op.instance_name)
2172 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2174 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2175 (self.op.instance_name,
2176 instance.primary_node))
2178 self.op.os_type = getattr(self.op, "os_type", None)
2179 if self.op.os_type is not None:
2181 pnode = self.cfg.GetNodeInfo(
2182 self.cfg.ExpandNodeName(instance.primary_node))
2184 raise errors.OpPrereqError("Primary node '%s' is unknown" %
2186 os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2188 raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2189 " primary node" % self.op.os_type)
2191 self.instance = instance
2193 def Exec(self, feedback_fn):
2194 """Reinstall the instance.
2197 inst = self.instance
2199 if self.op.os_type is not None:
2200 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2201 inst.os = self.op.os_type
2202 self.cfg.AddInstance(inst)
2204 _StartInstanceDisks(self.cfg, inst, None)
2206 feedback_fn("Running the instance OS create scripts...")
2207 if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
2208 raise errors.OpExecError("Could not install OS for instance %s"
2210 (inst.name, inst.primary_node))
2212 _ShutdownInstanceDisks(inst, self.cfg)
2215 class LURenameInstance(LogicalUnit):
2216 """Rename an instance.
2219 HPATH = "instance-rename"
2220 HTYPE = constants.HTYPE_INSTANCE
2221 _OP_REQP = ["instance_name", "new_name"]
2223 def BuildHooksEnv(self):
2226 This runs on master, primary and secondary nodes of the instance.
2229 env = _BuildInstanceHookEnvByObject(self.instance)
2230 env["INSTANCE_NEW_NAME"] = self.op.new_name
2231 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2232 list(self.instance.secondary_nodes))
2235 def CheckPrereq(self):
2236 """Check prerequisites.
2238 This checks that the instance is in the cluster and is not running.
2241 instance = self.cfg.GetInstanceInfo(
2242 self.cfg.ExpandInstanceName(self.op.instance_name))
2243 if instance is None:
2244 raise errors.OpPrereqError("Instance '%s' not known" %
2245 self.op.instance_name)
2246 if instance.status != "down":
2247 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2248 self.op.instance_name)
2249 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2251 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2252 (self.op.instance_name,
2253 instance.primary_node))
2254 self.instance = instance
2256 # new name verification
2257 name_info = utils.HostInfo(self.op.new_name)
2259 self.op.new_name = new_name = name_info.name
2260 instance_list = self.cfg.GetInstanceList()
2261 if new_name in instance_list:
2262 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2265 if not getattr(self.op, "ignore_ip", False):
2266 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
2267 raise errors.OpPrereqError("IP %s of instance %s already in use" %
2268 (name_info.ip, new_name))
2271 def Exec(self, feedback_fn):
2272 """Reinstall the instance.
2275 inst = self.instance
2276 old_name = inst.name
2278 if inst.disk_template == constants.DT_FILE:
2279 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2281 self.cfg.RenameInstance(inst.name, self.op.new_name)
2283 # re-read the instance from the configuration after rename
2284 inst = self.cfg.GetInstanceInfo(self.op.new_name)
2286 if inst.disk_template == constants.DT_FILE:
2287 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2288 result = rpc.call_file_storage_dir_rename(inst.primary_node,
2289 old_file_storage_dir,
2290 new_file_storage_dir)
2293 raise errors.OpExecError("Could not connect to node '%s' to rename"
2294 " directory '%s' to '%s' (but the instance"
2295 " has been renamed in Ganeti)" % (
2296 inst.primary_node, old_file_storage_dir,
2297 new_file_storage_dir))
2300 raise errors.OpExecError("Could not rename directory '%s' to '%s'"
2301 " (but the instance has been renamed in"
2302 " Ganeti)" % (old_file_storage_dir,
2303 new_file_storage_dir))
2305 _StartInstanceDisks(self.cfg, inst, None)
2307 if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
2309 msg = ("Could run OS rename script for instance %s on node %s (but the"
2310 " instance has been renamed in Ganeti)" %
2311 (inst.name, inst.primary_node))
2314 _ShutdownInstanceDisks(inst, self.cfg)
2317 class LURemoveInstance(LogicalUnit):
2318 """Remove an instance.
2321 HPATH = "instance-remove"
2322 HTYPE = constants.HTYPE_INSTANCE
2323 _OP_REQP = ["instance_name", "ignore_failures"]
2325 def BuildHooksEnv(self):
2328 This runs on master, primary and secondary nodes of the instance.
2331 env = _BuildInstanceHookEnvByObject(self.instance)
2332 nl = [self.sstore.GetMasterNode()]
2335 def CheckPrereq(self):
2336 """Check prerequisites.
2338 This checks that the instance is in the cluster.
2341 instance = self.cfg.GetInstanceInfo(
2342 self.cfg.ExpandInstanceName(self.op.instance_name))
2343 if instance is None:
2344 raise errors.OpPrereqError("Instance '%s' not known" %
2345 self.op.instance_name)
2346 self.instance = instance
2348 def Exec(self, feedback_fn):
2349 """Remove the instance.
2352 instance = self.instance
2353 logger.Info("shutting down instance %s on node %s" %
2354 (instance.name, instance.primary_node))
2356 if not rpc.call_instance_shutdown(instance.primary_node, instance):
2357 if self.op.ignore_failures:
2358 feedback_fn("Warning: can't shutdown instance")
2360 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2361 (instance.name, instance.primary_node))
2363 logger.Info("removing block devices for instance %s" % instance.name)
2365 if not _RemoveDisks(instance, self.cfg):
2366 if self.op.ignore_failures:
2367 feedback_fn("Warning: can't remove instance's disks")
2369 raise errors.OpExecError("Can't remove instance's disks")
2371 logger.Info("removing instance %s out of cluster config" % instance.name)
2373 self.cfg.RemoveInstance(instance.name)
2374 # Remove the new instance from the Ganeti Lock Manager
2375 self.context.glm.remove(locking.LEVEL_INSTANCE, instance.name)
2378 class LUQueryInstances(NoHooksLU):
2379 """Logical unit for querying instances.
2382 _OP_REQP = ["output_fields", "names"]
2384 def CheckPrereq(self):
2385 """Check prerequisites.
2387 This checks that the fields required are valid output fields.
2390 self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
2391 _CheckOutputFields(static=["name", "os", "pnode", "snodes",
2392 "admin_state", "admin_ram",
2393 "disk_template", "ip", "mac", "bridge",
2394 "sda_size", "sdb_size", "vcpus", "tags"],
2395 dynamic=self.dynamic_fields,
2396 selected=self.op.output_fields)
2398 self.wanted = _GetWantedInstances(self, self.op.names)
2400 def Exec(self, feedback_fn):
2401 """Computes the list of nodes and their attributes.
2404 instance_names = self.wanted
2405 instance_list = [self.cfg.GetInstanceInfo(iname) for iname
2408 # begin data gathering
2410 nodes = frozenset([inst.primary_node for inst in instance_list])
2413 if self.dynamic_fields.intersection(self.op.output_fields):
2415 node_data = rpc.call_all_instances_info(nodes)
2417 result = node_data[name]
2419 live_data.update(result)
2420 elif result == False:
2421 bad_nodes.append(name)
2422 # else no instance is alive
2424 live_data = dict([(name, {}) for name in instance_names])
2426 # end data gathering
2429 for instance in instance_list:
2431 for field in self.op.output_fields:
2436 elif field == "pnode":
2437 val = instance.primary_node
2438 elif field == "snodes":
2439 val = list(instance.secondary_nodes)
2440 elif field == "admin_state":
2441 val = (instance.status != "down")
2442 elif field == "oper_state":
2443 if instance.primary_node in bad_nodes:
2446 val = bool(live_data.get(instance.name))
2447 elif field == "status":
2448 if instance.primary_node in bad_nodes:
2449 val = "ERROR_nodedown"
2451 running = bool(live_data.get(instance.name))
2453 if instance.status != "down":
2458 if instance.status != "down":
2462 elif field == "admin_ram":
2463 val = instance.memory
2464 elif field == "oper_ram":
2465 if instance.primary_node in bad_nodes:
2467 elif instance.name in live_data:
2468 val = live_data[instance.name].get("memory", "?")
2471 elif field == "disk_template":
2472 val = instance.disk_template
2474 val = instance.nics[0].ip
2475 elif field == "bridge":
2476 val = instance.nics[0].bridge
2477 elif field == "mac":
2478 val = instance.nics[0].mac
2479 elif field == "sda_size" or field == "sdb_size":
2480 disk = instance.FindDisk(field[:3])
2485 elif field == "vcpus":
2486 val = instance.vcpus
2487 elif field == "tags":
2488 val = list(instance.GetTags())
2490 raise errors.ParameterError(field)
2497 class LUFailoverInstance(LogicalUnit):
2498 """Failover an instance.
2501 HPATH = "instance-failover"
2502 HTYPE = constants.HTYPE_INSTANCE
2503 _OP_REQP = ["instance_name", "ignore_consistency"]
2505 def BuildHooksEnv(self):
2508 This runs on master, primary and secondary nodes of the instance.
2512 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2514 env.update(_BuildInstanceHookEnvByObject(self.instance))
2515 nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
2518 def CheckPrereq(self):
2519 """Check prerequisites.
2521 This checks that the instance is in the cluster.
2524 instance = self.cfg.GetInstanceInfo(
2525 self.cfg.ExpandInstanceName(self.op.instance_name))
2526 if instance is None:
2527 raise errors.OpPrereqError("Instance '%s' not known" %
2528 self.op.instance_name)
2530 if instance.disk_template not in constants.DTS_NET_MIRROR:
2531 raise errors.OpPrereqError("Instance's disk layout is not"
2532 " network mirrored, cannot failover.")
2534 secondary_nodes = instance.secondary_nodes
2535 if not secondary_nodes:
2536 raise errors.ProgrammerError("no secondary node but using "
2537 "a mirrored disk template")
2539 target_node = secondary_nodes[0]
2540 # check memory requirements on the secondary node
2541 _CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" %
2542 instance.name, instance.memory)
2544 # check bridge existance
2545 brlist = [nic.bridge for nic in instance.nics]
2546 if not rpc.call_bridges_exist(target_node, brlist):
2547 raise errors.OpPrereqError("One or more target bridges %s does not"
2548 " exist on destination node '%s'" %
2549 (brlist, target_node))
2551 self.instance = instance
2553 def Exec(self, feedback_fn):
2554 """Failover an instance.
2556 The failover is done by shutting it down on its present node and
2557 starting it on the secondary.
2560 instance = self.instance
2562 source_node = instance.primary_node
2563 target_node = instance.secondary_nodes[0]
2565 feedback_fn("* checking disk consistency between source and target")
2566 for dev in instance.disks:
2567 # for drbd, these are drbd over lvm
2568 if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
2569 if instance.status == "up" and not self.op.ignore_consistency:
2570 raise errors.OpExecError("Disk %s is degraded on target node,"
2571 " aborting failover." % dev.iv_name)
2573 feedback_fn("* shutting down instance on source node")
2574 logger.Info("Shutting down instance %s on node %s" %
2575 (instance.name, source_node))
2577 if not rpc.call_instance_shutdown(source_node, instance):
2578 if self.op.ignore_consistency:
2579 logger.Error("Could not shutdown instance %s on node %s. Proceeding"
2580 " anyway. Please make sure node %s is down" %
2581 (instance.name, source_node, source_node))
2583 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2584 (instance.name, source_node))
2586 feedback_fn("* deactivating the instance's disks on source node")
2587 if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
2588 raise errors.OpExecError("Can't shut down the instance's disks.")
2590 instance.primary_node = target_node
2591 # distribute new instance config to the other nodes
2592 self.cfg.Update(instance)
2594 # Only start the instance if it's marked as up
2595 if instance.status == "up":
2596 feedback_fn("* activating the instance's disks on target node")
2597 logger.Info("Starting instance %s on node %s" %
2598 (instance.name, target_node))
2600 disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
2601 ignore_secondaries=True)
2603 _ShutdownInstanceDisks(instance, self.cfg)
2604 raise errors.OpExecError("Can't activate the instance's disks")
2606 feedback_fn("* starting the instance on the target node")
2607 if not rpc.call_instance_start(target_node, instance, None):
2608 _ShutdownInstanceDisks(instance, self.cfg)
2609 raise errors.OpExecError("Could not start instance %s on node %s." %
2610 (instance.name, target_node))
2613 def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
2614 """Create a tree of block devices on the primary node.
2616 This always creates all devices.
2620 for child in device.children:
2621 if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
2624 cfg.SetDiskID(device, node)
2625 new_id = rpc.call_blockdev_create(node, device, device.size,
2626 instance.name, True, info)
2629 if device.physical_id is None:
2630 device.physical_id = new_id
2634 def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
2635 """Create a tree of block devices on a secondary node.
2637 If this device type has to be created on secondaries, create it and
2640 If not, just recurse to children keeping the same 'force' value.
2643 if device.CreateOnSecondary():
2646 for child in device.children:
2647 if not _CreateBlockDevOnSecondary(cfg, node, instance,
2648 child, force, info):
2653 cfg.SetDiskID(device, node)
2654 new_id = rpc.call_blockdev_create(node, device, device.size,
2655 instance.name, False, info)
2658 if device.physical_id is None:
2659 device.physical_id = new_id
2663 def _GenerateUniqueNames(cfg, exts):
2664 """Generate a suitable LV name.
2666 This will generate a logical volume name for the given instance.
2671 new_id = cfg.GenerateUniqueID()
2672 results.append("%s%s" % (new_id, val))
2676 def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
2677 """Generate a drbd8 device complete with its children.
2680 port = cfg.AllocatePort()
2681 vgname = cfg.GetVGName()
2682 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2683 logical_id=(vgname, names[0]))
2684 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2685 logical_id=(vgname, names[1]))
2686 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
2687 logical_id = (primary, secondary, port),
2688 children = [dev_data, dev_meta],
2693 def _GenerateDiskTemplate(cfg, template_name,
2694 instance_name, primary_node,
2695 secondary_nodes, disk_sz, swap_sz,
2696 file_storage_dir, file_driver):
2697 """Generate the entire disk layout for a given template type.
2700 #TODO: compute space requirements
2702 vgname = cfg.GetVGName()
2703 if template_name == constants.DT_DISKLESS:
2705 elif template_name == constants.DT_PLAIN:
2706 if len(secondary_nodes) != 0:
2707 raise errors.ProgrammerError("Wrong template configuration")
2709 names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
2710 sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2711 logical_id=(vgname, names[0]),
2713 sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2714 logical_id=(vgname, names[1]),
2716 disks = [sda_dev, sdb_dev]
2717 elif template_name == constants.DT_DRBD8:
2718 if len(secondary_nodes) != 1:
2719 raise errors.ProgrammerError("Wrong template configuration")
2720 remote_node = secondary_nodes[0]
2721 names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2722 ".sdb_data", ".sdb_meta"])
2723 drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2724 disk_sz, names[0:2], "sda")
2725 drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2726 swap_sz, names[2:4], "sdb")
2727 disks = [drbd_sda_dev, drbd_sdb_dev]
2728 elif template_name == constants.DT_FILE:
2729 if len(secondary_nodes) != 0:
2730 raise errors.ProgrammerError("Wrong template configuration")
2732 file_sda_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk_sz,
2733 iv_name="sda", logical_id=(file_driver,
2734 "%s/sda" % file_storage_dir))
2735 file_sdb_dev = objects.Disk(dev_type=constants.LD_FILE, size=swap_sz,
2736 iv_name="sdb", logical_id=(file_driver,
2737 "%s/sdb" % file_storage_dir))
2738 disks = [file_sda_dev, file_sdb_dev]
2740 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
2744 def _GetInstanceInfoText(instance):
2745 """Compute that text that should be added to the disk's metadata.
2748 return "originstname+%s" % instance.name
2751 def _CreateDisks(cfg, instance):
2752 """Create all disks for an instance.
2754 This abstracts away some work from AddInstance.
2757 instance: the instance object
2760 True or False showing the success of the creation process
2763 info = _GetInstanceInfoText(instance)
2765 if instance.disk_template == constants.DT_FILE:
2766 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
2767 result = rpc.call_file_storage_dir_create(instance.primary_node,
2771 logger.Error("Could not connect to node '%s'" % instance.primary_node)
2775 logger.Error("failed to create directory '%s'" % file_storage_dir)
2778 for device in instance.disks:
2779 logger.Info("creating volume %s for instance %s" %
2780 (device.iv_name, instance.name))
2782 for secondary_node in instance.secondary_nodes:
2783 if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
2784 device, False, info):
2785 logger.Error("failed to create volume %s (%s) on secondary node %s!" %
2786 (device.iv_name, device, secondary_node))
2789 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
2790 instance, device, info):
2791 logger.Error("failed to create volume %s on primary!" %
2798 def _RemoveDisks(instance, cfg):
2799 """Remove all disks for an instance.
2801 This abstracts away some work from `AddInstance()` and
2802 `RemoveInstance()`. Note that in case some of the devices couldn't
2803 be removed, the removal will continue with the other ones (compare
2804 with `_CreateDisks()`).
2807 instance: the instance object
2810 True or False showing the success of the removal proces
2813 logger.Info("removing block devices for instance %s" % instance.name)
2816 for device in instance.disks:
2817 for node, disk in device.ComputeNodeTree(instance.primary_node):
2818 cfg.SetDiskID(disk, node)
2819 if not rpc.call_blockdev_remove(node, disk):
2820 logger.Error("could not remove block device %s on node %s,"
2821 " continuing anyway" %
2822 (device.iv_name, node))
2825 if instance.disk_template == constants.DT_FILE:
2826 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
2827 if not rpc.call_file_storage_dir_remove(instance.primary_node,
2829 logger.Error("could not remove directory '%s'" % file_storage_dir)
2835 def _ComputeDiskSize(disk_template, disk_size, swap_size):
2836 """Compute disk size requirements in the volume group
2838 This is currently hard-coded for the two-drive layout.
2841 # Required free disk space as a function of disk and swap space
2843 constants.DT_DISKLESS: None,
2844 constants.DT_PLAIN: disk_size + swap_size,
2845 # 256 MB are added for drbd metadata, 128MB for each drbd device
2846 constants.DT_DRBD8: disk_size + swap_size + 256,
2847 constants.DT_FILE: None,
2850 if disk_template not in req_size_dict:
2851 raise errors.ProgrammerError("Disk template '%s' size requirement"
2852 " is unknown" % disk_template)
2854 return req_size_dict[disk_template]
2857 class LUCreateInstance(LogicalUnit):
2858 """Create an instance.
2861 HPATH = "instance-add"
2862 HTYPE = constants.HTYPE_INSTANCE
2863 _OP_REQP = ["instance_name", "mem_size", "disk_size",
2864 "disk_template", "swap_size", "mode", "start", "vcpus",
2865 "wait_for_sync", "ip_check", "mac"]
2867 def _RunAllocator(self):
2868 """Run the allocator based on input opcode.
2871 disks = [{"size": self.op.disk_size, "mode": "w"},
2872 {"size": self.op.swap_size, "mode": "w"}]
2873 nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
2874 "bridge": self.op.bridge}]
2875 ial = IAllocator(self.cfg, self.sstore,
2876 mode=constants.IALLOCATOR_MODE_ALLOC,
2877 name=self.op.instance_name,
2878 disk_template=self.op.disk_template,
2881 vcpus=self.op.vcpus,
2882 mem_size=self.op.mem_size,
2887 ial.Run(self.op.iallocator)
2890 raise errors.OpPrereqError("Can't compute nodes using"
2891 " iallocator '%s': %s" % (self.op.iallocator,
2893 if len(ial.nodes) != ial.required_nodes:
2894 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
2895 " of nodes (%s), required %s" %
2896 (len(ial.nodes), ial.required_nodes))
2897 self.op.pnode = ial.nodes[0]
2898 logger.ToStdout("Selected nodes for the instance: %s" %
2899 (", ".join(ial.nodes),))
2900 logger.Info("Selected nodes for instance %s via iallocator %s: %s" %
2901 (self.op.instance_name, self.op.iallocator, ial.nodes))
2902 if ial.required_nodes == 2:
2903 self.op.snode = ial.nodes[1]
2905 def BuildHooksEnv(self):
2908 This runs on master, primary and secondary nodes of the instance.
2912 "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
2913 "INSTANCE_DISK_SIZE": self.op.disk_size,
2914 "INSTANCE_SWAP_SIZE": self.op.swap_size,
2915 "INSTANCE_ADD_MODE": self.op.mode,
2917 if self.op.mode == constants.INSTANCE_IMPORT:
2918 env["INSTANCE_SRC_NODE"] = self.op.src_node
2919 env["INSTANCE_SRC_PATH"] = self.op.src_path
2920 env["INSTANCE_SRC_IMAGE"] = self.src_image
2922 env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
2923 primary_node=self.op.pnode,
2924 secondary_nodes=self.secondaries,
2925 status=self.instance_status,
2926 os_type=self.op.os_type,
2927 memory=self.op.mem_size,
2928 vcpus=self.op.vcpus,
2929 nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
2932 nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
2937 def CheckPrereq(self):
2938 """Check prerequisites.
2941 # set optional parameters to none if they don't exist
2942 for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
2943 "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
2944 "vnc_bind_address"]:
2945 if not hasattr(self.op, attr):
2946 setattr(self.op, attr, None)
2948 if self.op.mode not in (constants.INSTANCE_CREATE,
2949 constants.INSTANCE_IMPORT):
2950 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
2953 if (not self.cfg.GetVGName() and
2954 self.op.disk_template not in constants.DTS_NOT_LVM):
2955 raise errors.OpPrereqError("Cluster does not support lvm-based"
2958 if self.op.mode == constants.INSTANCE_IMPORT:
2959 src_node = getattr(self.op, "src_node", None)
2960 src_path = getattr(self.op, "src_path", None)
2961 if src_node is None or src_path is None:
2962 raise errors.OpPrereqError("Importing an instance requires source"
2963 " node and path options")
2964 src_node_full = self.cfg.ExpandNodeName(src_node)
2965 if src_node_full is None:
2966 raise errors.OpPrereqError("Unknown source node '%s'" % src_node)
2967 self.op.src_node = src_node = src_node_full
2969 if not os.path.isabs(src_path):
2970 raise errors.OpPrereqError("The source path must be absolute")
2972 export_info = rpc.call_export_info(src_node, src_path)
2975 raise errors.OpPrereqError("No export found in dir %s" % src_path)
2977 if not export_info.has_section(constants.INISECT_EXP):
2978 raise errors.ProgrammerError("Corrupted export config")
2980 ei_version = export_info.get(constants.INISECT_EXP, 'version')
2981 if (int(ei_version) != constants.EXPORT_VERSION):
2982 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
2983 (ei_version, constants.EXPORT_VERSION))
2985 if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
2986 raise errors.OpPrereqError("Can't import instance with more than"
2989 # FIXME: are the old os-es, disk sizes, etc. useful?
2990 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
2991 diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
2993 self.src_image = diskimage
2994 else: # INSTANCE_CREATE
2995 if getattr(self.op, "os_type", None) is None:
2996 raise errors.OpPrereqError("No guest OS specified")
2998 #### instance parameters check
3000 # disk template and mirror node verification
3001 if self.op.disk_template not in constants.DISK_TEMPLATES:
3002 raise errors.OpPrereqError("Invalid disk template name")
3004 # instance name verification
3005 hostname1 = utils.HostInfo(self.op.instance_name)
3007 self.op.instance_name = instance_name = hostname1.name
3008 instance_list = self.cfg.GetInstanceList()
3009 if instance_name in instance_list:
3010 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3013 # ip validity checks
3014 ip = getattr(self.op, "ip", None)
3015 if ip is None or ip.lower() == "none":
3017 elif ip.lower() == "auto":
3018 inst_ip = hostname1.ip
3020 if not utils.IsValidIP(ip):
3021 raise errors.OpPrereqError("given IP address '%s' doesn't look"
3022 " like a valid IP" % ip)
3024 self.inst_ip = self.op.ip = inst_ip
3026 if self.op.start and not self.op.ip_check:
3027 raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
3028 " adding an instance in start mode")
3030 if self.op.ip_check:
3031 if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
3032 raise errors.OpPrereqError("IP %s of instance %s already in use" %
3033 (hostname1.ip, instance_name))
3035 # MAC address verification
3036 if self.op.mac != "auto":
3037 if not utils.IsValidMac(self.op.mac.lower()):
3038 raise errors.OpPrereqError("invalid MAC address specified: %s" %
3041 # bridge verification
3042 bridge = getattr(self.op, "bridge", None)
3044 self.op.bridge = self.cfg.GetDefBridge()
3046 self.op.bridge = bridge
3048 # boot order verification
3049 if self.op.hvm_boot_order is not None:
3050 if len(self.op.hvm_boot_order.strip("acdn")) != 0:
3051 raise errors.OpPrereqError("invalid boot order specified,"
3052 " must be one or more of [acdn]")
3053 # file storage checks
3054 if (self.op.file_driver and
3055 not self.op.file_driver in constants.FILE_DRIVER):
3056 raise errors.OpPrereqError("Invalid file driver name '%s'" %
3057 self.op.file_driver)
3059 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
3060 raise errors.OpPrereqError("File storage directory not a relative"
3064 if [self.op.iallocator, self.op.pnode].count(None) != 1:
3065 raise errors.OpPrereqError("One and only one of iallocator and primary"
3066 " node must be given")
3068 if self.op.iallocator is not None:
3069 self._RunAllocator()
3071 #### node related checks
3073 # check primary node
3074 pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
3076 raise errors.OpPrereqError("Primary node '%s' is unknown" %
3078 self.op.pnode = pnode.name
3080 self.secondaries = []
3082 # mirror node verification
3083 if self.op.disk_template in constants.DTS_NET_MIRROR:
3084 if getattr(self.op, "snode", None) is None:
3085 raise errors.OpPrereqError("The networked disk templates need"
3088 snode_name = self.cfg.ExpandNodeName(self.op.snode)
3089 if snode_name is None:
3090 raise errors.OpPrereqError("Unknown secondary node '%s'" %
3092 elif snode_name == pnode.name:
3093 raise errors.OpPrereqError("The secondary node cannot be"
3094 " the primary node.")
3095 self.secondaries.append(snode_name)
3097 req_size = _ComputeDiskSize(self.op.disk_template,
3098 self.op.disk_size, self.op.swap_size)
3100 # Check lv size requirements
3101 if req_size is not None:
3102 nodenames = [pnode.name] + self.secondaries
3103 nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
3104 for node in nodenames:
3105 info = nodeinfo.get(node, None)
3107 raise errors.OpPrereqError("Cannot get current information"
3108 " from node '%s'" % node)
3109 vg_free = info.get('vg_free', None)
3110 if not isinstance(vg_free, int):
3111 raise errors.OpPrereqError("Can't compute free disk space on"
3113 if req_size > info['vg_free']:
3114 raise errors.OpPrereqError("Not enough disk space on target node %s."
3115 " %d MB available, %d MB required" %
3116 (node, info['vg_free'], req_size))
3119 os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
3121 raise errors.OpPrereqError("OS '%s' not in supported os list for"
3122 " primary node" % self.op.os_type)
3124 if self.op.kernel_path == constants.VALUE_NONE:
3125 raise errors.OpPrereqError("Can't set instance kernel to none")
3128 # bridge check on primary node
3129 if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
3130 raise errors.OpPrereqError("target bridge '%s' does not exist on"
3131 " destination node '%s'" %
3132 (self.op.bridge, pnode.name))
3134 # memory check on primary node
3136 _CheckNodeFreeMemory(self.cfg, self.pnode.name,
3137 "creating instance %s" % self.op.instance_name,
3140 # hvm_cdrom_image_path verification
3141 if self.op.hvm_cdrom_image_path is not None:
3142 if not os.path.isabs(self.op.hvm_cdrom_image_path):
3143 raise errors.OpPrereqError("The path to the HVM CDROM image must"
3144 " be an absolute path or None, not %s" %
3145 self.op.hvm_cdrom_image_path)
3146 if not os.path.isfile(self.op.hvm_cdrom_image_path):
3147 raise errors.OpPrereqError("The HVM CDROM image must either be a"
3148 " regular file or a symlink pointing to"
3149 " an existing regular file, not %s" %
3150 self.op.hvm_cdrom_image_path)
3152 # vnc_bind_address verification
3153 if self.op.vnc_bind_address is not None:
3154 if not utils.IsValidIP(self.op.vnc_bind_address):
3155 raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
3156 " like a valid IP address" %
3157 self.op.vnc_bind_address)
3160 self.instance_status = 'up'
3162 self.instance_status = 'down'
3164 def Exec(self, feedback_fn):
3165 """Create and add the instance to the cluster.
3168 instance = self.op.instance_name
3169 pnode_name = self.pnode.name
3171 if self.op.mac == "auto":
3172 mac_address = self.cfg.GenerateMAC()
3174 mac_address = self.op.mac
3176 nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
3177 if self.inst_ip is not None:
3178 nic.ip = self.inst_ip
3180 ht_kind = self.sstore.GetHypervisorType()
3181 if ht_kind in constants.HTS_REQ_PORT:
3182 network_port = self.cfg.AllocatePort()
3186 if self.op.vnc_bind_address is None:
3187 self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
3189 # this is needed because os.path.join does not accept None arguments
3190 if self.op.file_storage_dir is None:
3191 string_file_storage_dir = ""
3193 string_file_storage_dir = self.op.file_storage_dir
3195 # build the full file storage dir path
3196 file_storage_dir = os.path.normpath(os.path.join(
3197 self.sstore.GetFileStorageDir(),
3198 string_file_storage_dir, instance))
3201 disks = _GenerateDiskTemplate(self.cfg,
3202 self.op.disk_template,
3203 instance, pnode_name,
3204 self.secondaries, self.op.disk_size,
3207 self.op.file_driver)
3209 iobj = objects.Instance(name=instance, os=self.op.os_type,
3210 primary_node=pnode_name,
3211 memory=self.op.mem_size,
3212 vcpus=self.op.vcpus,
3213 nics=[nic], disks=disks,
3214 disk_template=self.op.disk_template,
3215 status=self.instance_status,
3216 network_port=network_port,
3217 kernel_path=self.op.kernel_path,
3218 initrd_path=self.op.initrd_path,
3219 hvm_boot_order=self.op.hvm_boot_order,
3220 hvm_acpi=self.op.hvm_acpi,
3221 hvm_pae=self.op.hvm_pae,
3222 hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
3223 vnc_bind_address=self.op.vnc_bind_address,
3226 feedback_fn("* creating instance disks...")
3227 if not _CreateDisks(self.cfg, iobj):
3228 _RemoveDisks(iobj, self.cfg)
3229 raise errors.OpExecError("Device creation failed, reverting...")
3231 feedback_fn("adding instance %s to cluster config" % instance)
3233 self.cfg.AddInstance(iobj)
3234 # Add the new instance to the Ganeti Lock Manager
3235 self.context.glm.add(locking.LEVEL_INSTANCE, instance)
3237 if self.op.wait_for_sync:
3238 disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
3239 elif iobj.disk_template in constants.DTS_NET_MIRROR:
3240 # make sure the disks are not degraded (still sync-ing is ok)
3242 feedback_fn("* checking mirrors status")
3243 disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
3248 _RemoveDisks(iobj, self.cfg)
3249 self.cfg.RemoveInstance(iobj.name)
3250 # Remove the new instance from the Ganeti Lock Manager
3251 self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name)
3252 raise errors.OpExecError("There are some degraded disks for"
3255 feedback_fn("creating os for instance %s on node %s" %
3256 (instance, pnode_name))
3258 if iobj.disk_template != constants.DT_DISKLESS:
3259 if self.op.mode == constants.INSTANCE_CREATE:
3260 feedback_fn("* running the instance OS create scripts...")
3261 if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
3262 raise errors.OpExecError("could not add os for instance %s"
3264 (instance, pnode_name))
3266 elif self.op.mode == constants.INSTANCE_IMPORT:
3267 feedback_fn("* running the instance OS import scripts...")
3268 src_node = self.op.src_node
3269 src_image = self.src_image
3270 if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
3271 src_node, src_image):
3272 raise errors.OpExecError("Could not import os for instance"
3274 (instance, pnode_name))
3276 # also checked in the prereq part
3277 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
3281 logger.Info("starting instance %s on node %s" % (instance, pnode_name))
3282 feedback_fn("* starting instance...")
3283 if not rpc.call_instance_start(pnode_name, iobj, None):
3284 raise errors.OpExecError("Could not start instance")
3287 class LUConnectConsole(NoHooksLU):
3288 """Connect to an instance's console.
3290 This is somewhat special in that it returns the command line that
3291 you need to run on the master node in order to connect to the
3295 _OP_REQP = ["instance_name"]
3297 def CheckPrereq(self):
3298 """Check prerequisites.
3300 This checks that the instance is in the cluster.
3303 instance = self.cfg.GetInstanceInfo(
3304 self.cfg.ExpandInstanceName(self.op.instance_name))
3305 if instance is None:
3306 raise errors.OpPrereqError("Instance '%s' not known" %
3307 self.op.instance_name)
3308 self.instance = instance
3310 def Exec(self, feedback_fn):
3311 """Connect to the console of an instance
3314 instance = self.instance
3315 node = instance.primary_node
3317 node_insts = rpc.call_instance_list([node])[node]
3318 if node_insts is False:
3319 raise errors.OpExecError("Can't connect to node %s." % node)
3321 if instance.name not in node_insts:
3322 raise errors.OpExecError("Instance %s is not running." % instance.name)
3324 logger.Debug("connecting to console of %s on %s" % (instance.name, node))
3326 hyper = hypervisor.GetHypervisor()
3327 console_cmd = hyper.GetShellCommandForConsole(instance)
3330 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
3333 class LUReplaceDisks(LogicalUnit):
3334 """Replace the disks of an instance.
3337 HPATH = "mirrors-replace"
3338 HTYPE = constants.HTYPE_INSTANCE
3339 _OP_REQP = ["instance_name", "mode", "disks"]
3341 def _RunAllocator(self):
3342 """Compute a new secondary node using an IAllocator.
3345 ial = IAllocator(self.cfg, self.sstore,
3346 mode=constants.IALLOCATOR_MODE_RELOC,
3347 name=self.op.instance_name,
3348 relocate_from=[self.sec_node])
3350 ial.Run(self.op.iallocator)
3353 raise errors.OpPrereqError("Can't compute nodes using"
3354 " iallocator '%s': %s" % (self.op.iallocator,
3356 if len(ial.nodes) != ial.required_nodes:
3357 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
3358 " of nodes (%s), required %s" %
3359 (len(ial.nodes), ial.required_nodes))
3360 self.op.remote_node = ial.nodes[0]
3361 logger.ToStdout("Selected new secondary for the instance: %s" %
3362 self.op.remote_node)
3364 def BuildHooksEnv(self):
3367 This runs on the master, the primary and all the secondaries.
3371 "MODE": self.op.mode,
3372 "NEW_SECONDARY": self.op.remote_node,
3373 "OLD_SECONDARY": self.instance.secondary_nodes[0],
3375 env.update(_BuildInstanceHookEnvByObject(self.instance))
3377 self.sstore.GetMasterNode(),
3378 self.instance.primary_node,
3380 if self.op.remote_node is not None:
3381 nl.append(self.op.remote_node)
3384 def CheckPrereq(self):
3385 """Check prerequisites.
3387 This checks that the instance is in the cluster.
3390 if not hasattr(self.op, "remote_node"):
3391 self.op.remote_node = None
3393 instance = self.cfg.GetInstanceInfo(
3394 self.cfg.ExpandInstanceName(self.op.instance_name))
3395 if instance is None:
3396 raise errors.OpPrereqError("Instance '%s' not known" %
3397 self.op.instance_name)
3398 self.instance = instance
3399 self.op.instance_name = instance.name
3401 if instance.disk_template not in constants.DTS_NET_MIRROR:
3402 raise errors.OpPrereqError("Instance's disk layout is not"
3403 " network mirrored.")
3405 if len(instance.secondary_nodes) != 1:
3406 raise errors.OpPrereqError("The instance has a strange layout,"
3407 " expected one secondary but found %d" %
3408 len(instance.secondary_nodes))
3410 self.sec_node = instance.secondary_nodes[0]
3412 ia_name = getattr(self.op, "iallocator", None)
3413 if ia_name is not None:
3414 if self.op.remote_node is not None:
3415 raise errors.OpPrereqError("Give either the iallocator or the new"
3416 " secondary, not both")
3417 self.op.remote_node = self._RunAllocator()
3419 remote_node = self.op.remote_node
3420 if remote_node is not None:
3421 remote_node = self.cfg.ExpandNodeName(remote_node)
3422 if remote_node is None:
3423 raise errors.OpPrereqError("Node '%s' not known" %
3424 self.op.remote_node)
3425 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
3427 self.remote_node_info = None
3428 if remote_node == instance.primary_node:
3429 raise errors.OpPrereqError("The specified node is the primary node of"
3431 elif remote_node == self.sec_node:
3432 if self.op.mode == constants.REPLACE_DISK_SEC:
3433 # this is for DRBD8, where we can't execute the same mode of
3434 # replacement as for drbd7 (no different port allocated)
3435 raise errors.OpPrereqError("Same secondary given, cannot execute"
3437 if instance.disk_template == constants.DT_DRBD8:
3438 if (self.op.mode == constants.REPLACE_DISK_ALL and
3439 remote_node is not None):
3440 # switch to replace secondary mode
3441 self.op.mode = constants.REPLACE_DISK_SEC
3443 if self.op.mode == constants.REPLACE_DISK_ALL:
3444 raise errors.OpPrereqError("Template 'drbd' only allows primary or"
3445 " secondary disk replacement, not"
3447 elif self.op.mode == constants.REPLACE_DISK_PRI:
3448 if remote_node is not None:
3449 raise errors.OpPrereqError("Template 'drbd' does not allow changing"
3450 " the secondary while doing a primary"
3451 " node disk replacement")
3452 self.tgt_node = instance.primary_node
3453 self.oth_node = instance.secondary_nodes[0]
3454 elif self.op.mode == constants.REPLACE_DISK_SEC:
3455 self.new_node = remote_node # this can be None, in which case
3456 # we don't change the secondary
3457 self.tgt_node = instance.secondary_nodes[0]
3458 self.oth_node = instance.primary_node
3460 raise errors.ProgrammerError("Unhandled disk replace mode")
3462 for name in self.op.disks:
3463 if instance.FindDisk(name) is None:
3464 raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3465 (name, instance.name))
3466 self.op.remote_node = remote_node
3468 def _ExecD8DiskOnly(self, feedback_fn):
3469 """Replace a disk on the primary or secondary for dbrd8.
3471 The algorithm for replace is quite complicated:
3472 - for each disk to be replaced:
3473 - create new LVs on the target node with unique names
3474 - detach old LVs from the drbd device
3475 - rename old LVs to name_replaced.<time_t>
3476 - rename new LVs to old LVs
3477 - attach the new LVs (with the old names now) to the drbd device
3478 - wait for sync across all devices
3479 - for each modified disk:
3480 - remove old LVs (which have the name name_replaces.<time_t>)
3482 Failures are not very well handled.
3486 warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3487 instance = self.instance
3489 vgname = self.cfg.GetVGName()
3492 tgt_node = self.tgt_node
3493 oth_node = self.oth_node
3495 # Step: check device activation
3496 self.proc.LogStep(1, steps_total, "check device existence")
3497 info("checking volume groups")
3498 my_vg = cfg.GetVGName()
3499 results = rpc.call_vg_list([oth_node, tgt_node])
3501 raise errors.OpExecError("Can't list volume groups on the nodes")
3502 for node in oth_node, tgt_node:
3503 res = results.get(node, False)
3504 if not res or my_vg not in res:
3505 raise errors.OpExecError("Volume group '%s' not found on %s" %
3507 for dev in instance.disks:
3508 if not dev.iv_name in self.op.disks:
3510 for node in tgt_node, oth_node:
3511 info("checking %s on %s" % (dev.iv_name, node))
3512 cfg.SetDiskID(dev, node)
3513 if not rpc.call_blockdev_find(node, dev):
3514 raise errors.OpExecError("Can't find device %s on node %s" %
3515 (dev.iv_name, node))
3517 # Step: check other node consistency
3518 self.proc.LogStep(2, steps_total, "check peer consistency")
3519 for dev in instance.disks:
3520 if not dev.iv_name in self.op.disks:
3522 info("checking %s consistency on %s" % (dev.iv_name, oth_node))
3523 if not _CheckDiskConsistency(self.cfg, dev, oth_node,
3524 oth_node==instance.primary_node):
3525 raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
3526 " to replace disks on this node (%s)" %
3527 (oth_node, tgt_node))
3529 # Step: create new storage
3530 self.proc.LogStep(3, steps_total, "allocate new storage")
3531 for dev in instance.disks:
3532 if not dev.iv_name in self.op.disks:
3535 cfg.SetDiskID(dev, tgt_node)
3536 lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3537 names = _GenerateUniqueNames(cfg, lv_names)
3538 lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3539 logical_id=(vgname, names[0]))
3540 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3541 logical_id=(vgname, names[1]))
3542 new_lvs = [lv_data, lv_meta]
3543 old_lvs = dev.children
3544 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
3545 info("creating new local storage on %s for %s" %
3546 (tgt_node, dev.iv_name))
3547 # since we *always* want to create this LV, we use the
3548 # _Create...OnPrimary (which forces the creation), even if we
3549 # are talking about the secondary node
3550 for new_lv in new_lvs:
3551 if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
3552 _GetInstanceInfoText(instance)):
3553 raise errors.OpExecError("Failed to create new LV named '%s' on"
3555 (new_lv.logical_id[1], tgt_node))
3557 # Step: for each lv, detach+rename*2+attach
3558 self.proc.LogStep(4, steps_total, "change drbd configuration")
3559 for dev, old_lvs, new_lvs in iv_names.itervalues():
3560 info("detaching %s drbd from local storage" % dev.iv_name)
3561 if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
3562 raise errors.OpExecError("Can't detach drbd from local storage on node"
3563 " %s for device %s" % (tgt_node, dev.iv_name))
3565 #cfg.Update(instance)
3567 # ok, we created the new LVs, so now we know we have the needed
3568 # storage; as such, we proceed on the target node to rename
3569 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
3570 # using the assumption that logical_id == physical_id (which in
3571 # turn is the unique_id on that node)
3573 # FIXME(iustin): use a better name for the replaced LVs
3574 temp_suffix = int(time.time())
3575 ren_fn = lambda d, suff: (d.physical_id[0],
3576 d.physical_id[1] + "_replaced-%s" % suff)
3577 # build the rename list based on what LVs exist on the node
3579 for to_ren in old_lvs:
3580 find_res = rpc.call_blockdev_find(tgt_node, to_ren)
3581 if find_res is not None: # device exists
3582 rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
3584 info("renaming the old LVs on the target node")
3585 if not rpc.call_blockdev_rename(tgt_node, rlist):
3586 raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
3587 # now we rename the new LVs to the old LVs
3588 info("renaming the new LVs on the target node")
3589 rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
3590 if not rpc.call_blockdev_rename(tgt_node, rlist):
3591 raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
3593 for old, new in zip(old_lvs, new_lvs):
3594 new.logical_id = old.logical_id
3595 cfg.SetDiskID(new, tgt_node)
3597 for disk in old_lvs:
3598 disk.logical_id = ren_fn(disk, temp_suffix)
3599 cfg.SetDiskID(disk, tgt_node)
3601 # now that the new lvs have the old name, we can add them to the device
3602 info("adding new mirror component on %s" % tgt_node)
3603 if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
3604 for new_lv in new_lvs:
3605 if not rpc.call_blockdev_remove(tgt_node, new_lv):
3606 warning("Can't rollback device %s", hint="manually cleanup unused"
3608 raise errors.OpExecError("Can't add local storage to drbd")
3610 dev.children = new_lvs
3611 cfg.Update(instance)
3613 # Step: wait for sync
3615 # this can fail as the old devices are degraded and _WaitForSync
3616 # does a combined result over all disks, so we don't check its
3618 self.proc.LogStep(5, steps_total, "sync devices")
3619 _WaitForSync(cfg, instance, self.proc, unlock=True)
3621 # so check manually all the devices
3622 for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3623 cfg.SetDiskID(dev, instance.primary_node)
3624 is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3626 raise errors.OpExecError("DRBD device %s is degraded!" % name)
3628 # Step: remove old storage
3629 self.proc.LogStep(6, steps_total, "removing old storage")
3630 for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3631 info("remove logical volumes for %s" % name)
3633 cfg.SetDiskID(lv, tgt_node)
3634 if not rpc.call_blockdev_remove(tgt_node, lv):
3635 warning("Can't remove old LV", hint="manually remove unused LVs")
3638 def _ExecD8Secondary(self, feedback_fn):
3639 """Replace the secondary node for drbd8.
3641 The algorithm for replace is quite complicated:
3642 - for all disks of the instance:
3643 - create new LVs on the new node with same names
3644 - shutdown the drbd device on the old secondary
3645 - disconnect the drbd network on the primary
3646 - create the drbd device on the new secondary
3647 - network attach the drbd on the primary, using an artifice:
3648 the drbd code for Attach() will connect to the network if it
3649 finds a device which is connected to the good local disks but
3651 - wait for sync across all devices
3652 - remove all disks from the old secondary
3654 Failures are not very well handled.
3658 warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3659 instance = self.instance
3661 vgname = self.cfg.GetVGName()
3664 old_node = self.tgt_node
3665 new_node = self.new_node
3666 pri_node = instance.primary_node
3668 # Step: check device activation
3669 self.proc.LogStep(1, steps_total, "check device existence")
3670 info("checking volume groups")
3671 my_vg = cfg.GetVGName()
3672 results = rpc.call_vg_list([pri_node, new_node])
3674 raise errors.OpExecError("Can't list volume groups on the nodes")
3675 for node in pri_node, new_node:
3676 res = results.get(node, False)
3677 if not res or my_vg not in res:
3678 raise errors.OpExecError("Volume group '%s' not found on %s" %
3680 for dev in instance.disks:
3681 if not dev.iv_name in self.op.disks:
3683 info("checking %s on %s" % (dev.iv_name, pri_node))
3684 cfg.SetDiskID(dev, pri_node)
3685 if not rpc.call_blockdev_find(pri_node, dev):
3686 raise errors.OpExecError("Can't find device %s on node %s" %
3687 (dev.iv_name, pri_node))
3689 # Step: check other node consistency
3690 self.proc.LogStep(2, steps_total, "check peer consistency")
3691 for dev in instance.disks:
3692 if not dev.iv_name in self.op.disks:
3694 info("checking %s consistency on %s" % (dev.iv_name, pri_node))
3695 if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
3696 raise errors.OpExecError("Primary node (%s) has degraded storage,"
3697 " unsafe to replace the secondary" %
3700 # Step: create new storage
3701 self.proc.LogStep(3, steps_total, "allocate new storage")
3702 for dev in instance.disks:
3704 info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
3705 # since we *always* want to create this LV, we use the
3706 # _Create...OnPrimary (which forces the creation), even if we
3707 # are talking about the secondary node
3708 for new_lv in dev.children:
3709 if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
3710 _GetInstanceInfoText(instance)):
3711 raise errors.OpExecError("Failed to create new LV named '%s' on"
3713 (new_lv.logical_id[1], new_node))
3715 iv_names[dev.iv_name] = (dev, dev.children)
3717 self.proc.LogStep(4, steps_total, "changing drbd configuration")
3718 for dev in instance.disks:
3720 info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
3721 # create new devices on new_node
3722 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
3723 logical_id=(pri_node, new_node,
3725 children=dev.children)
3726 if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
3728 _GetInstanceInfoText(instance)):
3729 raise errors.OpExecError("Failed to create new DRBD on"
3730 " node '%s'" % new_node)
3732 for dev in instance.disks:
3733 # we have new devices, shutdown the drbd on the old secondary
3734 info("shutting down drbd for %s on old node" % dev.iv_name)
3735 cfg.SetDiskID(dev, old_node)
3736 if not rpc.call_blockdev_shutdown(old_node, dev):
3737 warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
3738 hint="Please cleanup this device manually as soon as possible")
3740 info("detaching primary drbds from the network (=> standalone)")
3742 for dev in instance.disks:
3743 cfg.SetDiskID(dev, pri_node)
3744 # set the physical (unique in bdev terms) id to None, meaning
3745 # detach from network
3746 dev.physical_id = (None,) * len(dev.physical_id)
3747 # and 'find' the device, which will 'fix' it to match the
3749 if rpc.call_blockdev_find(pri_node, dev):
3752 warning("Failed to detach drbd %s from network, unusual case" %
3756 # no detaches succeeded (very unlikely)
3757 raise errors.OpExecError("Can't detach at least one DRBD from old node")
3759 # if we managed to detach at least one, we update all the disks of
3760 # the instance to point to the new secondary
3761 info("updating instance configuration")
3762 for dev in instance.disks:
3763 dev.logical_id = (pri_node, new_node) + dev.logical_id[2:]
3764 cfg.SetDiskID(dev, pri_node)
3765 cfg.Update(instance)
3767 # and now perform the drbd attach
3768 info("attaching primary drbds to new secondary (standalone => connected)")
3770 for dev in instance.disks:
3771 info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
3772 # since the attach is smart, it's enough to 'find' the device,
3773 # it will automatically activate the network, if the physical_id
3775 cfg.SetDiskID(dev, pri_node)
3776 if not rpc.call_blockdev_find(pri_node, dev):
3777 warning("can't attach drbd %s to new secondary!" % dev.iv_name,
3778 "please do a gnt-instance info to see the status of disks")
3780 # this can fail as the old devices are degraded and _WaitForSync
3781 # does a combined result over all disks, so we don't check its
3783 self.proc.LogStep(5, steps_total, "sync devices")
3784 _WaitForSync(cfg, instance, self.proc, unlock=True)
3786 # so check manually all the devices
3787 for name, (dev, old_lvs) in iv_names.iteritems():
3788 cfg.SetDiskID(dev, pri_node)
3789 is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
3791 raise errors.OpExecError("DRBD device %s is degraded!" % name)
3793 self.proc.LogStep(6, steps_total, "removing old storage")
3794 for name, (dev, old_lvs) in iv_names.iteritems():
3795 info("remove logical volumes for %s" % name)
3797 cfg.SetDiskID(lv, old_node)
3798 if not rpc.call_blockdev_remove(old_node, lv):
3799 warning("Can't remove LV on old secondary",
3800 hint="Cleanup stale volumes by hand")
3802 def Exec(self, feedback_fn):
3803 """Execute disk replacement.
3805 This dispatches the disk replacement to the appropriate handler.
3808 instance = self.instance
3810 # Activate the instance disks if we're replacing them on a down instance
3811 if instance.status == "down":
3812 op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
3813 self.proc.ChainOpCode(op)
3815 if instance.disk_template == constants.DT_DRBD8:
3816 if self.op.remote_node is None:
3817 fn = self._ExecD8DiskOnly
3819 fn = self._ExecD8Secondary
3821 raise errors.ProgrammerError("Unhandled disk replacement case")
3823 ret = fn(feedback_fn)
3825 # Deactivate the instance disks if we're replacing them on a down instance
3826 if instance.status == "down":
3827 op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
3828 self.proc.ChainOpCode(op)
3833 class LUGrowDisk(LogicalUnit):
3834 """Grow a disk of an instance.
3838 HTYPE = constants.HTYPE_INSTANCE
3839 _OP_REQP = ["instance_name", "disk", "amount"]
3841 def BuildHooksEnv(self):
3844 This runs on the master, the primary and all the secondaries.
3848 "DISK": self.op.disk,
3849 "AMOUNT": self.op.amount,
3851 env.update(_BuildInstanceHookEnvByObject(self.instance))
3853 self.sstore.GetMasterNode(),
3854 self.instance.primary_node,
3858 def CheckPrereq(self):
3859 """Check prerequisites.
3861 This checks that the instance is in the cluster.
3864 instance = self.cfg.GetInstanceInfo(
3865 self.cfg.ExpandInstanceName(self.op.instance_name))
3866 if instance is None:
3867 raise errors.OpPrereqError("Instance '%s' not known" %
3868 self.op.instance_name)
3869 self.instance = instance
3870 self.op.instance_name = instance.name
3872 if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
3873 raise errors.OpPrereqError("Instance's disk layout does not support"
3876 if instance.FindDisk(self.op.disk) is None:
3877 raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3878 (self.op.disk, instance.name))
3880 nodenames = [instance.primary_node] + list(instance.secondary_nodes)
3881 nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
3882 for node in nodenames:
3883 info = nodeinfo.get(node, None)
3885 raise errors.OpPrereqError("Cannot get current information"
3886 " from node '%s'" % node)
3887 vg_free = info.get('vg_free', None)
3888 if not isinstance(vg_free, int):
3889 raise errors.OpPrereqError("Can't compute free disk space on"
3891 if self.op.amount > info['vg_free']:
3892 raise errors.OpPrereqError("Not enough disk space on target node %s:"
3893 " %d MiB available, %d MiB required" %
3894 (node, info['vg_free'], self.op.amount))
3896 def Exec(self, feedback_fn):
3897 """Execute disk grow.
3900 instance = self.instance
3901 disk = instance.FindDisk(self.op.disk)
3902 for node in (instance.secondary_nodes + (instance.primary_node,)):
3903 self.cfg.SetDiskID(disk, node)
3904 result = rpc.call_blockdev_grow(node, disk, self.op.amount)
3905 if not result or not isinstance(result, tuple) or len(result) != 2:
3906 raise errors.OpExecError("grow request failed to node %s" % node)
3908 raise errors.OpExecError("grow request failed to node %s: %s" %
3910 disk.RecordGrow(self.op.amount)
3911 self.cfg.Update(instance)
3915 class LUQueryInstanceData(NoHooksLU):
3916 """Query runtime instance data.
3919 _OP_REQP = ["instances"]
3921 def CheckPrereq(self):
3922 """Check prerequisites.
3924 This only checks the optional instance list against the existing names.
3927 if not isinstance(self.op.instances, list):
3928 raise errors.OpPrereqError("Invalid argument type 'instances'")
3929 if self.op.instances:
3930 self.wanted_instances = []
3931 names = self.op.instances
3933 instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
3934 if instance is None:
3935 raise errors.OpPrereqError("No such instance name '%s'" % name)
3936 self.wanted_instances.append(instance)
3938 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3939 in self.cfg.GetInstanceList()]
3943 def _ComputeDiskStatus(self, instance, snode, dev):
3944 """Compute block device status.
3947 self.cfg.SetDiskID(dev, instance.primary_node)
3948 dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
3949 if dev.dev_type in constants.LDS_DRBD:
3950 # we change the snode then (otherwise we use the one passed in)
3951 if dev.logical_id[0] == instance.primary_node:
3952 snode = dev.logical_id[1]
3954 snode = dev.logical_id[0]
3957 self.cfg.SetDiskID(dev, snode)
3958 dev_sstatus = rpc.call_blockdev_find(snode, dev)
3963 dev_children = [self._ComputeDiskStatus(instance, snode, child)
3964 for child in dev.children]
3969 "iv_name": dev.iv_name,
3970 "dev_type": dev.dev_type,
3971 "logical_id": dev.logical_id,
3972 "physical_id": dev.physical_id,
3973 "pstatus": dev_pstatus,
3974 "sstatus": dev_sstatus,
3975 "children": dev_children,
3980 def Exec(self, feedback_fn):
3981 """Gather and return data"""
3983 for instance in self.wanted_instances:
3984 remote_info = rpc.call_instance_info(instance.primary_node,
3986 if remote_info and "state" in remote_info:
3989 remote_state = "down"
3990 if instance.status == "down":
3991 config_state = "down"
3995 disks = [self._ComputeDiskStatus(instance, None, device)
3996 for device in instance.disks]
3999 "name": instance.name,
4000 "config_state": config_state,
4001 "run_state": remote_state,
4002 "pnode": instance.primary_node,
4003 "snodes": instance.secondary_nodes,
4005 "memory": instance.memory,
4006 "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
4008 "vcpus": instance.vcpus,
4011 htkind = self.sstore.GetHypervisorType()
4012 if htkind == constants.HT_XEN_PVM30:
4013 idict["kernel_path"] = instance.kernel_path
4014 idict["initrd_path"] = instance.initrd_path
4016 if htkind == constants.HT_XEN_HVM31:
4017 idict["hvm_boot_order"] = instance.hvm_boot_order
4018 idict["hvm_acpi"] = instance.hvm_acpi
4019 idict["hvm_pae"] = instance.hvm_pae
4020 idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
4022 if htkind in constants.HTS_REQ_PORT:
4023 idict["vnc_bind_address"] = instance.vnc_bind_address
4024 idict["network_port"] = instance.network_port
4026 result[instance.name] = idict
4031 class LUSetInstanceParams(LogicalUnit):
4032 """Modifies an instances's parameters.
4035 HPATH = "instance-modify"
4036 HTYPE = constants.HTYPE_INSTANCE
4037 _OP_REQP = ["instance_name"]
4039 def BuildHooksEnv(self):
4042 This runs on the master, primary and secondaries.
4047 args['memory'] = self.mem
4049 args['vcpus'] = self.vcpus
4050 if self.do_ip or self.do_bridge or self.mac:
4054 ip = self.instance.nics[0].ip
4056 bridge = self.bridge
4058 bridge = self.instance.nics[0].bridge
4062 mac = self.instance.nics[0].mac
4063 args['nics'] = [(ip, bridge, mac)]
4064 env = _BuildInstanceHookEnvByObject(self.instance, override=args)
4065 nl = [self.sstore.GetMasterNode(),
4066 self.instance.primary_node] + list(self.instance.secondary_nodes)
4069 def CheckPrereq(self):
4070 """Check prerequisites.
4072 This only checks the instance list against the existing names.
4075 self.mem = getattr(self.op, "mem", None)
4076 self.vcpus = getattr(self.op, "vcpus", None)
4077 self.ip = getattr(self.op, "ip", None)
4078 self.mac = getattr(self.op, "mac", None)
4079 self.bridge = getattr(self.op, "bridge", None)
4080 self.kernel_path = getattr(self.op, "kernel_path", None)
4081 self.initrd_path = getattr(self.op, "initrd_path", None)
4082 self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
4083 self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
4084 self.hvm_pae = getattr(self.op, "hvm_pae", None)
4085 self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
4086 self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
4087 all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
4088 self.kernel_path, self.initrd_path, self.hvm_boot_order,
4089 self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
4090 self.vnc_bind_address]
4091 if all_parms.count(None) == len(all_parms):
4092 raise errors.OpPrereqError("No changes submitted")
4093 if self.mem is not None:
4095 self.mem = int(self.mem)
4096 except ValueError, err:
4097 raise errors.OpPrereqError("Invalid memory size: %s" % str(err))
4098 if self.vcpus is not None:
4100 self.vcpus = int(self.vcpus)
4101 except ValueError, err:
4102 raise errors.OpPrereqError("Invalid vcpus number: %s" % str(err))
4103 if self.ip is not None:
4105 if self.ip.lower() == "none":
4108 if not utils.IsValidIP(self.ip):
4109 raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
4112 self.do_bridge = (self.bridge is not None)
4113 if self.mac is not None:
4114 if self.cfg.IsMacInUse(self.mac):
4115 raise errors.OpPrereqError('MAC address %s already in use in cluster' %
4117 if not utils.IsValidMac(self.mac):
4118 raise errors.OpPrereqError('Invalid MAC address %s' % self.mac)
4120 if self.kernel_path is not None:
4121 self.do_kernel_path = True
4122 if self.kernel_path == constants.VALUE_NONE:
4123 raise errors.OpPrereqError("Can't set instance to no kernel")
4125 if self.kernel_path != constants.VALUE_DEFAULT:
4126 if not os.path.isabs(self.kernel_path):
4127 raise errors.OpPrereqError("The kernel path must be an absolute"
4130 self.do_kernel_path = False
4132 if self.initrd_path is not None:
4133 self.do_initrd_path = True
4134 if self.initrd_path not in (constants.VALUE_NONE,
4135 constants.VALUE_DEFAULT):
4136 if not os.path.isabs(self.initrd_path):
4137 raise errors.OpPrereqError("The initrd path must be an absolute"
4140 self.do_initrd_path = False
4142 # boot order verification
4143 if self.hvm_boot_order is not None:
4144 if self.hvm_boot_order != constants.VALUE_DEFAULT:
4145 if len(self.hvm_boot_order.strip("acdn")) != 0:
4146 raise errors.OpPrereqError("invalid boot order specified,"
4147 " must be one or more of [acdn]"
4150 # hvm_cdrom_image_path verification
4151 if self.op.hvm_cdrom_image_path is not None:
4152 if not os.path.isabs(self.op.hvm_cdrom_image_path):
4153 raise errors.OpPrereqError("The path to the HVM CDROM image must"
4154 " be an absolute path or None, not %s" %
4155 self.op.hvm_cdrom_image_path)
4156 if not os.path.isfile(self.op.hvm_cdrom_image_path):
4157 raise errors.OpPrereqError("The HVM CDROM image must either be a"
4158 " regular file or a symlink pointing to"
4159 " an existing regular file, not %s" %
4160 self.op.hvm_cdrom_image_path)
4162 # vnc_bind_address verification
4163 if self.op.vnc_bind_address is not None:
4164 if not utils.IsValidIP(self.op.vnc_bind_address):
4165 raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
4166 " like a valid IP address" %
4167 self.op.vnc_bind_address)
4169 instance = self.cfg.GetInstanceInfo(
4170 self.cfg.ExpandInstanceName(self.op.instance_name))
4171 if instance is None:
4172 raise errors.OpPrereqError("No such instance name '%s'" %
4173 self.op.instance_name)
4174 self.op.instance_name = instance.name
4175 self.instance = instance
4178 def Exec(self, feedback_fn):
4179 """Modifies an instance.
4181 All parameters take effect only at the next restart of the instance.
4184 instance = self.instance
4186 instance.memory = self.mem
4187 result.append(("mem", self.mem))
4189 instance.vcpus = self.vcpus
4190 result.append(("vcpus", self.vcpus))
4192 instance.nics[0].ip = self.ip
4193 result.append(("ip", self.ip))
4195 instance.nics[0].bridge = self.bridge
4196 result.append(("bridge", self.bridge))
4198 instance.nics[0].mac = self.mac
4199 result.append(("mac", self.mac))
4200 if self.do_kernel_path:
4201 instance.kernel_path = self.kernel_path
4202 result.append(("kernel_path", self.kernel_path))
4203 if self.do_initrd_path:
4204 instance.initrd_path = self.initrd_path
4205 result.append(("initrd_path", self.initrd_path))
4206 if self.hvm_boot_order:
4207 if self.hvm_boot_order == constants.VALUE_DEFAULT:
4208 instance.hvm_boot_order = None
4210 instance.hvm_boot_order = self.hvm_boot_order
4211 result.append(("hvm_boot_order", self.hvm_boot_order))
4213 instance.hvm_acpi = self.hvm_acpi
4214 result.append(("hvm_acpi", self.hvm_acpi))
4216 instance.hvm_pae = self.hvm_pae
4217 result.append(("hvm_pae", self.hvm_pae))
4218 if self.hvm_cdrom_image_path:
4219 instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
4220 result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
4221 if self.vnc_bind_address:
4222 instance.vnc_bind_address = self.vnc_bind_address
4223 result.append(("vnc_bind_address", self.vnc_bind_address))
4225 self.cfg.AddInstance(instance)
4230 class LUQueryExports(NoHooksLU):
4231 """Query the exports list
4236 def CheckPrereq(self):
4237 """Check that the nodelist contains only existing nodes.
4240 self.nodes = _GetWantedNodes(self, getattr(self.op, "nodes", None))
4242 def Exec(self, feedback_fn):
4243 """Compute the list of all the exported system images.
4246 a dictionary with the structure node->(export-list)
4247 where export-list is a list of the instances exported on
4251 return rpc.call_export_list(self.nodes)
4254 class LUExportInstance(LogicalUnit):
4255 """Export an instance to an image in the cluster.
4258 HPATH = "instance-export"
4259 HTYPE = constants.HTYPE_INSTANCE
4260 _OP_REQP = ["instance_name", "target_node", "shutdown"]
4262 def BuildHooksEnv(self):
4265 This will run on the master, primary node and target node.
4269 "EXPORT_NODE": self.op.target_node,
4270 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
4272 env.update(_BuildInstanceHookEnvByObject(self.instance))
4273 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
4274 self.op.target_node]
4277 def CheckPrereq(self):
4278 """Check prerequisites.
4280 This checks that the instance and node names are valid.
4283 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
4284 self.instance = self.cfg.GetInstanceInfo(instance_name)
4285 if self.instance is None:
4286 raise errors.OpPrereqError("Instance '%s' not found" %
4287 self.op.instance_name)
4290 dst_node_short = self.cfg.ExpandNodeName(self.op.target_node)
4291 self.dst_node = self.cfg.GetNodeInfo(dst_node_short)
4293 if self.dst_node is None:
4294 raise errors.OpPrereqError("Destination node '%s' is unknown." %
4295 self.op.target_node)
4296 self.op.target_node = self.dst_node.name
4298 # instance disk type verification
4299 for disk in self.instance.disks:
4300 if disk.dev_type == constants.LD_FILE:
4301 raise errors.OpPrereqError("Export not supported for instances with"
4302 " file-based disks")
4304 def Exec(self, feedback_fn):
4305 """Export an instance to an image in the cluster.
4308 instance = self.instance
4309 dst_node = self.dst_node
4310 src_node = instance.primary_node
4311 if self.op.shutdown:
4312 # shutdown the instance, but not the disks
4313 if not rpc.call_instance_shutdown(src_node, instance):
4314 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
4315 (instance.name, src_node))
4317 vgname = self.cfg.GetVGName()
4322 for disk in instance.disks:
4323 if disk.iv_name == "sda":
4324 # new_dev_name will be a snapshot of an lvm leaf of the one we passed
4325 new_dev_name = rpc.call_blockdev_snapshot(src_node, disk)
4327 if not new_dev_name:
4328 logger.Error("could not snapshot block device %s on node %s" %
4329 (disk.logical_id[1], src_node))
4331 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
4332 logical_id=(vgname, new_dev_name),
4333 physical_id=(vgname, new_dev_name),
4334 iv_name=disk.iv_name)
4335 snap_disks.append(new_dev)
4338 if self.op.shutdown and instance.status == "up":
4339 if not rpc.call_instance_start(src_node, instance, None):
4340 _ShutdownInstanceDisks(instance, self.cfg)
4341 raise errors.OpExecError("Could not start instance")
4343 # TODO: check for size
4345 for dev in snap_disks:
4346 if not rpc.call_snapshot_export(src_node, dev, dst_node.name, instance):
4347 logger.Error("could not export block device %s from node %s to node %s"
4348 % (dev.logical_id[1], src_node, dst_node.name))
4349 if not rpc.call_blockdev_remove(src_node, dev):
4350 logger.Error("could not remove snapshot block device %s from node %s" %
4351 (dev.logical_id[1], src_node))
4353 if not rpc.call_finalize_export(dst_node.name, instance, snap_disks):
4354 logger.Error("could not finalize export for instance %s on node %s" %
4355 (instance.name, dst_node.name))
4357 nodelist = self.cfg.GetNodeList()
4358 nodelist.remove(dst_node.name)
4360 # on one-node clusters nodelist will be empty after the removal
4361 # if we proceed the backup would be removed because OpQueryExports
4362 # substitutes an empty list with the full cluster node list.
4364 op = opcodes.OpQueryExports(nodes=nodelist)
4365 exportlist = self.proc.ChainOpCode(op)
4366 for node in exportlist:
4367 if instance.name in exportlist[node]:
4368 if not rpc.call_export_remove(node, instance.name):
4369 logger.Error("could not remove older export for instance %s"
4370 " on node %s" % (instance.name, node))
4373 class LURemoveExport(NoHooksLU):
4374 """Remove exports related to the named instance.
4377 _OP_REQP = ["instance_name"]
4379 def CheckPrereq(self):
4380 """Check prerequisites.
4384 def Exec(self, feedback_fn):
4385 """Remove any export.
4388 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
4389 # If the instance was not found we'll try with the name that was passed in.
4390 # This will only work if it was an FQDN, though.
4392 if not instance_name:
4394 instance_name = self.op.instance_name
4396 op = opcodes.OpQueryExports(nodes=[])
4397 exportlist = self.proc.ChainOpCode(op)
4399 for node in exportlist:
4400 if instance_name in exportlist[node]:
4402 if not rpc.call_export_remove(node, instance_name):
4403 logger.Error("could not remove export for instance %s"
4404 " on node %s" % (instance_name, node))
4406 if fqdn_warn and not found:
4407 feedback_fn("Export not found. If trying to remove an export belonging"
4408 " to a deleted instance please use its Fully Qualified"
4412 class TagsLU(NoHooksLU):
4415 This is an abstract class which is the parent of all the other tags LUs.
4418 def CheckPrereq(self):
4419 """Check prerequisites.
4422 if self.op.kind == constants.TAG_CLUSTER:
4423 self.target = self.cfg.GetClusterInfo()
4424 elif self.op.kind == constants.TAG_NODE:
4425 name = self.cfg.ExpandNodeName(self.op.name)
4427 raise errors.OpPrereqError("Invalid node name (%s)" %
4430 self.target = self.cfg.GetNodeInfo(name)
4431 elif self.op.kind == constants.TAG_INSTANCE:
4432 name = self.cfg.ExpandInstanceName(self.op.name)
4434 raise errors.OpPrereqError("Invalid instance name (%s)" %
4437 self.target = self.cfg.GetInstanceInfo(name)
4439 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
4443 class LUGetTags(TagsLU):
4444 """Returns the tags of a given object.
4447 _OP_REQP = ["kind", "name"]
4449 def Exec(self, feedback_fn):
4450 """Returns the tag list.
4453 return self.target.GetTags()
4456 class LUSearchTags(NoHooksLU):
4457 """Searches the tags for a given pattern.
4460 _OP_REQP = ["pattern"]
4462 def CheckPrereq(self):
4463 """Check prerequisites.
4465 This checks the pattern passed for validity by compiling it.
4469 self.re = re.compile(self.op.pattern)
4470 except re.error, err:
4471 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
4472 (self.op.pattern, err))
4474 def Exec(self, feedback_fn):
4475 """Returns the tag list.
4479 tgts = [("/cluster", cfg.GetClusterInfo())]
4480 ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()]
4481 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
4482 nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()]
4483 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
4485 for path, target in tgts:
4486 for tag in target.GetTags():
4487 if self.re.search(tag):
4488 results.append((path, tag))
4492 class LUAddTags(TagsLU):
4493 """Sets a tag on a given object.
4496 _OP_REQP = ["kind", "name", "tags"]
4498 def CheckPrereq(self):
4499 """Check prerequisites.
4501 This checks the type and length of the tag name and value.
4504 TagsLU.CheckPrereq(self)
4505 for tag in self.op.tags:
4506 objects.TaggableObject.ValidateTag(tag)
4508 def Exec(self, feedback_fn):
4513 for tag in self.op.tags:
4514 self.target.AddTag(tag)
4515 except errors.TagError, err:
4516 raise errors.OpExecError("Error while setting tag: %s" % str(err))
4518 self.cfg.Update(self.target)
4519 except errors.ConfigurationError:
4520 raise errors.OpRetryError("There has been a modification to the"
4521 " config file and the operation has been"
4522 " aborted. Please retry.")
4525 class LUDelTags(TagsLU):
4526 """Delete a list of tags from a given object.
4529 _OP_REQP = ["kind", "name", "tags"]
4531 def CheckPrereq(self):
4532 """Check prerequisites.
4534 This checks that we have the given tag.
4537 TagsLU.CheckPrereq(self)
4538 for tag in self.op.tags:
4539 objects.TaggableObject.ValidateTag(tag)
4540 del_tags = frozenset(self.op.tags)
4541 cur_tags = self.target.GetTags()
4542 if not del_tags <= cur_tags:
4543 diff_tags = del_tags - cur_tags
4544 diff_names = ["'%s'" % tag for tag in diff_tags]
4546 raise errors.OpPrereqError("Tag(s) %s not found" %
4547 (",".join(diff_names)))
4549 def Exec(self, feedback_fn):
4550 """Remove the tag from the object.
4553 for tag in self.op.tags:
4554 self.target.RemoveTag(tag)
4556 self.cfg.Update(self.target)
4557 except errors.ConfigurationError:
4558 raise errors.OpRetryError("There has been a modification to the"
4559 " config file and the operation has been"
4560 " aborted. Please retry.")
4562 class LUTestDelay(NoHooksLU):
4563 """Sleep for a specified amount of time.
4565 This LU sleeps on the master and/or nodes for a specified amount of
4569 _OP_REQP = ["duration", "on_master", "on_nodes"]
4571 def CheckPrereq(self):
4572 """Check prerequisites.
4574 This checks that we have a good list of nodes and/or the duration
4579 if self.op.on_nodes:
4580 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
4582 def Exec(self, feedback_fn):
4583 """Do the actual sleep.
4586 if self.op.on_master:
4587 if not utils.TestDelay(self.op.duration):
4588 raise errors.OpExecError("Error during master delay test")
4589 if self.op.on_nodes:
4590 result = rpc.call_test_delay(self.op.on_nodes, self.op.duration)
4592 raise errors.OpExecError("Complete failure from rpc call")
4593 for node, node_result in result.items():
4595 raise errors.OpExecError("Failure during rpc call to node %s,"
4596 " result: %s" % (node, node_result))
4599 class IAllocator(object):
4600 """IAllocator framework.
4602 An IAllocator instance has three sets of attributes:
4603 - cfg/sstore that are needed to query the cluster
4604 - input data (all members of the _KEYS class attribute are required)
4605 - four buffer attributes (in|out_data|text), that represent the
4606 input (to the external script) in text and data structure format,
4607 and the output from it, again in two formats
4608 - the result variables from the script (success, info, nodes) for
4613 "mem_size", "disks", "disk_template",
4614 "os", "tags", "nics", "vcpus",
4620 def __init__(self, cfg, sstore, mode, name, **kwargs):
4622 self.sstore = sstore
4623 # init buffer variables
4624 self.in_text = self.out_text = self.in_data = self.out_data = None
4625 # init all input fields so that pylint is happy
4628 self.mem_size = self.disks = self.disk_template = None
4629 self.os = self.tags = self.nics = self.vcpus = None
4630 self.relocate_from = None
4632 self.required_nodes = None
4633 # init result fields
4634 self.success = self.info = self.nodes = None
4635 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
4636 keyset = self._ALLO_KEYS
4637 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
4638 keyset = self._RELO_KEYS
4640 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
4641 " IAllocator" % self.mode)
4643 if key not in keyset:
4644 raise errors.ProgrammerError("Invalid input parameter '%s' to"
4645 " IAllocator" % key)
4646 setattr(self, key, kwargs[key])
4648 if key not in kwargs:
4649 raise errors.ProgrammerError("Missing input parameter '%s' to"
4650 " IAllocator" % key)
4651 self._BuildInputData()
4653 def _ComputeClusterData(self):
4654 """Compute the generic allocator input data.
4656 This is the data that is independent of the actual operation.
4663 "cluster_name": self.sstore.GetClusterName(),
4664 "cluster_tags": list(cfg.GetClusterInfo().GetTags()),
4665 "hypervisor_type": self.sstore.GetHypervisorType(),
4666 # we don't have job IDs
4669 i_list = [cfg.GetInstanceInfo(iname) for iname in cfg.GetInstanceList()]
4673 node_list = cfg.GetNodeList()
4674 node_data = rpc.call_node_info(node_list, cfg.GetVGName())
4675 for nname in node_list:
4676 ninfo = cfg.GetNodeInfo(nname)
4677 if nname not in node_data or not isinstance(node_data[nname], dict):
4678 raise errors.OpExecError("Can't get data for node %s" % nname)
4679 remote_info = node_data[nname]
4680 for attr in ['memory_total', 'memory_free', 'memory_dom0',
4681 'vg_size', 'vg_free', 'cpu_total']:
4682 if attr not in remote_info:
4683 raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
4686 remote_info[attr] = int(remote_info[attr])
4687 except ValueError, err:
4688 raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
4689 " %s" % (nname, attr, str(err)))
4690 # compute memory used by primary instances
4691 i_p_mem = i_p_up_mem = 0
4692 for iinfo in i_list:
4693 if iinfo.primary_node == nname:
4694 i_p_mem += iinfo.memory
4695 if iinfo.status == "up":
4696 i_p_up_mem += iinfo.memory
4698 # compute memory used by instances
4700 "tags": list(ninfo.GetTags()),
4701 "total_memory": remote_info['memory_total'],
4702 "reserved_memory": remote_info['memory_dom0'],
4703 "free_memory": remote_info['memory_free'],
4704 "i_pri_memory": i_p_mem,
4705 "i_pri_up_memory": i_p_up_mem,
4706 "total_disk": remote_info['vg_size'],
4707 "free_disk": remote_info['vg_free'],
4708 "primary_ip": ninfo.primary_ip,
4709 "secondary_ip": ninfo.secondary_ip,
4710 "total_cpus": remote_info['cpu_total'],
4712 node_results[nname] = pnr
4713 data["nodes"] = node_results
4717 for iinfo in i_list:
4718 nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
4719 for n in iinfo.nics]
4721 "tags": list(iinfo.GetTags()),
4722 "should_run": iinfo.status == "up",
4723 "vcpus": iinfo.vcpus,
4724 "memory": iinfo.memory,
4726 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
4728 "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
4729 "disk_template": iinfo.disk_template,
4731 instance_data[iinfo.name] = pir
4733 data["instances"] = instance_data
4737 def _AddNewInstance(self):
4738 """Add new instance data to allocator structure.
4740 This in combination with _AllocatorGetClusterData will create the
4741 correct structure needed as input for the allocator.
4743 The checks for the completeness of the opcode must have already been
4748 if len(self.disks) != 2:
4749 raise errors.OpExecError("Only two-disk configurations supported")
4751 disk_space = _ComputeDiskSize(self.disk_template,
4752 self.disks[0]["size"], self.disks[1]["size"])
4754 if self.disk_template in constants.DTS_NET_MIRROR:
4755 self.required_nodes = 2
4757 self.required_nodes = 1
4761 "disk_template": self.disk_template,
4764 "vcpus": self.vcpus,
4765 "memory": self.mem_size,
4766 "disks": self.disks,
4767 "disk_space_total": disk_space,
4769 "required_nodes": self.required_nodes,
4771 data["request"] = request
4773 def _AddRelocateInstance(self):
4774 """Add relocate instance data to allocator structure.
4776 This in combination with _IAllocatorGetClusterData will create the
4777 correct structure needed as input for the allocator.
4779 The checks for the completeness of the opcode must have already been
4783 instance = self.cfg.GetInstanceInfo(self.name)
4784 if instance is None:
4785 raise errors.ProgrammerError("Unknown instance '%s' passed to"
4786 " IAllocator" % self.name)
4788 if instance.disk_template not in constants.DTS_NET_MIRROR:
4789 raise errors.OpPrereqError("Can't relocate non-mirrored instances")
4791 if len(instance.secondary_nodes) != 1:
4792 raise errors.OpPrereqError("Instance has not exactly one secondary node")
4794 self.required_nodes = 1
4796 disk_space = _ComputeDiskSize(instance.disk_template,
4797 instance.disks[0].size,
4798 instance.disks[1].size)
4803 "disk_space_total": disk_space,
4804 "required_nodes": self.required_nodes,
4805 "relocate_from": self.relocate_from,
4807 self.in_data["request"] = request
4809 def _BuildInputData(self):
4810 """Build input data structures.
4813 self._ComputeClusterData()
4815 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
4816 self._AddNewInstance()
4818 self._AddRelocateInstance()
4820 self.in_text = serializer.Dump(self.in_data)
4822 def Run(self, name, validate=True, call_fn=rpc.call_iallocator_runner):
4823 """Run an instance allocator and return the results.
4828 result = call_fn(self.sstore.GetMasterNode(), name, self.in_text)
4830 if not isinstance(result, tuple) or len(result) != 4:
4831 raise errors.OpExecError("Invalid result from master iallocator runner")
4833 rcode, stdout, stderr, fail = result
4835 if rcode == constants.IARUN_NOTFOUND:
4836 raise errors.OpExecError("Can't find allocator '%s'" % name)
4837 elif rcode == constants.IARUN_FAILURE:
4838 raise errors.OpExecError("Instance allocator call failed: %s,"
4840 (fail, stdout+stderr))
4841 self.out_text = stdout
4843 self._ValidateResult()
4845 def _ValidateResult(self):
4846 """Process the allocator results.
4848 This will process and if successful save the result in
4849 self.out_data and the other parameters.
4853 rdict = serializer.Load(self.out_text)
4854 except Exception, err:
4855 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
4857 if not isinstance(rdict, dict):
4858 raise errors.OpExecError("Can't parse iallocator results: not a dict")
4860 for key in "success", "info", "nodes":
4861 if key not in rdict:
4862 raise errors.OpExecError("Can't parse iallocator results:"
4863 " missing key '%s'" % key)
4864 setattr(self, key, rdict[key])
4866 if not isinstance(rdict["nodes"], list):
4867 raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
4869 self.out_data = rdict
4872 class LUTestAllocator(NoHooksLU):
4873 """Run allocator tests.
4875 This LU runs the allocator tests
4878 _OP_REQP = ["direction", "mode", "name"]
4880 def CheckPrereq(self):
4881 """Check prerequisites.
4883 This checks the opcode parameters depending on the director and mode test.
4886 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
4887 for attr in ["name", "mem_size", "disks", "disk_template",
4888 "os", "tags", "nics", "vcpus"]:
4889 if not hasattr(self.op, attr):
4890 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
4892 iname = self.cfg.ExpandInstanceName(self.op.name)
4893 if iname is not None:
4894 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
4896 if not isinstance(self.op.nics, list):
4897 raise errors.OpPrereqError("Invalid parameter 'nics'")
4898 for row in self.op.nics:
4899 if (not isinstance(row, dict) or
4902 "bridge" not in row):
4903 raise errors.OpPrereqError("Invalid contents of the"
4904 " 'nics' parameter")
4905 if not isinstance(self.op.disks, list):
4906 raise errors.OpPrereqError("Invalid parameter 'disks'")
4907 if len(self.op.disks) != 2:
4908 raise errors.OpPrereqError("Only two-disk configurations supported")
4909 for row in self.op.disks:
4910 if (not isinstance(row, dict) or
4911 "size" not in row or
4912 not isinstance(row["size"], int) or
4913 "mode" not in row or
4914 row["mode"] not in ['r', 'w']):
4915 raise errors.OpPrereqError("Invalid contents of the"
4916 " 'disks' parameter")
4917 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
4918 if not hasattr(self.op, "name"):
4919 raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
4920 fname = self.cfg.ExpandInstanceName(self.op.name)
4922 raise errors.OpPrereqError("Instance '%s' not found for relocation" %
4924 self.op.name = fname
4925 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
4927 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
4930 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
4931 if not hasattr(self.op, "allocator") or self.op.allocator is None:
4932 raise errors.OpPrereqError("Missing allocator name")
4933 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
4934 raise errors.OpPrereqError("Wrong allocator test '%s'" %
4937 def Exec(self, feedback_fn):
4938 """Run the allocator test.
4941 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
4942 ial = IAllocator(self.cfg, self.sstore,
4945 mem_size=self.op.mem_size,
4946 disks=self.op.disks,
4947 disk_template=self.op.disk_template,
4951 vcpus=self.op.vcpus,
4954 ial = IAllocator(self.cfg, self.sstore,
4957 relocate_from=list(self.relocate_from),
4960 if self.op.direction == constants.IALLOCATOR_DIR_IN:
4961 result = ial.in_text
4963 ial.Run(self.op.allocator, validate=False)
4964 result = ial.out_text