4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0613,W0201
34 from ganeti import rpc
35 from ganeti import ssh
36 from ganeti import logger
37 from ganeti import utils
38 from ganeti import errors
39 from ganeti import hypervisor
40 from ganeti import config
41 from ganeti import constants
42 from ganeti import objects
43 from ganeti import opcodes
44 from ganeti import ssconf
46 class LogicalUnit(object):
47 """Logical Unit base class.
49 Subclasses must follow these rules:
50 - implement CheckPrereq which also fills in the opcode instance
51 with all the fields (even if as None)
53 - implement BuildHooksEnv
54 - redefine HPATH and HTYPE
55 - optionally redefine their run requirements (REQ_CLUSTER,
56 REQ_MASTER); note that all commands require root permissions
65 def __init__(self, processor, op, cfg, sstore):
66 """Constructor for LogicalUnit.
68 This needs to be overriden in derived classes in order to check op
76 for attr_name in self._OP_REQP:
77 attr_val = getattr(op, attr_name, None)
79 raise errors.OpPrereqError("Required parameter '%s' missing" %
82 if not cfg.IsCluster():
83 raise errors.OpPrereqError("Cluster not initialized yet,"
84 " use 'gnt-cluster init' first.")
86 master = sstore.GetMasterNode()
87 if master != utils.HostInfo().name:
88 raise errors.OpPrereqError("Commands must be run on the master"
91 def CheckPrereq(self):
92 """Check prerequisites for this LU.
94 This method should check that the prerequisites for the execution
95 of this LU are fulfilled. It can do internode communication, but
96 it should be idempotent - no cluster or system changes are
99 The method should raise errors.OpPrereqError in case something is
100 not fulfilled. Its return value is ignored.
102 This method should also update all the parameters of the opcode to
103 their canonical form; e.g. a short node name must be fully
104 expanded after this method has successfully completed (so that
105 hooks, logging, etc. work correctly).
108 raise NotImplementedError
110 def Exec(self, feedback_fn):
113 This method should implement the actual work. It should raise
114 errors.OpExecError for failures that are somewhat dealt with in
118 raise NotImplementedError
120 def BuildHooksEnv(self):
121 """Build hooks environment for this LU.
123 This method should return a three-node tuple consisting of: a dict
124 containing the environment that will be used for running the
125 specific hook for this LU, a list of node names on which the hook
126 should run before the execution, and a list of node names on which
127 the hook should run after the execution.
129 The keys of the dict must not have 'GANETI_' prefixed as this will
130 be handled in the hooks runner. Also note additional keys will be
131 added by the hooks runner. If the LU doesn't define any
132 environment, an empty dict (and not None) should be returned.
134 As for the node lists, the master should not be included in the
135 them, as it will be added by the hooks runner in case this LU
136 requires a cluster to run on (otherwise we don't have a node
137 list). No nodes should be returned as an empty list (and not
140 Note that if the HPATH for a LU class is None, this function will
144 raise NotImplementedError
147 class NoHooksLU(LogicalUnit):
148 """Simple LU which runs no hooks.
150 This LU is intended as a parent for other LogicalUnits which will
151 run no hooks, in order to reduce duplicate code.
157 def BuildHooksEnv(self):
160 This is a no-op, since we don't run hooks.
166 def _AddHostToEtcHosts(hostname):
167 """Wrapper around utils.SetEtcHostsEntry.
170 hi = utils.HostInfo(name=hostname)
171 utils.SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
174 def _RemoveHostFromEtcHosts(hostname):
175 """Wrapper around utils.RemoveEtcHostsEntry.
178 hi = utils.HostInfo(name=hostname)
179 utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
180 utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
183 def _GetWantedNodes(lu, nodes):
184 """Returns list of checked and expanded node names.
187 nodes: List of nodes (strings) or None for all
190 if not isinstance(nodes, list):
191 raise errors.OpPrereqError("Invalid argument type 'nodes'")
197 node = lu.cfg.ExpandNodeName(name)
199 raise errors.OpPrereqError("No such node name '%s'" % name)
203 wanted = lu.cfg.GetNodeList()
204 return utils.NiceSort(wanted)
207 def _GetWantedInstances(lu, instances):
208 """Returns list of checked and expanded instance names.
211 instances: List of instances (strings) or None for all
214 if not isinstance(instances, list):
215 raise errors.OpPrereqError("Invalid argument type 'instances'")
220 for name in instances:
221 instance = lu.cfg.ExpandInstanceName(name)
223 raise errors.OpPrereqError("No such instance name '%s'" % name)
224 wanted.append(instance)
227 wanted = lu.cfg.GetInstanceList()
228 return utils.NiceSort(wanted)
231 def _CheckOutputFields(static, dynamic, selected):
232 """Checks whether all selected fields are valid.
235 static: Static fields
236 dynamic: Dynamic fields
239 static_fields = frozenset(static)
240 dynamic_fields = frozenset(dynamic)
242 all_fields = static_fields | dynamic_fields
244 if not all_fields.issuperset(selected):
245 raise errors.OpPrereqError("Unknown output fields selected: %s"
246 % ",".join(frozenset(selected).
247 difference(all_fields)))
250 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
251 memory, vcpus, nics):
252 """Builds instance related env variables for hooks from single variables.
255 secondary_nodes: List of secondary nodes as strings
259 "INSTANCE_NAME": name,
260 "INSTANCE_PRIMARY": primary_node,
261 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
262 "INSTANCE_OS_TYPE": os_type,
263 "INSTANCE_STATUS": status,
264 "INSTANCE_MEMORY": memory,
265 "INSTANCE_VCPUS": vcpus,
269 nic_count = len(nics)
270 for idx, (ip, bridge) in enumerate(nics):
273 env["INSTANCE_NIC%d_IP" % idx] = ip
274 env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
278 env["INSTANCE_NIC_COUNT"] = nic_count
283 def _BuildInstanceHookEnvByObject(instance, override=None):
284 """Builds instance related env variables for hooks from an object.
287 instance: objects.Instance object of instance
288 override: dict of values to override
291 'name': instance.name,
292 'primary_node': instance.primary_node,
293 'secondary_nodes': instance.secondary_nodes,
294 'os_type': instance.os,
295 'status': instance.os,
296 'memory': instance.memory,
297 'vcpus': instance.vcpus,
298 'nics': [(nic.ip, nic.bridge) for nic in instance.nics],
301 args.update(override)
302 return _BuildInstanceHookEnv(**args)
305 def _UpdateKnownHosts(fullnode, ip, pubkey):
306 """Ensure a node has a correct known_hosts entry.
309 fullnode - Fully qualified domain name of host. (str)
310 ip - IPv4 address of host (str)
311 pubkey - the public key of the cluster
314 if os.path.exists(constants.SSH_KNOWN_HOSTS_FILE):
315 f = open(constants.SSH_KNOWN_HOSTS_FILE, 'r+')
317 f = open(constants.SSH_KNOWN_HOSTS_FILE, 'w+')
326 logger.Debug('read %s' % (repr(rawline),))
328 parts = rawline.rstrip('\r\n').split()
330 # Ignore unwanted lines
331 if len(parts) >= 3 and not rawline.lstrip()[0] == '#':
332 fields = parts[0].split(',')
337 for spec in [ ip, fullnode ]:
338 if spec not in fields:
343 logger.Debug("key, pubkey = %s." % (repr((key, pubkey)),))
344 if haveall and key == pubkey:
346 save_lines.append(rawline)
347 logger.Debug("Keeping known_hosts '%s'." % (repr(rawline),))
350 if havesome and (not haveall or key != pubkey):
352 logger.Debug("Discarding known_hosts '%s'." % (repr(rawline),))
355 save_lines.append(rawline)
358 add_lines.append('%s,%s ssh-rsa %s\n' % (fullnode, ip, pubkey))
359 logger.Debug("Adding known_hosts '%s'." % (repr(add_lines[-1]),))
362 save_lines = save_lines + add_lines
364 # Write a new file and replace old.
365 fd, tmpname = tempfile.mkstemp('.tmp', 'known_hosts.',
367 newfile = os.fdopen(fd, 'w')
369 newfile.write(''.join(save_lines))
372 logger.Debug("Wrote new known_hosts.")
373 os.rename(tmpname, constants.SSH_KNOWN_HOSTS_FILE)
376 # Simply appending a new line will do the trick.
378 for add in add_lines:
384 def _HasValidVG(vglist, vgname):
385 """Checks if the volume group list is valid.
387 A non-None return value means there's an error, and the return value
388 is the error message.
391 vgsize = vglist.get(vgname, None)
393 return "volume group '%s' missing" % vgname
395 return ("volume group '%s' too small (20480MiB required, %dMib found)" %
400 def _InitSSHSetup(node):
401 """Setup the SSH configuration for the cluster.
404 This generates a dsa keypair for root, adds the pub key to the
405 permitted hosts and adds the hostkey to its own known hosts.
408 node: the name of this host as a fqdn
411 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
413 for name in priv_key, pub_key:
414 if os.path.exists(name):
415 utils.CreateBackup(name)
416 utils.RemoveFile(name)
418 result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
422 raise errors.OpExecError("Could not generate ssh keypair, error %s" %
425 f = open(pub_key, 'r')
427 utils.AddAuthorizedKey(auth_keys, f.read(8192))
432 def _InitGanetiServerSetup(ss):
433 """Setup the necessary configuration for the initial node daemon.
435 This creates the nodepass file containing the shared password for
436 the cluster and also generates the SSL certificate.
439 # Create pseudo random password
440 randpass = sha.new(os.urandom(64)).hexdigest()
441 # and write it into sstore
442 ss.SetKey(ss.SS_NODED_PASS, randpass)
444 result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
445 "-days", str(365*5), "-nodes", "-x509",
446 "-keyout", constants.SSL_CERT_FILE,
447 "-out", constants.SSL_CERT_FILE, "-batch"])
449 raise errors.OpExecError("could not generate server ssl cert, command"
450 " %s had exitcode %s and error message %s" %
451 (result.cmd, result.exit_code, result.output))
453 os.chmod(constants.SSL_CERT_FILE, 0400)
455 result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
458 raise errors.OpExecError("Could not start the node daemon, command %s"
459 " had exitcode %s and error %s" %
460 (result.cmd, result.exit_code, result.output))
463 def _CheckInstanceBridgesExist(instance):
464 """Check that the brigdes needed by an instance exist.
467 # check bridges existance
468 brlist = [nic.bridge for nic in instance.nics]
469 if not rpc.call_bridges_exist(instance.primary_node, brlist):
470 raise errors.OpPrereqError("one or more target bridges %s does not"
471 " exist on destination node '%s'" %
472 (brlist, instance.primary_node))
475 class LUInitCluster(LogicalUnit):
476 """Initialise the cluster.
479 HPATH = "cluster-init"
480 HTYPE = constants.HTYPE_CLUSTER
481 _OP_REQP = ["cluster_name", "hypervisor_type", "vg_name", "mac_prefix",
482 "def_bridge", "master_netdev"]
485 def BuildHooksEnv(self):
488 Notes: Since we don't require a cluster, we must manually add
489 ourselves in the post-run node list.
492 env = {"OP_TARGET": self.op.cluster_name}
493 return env, [], [self.hostname.name]
495 def CheckPrereq(self):
496 """Verify that the passed name is a valid one.
499 if config.ConfigWriter.IsCluster():
500 raise errors.OpPrereqError("Cluster is already initialised")
502 self.hostname = hostname = utils.HostInfo()
504 if hostname.ip.startswith("127."):
505 raise errors.OpPrereqError("This host's IP resolves to the private"
506 " range (%s). Please fix DNS or /etc/hosts." %
509 self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
511 if not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, hostname.ip,
512 constants.DEFAULT_NODED_PORT):
513 raise errors.OpPrereqError("Inconsistency: this host's name resolves"
514 " to %s,\nbut this ip address does not"
515 " belong to this host."
516 " Aborting." % hostname.ip)
518 secondary_ip = getattr(self.op, "secondary_ip", None)
519 if secondary_ip and not utils.IsValidIP(secondary_ip):
520 raise errors.OpPrereqError("Invalid secondary ip given")
522 secondary_ip != hostname.ip and
523 (not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, secondary_ip,
524 constants.DEFAULT_NODED_PORT))):
525 raise errors.OpPrereqError("You gave %s as secondary IP,"
526 " but it does not belong to this host." %
528 self.secondary_ip = secondary_ip
530 # checks presence of the volume group given
531 vgstatus = _HasValidVG(utils.ListVolumeGroups(), self.op.vg_name)
534 raise errors.OpPrereqError("Error: %s" % vgstatus)
536 if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$",
538 raise errors.OpPrereqError("Invalid mac prefix given '%s'" %
541 if self.op.hypervisor_type not in hypervisor.VALID_HTYPES:
542 raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
543 self.op.hypervisor_type)
545 result = utils.RunCmd(["ip", "link", "show", "dev", self.op.master_netdev])
547 raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
548 (self.op.master_netdev,
549 result.output.strip()))
551 if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
552 os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
553 raise errors.OpPrereqError("Init.d script '%s' missing or not"
554 " executable." % constants.NODE_INITD_SCRIPT)
556 def Exec(self, feedback_fn):
557 """Initialize the cluster.
560 clustername = self.clustername
561 hostname = self.hostname
563 # set up the simple store
564 self.sstore = ss = ssconf.SimpleStore()
565 ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
566 ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
567 ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
568 ss.SetKey(ss.SS_MASTER_NETDEV, self.op.master_netdev)
569 ss.SetKey(ss.SS_CLUSTER_NAME, clustername.name)
571 # set up the inter-node password and certificate
572 _InitGanetiServerSetup(ss)
574 # start the master ip
575 rpc.call_node_start_master(hostname.name)
577 # set up ssh config and /etc/hosts
578 f = open(constants.SSH_HOST_RSA_PUB, 'r')
583 sshkey = sshline.split(" ")[1]
585 _AddHostToEtcHosts(hostname.name)
587 _UpdateKnownHosts(hostname.name, hostname.ip, sshkey)
589 _InitSSHSetup(hostname.name)
591 # init of cluster config file
592 self.cfg = cfgw = config.ConfigWriter()
593 cfgw.InitConfig(hostname.name, hostname.ip, self.secondary_ip,
594 sshkey, self.op.mac_prefix,
595 self.op.vg_name, self.op.def_bridge)
598 class LUDestroyCluster(NoHooksLU):
599 """Logical unit for destroying the cluster.
604 def CheckPrereq(self):
605 """Check prerequisites.
607 This checks whether the cluster is empty.
609 Any errors are signalled by raising errors.OpPrereqError.
612 master = self.sstore.GetMasterNode()
614 nodelist = self.cfg.GetNodeList()
615 if len(nodelist) != 1 or nodelist[0] != master:
616 raise errors.OpPrereqError("There are still %d node(s) in"
617 " this cluster." % (len(nodelist) - 1))
618 instancelist = self.cfg.GetInstanceList()
620 raise errors.OpPrereqError("There are still %d instance(s) in"
621 " this cluster." % len(instancelist))
623 def Exec(self, feedback_fn):
624 """Destroys the cluster.
627 master = self.sstore.GetMasterNode()
628 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
629 utils.CreateBackup(priv_key)
630 utils.CreateBackup(pub_key)
631 rpc.call_node_leave_cluster(master)
634 class LUVerifyCluster(NoHooksLU):
635 """Verifies the cluster status.
640 def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
641 remote_version, feedback_fn):
642 """Run multiple tests against a node.
645 - compares ganeti version
646 - checks vg existance and size > 20G
647 - checks config file checksum
648 - checks ssh to other nodes
651 node: name of the node to check
652 file_list: required list of files
653 local_cksum: dictionary of local files and their checksums
656 # compares ganeti version
657 local_version = constants.PROTOCOL_VERSION
658 if not remote_version:
659 feedback_fn(" - ERROR: connection to %s failed" % (node))
662 if local_version != remote_version:
663 feedback_fn(" - ERROR: sw version mismatch: master %s, node(%s) %s" %
664 (local_version, node, remote_version))
667 # checks vg existance and size > 20G
671 feedback_fn(" - ERROR: unable to check volume groups on node %s." %
675 vgstatus = _HasValidVG(vglist, self.cfg.GetVGName())
677 feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
680 # checks config file checksum
683 if 'filelist' not in node_result:
685 feedback_fn(" - ERROR: node hasn't returned file checksum data")
687 remote_cksum = node_result['filelist']
688 for file_name in file_list:
689 if file_name not in remote_cksum:
691 feedback_fn(" - ERROR: file '%s' missing" % file_name)
692 elif remote_cksum[file_name] != local_cksum[file_name]:
694 feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
696 if 'nodelist' not in node_result:
698 feedback_fn(" - ERROR: node hasn't returned node connectivity data")
700 if node_result['nodelist']:
702 for node in node_result['nodelist']:
703 feedback_fn(" - ERROR: communication with node '%s': %s" %
704 (node, node_result['nodelist'][node]))
705 hyp_result = node_result.get('hypervisor', None)
706 if hyp_result is not None:
707 feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
710 def _VerifyInstance(self, instance, node_vol_is, node_instance, feedback_fn):
711 """Verify an instance.
713 This function checks to see if the required block devices are
714 available on the instance's node.
719 instancelist = self.cfg.GetInstanceList()
720 if not instance in instancelist:
721 feedback_fn(" - ERROR: instance %s not in instance list %s" %
722 (instance, instancelist))
725 instanceconfig = self.cfg.GetInstanceInfo(instance)
726 node_current = instanceconfig.primary_node
729 instanceconfig.MapLVsByNode(node_vol_should)
731 for node in node_vol_should:
732 for volume in node_vol_should[node]:
733 if node not in node_vol_is or volume not in node_vol_is[node]:
734 feedback_fn(" - ERROR: volume %s missing on node %s" %
738 if not instanceconfig.status == 'down':
739 if not instance in node_instance[node_current]:
740 feedback_fn(" - ERROR: instance %s not running on node %s" %
741 (instance, node_current))
744 for node in node_instance:
745 if (not node == node_current):
746 if instance in node_instance[node]:
747 feedback_fn(" - ERROR: instance %s should not run on node %s" %
753 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
754 """Verify if there are any unknown volumes in the cluster.
756 The .os, .swap and backup volumes are ignored. All other volumes are
762 for node in node_vol_is:
763 for volume in node_vol_is[node]:
764 if node not in node_vol_should or volume not in node_vol_should[node]:
765 feedback_fn(" - ERROR: volume %s on node %s should not exist" %
770 def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
771 """Verify the list of running instances.
773 This checks what instances are running but unknown to the cluster.
777 for node in node_instance:
778 for runninginstance in node_instance[node]:
779 if runninginstance not in instancelist:
780 feedback_fn(" - ERROR: instance %s on node %s should not exist" %
781 (runninginstance, node))
785 def CheckPrereq(self):
786 """Check prerequisites.
788 This has no prerequisites.
793 def Exec(self, feedback_fn):
794 """Verify integrity of cluster, performing various test on nodes.
798 feedback_fn("* Verifying global settings")
799 for msg in self.cfg.VerifyConfig():
800 feedback_fn(" - ERROR: %s" % msg)
802 vg_name = self.cfg.GetVGName()
803 nodelist = utils.NiceSort(self.cfg.GetNodeList())
804 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
808 # FIXME: verify OS list
810 file_names = list(self.sstore.GetFileList())
811 file_names.append(constants.SSL_CERT_FILE)
812 file_names.append(constants.CLUSTER_CONF_FILE)
813 local_checksums = utils.FingerprintFiles(file_names)
815 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
816 all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
817 all_instanceinfo = rpc.call_instance_list(nodelist)
818 all_vglist = rpc.call_vg_list(nodelist)
819 node_verify_param = {
820 'filelist': file_names,
821 'nodelist': nodelist,
824 all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
825 all_rversion = rpc.call_version(nodelist)
827 for node in nodelist:
828 feedback_fn("* Verifying node %s" % node)
829 result = self._VerifyNode(node, file_names, local_checksums,
830 all_vglist[node], all_nvinfo[node],
831 all_rversion[node], feedback_fn)
835 volumeinfo = all_volumeinfo[node]
837 if type(volumeinfo) != dict:
838 feedback_fn(" - ERROR: connection to %s failed" % (node,))
842 node_volume[node] = volumeinfo
845 nodeinstance = all_instanceinfo[node]
846 if type(nodeinstance) != list:
847 feedback_fn(" - ERROR: connection to %s failed" % (node,))
851 node_instance[node] = nodeinstance
855 for instance in instancelist:
856 feedback_fn("* Verifying instance %s" % instance)
857 result = self._VerifyInstance(instance, node_volume, node_instance,
861 inst_config = self.cfg.GetInstanceInfo(instance)
863 inst_config.MapLVsByNode(node_vol_should)
865 feedback_fn("* Verifying orphan volumes")
866 result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
870 feedback_fn("* Verifying remaining instances")
871 result = self._VerifyOrphanInstances(instancelist, node_instance,
878 class LURenameCluster(LogicalUnit):
879 """Rename the cluster.
882 HPATH = "cluster-rename"
883 HTYPE = constants.HTYPE_CLUSTER
886 def BuildHooksEnv(self):
891 "OP_TARGET": self.op.sstore.GetClusterName(),
892 "NEW_NAME": self.op.name,
894 mn = self.sstore.GetMasterNode()
895 return env, [mn], [mn]
897 def CheckPrereq(self):
898 """Verify that the passed name is a valid one.
901 hostname = utils.HostInfo(self.op.name)
903 new_name = hostname.name
904 self.ip = new_ip = hostname.ip
905 old_name = self.sstore.GetClusterName()
906 old_ip = self.sstore.GetMasterIP()
907 if new_name == old_name and new_ip == old_ip:
908 raise errors.OpPrereqError("Neither the name nor the IP address of the"
909 " cluster has changed")
911 result = utils.RunCmd(["fping", "-q", new_ip])
912 if not result.failed:
913 raise errors.OpPrereqError("The given cluster IP address (%s) is"
914 " reachable on the network. Aborting." %
917 self.op.name = new_name
919 def Exec(self, feedback_fn):
920 """Rename the cluster.
923 clustername = self.op.name
927 # shutdown the master IP
928 master = ss.GetMasterNode()
929 if not rpc.call_node_stop_master(master):
930 raise errors.OpExecError("Could not disable the master role")
934 ss.SetKey(ss.SS_MASTER_IP, ip)
935 ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
937 # Distribute updated ss config to all nodes
938 myself = self.cfg.GetNodeInfo(master)
939 dist_nodes = self.cfg.GetNodeList()
940 if myself.name in dist_nodes:
941 dist_nodes.remove(myself.name)
943 logger.Debug("Copying updated ssconf data to all nodes")
944 for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
945 fname = ss.KeyToFilename(keyname)
946 result = rpc.call_upload_file(dist_nodes, fname)
947 for to_node in dist_nodes:
948 if not result[to_node]:
949 logger.Error("copy of file %s to node %s failed" %
952 if not rpc.call_node_start_master(master):
953 logger.Error("Could not re-enable the master role on the master,"
954 " please restart manually.")
957 def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
958 """Sleep and poll for an instance's disk to sync.
961 if not instance.disks:
965 proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
967 node = instance.primary_node
969 for dev in instance.disks:
970 cfgw.SetDiskID(dev, node)
976 cumul_degraded = False
977 rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
979 proc.LogWarning("Can't get any data from node %s" % node)
982 raise errors.RemoteError("Can't contact node %s for mirror data,"
987 for i in range(len(rstats)):
990 proc.LogWarning("Can't compute data for node %s/%s" %
991 (node, instance.disks[i].iv_name))
993 # we ignore the ldisk parameter
994 perc_done, est_time, is_degraded, _ = mstat
995 cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
996 if perc_done is not None:
998 if est_time is not None:
999 rem_time = "%d estimated seconds remaining" % est_time
1002 rem_time = "no time estimate"
1003 proc.LogInfo("- device %s: %5.2f%% done, %s" %
1004 (instance.disks[i].iv_name, perc_done, rem_time))
1011 time.sleep(min(60, max_time))
1017 proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1018 return not cumul_degraded
1021 def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
1022 """Check that mirrors are not degraded.
1024 The ldisk parameter, if True, will change the test from the
1025 is_degraded attribute (which represents overall non-ok status for
1026 the device(s)) to the ldisk (representing the local storage status).
1029 cfgw.SetDiskID(dev, node)
1036 if on_primary or dev.AssembleOnSecondary():
1037 rstats = rpc.call_blockdev_find(node, dev)
1039 logger.ToStderr("Can't get any data from node %s" % node)
1042 result = result and (not rstats[idx])
1044 for child in dev.children:
1045 result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
1050 class LUDiagnoseOS(NoHooksLU):
1051 """Logical unit for OS diagnose/query.
1056 def CheckPrereq(self):
1057 """Check prerequisites.
1059 This always succeeds, since this is a pure query LU.
1064 def Exec(self, feedback_fn):
1065 """Compute the list of OSes.
1068 node_list = self.cfg.GetNodeList()
1069 node_data = rpc.call_os_diagnose(node_list)
1070 if node_data == False:
1071 raise errors.OpExecError("Can't gather the list of OSes")
1075 class LURemoveNode(LogicalUnit):
1076 """Logical unit for removing a node.
1079 HPATH = "node-remove"
1080 HTYPE = constants.HTYPE_NODE
1081 _OP_REQP = ["node_name"]
1083 def BuildHooksEnv(self):
1086 This doesn't run on the target node in the pre phase as a failed
1087 node would not allows itself to run.
1091 "OP_TARGET": self.op.node_name,
1092 "NODE_NAME": self.op.node_name,
1094 all_nodes = self.cfg.GetNodeList()
1095 all_nodes.remove(self.op.node_name)
1096 return env, all_nodes, all_nodes
1098 def CheckPrereq(self):
1099 """Check prerequisites.
1102 - the node exists in the configuration
1103 - it does not have primary or secondary instances
1104 - it's not the master
1106 Any errors are signalled by raising errors.OpPrereqError.
1109 node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1111 raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1113 instance_list = self.cfg.GetInstanceList()
1115 masternode = self.sstore.GetMasterNode()
1116 if node.name == masternode:
1117 raise errors.OpPrereqError("Node is the master node,"
1118 " you need to failover first.")
1120 for instance_name in instance_list:
1121 instance = self.cfg.GetInstanceInfo(instance_name)
1122 if node.name == instance.primary_node:
1123 raise errors.OpPrereqError("Instance %s still running on the node,"
1124 " please remove first." % instance_name)
1125 if node.name in instance.secondary_nodes:
1126 raise errors.OpPrereqError("Instance %s has node as a secondary,"
1127 " please remove first." % instance_name)
1128 self.op.node_name = node.name
1131 def Exec(self, feedback_fn):
1132 """Removes the node from the cluster.
1136 logger.Info("stopping the node daemon and removing configs from node %s" %
1139 rpc.call_node_leave_cluster(node.name)
1141 ssh.SSHCall(node.name, 'root', "%s stop" % constants.NODE_INITD_SCRIPT)
1143 logger.Info("Removing node %s from config" % node.name)
1145 self.cfg.RemoveNode(node.name)
1147 _RemoveHostFromEtcHosts(node.name)
1150 class LUQueryNodes(NoHooksLU):
1151 """Logical unit for querying nodes.
1154 _OP_REQP = ["output_fields", "names"]
1156 def CheckPrereq(self):
1157 """Check prerequisites.
1159 This checks that the fields required are valid output fields.
1162 self.dynamic_fields = frozenset(["dtotal", "dfree",
1163 "mtotal", "mnode", "mfree",
1166 _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
1167 "pinst_list", "sinst_list",
1169 dynamic=self.dynamic_fields,
1170 selected=self.op.output_fields)
1172 self.wanted = _GetWantedNodes(self, self.op.names)
1174 def Exec(self, feedback_fn):
1175 """Computes the list of nodes and their attributes.
1178 nodenames = self.wanted
1179 nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames]
1181 # begin data gathering
1183 if self.dynamic_fields.intersection(self.op.output_fields):
1185 node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
1186 for name in nodenames:
1187 nodeinfo = node_data.get(name, None)
1190 "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1191 "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1192 "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1193 "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1194 "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1195 "bootid": nodeinfo['bootid'],
1198 live_data[name] = {}
1200 live_data = dict.fromkeys(nodenames, {})
1202 node_to_primary = dict([(name, set()) for name in nodenames])
1203 node_to_secondary = dict([(name, set()) for name in nodenames])
1205 inst_fields = frozenset(("pinst_cnt", "pinst_list",
1206 "sinst_cnt", "sinst_list"))
1207 if inst_fields & frozenset(self.op.output_fields):
1208 instancelist = self.cfg.GetInstanceList()
1210 for instance_name in instancelist:
1211 inst = self.cfg.GetInstanceInfo(instance_name)
1212 if inst.primary_node in node_to_primary:
1213 node_to_primary[inst.primary_node].add(inst.name)
1214 for secnode in inst.secondary_nodes:
1215 if secnode in node_to_secondary:
1216 node_to_secondary[secnode].add(inst.name)
1218 # end data gathering
1221 for node in nodelist:
1223 for field in self.op.output_fields:
1226 elif field == "pinst_list":
1227 val = list(node_to_primary[node.name])
1228 elif field == "sinst_list":
1229 val = list(node_to_secondary[node.name])
1230 elif field == "pinst_cnt":
1231 val = len(node_to_primary[node.name])
1232 elif field == "sinst_cnt":
1233 val = len(node_to_secondary[node.name])
1234 elif field == "pip":
1235 val = node.primary_ip
1236 elif field == "sip":
1237 val = node.secondary_ip
1238 elif field in self.dynamic_fields:
1239 val = live_data[node.name].get(field, None)
1241 raise errors.ParameterError(field)
1242 node_output.append(val)
1243 output.append(node_output)
1248 class LUQueryNodeVolumes(NoHooksLU):
1249 """Logical unit for getting volumes on node(s).
1252 _OP_REQP = ["nodes", "output_fields"]
1254 def CheckPrereq(self):
1255 """Check prerequisites.
1257 This checks that the fields required are valid output fields.
1260 self.nodes = _GetWantedNodes(self, self.op.nodes)
1262 _CheckOutputFields(static=["node"],
1263 dynamic=["phys", "vg", "name", "size", "instance"],
1264 selected=self.op.output_fields)
1267 def Exec(self, feedback_fn):
1268 """Computes the list of nodes and their attributes.
1271 nodenames = self.nodes
1272 volumes = rpc.call_node_volumes(nodenames)
1274 ilist = [self.cfg.GetInstanceInfo(iname) for iname
1275 in self.cfg.GetInstanceList()]
1277 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1280 for node in nodenames:
1281 if node not in volumes or not volumes[node]:
1284 node_vols = volumes[node][:]
1285 node_vols.sort(key=lambda vol: vol['dev'])
1287 for vol in node_vols:
1289 for field in self.op.output_fields:
1292 elif field == "phys":
1296 elif field == "name":
1298 elif field == "size":
1299 val = int(float(vol['size']))
1300 elif field == "instance":
1302 if node not in lv_by_node[inst]:
1304 if vol['name'] in lv_by_node[inst][node]:
1310 raise errors.ParameterError(field)
1311 node_output.append(str(val))
1313 output.append(node_output)
1318 class LUAddNode(LogicalUnit):
1319 """Logical unit for adding node to the cluster.
1323 HTYPE = constants.HTYPE_NODE
1324 _OP_REQP = ["node_name"]
1326 def BuildHooksEnv(self):
1329 This will run on all nodes before, and on all nodes + the new node after.
1333 "OP_TARGET": self.op.node_name,
1334 "NODE_NAME": self.op.node_name,
1335 "NODE_PIP": self.op.primary_ip,
1336 "NODE_SIP": self.op.secondary_ip,
1338 nodes_0 = self.cfg.GetNodeList()
1339 nodes_1 = nodes_0 + [self.op.node_name, ]
1340 return env, nodes_0, nodes_1
1342 def CheckPrereq(self):
1343 """Check prerequisites.
1346 - the new node is not already in the config
1348 - its parameters (single/dual homed) matches the cluster
1350 Any errors are signalled by raising errors.OpPrereqError.
1353 node_name = self.op.node_name
1356 dns_data = utils.HostInfo(node_name)
1358 node = dns_data.name
1359 primary_ip = self.op.primary_ip = dns_data.ip
1360 secondary_ip = getattr(self.op, "secondary_ip", None)
1361 if secondary_ip is None:
1362 secondary_ip = primary_ip
1363 if not utils.IsValidIP(secondary_ip):
1364 raise errors.OpPrereqError("Invalid secondary IP given")
1365 self.op.secondary_ip = secondary_ip
1366 node_list = cfg.GetNodeList()
1367 if node in node_list:
1368 raise errors.OpPrereqError("Node %s is already in the configuration"
1371 for existing_node_name in node_list:
1372 existing_node = cfg.GetNodeInfo(existing_node_name)
1373 if (existing_node.primary_ip == primary_ip or
1374 existing_node.secondary_ip == primary_ip or
1375 existing_node.primary_ip == secondary_ip or
1376 existing_node.secondary_ip == secondary_ip):
1377 raise errors.OpPrereqError("New node ip address(es) conflict with"
1378 " existing node %s" % existing_node.name)
1380 # check that the type of the node (single versus dual homed) is the
1381 # same as for the master
1382 myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
1383 master_singlehomed = myself.secondary_ip == myself.primary_ip
1384 newbie_singlehomed = secondary_ip == primary_ip
1385 if master_singlehomed != newbie_singlehomed:
1386 if master_singlehomed:
1387 raise errors.OpPrereqError("The master has no private ip but the"
1388 " new node has one")
1390 raise errors.OpPrereqError("The master has a private ip but the"
1391 " new node doesn't have one")
1393 # checks reachablity
1394 if not utils.TcpPing(utils.HostInfo().name,
1396 constants.DEFAULT_NODED_PORT):
1397 raise errors.OpPrereqError("Node not reachable by ping")
1399 if not newbie_singlehomed:
1400 # check reachability from my secondary ip to newbie's secondary ip
1401 if not utils.TcpPing(myself.secondary_ip,
1403 constants.DEFAULT_NODED_PORT):
1404 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
1405 " based ping to noded port")
1407 self.new_node = objects.Node(name=node,
1408 primary_ip=primary_ip,
1409 secondary_ip=secondary_ip)
1411 def Exec(self, feedback_fn):
1412 """Adds the new node to the cluster.
1415 new_node = self.new_node
1416 node = new_node.name
1418 # set up inter-node password and certificate and restarts the node daemon
1419 gntpass = self.sstore.GetNodeDaemonPassword()
1420 if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
1421 raise errors.OpExecError("ganeti password corruption detected")
1422 f = open(constants.SSL_CERT_FILE)
1424 gntpem = f.read(8192)
1427 # in the base64 pem encoding, neither '!' nor '.' are valid chars,
1428 # so we use this to detect an invalid certificate; as long as the
1429 # cert doesn't contain this, the here-document will be correctly
1430 # parsed by the shell sequence below
1431 if re.search('^!EOF\.', gntpem, re.MULTILINE):
1432 raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
1433 if not gntpem.endswith("\n"):
1434 raise errors.OpExecError("PEM must end with newline")
1435 logger.Info("copy cluster pass to %s and starting the node daemon" % node)
1437 # and then connect with ssh to set password and start ganeti-noded
1438 # note that all the below variables are sanitized at this point,
1439 # either by being constants or by the checks above
1441 mycommand = ("umask 077 && "
1442 "echo '%s' > '%s' && "
1443 "cat > '%s' << '!EOF.' && \n"
1444 "%s!EOF.\n%s restart" %
1445 (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
1446 constants.SSL_CERT_FILE, gntpem,
1447 constants.NODE_INITD_SCRIPT))
1449 result = ssh.SSHCall(node, 'root', mycommand, batch=False, ask_key=True)
1451 raise errors.OpExecError("Remote command on node %s, error: %s,"
1453 (node, result.fail_reason, result.output))
1455 # check connectivity
1458 result = rpc.call_version([node])[node]
1460 if constants.PROTOCOL_VERSION == result:
1461 logger.Info("communication to node %s fine, sw version %s match" %
1464 raise errors.OpExecError("Version mismatch master version %s,"
1465 " node version %s" %
1466 (constants.PROTOCOL_VERSION, result))
1468 raise errors.OpExecError("Cannot get version from the new node")
1471 logger.Info("copy ssh key to node %s" % node)
1472 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1474 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
1475 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
1481 keyarray.append(f.read())
1485 result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
1486 keyarray[3], keyarray[4], keyarray[5])
1489 raise errors.OpExecError("Cannot transfer ssh keys to the new node")
1491 # Add node to our /etc/hosts, and add key to known_hosts
1492 _AddHostToEtcHosts(new_node.name)
1494 _UpdateKnownHosts(new_node.name, new_node.primary_ip,
1495 self.cfg.GetHostKey())
1497 if new_node.secondary_ip != new_node.primary_ip:
1498 if not rpc.call_node_tcp_ping(new_node.name,
1499 constants.LOCALHOST_IP_ADDRESS,
1500 new_node.secondary_ip,
1501 constants.DEFAULT_NODED_PORT,
1503 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
1504 " you gave (%s). Please fix and re-run this"
1505 " command." % new_node.secondary_ip)
1507 success, msg = ssh.VerifyNodeHostname(node)
1509 raise errors.OpExecError("Node '%s' claims it has a different hostname"
1510 " than the one the resolver gives: %s."
1511 " Please fix and re-run this command." %
1514 # Distribute updated /etc/hosts and known_hosts to all nodes,
1515 # including the node just added
1516 myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
1517 dist_nodes = self.cfg.GetNodeList() + [node]
1518 if myself.name in dist_nodes:
1519 dist_nodes.remove(myself.name)
1521 logger.Debug("Copying hosts and known_hosts to all nodes")
1522 for fname in ("/etc/hosts", constants.SSH_KNOWN_HOSTS_FILE):
1523 result = rpc.call_upload_file(dist_nodes, fname)
1524 for to_node in dist_nodes:
1525 if not result[to_node]:
1526 logger.Error("copy of file %s to node %s failed" %
1529 to_copy = ss.GetFileList()
1530 for fname in to_copy:
1531 if not ssh.CopyFileToNode(node, fname):
1532 logger.Error("could not copy file %s to node %s" % (fname, node))
1534 logger.Info("adding node %s to cluster.conf" % node)
1535 self.cfg.AddNode(new_node)
1538 class LUMasterFailover(LogicalUnit):
1539 """Failover the master node to the current node.
1541 This is a special LU in that it must run on a non-master node.
1544 HPATH = "master-failover"
1545 HTYPE = constants.HTYPE_CLUSTER
1549 def BuildHooksEnv(self):
1552 This will run on the new master only in the pre phase, and on all
1553 the nodes in the post phase.
1557 "OP_TARGET": self.new_master,
1558 "NEW_MASTER": self.new_master,
1559 "OLD_MASTER": self.old_master,
1561 return env, [self.new_master], self.cfg.GetNodeList()
1563 def CheckPrereq(self):
1564 """Check prerequisites.
1566 This checks that we are not already the master.
1569 self.new_master = utils.HostInfo().name
1570 self.old_master = self.sstore.GetMasterNode()
1572 if self.old_master == self.new_master:
1573 raise errors.OpPrereqError("This commands must be run on the node"
1574 " where you want the new master to be."
1575 " %s is already the master" %
1578 def Exec(self, feedback_fn):
1579 """Failover the master node.
1581 This command, when run on a non-master node, will cause the current
1582 master to cease being master, and the non-master to become new
1586 #TODO: do not rely on gethostname returning the FQDN
1587 logger.Info("setting master to %s, old master: %s" %
1588 (self.new_master, self.old_master))
1590 if not rpc.call_node_stop_master(self.old_master):
1591 logger.Error("could disable the master role on the old master"
1592 " %s, please disable manually" % self.old_master)
1595 ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
1596 if not rpc.call_upload_file(self.cfg.GetNodeList(),
1597 ss.KeyToFilename(ss.SS_MASTER_NODE)):
1598 logger.Error("could not distribute the new simple store master file"
1599 " to the other nodes, please check.")
1601 if not rpc.call_node_start_master(self.new_master):
1602 logger.Error("could not start the master role on the new master"
1603 " %s, please check" % self.new_master)
1604 feedback_fn("Error in activating the master IP on the new master,"
1605 " please fix manually.")
1609 class LUQueryClusterInfo(NoHooksLU):
1610 """Query cluster configuration.
1616 def CheckPrereq(self):
1617 """No prerequsites needed for this LU.
1622 def Exec(self, feedback_fn):
1623 """Return cluster config.
1627 "name": self.sstore.GetClusterName(),
1628 "software_version": constants.RELEASE_VERSION,
1629 "protocol_version": constants.PROTOCOL_VERSION,
1630 "config_version": constants.CONFIG_VERSION,
1631 "os_api_version": constants.OS_API_VERSION,
1632 "export_version": constants.EXPORT_VERSION,
1633 "master": self.sstore.GetMasterNode(),
1634 "architecture": (platform.architecture()[0], platform.machine()),
1640 class LUClusterCopyFile(NoHooksLU):
1641 """Copy file to cluster.
1644 _OP_REQP = ["nodes", "filename"]
1646 def CheckPrereq(self):
1647 """Check prerequisites.
1649 It should check that the named file exists and that the given list
1653 if not os.path.exists(self.op.filename):
1654 raise errors.OpPrereqError("No such filename '%s'" % self.op.filename)
1656 self.nodes = _GetWantedNodes(self, self.op.nodes)
1658 def Exec(self, feedback_fn):
1659 """Copy a file from master to some nodes.
1662 opts - class with options as members
1663 args - list containing a single element, the file name
1665 nodes - list containing the name of target nodes; if empty, all nodes
1668 filename = self.op.filename
1670 myname = utils.HostInfo().name
1672 for node in self.nodes:
1675 if not ssh.CopyFileToNode(node, filename):
1676 logger.Error("Copy of file %s to node %s failed" % (filename, node))
1679 class LUDumpClusterConfig(NoHooksLU):
1680 """Return a text-representation of the cluster-config.
1685 def CheckPrereq(self):
1686 """No prerequisites.
1691 def Exec(self, feedback_fn):
1692 """Dump a representation of the cluster config to the standard output.
1695 return self.cfg.DumpConfig()
1698 class LURunClusterCommand(NoHooksLU):
1699 """Run a command on some nodes.
1702 _OP_REQP = ["command", "nodes"]
1704 def CheckPrereq(self):
1705 """Check prerequisites.
1707 It checks that the given list of nodes is valid.
1710 self.nodes = _GetWantedNodes(self, self.op.nodes)
1712 def Exec(self, feedback_fn):
1713 """Run a command on some nodes.
1717 for node in self.nodes:
1718 result = ssh.SSHCall(node, "root", self.op.command)
1719 data.append((node, result.output, result.exit_code))
1724 class LUActivateInstanceDisks(NoHooksLU):
1725 """Bring up an instance's disks.
1728 _OP_REQP = ["instance_name"]
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks that the instance is in the cluster.
1736 instance = self.cfg.GetInstanceInfo(
1737 self.cfg.ExpandInstanceName(self.op.instance_name))
1738 if instance is None:
1739 raise errors.OpPrereqError("Instance '%s' not known" %
1740 self.op.instance_name)
1741 self.instance = instance
1744 def Exec(self, feedback_fn):
1745 """Activate the disks.
1748 disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
1750 raise errors.OpExecError("Cannot activate block devices")
1755 def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
1756 """Prepare the block devices for an instance.
1758 This sets up the block devices on all nodes.
1761 instance: a ganeti.objects.Instance object
1762 ignore_secondaries: if true, errors on secondary nodes won't result
1763 in an error return from the function
1766 false if the operation failed
1767 list of (host, instance_visible_name, node_visible_name) if the operation
1768 suceeded with the mapping from node devices to instance devices
1772 for inst_disk in instance.disks:
1773 master_result = None
1774 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1775 cfg.SetDiskID(node_disk, node)
1776 is_primary = node == instance.primary_node
1777 result = rpc.call_blockdev_assemble(node, node_disk,
1778 instance.name, is_primary)
1780 logger.Error("could not prepare block device %s on node %s"
1781 " (is_primary=%s)" %
1782 (inst_disk.iv_name, node, is_primary))
1783 if is_primary or not ignore_secondaries:
1786 master_result = result
1787 device_info.append((instance.primary_node, inst_disk.iv_name,
1790 # leave the disks configured for the primary node
1791 # this is a workaround that would be fixed better by
1792 # improving the logical/physical id handling
1793 for disk in instance.disks:
1794 cfg.SetDiskID(disk, instance.primary_node)
1796 return disks_ok, device_info
1799 def _StartInstanceDisks(cfg, instance, force):
1800 """Start the disks of an instance.
1803 disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
1804 ignore_secondaries=force)
1806 _ShutdownInstanceDisks(instance, cfg)
1807 if force is not None and not force:
1808 logger.Error("If the message above refers to a secondary node,"
1809 " you can retry the operation using '--force'.")
1810 raise errors.OpExecError("Disk consistency error")
1813 class LUDeactivateInstanceDisks(NoHooksLU):
1814 """Shutdown an instance's disks.
1817 _OP_REQP = ["instance_name"]
1819 def CheckPrereq(self):
1820 """Check prerequisites.
1822 This checks that the instance is in the cluster.
1825 instance = self.cfg.GetInstanceInfo(
1826 self.cfg.ExpandInstanceName(self.op.instance_name))
1827 if instance is None:
1828 raise errors.OpPrereqError("Instance '%s' not known" %
1829 self.op.instance_name)
1830 self.instance = instance
1832 def Exec(self, feedback_fn):
1833 """Deactivate the disks
1836 instance = self.instance
1837 ins_l = rpc.call_instance_list([instance.primary_node])
1838 ins_l = ins_l[instance.primary_node]
1839 if not type(ins_l) is list:
1840 raise errors.OpExecError("Can't contact node '%s'" %
1841 instance.primary_node)
1843 if self.instance.name in ins_l:
1844 raise errors.OpExecError("Instance is running, can't shutdown"
1847 _ShutdownInstanceDisks(instance, self.cfg)
1850 def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
1851 """Shutdown block devices of an instance.
1853 This does the shutdown on all nodes of the instance.
1855 If the ignore_primary is false, errors on the primary node are
1860 for disk in instance.disks:
1861 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1862 cfg.SetDiskID(top_disk, node)
1863 if not rpc.call_blockdev_shutdown(node, top_disk):
1864 logger.Error("could not shutdown block device %s on node %s" %
1865 (disk.iv_name, node))
1866 if not ignore_primary or node != instance.primary_node:
1871 class LUStartupInstance(LogicalUnit):
1872 """Starts an instance.
1875 HPATH = "instance-start"
1876 HTYPE = constants.HTYPE_INSTANCE
1877 _OP_REQP = ["instance_name", "force"]
1879 def BuildHooksEnv(self):
1882 This runs on master, primary and secondary nodes of the instance.
1886 "FORCE": self.op.force,
1888 env.update(_BuildInstanceHookEnvByObject(self.instance))
1889 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1890 list(self.instance.secondary_nodes))
1893 def CheckPrereq(self):
1894 """Check prerequisites.
1896 This checks that the instance is in the cluster.
1899 instance = self.cfg.GetInstanceInfo(
1900 self.cfg.ExpandInstanceName(self.op.instance_name))
1901 if instance is None:
1902 raise errors.OpPrereqError("Instance '%s' not known" %
1903 self.op.instance_name)
1905 # check bridges existance
1906 _CheckInstanceBridgesExist(instance)
1908 self.instance = instance
1909 self.op.instance_name = instance.name
1911 def Exec(self, feedback_fn):
1912 """Start the instance.
1915 instance = self.instance
1916 force = self.op.force
1917 extra_args = getattr(self.op, "extra_args", "")
1919 node_current = instance.primary_node
1921 nodeinfo = rpc.call_node_info([node_current], self.cfg.GetVGName())
1923 raise errors.OpExecError("Could not contact node %s for infos" %
1926 freememory = nodeinfo[node_current]['memory_free']
1927 memory = instance.memory
1928 if memory > freememory:
1929 raise errors.OpExecError("Not enough memory to start instance"
1931 " needed %s MiB, available %s MiB" %
1932 (instance.name, node_current, memory,
1935 _StartInstanceDisks(self.cfg, instance, force)
1937 if not rpc.call_instance_start(node_current, instance, extra_args):
1938 _ShutdownInstanceDisks(instance, self.cfg)
1939 raise errors.OpExecError("Could not start instance")
1941 self.cfg.MarkInstanceUp(instance.name)
1944 class LURebootInstance(LogicalUnit):
1945 """Reboot an instance.
1948 HPATH = "instance-reboot"
1949 HTYPE = constants.HTYPE_INSTANCE
1950 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
1952 def BuildHooksEnv(self):
1955 This runs on master, primary and secondary nodes of the instance.
1959 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
1961 env.update(_BuildInstanceHookEnvByObject(self.instance))
1962 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1963 list(self.instance.secondary_nodes))
1966 def CheckPrereq(self):
1967 """Check prerequisites.
1969 This checks that the instance is in the cluster.
1972 instance = self.cfg.GetInstanceInfo(
1973 self.cfg.ExpandInstanceName(self.op.instance_name))
1974 if instance is None:
1975 raise errors.OpPrereqError("Instance '%s' not known" %
1976 self.op.instance_name)
1978 # check bridges existance
1979 _CheckInstanceBridgesExist(instance)
1981 self.instance = instance
1982 self.op.instance_name = instance.name
1984 def Exec(self, feedback_fn):
1985 """Reboot the instance.
1988 instance = self.instance
1989 ignore_secondaries = self.op.ignore_secondaries
1990 reboot_type = self.op.reboot_type
1991 extra_args = getattr(self.op, "extra_args", "")
1993 node_current = instance.primary_node
1995 if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
1996 constants.INSTANCE_REBOOT_HARD,
1997 constants.INSTANCE_REBOOT_FULL]:
1998 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
1999 (constants.INSTANCE_REBOOT_SOFT,
2000 constants.INSTANCE_REBOOT_HARD,
2001 constants.INSTANCE_REBOOT_FULL))
2003 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2004 constants.INSTANCE_REBOOT_HARD]:
2005 if not rpc.call_instance_reboot(node_current, instance,
2006 reboot_type, extra_args):
2007 raise errors.OpExecError("Could not reboot instance")
2009 if not rpc.call_instance_shutdown(node_current, instance):
2010 raise errors.OpExecError("could not shutdown instance for full reboot")
2011 _ShutdownInstanceDisks(instance, self.cfg)
2012 _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
2013 if not rpc.call_instance_start(node_current, instance, extra_args):
2014 _ShutdownInstanceDisks(instance, self.cfg)
2015 raise errors.OpExecError("Could not start instance for full reboot")
2017 self.cfg.MarkInstanceUp(instance.name)
2020 class LUShutdownInstance(LogicalUnit):
2021 """Shutdown an instance.
2024 HPATH = "instance-stop"
2025 HTYPE = constants.HTYPE_INSTANCE
2026 _OP_REQP = ["instance_name"]
2028 def BuildHooksEnv(self):
2031 This runs on master, primary and secondary nodes of the instance.
2034 env = _BuildInstanceHookEnvByObject(self.instance)
2035 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2036 list(self.instance.secondary_nodes))
2039 def CheckPrereq(self):
2040 """Check prerequisites.
2042 This checks that the instance is in the cluster.
2045 instance = self.cfg.GetInstanceInfo(
2046 self.cfg.ExpandInstanceName(self.op.instance_name))
2047 if instance is None:
2048 raise errors.OpPrereqError("Instance '%s' not known" %
2049 self.op.instance_name)
2050 self.instance = instance
2052 def Exec(self, feedback_fn):
2053 """Shutdown the instance.
2056 instance = self.instance
2057 node_current = instance.primary_node
2058 if not rpc.call_instance_shutdown(node_current, instance):
2059 logger.Error("could not shutdown instance")
2061 self.cfg.MarkInstanceDown(instance.name)
2062 _ShutdownInstanceDisks(instance, self.cfg)
2065 class LUReinstallInstance(LogicalUnit):
2066 """Reinstall an instance.
2069 HPATH = "instance-reinstall"
2070 HTYPE = constants.HTYPE_INSTANCE
2071 _OP_REQP = ["instance_name"]
2073 def BuildHooksEnv(self):
2076 This runs on master, primary and secondary nodes of the instance.
2079 env = _BuildInstanceHookEnvByObject(self.instance)
2080 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2081 list(self.instance.secondary_nodes))
2084 def CheckPrereq(self):
2085 """Check prerequisites.
2087 This checks that the instance is in the cluster and is not running.
2090 instance = self.cfg.GetInstanceInfo(
2091 self.cfg.ExpandInstanceName(self.op.instance_name))
2092 if instance is None:
2093 raise errors.OpPrereqError("Instance '%s' not known" %
2094 self.op.instance_name)
2095 if instance.disk_template == constants.DT_DISKLESS:
2096 raise errors.OpPrereqError("Instance '%s' has no disks" %
2097 self.op.instance_name)
2098 if instance.status != "down":
2099 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2100 self.op.instance_name)
2101 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2103 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2104 (self.op.instance_name,
2105 instance.primary_node))
2107 self.op.os_type = getattr(self.op, "os_type", None)
2108 if self.op.os_type is not None:
2110 pnode = self.cfg.GetNodeInfo(
2111 self.cfg.ExpandNodeName(instance.primary_node))
2113 raise errors.OpPrereqError("Primary node '%s' is unknown" %
2115 os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2117 raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2118 " primary node" % self.op.os_type)
2120 self.instance = instance
2122 def Exec(self, feedback_fn):
2123 """Reinstall the instance.
2126 inst = self.instance
2128 if self.op.os_type is not None:
2129 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2130 inst.os = self.op.os_type
2131 self.cfg.AddInstance(inst)
2133 _StartInstanceDisks(self.cfg, inst, None)
2135 feedback_fn("Running the instance OS create scripts...")
2136 if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
2137 raise errors.OpExecError("Could not install OS for instance %s"
2139 (inst.name, inst.primary_node))
2141 _ShutdownInstanceDisks(inst, self.cfg)
2144 class LURenameInstance(LogicalUnit):
2145 """Rename an instance.
2148 HPATH = "instance-rename"
2149 HTYPE = constants.HTYPE_INSTANCE
2150 _OP_REQP = ["instance_name", "new_name"]
2152 def BuildHooksEnv(self):
2155 This runs on master, primary and secondary nodes of the instance.
2158 env = _BuildInstanceHookEnvByObject(self.instance)
2159 env["INSTANCE_NEW_NAME"] = self.op.new_name
2160 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2161 list(self.instance.secondary_nodes))
2164 def CheckPrereq(self):
2165 """Check prerequisites.
2167 This checks that the instance is in the cluster and is not running.
2170 instance = self.cfg.GetInstanceInfo(
2171 self.cfg.ExpandInstanceName(self.op.instance_name))
2172 if instance is None:
2173 raise errors.OpPrereqError("Instance '%s' not known" %
2174 self.op.instance_name)
2175 if instance.status != "down":
2176 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2177 self.op.instance_name)
2178 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2180 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2181 (self.op.instance_name,
2182 instance.primary_node))
2183 self.instance = instance
2185 # new name verification
2186 name_info = utils.HostInfo(self.op.new_name)
2188 self.op.new_name = new_name = name_info.name
2189 if not getattr(self.op, "ignore_ip", False):
2190 command = ["fping", "-q", name_info.ip]
2191 result = utils.RunCmd(command)
2192 if not result.failed:
2193 raise errors.OpPrereqError("IP %s of instance %s already in use" %
2194 (name_info.ip, new_name))
2197 def Exec(self, feedback_fn):
2198 """Reinstall the instance.
2201 inst = self.instance
2202 old_name = inst.name
2204 self.cfg.RenameInstance(inst.name, self.op.new_name)
2206 # re-read the instance from the configuration after rename
2207 inst = self.cfg.GetInstanceInfo(self.op.new_name)
2209 _StartInstanceDisks(self.cfg, inst, None)
2211 if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
2213 msg = ("Could run OS rename script for instance %s on node %s (but the"
2214 " instance has been renamed in Ganeti)" %
2215 (inst.name, inst.primary_node))
2218 _ShutdownInstanceDisks(inst, self.cfg)
2221 class LURemoveInstance(LogicalUnit):
2222 """Remove an instance.
2225 HPATH = "instance-remove"
2226 HTYPE = constants.HTYPE_INSTANCE
2227 _OP_REQP = ["instance_name"]
2229 def BuildHooksEnv(self):
2232 This runs on master, primary and secondary nodes of the instance.
2235 env = _BuildInstanceHookEnvByObject(self.instance)
2236 nl = [self.sstore.GetMasterNode()]
2239 def CheckPrereq(self):
2240 """Check prerequisites.
2242 This checks that the instance is in the cluster.
2245 instance = self.cfg.GetInstanceInfo(
2246 self.cfg.ExpandInstanceName(self.op.instance_name))
2247 if instance is None:
2248 raise errors.OpPrereqError("Instance '%s' not known" %
2249 self.op.instance_name)
2250 self.instance = instance
2252 def Exec(self, feedback_fn):
2253 """Remove the instance.
2256 instance = self.instance
2257 logger.Info("shutting down instance %s on node %s" %
2258 (instance.name, instance.primary_node))
2260 if not rpc.call_instance_shutdown(instance.primary_node, instance):
2261 if self.op.ignore_failures:
2262 feedback_fn("Warning: can't shutdown instance")
2264 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2265 (instance.name, instance.primary_node))
2267 logger.Info("removing block devices for instance %s" % instance.name)
2269 if not _RemoveDisks(instance, self.cfg):
2270 if self.op.ignore_failures:
2271 feedback_fn("Warning: can't remove instance's disks")
2273 raise errors.OpExecError("Can't remove instance's disks")
2275 logger.Info("removing instance %s out of cluster config" % instance.name)
2277 self.cfg.RemoveInstance(instance.name)
2280 class LUQueryInstances(NoHooksLU):
2281 """Logical unit for querying instances.
2284 _OP_REQP = ["output_fields", "names"]
2286 def CheckPrereq(self):
2287 """Check prerequisites.
2289 This checks that the fields required are valid output fields.
2292 self.dynamic_fields = frozenset(["oper_state", "oper_ram"])
2293 _CheckOutputFields(static=["name", "os", "pnode", "snodes",
2294 "admin_state", "admin_ram",
2295 "disk_template", "ip", "mac", "bridge",
2296 "sda_size", "sdb_size"],
2297 dynamic=self.dynamic_fields,
2298 selected=self.op.output_fields)
2300 self.wanted = _GetWantedInstances(self, self.op.names)
2302 def Exec(self, feedback_fn):
2303 """Computes the list of nodes and their attributes.
2306 instance_names = self.wanted
2307 instance_list = [self.cfg.GetInstanceInfo(iname) for iname
2310 # begin data gathering
2312 nodes = frozenset([inst.primary_node for inst in instance_list])
2315 if self.dynamic_fields.intersection(self.op.output_fields):
2317 node_data = rpc.call_all_instances_info(nodes)
2319 result = node_data[name]
2321 live_data.update(result)
2322 elif result == False:
2323 bad_nodes.append(name)
2324 # else no instance is alive
2326 live_data = dict([(name, {}) for name in instance_names])
2328 # end data gathering
2331 for instance in instance_list:
2333 for field in self.op.output_fields:
2338 elif field == "pnode":
2339 val = instance.primary_node
2340 elif field == "snodes":
2341 val = list(instance.secondary_nodes)
2342 elif field == "admin_state":
2343 val = (instance.status != "down")
2344 elif field == "oper_state":
2345 if instance.primary_node in bad_nodes:
2348 val = bool(live_data.get(instance.name))
2349 elif field == "admin_ram":
2350 val = instance.memory
2351 elif field == "oper_ram":
2352 if instance.primary_node in bad_nodes:
2354 elif instance.name in live_data:
2355 val = live_data[instance.name].get("memory", "?")
2358 elif field == "disk_template":
2359 val = instance.disk_template
2361 val = instance.nics[0].ip
2362 elif field == "bridge":
2363 val = instance.nics[0].bridge
2364 elif field == "mac":
2365 val = instance.nics[0].mac
2366 elif field == "sda_size" or field == "sdb_size":
2367 disk = instance.FindDisk(field[:3])
2373 raise errors.ParameterError(field)
2380 class LUFailoverInstance(LogicalUnit):
2381 """Failover an instance.
2384 HPATH = "instance-failover"
2385 HTYPE = constants.HTYPE_INSTANCE
2386 _OP_REQP = ["instance_name", "ignore_consistency"]
2388 def BuildHooksEnv(self):
2391 This runs on master, primary and secondary nodes of the instance.
2395 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2397 env.update(_BuildInstanceHookEnvByObject(self.instance))
2398 nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
2401 def CheckPrereq(self):
2402 """Check prerequisites.
2404 This checks that the instance is in the cluster.
2407 instance = self.cfg.GetInstanceInfo(
2408 self.cfg.ExpandInstanceName(self.op.instance_name))
2409 if instance is None:
2410 raise errors.OpPrereqError("Instance '%s' not known" %
2411 self.op.instance_name)
2413 if instance.disk_template not in constants.DTS_NET_MIRROR:
2414 raise errors.OpPrereqError("Instance's disk layout is not"
2415 " network mirrored, cannot failover.")
2417 secondary_nodes = instance.secondary_nodes
2418 if not secondary_nodes:
2419 raise errors.ProgrammerError("no secondary node but using "
2420 "DT_REMOTE_RAID1 template")
2422 # check memory requirements on the secondary node
2423 target_node = secondary_nodes[0]
2424 nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
2425 info = nodeinfo.get(target_node, None)
2427 raise errors.OpPrereqError("Cannot get current information"
2428 " from node '%s'" % nodeinfo)
2429 if instance.memory > info['memory_free']:
2430 raise errors.OpPrereqError("Not enough memory on target node %s."
2431 " %d MB available, %d MB required" %
2432 (target_node, info['memory_free'],
2435 # check bridge existance
2436 brlist = [nic.bridge for nic in instance.nics]
2437 if not rpc.call_bridges_exist(target_node, brlist):
2438 raise errors.OpPrereqError("One or more target bridges %s does not"
2439 " exist on destination node '%s'" %
2440 (brlist, target_node))
2442 self.instance = instance
2444 def Exec(self, feedback_fn):
2445 """Failover an instance.
2447 The failover is done by shutting it down on its present node and
2448 starting it on the secondary.
2451 instance = self.instance
2453 source_node = instance.primary_node
2454 target_node = instance.secondary_nodes[0]
2456 feedback_fn("* checking disk consistency between source and target")
2457 for dev in instance.disks:
2458 # for remote_raid1, these are md over drbd
2459 if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
2460 if not self.op.ignore_consistency:
2461 raise errors.OpExecError("Disk %s is degraded on target node,"
2462 " aborting failover." % dev.iv_name)
2464 feedback_fn("* checking target node resource availability")
2465 nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
2468 raise errors.OpExecError("Could not contact target node %s." %
2471 free_memory = int(nodeinfo[target_node]['memory_free'])
2472 memory = instance.memory
2473 if memory > free_memory:
2474 raise errors.OpExecError("Not enough memory to create instance %s on"
2475 " node %s. needed %s MiB, available %s MiB" %
2476 (instance.name, target_node, memory,
2479 feedback_fn("* shutting down instance on source node")
2480 logger.Info("Shutting down instance %s on node %s" %
2481 (instance.name, source_node))
2483 if not rpc.call_instance_shutdown(source_node, instance):
2484 if self.op.ignore_consistency:
2485 logger.Error("Could not shutdown instance %s on node %s. Proceeding"
2486 " anyway. Please make sure node %s is down" %
2487 (instance.name, source_node, source_node))
2489 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2490 (instance.name, source_node))
2492 feedback_fn("* deactivating the instance's disks on source node")
2493 if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
2494 raise errors.OpExecError("Can't shut down the instance's disks.")
2496 instance.primary_node = target_node
2497 # distribute new instance config to the other nodes
2498 self.cfg.AddInstance(instance)
2500 feedback_fn("* activating the instance's disks on target node")
2501 logger.Info("Starting instance %s on node %s" %
2502 (instance.name, target_node))
2504 disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
2505 ignore_secondaries=True)
2507 _ShutdownInstanceDisks(instance, self.cfg)
2508 raise errors.OpExecError("Can't activate the instance's disks")
2510 feedback_fn("* starting the instance on the target node")
2511 if not rpc.call_instance_start(target_node, instance, None):
2512 _ShutdownInstanceDisks(instance, self.cfg)
2513 raise errors.OpExecError("Could not start instance %s on node %s." %
2514 (instance.name, target_node))
2517 def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
2518 """Create a tree of block devices on the primary node.
2520 This always creates all devices.
2524 for child in device.children:
2525 if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
2528 cfg.SetDiskID(device, node)
2529 new_id = rpc.call_blockdev_create(node, device, device.size,
2530 instance.name, True, info)
2533 if device.physical_id is None:
2534 device.physical_id = new_id
2538 def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
2539 """Create a tree of block devices on a secondary node.
2541 If this device type has to be created on secondaries, create it and
2544 If not, just recurse to children keeping the same 'force' value.
2547 if device.CreateOnSecondary():
2550 for child in device.children:
2551 if not _CreateBlockDevOnSecondary(cfg, node, instance,
2552 child, force, info):
2557 cfg.SetDiskID(device, node)
2558 new_id = rpc.call_blockdev_create(node, device, device.size,
2559 instance.name, False, info)
2562 if device.physical_id is None:
2563 device.physical_id = new_id
2567 def _GenerateUniqueNames(cfg, exts):
2568 """Generate a suitable LV name.
2570 This will generate a logical volume name for the given instance.
2575 new_id = cfg.GenerateUniqueID()
2576 results.append("%s%s" % (new_id, val))
2580 def _GenerateMDDRBDBranch(cfg, primary, secondary, size, names):
2581 """Generate a drbd device complete with its children.
2584 port = cfg.AllocatePort()
2585 vgname = cfg.GetVGName()
2586 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2587 logical_id=(vgname, names[0]))
2588 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2589 logical_id=(vgname, names[1]))
2590 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD7, size=size,
2591 logical_id = (primary, secondary, port),
2592 children = [dev_data, dev_meta])
2596 def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
2597 """Generate a drbd8 device complete with its children.
2600 port = cfg.AllocatePort()
2601 vgname = cfg.GetVGName()
2602 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2603 logical_id=(vgname, names[0]))
2604 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2605 logical_id=(vgname, names[1]))
2606 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
2607 logical_id = (primary, secondary, port),
2608 children = [dev_data, dev_meta],
2612 def _GenerateDiskTemplate(cfg, template_name,
2613 instance_name, primary_node,
2614 secondary_nodes, disk_sz, swap_sz):
2615 """Generate the entire disk layout for a given template type.
2618 #TODO: compute space requirements
2620 vgname = cfg.GetVGName()
2621 if template_name == "diskless":
2623 elif template_name == "plain":
2624 if len(secondary_nodes) != 0:
2625 raise errors.ProgrammerError("Wrong template configuration")
2627 names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
2628 sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2629 logical_id=(vgname, names[0]),
2631 sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2632 logical_id=(vgname, names[1]),
2634 disks = [sda_dev, sdb_dev]
2635 elif template_name == "local_raid1":
2636 if len(secondary_nodes) != 0:
2637 raise errors.ProgrammerError("Wrong template configuration")
2640 names = _GenerateUniqueNames(cfg, [".sda_m1", ".sda_m2",
2641 ".sdb_m1", ".sdb_m2"])
2642 sda_dev_m1 = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2643 logical_id=(vgname, names[0]))
2644 sda_dev_m2 = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2645 logical_id=(vgname, names[1]))
2646 md_sda_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name = "sda",
2648 children = [sda_dev_m1, sda_dev_m2])
2649 sdb_dev_m1 = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2650 logical_id=(vgname, names[2]))
2651 sdb_dev_m2 = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2652 logical_id=(vgname, names[3]))
2653 md_sdb_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name = "sdb",
2655 children = [sdb_dev_m1, sdb_dev_m2])
2656 disks = [md_sda_dev, md_sdb_dev]
2657 elif template_name == constants.DT_REMOTE_RAID1:
2658 if len(secondary_nodes) != 1:
2659 raise errors.ProgrammerError("Wrong template configuration")
2660 remote_node = secondary_nodes[0]
2661 names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2662 ".sdb_data", ".sdb_meta"])
2663 drbd_sda_dev = _GenerateMDDRBDBranch(cfg, primary_node, remote_node,
2664 disk_sz, names[0:2])
2665 md_sda_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name="sda",
2666 children = [drbd_sda_dev], size=disk_sz)
2667 drbd_sdb_dev = _GenerateMDDRBDBranch(cfg, primary_node, remote_node,
2668 swap_sz, names[2:4])
2669 md_sdb_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name="sdb",
2670 children = [drbd_sdb_dev], size=swap_sz)
2671 disks = [md_sda_dev, md_sdb_dev]
2672 elif template_name == constants.DT_DRBD8:
2673 if len(secondary_nodes) != 1:
2674 raise errors.ProgrammerError("Wrong template configuration")
2675 remote_node = secondary_nodes[0]
2676 names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2677 ".sdb_data", ".sdb_meta"])
2678 drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2679 disk_sz, names[0:2], "sda")
2680 drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2681 swap_sz, names[2:4], "sdb")
2682 disks = [drbd_sda_dev, drbd_sdb_dev]
2684 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
2688 def _GetInstanceInfoText(instance):
2689 """Compute that text that should be added to the disk's metadata.
2692 return "originstname+%s" % instance.name
2695 def _CreateDisks(cfg, instance):
2696 """Create all disks for an instance.
2698 This abstracts away some work from AddInstance.
2701 instance: the instance object
2704 True or False showing the success of the creation process
2707 info = _GetInstanceInfoText(instance)
2709 for device in instance.disks:
2710 logger.Info("creating volume %s for instance %s" %
2711 (device.iv_name, instance.name))
2713 for secondary_node in instance.secondary_nodes:
2714 if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
2715 device, False, info):
2716 logger.Error("failed to create volume %s (%s) on secondary node %s!" %
2717 (device.iv_name, device, secondary_node))
2720 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
2721 instance, device, info):
2722 logger.Error("failed to create volume %s on primary!" %
2728 def _RemoveDisks(instance, cfg):
2729 """Remove all disks for an instance.
2731 This abstracts away some work from `AddInstance()` and
2732 `RemoveInstance()`. Note that in case some of the devices couldn't
2733 be removed, the removal will continue with the other ones (compare
2734 with `_CreateDisks()`).
2737 instance: the instance object
2740 True or False showing the success of the removal proces
2743 logger.Info("removing block devices for instance %s" % instance.name)
2746 for device in instance.disks:
2747 for node, disk in device.ComputeNodeTree(instance.primary_node):
2748 cfg.SetDiskID(disk, node)
2749 if not rpc.call_blockdev_remove(node, disk):
2750 logger.Error("could not remove block device %s on node %s,"
2751 " continuing anyway" %
2752 (device.iv_name, node))
2757 class LUCreateInstance(LogicalUnit):
2758 """Create an instance.
2761 HPATH = "instance-add"
2762 HTYPE = constants.HTYPE_INSTANCE
2763 _OP_REQP = ["instance_name", "mem_size", "disk_size", "pnode",
2764 "disk_template", "swap_size", "mode", "start", "vcpus",
2765 "wait_for_sync", "ip_check"]
2767 def BuildHooksEnv(self):
2770 This runs on master, primary and secondary nodes of the instance.
2774 "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
2775 "INSTANCE_DISK_SIZE": self.op.disk_size,
2776 "INSTANCE_SWAP_SIZE": self.op.swap_size,
2777 "INSTANCE_ADD_MODE": self.op.mode,
2779 if self.op.mode == constants.INSTANCE_IMPORT:
2780 env["INSTANCE_SRC_NODE"] = self.op.src_node
2781 env["INSTANCE_SRC_PATH"] = self.op.src_path
2782 env["INSTANCE_SRC_IMAGE"] = self.src_image
2784 env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
2785 primary_node=self.op.pnode,
2786 secondary_nodes=self.secondaries,
2787 status=self.instance_status,
2788 os_type=self.op.os_type,
2789 memory=self.op.mem_size,
2790 vcpus=self.op.vcpus,
2791 nics=[(self.inst_ip, self.op.bridge)],
2794 nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
2799 def CheckPrereq(self):
2800 """Check prerequisites.
2803 if self.op.mode not in (constants.INSTANCE_CREATE,
2804 constants.INSTANCE_IMPORT):
2805 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
2808 if self.op.mode == constants.INSTANCE_IMPORT:
2809 src_node = getattr(self.op, "src_node", None)
2810 src_path = getattr(self.op, "src_path", None)
2811 if src_node is None or src_path is None:
2812 raise errors.OpPrereqError("Importing an instance requires source"
2813 " node and path options")
2814 src_node_full = self.cfg.ExpandNodeName(src_node)
2815 if src_node_full is None:
2816 raise errors.OpPrereqError("Unknown source node '%s'" % src_node)
2817 self.op.src_node = src_node = src_node_full
2819 if not os.path.isabs(src_path):
2820 raise errors.OpPrereqError("The source path must be absolute")
2822 export_info = rpc.call_export_info(src_node, src_path)
2825 raise errors.OpPrereqError("No export found in dir %s" % src_path)
2827 if not export_info.has_section(constants.INISECT_EXP):
2828 raise errors.ProgrammerError("Corrupted export config")
2830 ei_version = export_info.get(constants.INISECT_EXP, 'version')
2831 if (int(ei_version) != constants.EXPORT_VERSION):
2832 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
2833 (ei_version, constants.EXPORT_VERSION))
2835 if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
2836 raise errors.OpPrereqError("Can't import instance with more than"
2839 # FIXME: are the old os-es, disk sizes, etc. useful?
2840 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
2841 diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
2843 self.src_image = diskimage
2844 else: # INSTANCE_CREATE
2845 if getattr(self.op, "os_type", None) is None:
2846 raise errors.OpPrereqError("No guest OS specified")
2848 # check primary node
2849 pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
2851 raise errors.OpPrereqError("Primary node '%s' is unknown" %
2853 self.op.pnode = pnode.name
2855 self.secondaries = []
2856 # disk template and mirror node verification
2857 if self.op.disk_template not in constants.DISK_TEMPLATES:
2858 raise errors.OpPrereqError("Invalid disk template name")
2860 if self.op.disk_template in constants.DTS_NET_MIRROR:
2861 if getattr(self.op, "snode", None) is None:
2862 raise errors.OpPrereqError("The networked disk templates need"
2865 snode_name = self.cfg.ExpandNodeName(self.op.snode)
2866 if snode_name is None:
2867 raise errors.OpPrereqError("Unknown secondary node '%s'" %
2869 elif snode_name == pnode.name:
2870 raise errors.OpPrereqError("The secondary node cannot be"
2871 " the primary node.")
2872 self.secondaries.append(snode_name)
2874 # Check lv size requirements
2875 nodenames = [pnode.name] + self.secondaries
2876 nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
2878 # Required free disk space as a function of disk and swap space
2880 constants.DT_DISKLESS: 0,
2881 constants.DT_PLAIN: self.op.disk_size + self.op.swap_size,
2882 constants.DT_LOCAL_RAID1: (self.op.disk_size + self.op.swap_size) * 2,
2883 # 256 MB are added for drbd metadata, 128MB for each drbd device
2884 constants.DT_REMOTE_RAID1: self.op.disk_size + self.op.swap_size + 256,
2885 constants.DT_DRBD8: self.op.disk_size + self.op.swap_size + 256,
2888 if self.op.disk_template not in req_size_dict:
2889 raise errors.ProgrammerError("Disk template '%s' size requirement"
2890 " is unknown" % self.op.disk_template)
2892 req_size = req_size_dict[self.op.disk_template]
2894 for node in nodenames:
2895 info = nodeinfo.get(node, None)
2897 raise errors.OpPrereqError("Cannot get current information"
2898 " from node '%s'" % nodeinfo)
2899 if req_size > info['vg_free']:
2900 raise errors.OpPrereqError("Not enough disk space on target node %s."
2901 " %d MB available, %d MB required" %
2902 (node, info['vg_free'], req_size))
2905 os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2907 raise errors.OpPrereqError("OS '%s' not in supported os list for"
2908 " primary node" % self.op.os_type)
2910 # instance verification
2911 hostname1 = utils.HostInfo(self.op.instance_name)
2913 self.op.instance_name = instance_name = hostname1.name
2914 instance_list = self.cfg.GetInstanceList()
2915 if instance_name in instance_list:
2916 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2919 ip = getattr(self.op, "ip", None)
2920 if ip is None or ip.lower() == "none":
2922 elif ip.lower() == "auto":
2923 inst_ip = hostname1.ip
2925 if not utils.IsValidIP(ip):
2926 raise errors.OpPrereqError("given IP address '%s' doesn't look"
2927 " like a valid IP" % ip)
2929 self.inst_ip = inst_ip
2931 if self.op.start and not self.op.ip_check:
2932 raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
2933 " adding an instance in start mode")
2935 if self.op.ip_check:
2936 if utils.TcpPing(utils.HostInfo().name, hostname1.ip,
2937 constants.DEFAULT_NODED_PORT):
2938 raise errors.OpPrereqError("IP %s of instance %s already in use" %
2939 (hostname1.ip, instance_name))
2941 # bridge verification
2942 bridge = getattr(self.op, "bridge", None)
2944 self.op.bridge = self.cfg.GetDefBridge()
2946 self.op.bridge = bridge
2948 if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
2949 raise errors.OpPrereqError("target bridge '%s' does not exist on"
2950 " destination node '%s'" %
2951 (self.op.bridge, pnode.name))
2954 self.instance_status = 'up'
2956 self.instance_status = 'down'
2958 def Exec(self, feedback_fn):
2959 """Create and add the instance to the cluster.
2962 instance = self.op.instance_name
2963 pnode_name = self.pnode.name
2965 nic = objects.NIC(bridge=self.op.bridge, mac=self.cfg.GenerateMAC())
2966 if self.inst_ip is not None:
2967 nic.ip = self.inst_ip
2969 disks = _GenerateDiskTemplate(self.cfg,
2970 self.op.disk_template,
2971 instance, pnode_name,
2972 self.secondaries, self.op.disk_size,
2975 iobj = objects.Instance(name=instance, os=self.op.os_type,
2976 primary_node=pnode_name,
2977 memory=self.op.mem_size,
2978 vcpus=self.op.vcpus,
2979 nics=[nic], disks=disks,
2980 disk_template=self.op.disk_template,
2981 status=self.instance_status,
2984 feedback_fn("* creating instance disks...")
2985 if not _CreateDisks(self.cfg, iobj):
2986 _RemoveDisks(iobj, self.cfg)
2987 raise errors.OpExecError("Device creation failed, reverting...")
2989 feedback_fn("adding instance %s to cluster config" % instance)
2991 self.cfg.AddInstance(iobj)
2993 if self.op.wait_for_sync:
2994 disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
2995 elif iobj.disk_template in constants.DTS_NET_MIRROR:
2996 # make sure the disks are not degraded (still sync-ing is ok)
2998 feedback_fn("* checking mirrors status")
2999 disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
3004 _RemoveDisks(iobj, self.cfg)
3005 self.cfg.RemoveInstance(iobj.name)
3006 raise errors.OpExecError("There are some degraded disks for"
3009 feedback_fn("creating os for instance %s on node %s" %
3010 (instance, pnode_name))
3012 if iobj.disk_template != constants.DT_DISKLESS:
3013 if self.op.mode == constants.INSTANCE_CREATE:
3014 feedback_fn("* running the instance OS create scripts...")
3015 if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
3016 raise errors.OpExecError("could not add os for instance %s"
3018 (instance, pnode_name))
3020 elif self.op.mode == constants.INSTANCE_IMPORT:
3021 feedback_fn("* running the instance OS import scripts...")
3022 src_node = self.op.src_node
3023 src_image = self.src_image
3024 if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
3025 src_node, src_image):
3026 raise errors.OpExecError("Could not import os for instance"
3028 (instance, pnode_name))
3030 # also checked in the prereq part
3031 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
3035 logger.Info("starting instance %s on node %s" % (instance, pnode_name))
3036 feedback_fn("* starting instance...")
3037 if not rpc.call_instance_start(pnode_name, iobj, None):
3038 raise errors.OpExecError("Could not start instance")
3041 class LUConnectConsole(NoHooksLU):
3042 """Connect to an instance's console.
3044 This is somewhat special in that it returns the command line that
3045 you need to run on the master node in order to connect to the
3049 _OP_REQP = ["instance_name"]
3051 def CheckPrereq(self):
3052 """Check prerequisites.
3054 This checks that the instance is in the cluster.
3057 instance = self.cfg.GetInstanceInfo(
3058 self.cfg.ExpandInstanceName(self.op.instance_name))
3059 if instance is None:
3060 raise errors.OpPrereqError("Instance '%s' not known" %
3061 self.op.instance_name)
3062 self.instance = instance
3064 def Exec(self, feedback_fn):
3065 """Connect to the console of an instance
3068 instance = self.instance
3069 node = instance.primary_node
3071 node_insts = rpc.call_instance_list([node])[node]
3072 if node_insts is False:
3073 raise errors.OpExecError("Can't connect to node %s." % node)
3075 if instance.name not in node_insts:
3076 raise errors.OpExecError("Instance %s is not running." % instance.name)
3078 logger.Debug("connecting to console of %s on %s" % (instance.name, node))
3080 hyper = hypervisor.GetHypervisor()
3081 console_cmd = hyper.GetShellCommandForConsole(instance.name)
3083 argv = ["ssh", "-q", "-t"]
3084 argv.extend(ssh.KNOWN_HOSTS_OPTS)
3085 argv.extend(ssh.BATCH_MODE_OPTS)
3087 argv.append(console_cmd)
3091 class LUAddMDDRBDComponent(LogicalUnit):
3092 """Adda new mirror member to an instance's disk.
3095 HPATH = "mirror-add"
3096 HTYPE = constants.HTYPE_INSTANCE
3097 _OP_REQP = ["instance_name", "remote_node", "disk_name"]
3099 def BuildHooksEnv(self):
3102 This runs on the master, the primary and all the secondaries.
3106 "NEW_SECONDARY": self.op.remote_node,
3107 "DISK_NAME": self.op.disk_name,
3109 env.update(_BuildInstanceHookEnvByObject(self.instance))
3110 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
3111 self.op.remote_node,] + list(self.instance.secondary_nodes)
3114 def CheckPrereq(self):
3115 """Check prerequisites.
3117 This checks that the instance is in the cluster.
3120 instance = self.cfg.GetInstanceInfo(
3121 self.cfg.ExpandInstanceName(self.op.instance_name))
3122 if instance is None:
3123 raise errors.OpPrereqError("Instance '%s' not known" %
3124 self.op.instance_name)
3125 self.instance = instance
3127 remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
3128 if remote_node is None:
3129 raise errors.OpPrereqError("Node '%s' not known" % self.op.remote_node)
3130 self.remote_node = remote_node
3132 if remote_node == instance.primary_node:
3133 raise errors.OpPrereqError("The specified node is the primary node of"
3136 if instance.disk_template != constants.DT_REMOTE_RAID1:
3137 raise errors.OpPrereqError("Instance's disk layout is not"
3139 for disk in instance.disks:
3140 if disk.iv_name == self.op.disk_name:
3143 raise errors.OpPrereqError("Can't find this device ('%s') in the"
3144 " instance." % self.op.disk_name)
3145 if len(disk.children) > 1:
3146 raise errors.OpPrereqError("The device already has two slave devices."
3147 " This would create a 3-disk raid1 which we"
3151 def Exec(self, feedback_fn):
3152 """Add the mirror component
3156 instance = self.instance
3158 remote_node = self.remote_node
3159 lv_names = [".%s_%s" % (disk.iv_name, suf) for suf in ["data", "meta"]]
3160 names = _GenerateUniqueNames(self.cfg, lv_names)
3161 new_drbd = _GenerateMDDRBDBranch(self.cfg, instance.primary_node,
3162 remote_node, disk.size, names)
3164 logger.Info("adding new mirror component on secondary")
3166 if not _CreateBlockDevOnSecondary(self.cfg, remote_node, instance,
3168 _GetInstanceInfoText(instance)):
3169 raise errors.OpExecError("Failed to create new component on secondary"
3170 " node %s" % remote_node)
3172 logger.Info("adding new mirror component on primary")
3174 if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node,
3176 _GetInstanceInfoText(instance)):
3177 # remove secondary dev
3178 self.cfg.SetDiskID(new_drbd, remote_node)
3179 rpc.call_blockdev_remove(remote_node, new_drbd)
3180 raise errors.OpExecError("Failed to create volume on primary")
3182 # the device exists now
3183 # call the primary node to add the mirror to md
3184 logger.Info("adding new mirror component to md")
3185 if not rpc.call_blockdev_addchildren(instance.primary_node,
3187 logger.Error("Can't add mirror compoment to md!")
3188 self.cfg.SetDiskID(new_drbd, remote_node)
3189 if not rpc.call_blockdev_remove(remote_node, new_drbd):
3190 logger.Error("Can't rollback on secondary")
3191 self.cfg.SetDiskID(new_drbd, instance.primary_node)
3192 if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
3193 logger.Error("Can't rollback on primary")
3194 raise errors.OpExecError("Can't add mirror component to md array")
3196 disk.children.append(new_drbd)
3198 self.cfg.AddInstance(instance)
3200 _WaitForSync(self.cfg, instance, self.proc)
3205 class LURemoveMDDRBDComponent(LogicalUnit):
3206 """Remove a component from a remote_raid1 disk.
3209 HPATH = "mirror-remove"
3210 HTYPE = constants.HTYPE_INSTANCE
3211 _OP_REQP = ["instance_name", "disk_name", "disk_id"]
3213 def BuildHooksEnv(self):
3216 This runs on the master, the primary and all the secondaries.
3220 "DISK_NAME": self.op.disk_name,
3221 "DISK_ID": self.op.disk_id,
3222 "OLD_SECONDARY": self.old_secondary,
3224 env.update(_BuildInstanceHookEnvByObject(self.instance))
3225 nl = [self.sstore.GetMasterNode(),
3226 self.instance.primary_node] + list(self.instance.secondary_nodes)
3229 def CheckPrereq(self):
3230 """Check prerequisites.
3232 This checks that the instance is in the cluster.
3235 instance = self.cfg.GetInstanceInfo(
3236 self.cfg.ExpandInstanceName(self.op.instance_name))
3237 if instance is None:
3238 raise errors.OpPrereqError("Instance '%s' not known" %
3239 self.op.instance_name)
3240 self.instance = instance
3242 if instance.disk_template != constants.DT_REMOTE_RAID1:
3243 raise errors.OpPrereqError("Instance's disk layout is not"
3245 for disk in instance.disks:
3246 if disk.iv_name == self.op.disk_name:
3249 raise errors.OpPrereqError("Can't find this device ('%s') in the"
3250 " instance." % self.op.disk_name)
3251 for child in disk.children:
3252 if (child.dev_type == constants.LD_DRBD7 and
3253 child.logical_id[2] == self.op.disk_id):
3256 raise errors.OpPrereqError("Can't find the device with this port.")
3258 if len(disk.children) < 2:
3259 raise errors.OpPrereqError("Cannot remove the last component from"
3263 if self.child.logical_id[0] == instance.primary_node:
3267 self.old_secondary = self.child.logical_id[oid]
3269 def Exec(self, feedback_fn):
3270 """Remove the mirror component
3273 instance = self.instance
3276 logger.Info("remove mirror component")
3277 self.cfg.SetDiskID(disk, instance.primary_node)
3278 if not rpc.call_blockdev_removechildren(instance.primary_node,
3280 raise errors.OpExecError("Can't remove child from mirror.")
3282 for node in child.logical_id[:2]:
3283 self.cfg.SetDiskID(child, node)
3284 if not rpc.call_blockdev_remove(node, child):
3285 logger.Error("Warning: failed to remove device from node %s,"
3286 " continuing operation." % node)
3288 disk.children.remove(child)
3289 self.cfg.AddInstance(instance)
3292 class LUReplaceDisks(LogicalUnit):
3293 """Replace the disks of an instance.
3296 HPATH = "mirrors-replace"
3297 HTYPE = constants.HTYPE_INSTANCE
3298 _OP_REQP = ["instance_name", "mode", "disks"]
3300 def BuildHooksEnv(self):
3303 This runs on the master, the primary and all the secondaries.
3307 "MODE": self.op.mode,
3308 "NEW_SECONDARY": self.op.remote_node,
3309 "OLD_SECONDARY": self.instance.secondary_nodes[0],
3311 env.update(_BuildInstanceHookEnvByObject(self.instance))
3313 self.sstore.GetMasterNode(),
3314 self.instance.primary_node,
3316 if self.op.remote_node is not None:
3317 nl.append(self.op.remote_node)
3320 def CheckPrereq(self):
3321 """Check prerequisites.
3323 This checks that the instance is in the cluster.
3326 instance = self.cfg.GetInstanceInfo(
3327 self.cfg.ExpandInstanceName(self.op.instance_name))
3328 if instance is None:
3329 raise errors.OpPrereqError("Instance '%s' not known" %
3330 self.op.instance_name)
3331 self.instance = instance
3332 self.op.instance_name = instance.name
3334 if instance.disk_template not in constants.DTS_NET_MIRROR:
3335 raise errors.OpPrereqError("Instance's disk layout is not"
3336 " network mirrored.")
3338 if len(instance.secondary_nodes) != 1:
3339 raise errors.OpPrereqError("The instance has a strange layout,"
3340 " expected one secondary but found %d" %
3341 len(instance.secondary_nodes))
3343 self.sec_node = instance.secondary_nodes[0]
3345 remote_node = getattr(self.op, "remote_node", None)
3346 if remote_node is not None:
3347 remote_node = self.cfg.ExpandNodeName(remote_node)
3348 if remote_node is None:
3349 raise errors.OpPrereqError("Node '%s' not known" %
3350 self.op.remote_node)
3351 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
3353 self.remote_node_info = None
3354 if remote_node == instance.primary_node:
3355 raise errors.OpPrereqError("The specified node is the primary node of"
3357 elif remote_node == self.sec_node:
3358 if self.op.mode == constants.REPLACE_DISK_SEC:
3359 # this is for DRBD8, where we can't execute the same mode of
3360 # replacement as for drbd7 (no different port allocated)
3361 raise errors.OpPrereqError("Same secondary given, cannot execute"
3363 # the user gave the current secondary, switch to
3364 # 'no-replace-secondary' mode for drbd7
3366 if (instance.disk_template == constants.DT_REMOTE_RAID1 and
3367 self.op.mode != constants.REPLACE_DISK_ALL):
3368 raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
3369 " disks replacement, not individual ones")
3370 if instance.disk_template == constants.DT_DRBD8:
3371 if (self.op.mode == constants.REPLACE_DISK_ALL and
3372 remote_node is not None):
3373 # switch to replace secondary mode
3374 self.op.mode = constants.REPLACE_DISK_SEC
3376 if self.op.mode == constants.REPLACE_DISK_ALL:
3377 raise errors.OpPrereqError("Template 'drbd' only allows primary or"
3378 " secondary disk replacement, not"
3380 elif self.op.mode == constants.REPLACE_DISK_PRI:
3381 if remote_node is not None:
3382 raise errors.OpPrereqError("Template 'drbd' does not allow changing"
3383 " the secondary while doing a primary"
3384 " node disk replacement")
3385 self.tgt_node = instance.primary_node
3386 self.oth_node = instance.secondary_nodes[0]
3387 elif self.op.mode == constants.REPLACE_DISK_SEC:
3388 self.new_node = remote_node # this can be None, in which case
3389 # we don't change the secondary
3390 self.tgt_node = instance.secondary_nodes[0]
3391 self.oth_node = instance.primary_node
3393 raise errors.ProgrammerError("Unhandled disk replace mode")
3395 for name in self.op.disks:
3396 if instance.FindDisk(name) is None:
3397 raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3398 (name, instance.name))
3399 self.op.remote_node = remote_node
3401 def _ExecRR1(self, feedback_fn):
3402 """Replace the disks of an instance.
3405 instance = self.instance
3408 if self.op.remote_node is None:
3409 remote_node = self.sec_node
3411 remote_node = self.op.remote_node
3413 for dev in instance.disks:
3415 lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3416 names = _GenerateUniqueNames(cfg, lv_names)
3417 new_drbd = _GenerateMDDRBDBranch(cfg, instance.primary_node,
3418 remote_node, size, names)
3419 iv_names[dev.iv_name] = (dev, dev.children[0], new_drbd)
3420 logger.Info("adding new mirror component on secondary for %s" %
3423 if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
3425 _GetInstanceInfoText(instance)):
3426 raise errors.OpExecError("Failed to create new component on secondary"
3427 " node %s. Full abort, cleanup manually!" %
3430 logger.Info("adding new mirror component on primary")
3432 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
3434 _GetInstanceInfoText(instance)):
3435 # remove secondary dev
3436 cfg.SetDiskID(new_drbd, remote_node)
3437 rpc.call_blockdev_remove(remote_node, new_drbd)
3438 raise errors.OpExecError("Failed to create volume on primary!"
3439 " Full abort, cleanup manually!!")
3441 # the device exists now
3442 # call the primary node to add the mirror to md
3443 logger.Info("adding new mirror component to md")
3444 if not rpc.call_blockdev_addchildren(instance.primary_node, dev,
3446 logger.Error("Can't add mirror compoment to md!")
3447 cfg.SetDiskID(new_drbd, remote_node)
3448 if not rpc.call_blockdev_remove(remote_node, new_drbd):
3449 logger.Error("Can't rollback on secondary")
3450 cfg.SetDiskID(new_drbd, instance.primary_node)
3451 if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
3452 logger.Error("Can't rollback on primary")
3453 raise errors.OpExecError("Full abort, cleanup manually!!")
3455 dev.children.append(new_drbd)
3456 cfg.AddInstance(instance)
3458 # this can fail as the old devices are degraded and _WaitForSync
3459 # does a combined result over all disks, so we don't check its
3461 _WaitForSync(cfg, instance, self.proc, unlock=True)
3463 # so check manually all the devices
3464 for name in iv_names:
3465 dev, child, new_drbd = iv_names[name]
3466 cfg.SetDiskID(dev, instance.primary_node)
3467 is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3469 raise errors.OpExecError("MD device %s is degraded!" % name)
3470 cfg.SetDiskID(new_drbd, instance.primary_node)
3471 is_degr = rpc.call_blockdev_find(instance.primary_node, new_drbd)[5]
3473 raise errors.OpExecError("New drbd device %s is degraded!" % name)
3475 for name in iv_names:
3476 dev, child, new_drbd = iv_names[name]
3477 logger.Info("remove mirror %s component" % name)
3478 cfg.SetDiskID(dev, instance.primary_node)
3479 if not rpc.call_blockdev_removechildren(instance.primary_node,
3481 logger.Error("Can't remove child from mirror, aborting"
3482 " *this device cleanup*.\nYou need to cleanup manually!!")
3485 for node in child.logical_id[:2]:
3486 logger.Info("remove child device on %s" % node)
3487 cfg.SetDiskID(child, node)
3488 if not rpc.call_blockdev_remove(node, child):
3489 logger.Error("Warning: failed to remove device from node %s,"
3490 " continuing operation." % node)
3492 dev.children.remove(child)
3494 cfg.AddInstance(instance)
3496 def _ExecD8DiskOnly(self, feedback_fn):
3497 """Replace a disk on the primary or secondary for dbrd8.
3499 The algorithm for replace is quite complicated:
3500 - for each disk to be replaced:
3501 - create new LVs on the target node with unique names
3502 - detach old LVs from the drbd device
3503 - rename old LVs to name_replaced.<time_t>
3504 - rename new LVs to old LVs
3505 - attach the new LVs (with the old names now) to the drbd device
3506 - wait for sync across all devices
3507 - for each modified disk:
3508 - remove old LVs (which have the name name_replaces.<time_t>)
3510 Failures are not very well handled.
3514 warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3515 instance = self.instance
3517 vgname = self.cfg.GetVGName()
3520 tgt_node = self.tgt_node
3521 oth_node = self.oth_node
3523 # Step: check device activation
3524 self.proc.LogStep(1, steps_total, "check device existence")
3525 info("checking volume groups")
3526 my_vg = cfg.GetVGName()
3527 results = rpc.call_vg_list([oth_node, tgt_node])
3529 raise errors.OpExecError("Can't list volume groups on the nodes")
3530 for node in oth_node, tgt_node:
3531 res = results.get(node, False)
3532 if not res or my_vg not in res:
3533 raise errors.OpExecError("Volume group '%s' not found on %s" %
3535 for dev in instance.disks:
3536 if not dev.iv_name in self.op.disks:
3538 for node in tgt_node, oth_node:
3539 info("checking %s on %s" % (dev.iv_name, node))
3540 cfg.SetDiskID(dev, node)
3541 if not rpc.call_blockdev_find(node, dev):
3542 raise errors.OpExecError("Can't find device %s on node %s" %
3543 (dev.iv_name, node))
3545 # Step: check other node consistency
3546 self.proc.LogStep(2, steps_total, "check peer consistency")
3547 for dev in instance.disks:
3548 if not dev.iv_name in self.op.disks:
3550 info("checking %s consistency on %s" % (dev.iv_name, oth_node))
3551 if not _CheckDiskConsistency(self.cfg, dev, oth_node,
3552 oth_node==instance.primary_node):
3553 raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
3554 " to replace disks on this node (%s)" %
3555 (oth_node, tgt_node))
3557 # Step: create new storage
3558 self.proc.LogStep(3, steps_total, "allocate new storage")
3559 for dev in instance.disks:
3560 if not dev.iv_name in self.op.disks:
3563 cfg.SetDiskID(dev, tgt_node)
3564 lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3565 names = _GenerateUniqueNames(cfg, lv_names)
3566 lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3567 logical_id=(vgname, names[0]))
3568 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3569 logical_id=(vgname, names[1]))
3570 new_lvs = [lv_data, lv_meta]
3571 old_lvs = dev.children
3572 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
3573 info("creating new local storage on %s for %s" %
3574 (tgt_node, dev.iv_name))
3575 # since we *always* want to create this LV, we use the
3576 # _Create...OnPrimary (which forces the creation), even if we
3577 # are talking about the secondary node
3578 for new_lv in new_lvs:
3579 if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
3580 _GetInstanceInfoText(instance)):
3581 raise errors.OpExecError("Failed to create new LV named '%s' on"
3583 (new_lv.logical_id[1], tgt_node))
3585 # Step: for each lv, detach+rename*2+attach
3586 self.proc.LogStep(4, steps_total, "change drbd configuration")
3587 for dev, old_lvs, new_lvs in iv_names.itervalues():
3588 info("detaching %s drbd from local storage" % dev.iv_name)
3589 if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
3590 raise errors.OpExecError("Can't detach drbd from local storage on node"
3591 " %s for device %s" % (tgt_node, dev.iv_name))
3593 #cfg.Update(instance)
3595 # ok, we created the new LVs, so now we know we have the needed
3596 # storage; as such, we proceed on the target node to rename
3597 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
3598 # using the assumption than logical_id == physical_id (which in
3599 # turn is the unique_id on that node)
3601 # FIXME(iustin): use a better name for the replaced LVs
3602 temp_suffix = int(time.time())
3603 ren_fn = lambda d, suff: (d.physical_id[0],
3604 d.physical_id[1] + "_replaced-%s" % suff)
3605 # build the rename list based on what LVs exist on the node
3607 for to_ren in old_lvs:
3608 find_res = rpc.call_blockdev_find(tgt_node, to_ren)
3609 if find_res is not None: # device exists
3610 rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
3612 info("renaming the old LVs on the target node")
3613 if not rpc.call_blockdev_rename(tgt_node, rlist):
3614 raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
3615 # now we rename the new LVs to the old LVs
3616 info("renaming the new LVs on the target node")
3617 rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
3618 if not rpc.call_blockdev_rename(tgt_node, rlist):
3619 raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
3621 for old, new in zip(old_lvs, new_lvs):
3622 new.logical_id = old.logical_id
3623 cfg.SetDiskID(new, tgt_node)
3625 for disk in old_lvs:
3626 disk.logical_id = ren_fn(disk, temp_suffix)
3627 cfg.SetDiskID(disk, tgt_node)
3629 # now that the new lvs have the old name, we can add them to the device
3630 info("adding new mirror component on %s" % tgt_node)
3631 if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
3632 for new_lv in new_lvs:
3633 if not rpc.call_blockdev_remove(tgt_node, new_lv):
3634 warning("Can't rollback device %s", "manually cleanup unused"
3636 raise errors.OpExecError("Can't add local storage to drbd")
3638 dev.children = new_lvs
3639 cfg.Update(instance)
3641 # Step: wait for sync
3643 # this can fail as the old devices are degraded and _WaitForSync
3644 # does a combined result over all disks, so we don't check its
3646 self.proc.LogStep(5, steps_total, "sync devices")
3647 _WaitForSync(cfg, instance, self.proc, unlock=True)
3649 # so check manually all the devices
3650 for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3651 cfg.SetDiskID(dev, instance.primary_node)
3652 is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3654 raise errors.OpExecError("DRBD device %s is degraded!" % name)
3656 # Step: remove old storage
3657 self.proc.LogStep(6, steps_total, "removing old storage")
3658 for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3659 info("remove logical volumes for %s" % name)
3661 cfg.SetDiskID(lv, tgt_node)
3662 if not rpc.call_blockdev_remove(tgt_node, lv):
3663 warning("Can't remove old LV", "manually remove unused LVs")
3666 def _ExecD8Secondary(self, feedback_fn):
3667 """Replace the secondary node for drbd8.
3669 The algorithm for replace is quite complicated:
3670 - for all disks of the instance:
3671 - create new LVs on the new node with same names
3672 - shutdown the drbd device on the old secondary
3673 - disconnect the drbd network on the primary
3674 - create the drbd device on the new secondary
3675 - network attach the drbd on the primary, using an artifice:
3676 the drbd code for Attach() will connect to the network if it
3677 finds a device which is connected to the good local disks but
3679 - wait for sync across all devices
3680 - remove all disks from the old secondary
3682 Failures are not very well handled.
3686 warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3687 instance = self.instance
3689 vgname = self.cfg.GetVGName()
3692 old_node = self.tgt_node
3693 new_node = self.new_node
3694 pri_node = instance.primary_node
3696 # Step: check device activation
3697 self.proc.LogStep(1, steps_total, "check device existence")
3698 info("checking volume groups")
3699 my_vg = cfg.GetVGName()
3700 results = rpc.call_vg_list([pri_node, new_node])
3702 raise errors.OpExecError("Can't list volume groups on the nodes")
3703 for node in pri_node, new_node:
3704 res = results.get(node, False)
3705 if not res or my_vg not in res:
3706 raise errors.OpExecError("Volume group '%s' not found on %s" %
3708 for dev in instance.disks:
3709 if not dev.iv_name in self.op.disks:
3711 info("checking %s on %s" % (dev.iv_name, pri_node))
3712 cfg.SetDiskID(dev, pri_node)
3713 if not rpc.call_blockdev_find(pri_node, dev):
3714 raise errors.OpExecError("Can't find device %s on node %s" %
3715 (dev.iv_name, pri_node))
3717 # Step: check other node consistency
3718 self.proc.LogStep(2, steps_total, "check peer consistency")
3719 for dev in instance.disks:
3720 if not dev.iv_name in self.op.disks:
3722 info("checking %s consistency on %s" % (dev.iv_name, pri_node))
3723 if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
3724 raise errors.OpExecError("Primary node (%s) has degraded storage,"
3725 " unsafe to replace the secondary" %
3728 # Step: create new storage
3729 self.proc.LogStep(3, steps_total, "allocate new storage")
3730 for dev in instance.disks:
3732 info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
3733 # since we *always* want to create this LV, we use the
3734 # _Create...OnPrimary (which forces the creation), even if we
3735 # are talking about the secondary node
3736 for new_lv in dev.children:
3737 if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
3738 _GetInstanceInfoText(instance)):
3739 raise errors.OpExecError("Failed to create new LV named '%s' on"
3741 (new_lv.logical_id[1], new_node))
3743 iv_names[dev.iv_name] = (dev, dev.children)
3745 self.proc.LogStep(4, steps_total, "changing drbd configuration")
3746 for dev in instance.disks:
3748 info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
3749 # create new devices on new_node
3750 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
3751 logical_id=(pri_node, new_node,
3753 children=dev.children)
3754 if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
3756 _GetInstanceInfoText(instance)):
3757 raise errors.OpExecError("Failed to create new DRBD on"
3758 " node '%s'" % new_node)
3760 for dev in instance.disks:
3761 # we have new devices, shutdown the drbd on the old secondary
3762 info("shutting down drbd for %s on old node" % dev.iv_name)
3763 cfg.SetDiskID(dev, old_node)
3764 if not rpc.call_blockdev_shutdown(old_node, dev):
3765 warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
3766 "Please cleanup this device manually as soon as possible")
3768 info("detaching primary drbds from the network (=> standalone)")
3770 for dev in instance.disks:
3771 cfg.SetDiskID(dev, pri_node)
3772 # set the physical (unique in bdev terms) id to None, meaning
3773 # detach from network
3774 dev.physical_id = (None,) * len(dev.physical_id)
3775 # and 'find' the device, which will 'fix' it to match the
3777 if rpc.call_blockdev_find(pri_node, dev):
3780 warning("Failed to detach drbd %s from network, unusual case" %
3784 # no detaches succeeded (very unlikely)
3785 raise errors.OpExecError("Can't detach at least one DRBD from old node")
3787 # if we managed to detach at least one, we update all the disks of
3788 # the instance to point to the new secondary
3789 info("updating instance configuration")
3790 for dev in instance.disks:
3791 dev.logical_id = (pri_node, new_node) + dev.logical_id[2:]
3792 cfg.SetDiskID(dev, pri_node)
3793 cfg.Update(instance)
3795 # and now perform the drbd attach
3796 info("attaching primary drbds to new secondary (standalone => connected)")
3798 for dev in instance.disks:
3799 info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
3800 # since the attach is smart, it's enough to 'find' the device,
3801 # it will automatically activate the network, if the physical_id
3803 cfg.SetDiskID(dev, pri_node)
3804 if not rpc.call_blockdev_find(pri_node, dev):
3805 warning("can't attach drbd %s to new secondary!" % dev.iv_name,
3806 "please do a gnt-instance info to see the status of disks")
3808 # this can fail as the old devices are degraded and _WaitForSync
3809 # does a combined result over all disks, so we don't check its
3811 self.proc.LogStep(5, steps_total, "sync devices")
3812 _WaitForSync(cfg, instance, self.proc, unlock=True)
3814 # so check manually all the devices
3815 for name, (dev, old_lvs) in iv_names.iteritems():
3816 cfg.SetDiskID(dev, pri_node)
3817 is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
3819 raise errors.OpExecError("DRBD device %s is degraded!" % name)
3821 self.proc.LogStep(6, steps_total, "removing old storage")
3822 for name, (dev, old_lvs) in iv_names.iteritems():
3823 info("remove logical volumes for %s" % name)
3825 cfg.SetDiskID(lv, old_node)
3826 if not rpc.call_blockdev_remove(old_node, lv):
3827 warning("Can't remove LV on old secondary",
3828 "Cleanup stale volumes by hand")
3830 def Exec(self, feedback_fn):
3831 """Execute disk replacement.
3833 This dispatches the disk replacement to the appropriate handler.
3836 instance = self.instance
3837 if instance.disk_template == constants.DT_REMOTE_RAID1:
3839 elif instance.disk_template == constants.DT_DRBD8:
3840 if self.op.remote_node is None:
3841 fn = self._ExecD8DiskOnly
3843 fn = self._ExecD8Secondary
3845 raise errors.ProgrammerError("Unhandled disk replacement case")
3846 return fn(feedback_fn)
3849 class LUQueryInstanceData(NoHooksLU):
3850 """Query runtime instance data.
3853 _OP_REQP = ["instances"]
3855 def CheckPrereq(self):
3856 """Check prerequisites.
3858 This only checks the optional instance list against the existing names.
3861 if not isinstance(self.op.instances, list):
3862 raise errors.OpPrereqError("Invalid argument type 'instances'")
3863 if self.op.instances:
3864 self.wanted_instances = []
3865 names = self.op.instances
3867 instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
3868 if instance is None:
3869 raise errors.OpPrereqError("No such instance name '%s'" % name)
3870 self.wanted_instances.append(instance)
3872 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3873 in self.cfg.GetInstanceList()]
3877 def _ComputeDiskStatus(self, instance, snode, dev):
3878 """Compute block device status.
3881 self.cfg.SetDiskID(dev, instance.primary_node)
3882 dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
3883 if dev.dev_type in constants.LDS_DRBD:
3884 # we change the snode then (otherwise we use the one passed in)
3885 if dev.logical_id[0] == instance.primary_node:
3886 snode = dev.logical_id[1]
3888 snode = dev.logical_id[0]
3891 self.cfg.SetDiskID(dev, snode)
3892 dev_sstatus = rpc.call_blockdev_find(snode, dev)
3897 dev_children = [self._ComputeDiskStatus(instance, snode, child)
3898 for child in dev.children]
3903 "iv_name": dev.iv_name,
3904 "dev_type": dev.dev_type,
3905 "logical_id": dev.logical_id,
3906 "physical_id": dev.physical_id,
3907 "pstatus": dev_pstatus,
3908 "sstatus": dev_sstatus,
3909 "children": dev_children,
3914 def Exec(self, feedback_fn):
3915 """Gather and return data"""
3917 for instance in self.wanted_instances:
3918 remote_info = rpc.call_instance_info(instance.primary_node,
3920 if remote_info and "state" in remote_info:
3923 remote_state = "down"
3924 if instance.status == "down":
3925 config_state = "down"
3929 disks = [self._ComputeDiskStatus(instance, None, device)
3930 for device in instance.disks]
3933 "name": instance.name,
3934 "config_state": config_state,
3935 "run_state": remote_state,
3936 "pnode": instance.primary_node,
3937 "snodes": instance.secondary_nodes,
3939 "memory": instance.memory,
3940 "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
3942 "vcpus": instance.vcpus,
3945 result[instance.name] = idict
3950 class LUSetInstanceParms(LogicalUnit):
3951 """Modifies an instances's parameters.
3954 HPATH = "instance-modify"
3955 HTYPE = constants.HTYPE_INSTANCE
3956 _OP_REQP = ["instance_name"]
3958 def BuildHooksEnv(self):
3961 This runs on the master, primary and secondaries.
3966 args['memory'] = self.mem
3968 args['vcpus'] = self.vcpus
3969 if self.do_ip or self.do_bridge:
3973 ip = self.instance.nics[0].ip
3975 bridge = self.bridge
3977 bridge = self.instance.nics[0].bridge
3978 args['nics'] = [(ip, bridge)]
3979 env = _BuildInstanceHookEnvByObject(self.instance, override=args)
3980 nl = [self.sstore.GetMasterNode(),
3981 self.instance.primary_node] + list(self.instance.secondary_nodes)
3984 def CheckPrereq(self):
3985 """Check prerequisites.
3987 This only checks the instance list against the existing names.
3990 self.mem = getattr(self.op, "mem", None)
3991 self.vcpus = getattr(self.op, "vcpus", None)
3992 self.ip = getattr(self.op, "ip", None)
3993 self.bridge = getattr(self.op, "bridge", None)
3994 if [self.mem, self.vcpus, self.ip, self.bridge].count(None) == 4:
3995 raise errors.OpPrereqError("No changes submitted")
3996 if self.mem is not None:
3998 self.mem = int(self.mem)
3999 except ValueError, err:
4000 raise errors.OpPrereqError("Invalid memory size: %s" % str(err))
4001 if self.vcpus is not None:
4003 self.vcpus = int(self.vcpus)
4004 except ValueError, err:
4005 raise errors.OpPrereqError("Invalid vcpus number: %s" % str(err))
4006 if self.ip is not None:
4008 if self.ip.lower() == "none":
4011 if not utils.IsValidIP(self.ip):
4012 raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
4015 self.do_bridge = (self.bridge is not None)
4017 instance = self.cfg.GetInstanceInfo(
4018 self.cfg.ExpandInstanceName(self.op.instance_name))
4019 if instance is None:
4020 raise errors.OpPrereqError("No such instance name '%s'" %
4021 self.op.instance_name)
4022 self.op.instance_name = instance.name
4023 self.instance = instance
4026 def Exec(self, feedback_fn):
4027 """Modifies an instance.
4029 All parameters take effect only at the next restart of the instance.
4032 instance = self.instance
4034 instance.memory = self.mem
4035 result.append(("mem", self.mem))
4037 instance.vcpus = self.vcpus
4038 result.append(("vcpus", self.vcpus))
4040 instance.nics[0].ip = self.ip
4041 result.append(("ip", self.ip))
4043 instance.nics[0].bridge = self.bridge
4044 result.append(("bridge", self.bridge))
4046 self.cfg.AddInstance(instance)
4051 class LUQueryExports(NoHooksLU):
4052 """Query the exports list
4057 def CheckPrereq(self):
4058 """Check that the nodelist contains only existing nodes.
4061 self.nodes = _GetWantedNodes(self, getattr(self.op, "nodes", None))
4063 def Exec(self, feedback_fn):
4064 """Compute the list of all the exported system images.
4067 a dictionary with the structure node->(export-list)
4068 where export-list is a list of the instances exported on
4072 return rpc.call_export_list(self.nodes)
4075 class LUExportInstance(LogicalUnit):
4076 """Export an instance to an image in the cluster.
4079 HPATH = "instance-export"
4080 HTYPE = constants.HTYPE_INSTANCE
4081 _OP_REQP = ["instance_name", "target_node", "shutdown"]
4083 def BuildHooksEnv(self):
4086 This will run on the master, primary node and target node.
4090 "EXPORT_NODE": self.op.target_node,
4091 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
4093 env.update(_BuildInstanceHookEnvByObject(self.instance))
4094 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
4095 self.op.target_node]
4098 def CheckPrereq(self):
4099 """Check prerequisites.
4101 This checks that the instance name is a valid one.
4104 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
4105 self.instance = self.cfg.GetInstanceInfo(instance_name)
4106 if self.instance is None:
4107 raise errors.OpPrereqError("Instance '%s' not found" %
4108 self.op.instance_name)
4111 dst_node_short = self.cfg.ExpandNodeName(self.op.target_node)
4112 self.dst_node = self.cfg.GetNodeInfo(dst_node_short)
4114 if self.dst_node is None:
4115 raise errors.OpPrereqError("Destination node '%s' is unknown." %
4116 self.op.target_node)
4117 self.op.target_node = self.dst_node.name
4119 def Exec(self, feedback_fn):
4120 """Export an instance to an image in the cluster.
4123 instance = self.instance
4124 dst_node = self.dst_node
4125 src_node = instance.primary_node
4126 # shutdown the instance, unless requested not to do so
4127 if self.op.shutdown:
4128 op = opcodes.OpShutdownInstance(instance_name=instance.name)
4129 self.proc.ChainOpCode(op)
4131 vgname = self.cfg.GetVGName()
4136 for disk in instance.disks:
4137 if disk.iv_name == "sda":
4138 # new_dev_name will be a snapshot of an lvm leaf of the one we passed
4139 new_dev_name = rpc.call_blockdev_snapshot(src_node, disk)
4141 if not new_dev_name:
4142 logger.Error("could not snapshot block device %s on node %s" %
4143 (disk.logical_id[1], src_node))
4145 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
4146 logical_id=(vgname, new_dev_name),
4147 physical_id=(vgname, new_dev_name),
4148 iv_name=disk.iv_name)
4149 snap_disks.append(new_dev)
4152 if self.op.shutdown:
4153 op = opcodes.OpStartupInstance(instance_name=instance.name,
4155 self.proc.ChainOpCode(op)
4157 # TODO: check for size
4159 for dev in snap_disks:
4160 if not rpc.call_snapshot_export(src_node, dev, dst_node.name,
4162 logger.Error("could not export block device %s from node"
4164 (dev.logical_id[1], src_node, dst_node.name))
4165 if not rpc.call_blockdev_remove(src_node, dev):
4166 logger.Error("could not remove snapshot block device %s from"
4167 " node %s" % (dev.logical_id[1], src_node))
4169 if not rpc.call_finalize_export(dst_node.name, instance, snap_disks):
4170 logger.Error("could not finalize export for instance %s on node %s" %
4171 (instance.name, dst_node.name))
4173 nodelist = self.cfg.GetNodeList()
4174 nodelist.remove(dst_node.name)
4176 # on one-node clusters nodelist will be empty after the removal
4177 # if we proceed the backup would be removed because OpQueryExports
4178 # substitutes an empty list with the full cluster node list.
4180 op = opcodes.OpQueryExports(nodes=nodelist)
4181 exportlist = self.proc.ChainOpCode(op)
4182 for node in exportlist:
4183 if instance.name in exportlist[node]:
4184 if not rpc.call_export_remove(node, instance.name):
4185 logger.Error("could not remove older export for instance %s"
4186 " on node %s" % (instance.name, node))
4189 class TagsLU(NoHooksLU):
4192 This is an abstract class which is the parent of all the other tags LUs.
4195 def CheckPrereq(self):
4196 """Check prerequisites.
4199 if self.op.kind == constants.TAG_CLUSTER:
4200 self.target = self.cfg.GetClusterInfo()
4201 elif self.op.kind == constants.TAG_NODE:
4202 name = self.cfg.ExpandNodeName(self.op.name)
4204 raise errors.OpPrereqError("Invalid node name (%s)" %
4207 self.target = self.cfg.GetNodeInfo(name)
4208 elif self.op.kind == constants.TAG_INSTANCE:
4209 name = self.cfg.ExpandInstanceName(self.op.name)
4211 raise errors.OpPrereqError("Invalid instance name (%s)" %
4214 self.target = self.cfg.GetInstanceInfo(name)
4216 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
4220 class LUGetTags(TagsLU):
4221 """Returns the tags of a given object.
4224 _OP_REQP = ["kind", "name"]
4226 def Exec(self, feedback_fn):
4227 """Returns the tag list.
4230 return self.target.GetTags()
4233 class LUSearchTags(NoHooksLU):
4234 """Searches the tags for a given pattern.
4237 _OP_REQP = ["pattern"]
4239 def CheckPrereq(self):
4240 """Check prerequisites.
4242 This checks the pattern passed for validity by compiling it.
4246 self.re = re.compile(self.op.pattern)
4247 except re.error, err:
4248 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
4249 (self.op.pattern, err))
4251 def Exec(self, feedback_fn):
4252 """Returns the tag list.
4256 tgts = [("/cluster", cfg.GetClusterInfo())]
4257 ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()]
4258 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
4259 nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()]
4260 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
4262 for path, target in tgts:
4263 for tag in target.GetTags():
4264 if self.re.search(tag):
4265 results.append((path, tag))
4269 class LUAddTags(TagsLU):
4270 """Sets a tag on a given object.
4273 _OP_REQP = ["kind", "name", "tags"]
4275 def CheckPrereq(self):
4276 """Check prerequisites.
4278 This checks the type and length of the tag name and value.
4281 TagsLU.CheckPrereq(self)
4282 for tag in self.op.tags:
4283 objects.TaggableObject.ValidateTag(tag)
4285 def Exec(self, feedback_fn):
4290 for tag in self.op.tags:
4291 self.target.AddTag(tag)
4292 except errors.TagError, err:
4293 raise errors.OpExecError("Error while setting tag: %s" % str(err))
4295 self.cfg.Update(self.target)
4296 except errors.ConfigurationError:
4297 raise errors.OpRetryError("There has been a modification to the"
4298 " config file and the operation has been"
4299 " aborted. Please retry.")
4302 class LUDelTags(TagsLU):
4303 """Delete a list of tags from a given object.
4306 _OP_REQP = ["kind", "name", "tags"]
4308 def CheckPrereq(self):
4309 """Check prerequisites.
4311 This checks that we have the given tag.
4314 TagsLU.CheckPrereq(self)
4315 for tag in self.op.tags:
4316 objects.TaggableObject.ValidateTag(tag)
4317 del_tags = frozenset(self.op.tags)
4318 cur_tags = self.target.GetTags()
4319 if not del_tags <= cur_tags:
4320 diff_tags = del_tags - cur_tags
4321 diff_names = ["'%s'" % tag for tag in diff_tags]
4323 raise errors.OpPrereqError("Tag(s) %s not found" %
4324 (",".join(diff_names)))
4326 def Exec(self, feedback_fn):
4327 """Remove the tag from the object.
4330 for tag in self.op.tags:
4331 self.target.RemoveTag(tag)
4333 self.cfg.Update(self.target)
4334 except errors.ConfigurationError:
4335 raise errors.OpRetryError("There has been a modification to the"
4336 " config file and the operation has been"
4337 " aborted. Please retry.")