- implement Exec
- implement BuildHooksEnv
- redefine HPATH and HTYPE
- - optionally redefine their run requirements (REQ_CLUSTER,
- REQ_MASTER); note that all commands require root permissions
+ - optionally redefine their run requirements:
+ REQ_MASTER: the LU needs to run on the master node
+ REQ_WSSTORE: the LU needs a writable SimpleStore
+
+ Note that all commands require root permissions.
"""
HPATH = None
HTYPE = None
_OP_REQP = []
- REQ_CLUSTER = True
REQ_MASTER = True
+ REQ_WSSTORE = False
def __init__(self, processor, op, cfg, sstore):
"""Constructor for LogicalUnit.
if attr_val is None:
raise errors.OpPrereqError("Required parameter '%s' missing" %
attr_name)
- if self.REQ_CLUSTER:
- if not cfg.IsCluster():
- raise errors.OpPrereqError("Cluster not initialized yet,"
- " use 'gnt-cluster init' first.")
- if self.REQ_MASTER:
- master = sstore.GetMasterNode()
- if master != utils.HostInfo().name:
- raise errors.OpPrereqError("Commands must be run on the master"
- " node %s" % master)
+
+ if not cfg.IsCluster():
+ raise errors.OpPrereqError("Cluster not initialized yet,"
+ " use 'gnt-cluster init' first.")
+ if self.REQ_MASTER:
+ master = sstore.GetMasterNode()
+ if master != utils.HostInfo().name:
+ raise errors.OpPrereqError("Commands must be run on the master"
+ " node %s" % master)
def __GetSSH(self):
"""Returns the SshRunner object
added by the hooks runner. If the LU doesn't define any
environment, an empty dict (and not None) should be returned.
- As for the node lists, the master should not be included in the
- them, as it will be added by the hooks runner in case this LU
- requires a cluster to run on (otherwise we don't have a node
- list). No nodes should be returned as an empty list (and not
- None).
+ No nodes should be returned as an empty list (and not None).
Note that if the HPATH for a LU class is None, this function will
not be called.
"""
raise NotImplementedError
+ def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
+ """Notify the LU about the results of its hooks.
+
+ This method is called every time a hooks phase is executed, and notifies
+ the Logical Unit about the hooks' result. The LU can then use it to alter
+ its result based on the hooks. By default the method does nothing and the
+ previous result is passed back unchanged but any LU can define it if it
+ wants to use the local cluster hook-scripts somehow.
+
+ Args:
+ phase: the hooks phase that has just been run
+ hooks_results: the results of the multi-node hooks rpc call
+ feedback_fn: function to send feedback back to the caller
+ lu_result: the previous result this LU had, or None in the PRE phase.
+
+ """
+ return lu_result
+
class NoHooksLU(LogicalUnit):
"""Simple LU which runs no hooks.
HPATH = None
HTYPE = None
- def BuildHooksEnv(self):
- """Build hooks env.
-
- This is a no-op, since we don't run hooks.
-
- """
- return {}, [], []
-
-
-def _AddHostToEtcHosts(hostname):
- """Wrapper around utils.SetEtcHostsEntry.
-
- """
- hi = utils.HostInfo(name=hostname)
- utils.SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
-
-
-def _RemoveHostFromEtcHosts(hostname):
- """Wrapper around utils.RemoveEtcHostsEntry.
-
- """
- hi = utils.HostInfo(name=hostname)
- utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
- utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
-
def _GetWantedNodes(lu, nodes):
"""Returns list of checked and expanded node names.
return _BuildInstanceHookEnv(**args)
-def _HasValidVG(vglist, vgname):
- """Checks if the volume group list is valid.
-
- A non-None return value means there's an error, and the return value
- is the error message.
-
- """
- vgsize = vglist.get(vgname, None)
- if vgsize is None:
- return "volume group '%s' missing" % vgname
- elif vgsize < 20480:
- return ("volume group '%s' too small (20480MiB required, %dMib found)" %
- (vgname, vgsize))
- return None
-
-
-def _InitSSHSetup(node):
- """Setup the SSH configuration for the cluster.
-
-
- This generates a dsa keypair for root, adds the pub key to the
- permitted hosts and adds the hostkey to its own known hosts.
-
- Args:
- node: the name of this host as a fqdn
-
- """
- priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
-
- for name in priv_key, pub_key:
- if os.path.exists(name):
- utils.CreateBackup(name)
- utils.RemoveFile(name)
-
- result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
- "-f", priv_key,
- "-q", "-N", ""])
- if result.failed:
- raise errors.OpExecError("Could not generate ssh keypair, error %s" %
- result.output)
-
- f = open(pub_key, 'r')
- try:
- utils.AddAuthorizedKey(auth_keys, f.read(8192))
- finally:
- f.close()
-
-
-def _InitGanetiServerSetup(ss):
- """Setup the necessary configuration for the initial node daemon.
-
- This creates the nodepass file containing the shared password for
- the cluster and also generates the SSL certificate.
-
- """
- # Create pseudo random password
- randpass = sha.new(os.urandom(64)).hexdigest()
- # and write it into sstore
- ss.SetKey(ss.SS_NODED_PASS, randpass)
-
- result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
- "-days", str(365*5), "-nodes", "-x509",
- "-keyout", constants.SSL_CERT_FILE,
- "-out", constants.SSL_CERT_FILE, "-batch"])
- if result.failed:
- raise errors.OpExecError("could not generate server ssl cert, command"
- " %s had exitcode %s and error message %s" %
- (result.cmd, result.exit_code, result.output))
-
- os.chmod(constants.SSL_CERT_FILE, 0400)
-
- result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
-
- if result.failed:
- raise errors.OpExecError("Could not start the node daemon, command %s"
- " had exitcode %s and error %s" %
- (result.cmd, result.exit_code, result.output))
-
-
def _CheckInstanceBridgesExist(instance):
"""Check that the brigdes needed by an instance exist.
(brlist, instance.primary_node))
-class LUInitCluster(LogicalUnit):
- """Initialise the cluster.
-
- """
- HPATH = "cluster-init"
- HTYPE = constants.HTYPE_CLUSTER
- _OP_REQP = ["cluster_name", "hypervisor_type", "mac_prefix",
- "def_bridge", "master_netdev", "file_storage_dir"]
- REQ_CLUSTER = False
-
- def BuildHooksEnv(self):
- """Build hooks env.
-
- Notes: Since we don't require a cluster, we must manually add
- ourselves in the post-run node list.
-
- """
- env = {"OP_TARGET": self.op.cluster_name}
- return env, [], [self.hostname.name]
-
- def CheckPrereq(self):
- """Verify that the passed name is a valid one.
-
- """
- if config.ConfigWriter.IsCluster():
- raise errors.OpPrereqError("Cluster is already initialised")
-
- if self.op.hypervisor_type == constants.HT_XEN_HVM31:
- if not os.path.exists(constants.VNC_PASSWORD_FILE):
- raise errors.OpPrereqError("Please prepare the cluster VNC"
- "password file %s" %
- constants.VNC_PASSWORD_FILE)
-
- self.hostname = hostname = utils.HostInfo()
-
- if hostname.ip.startswith("127."):
- raise errors.OpPrereqError("This host's IP resolves to the private"
- " range (%s). Please fix DNS or %s." %
- (hostname.ip, constants.ETC_HOSTS))
-
- if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT,
- source=constants.LOCALHOST_IP_ADDRESS):
- raise errors.OpPrereqError("Inconsistency: this host's name resolves"
- " to %s,\nbut this ip address does not"
- " belong to this host."
- " Aborting." % hostname.ip)
-
- self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
-
- if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
- timeout=5):
- raise errors.OpPrereqError("Cluster IP already active. Aborting.")
-
- secondary_ip = getattr(self.op, "secondary_ip", None)
- if secondary_ip and not utils.IsValidIP(secondary_ip):
- raise errors.OpPrereqError("Invalid secondary ip given")
- if (secondary_ip and
- secondary_ip != hostname.ip and
- (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
- source=constants.LOCALHOST_IP_ADDRESS))):
- raise errors.OpPrereqError("You gave %s as secondary IP,"
- " but it does not belong to this host." %
- secondary_ip)
- self.secondary_ip = secondary_ip
-
- if not hasattr(self.op, "vg_name"):
- self.op.vg_name = None
- # if vg_name not None, checks if volume group is valid
- if self.op.vg_name:
- vgstatus = _HasValidVG(utils.ListVolumeGroups(), self.op.vg_name)
- if vgstatus:
- raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
- " you are not using lvm" % vgstatus)
-
- self.op.file_storage_dir = os.path.normpath(self.op.file_storage_dir)
-
- if not os.path.isabs(self.op.file_storage_dir):
- raise errors.OpPrereqError("The file storage directory you have is"
- " not an absolute path.")
-
- if not os.path.exists(self.op.file_storage_dir):
- try:
- os.makedirs(self.op.file_storage_dir, 0750)
- except OSError, err:
- raise errors.OpPrereqError("Cannot create file storage directory"
- " '%s': %s" %
- (self.op.file_storage_dir, err))
-
- if not os.path.isdir(self.op.file_storage_dir):
- raise errors.OpPrereqError("The file storage directory '%s' is not"
- " a directory." % self.op.file_storage_dir)
-
- if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$",
- self.op.mac_prefix):
- raise errors.OpPrereqError("Invalid mac prefix given '%s'" %
- self.op.mac_prefix)
-
- if self.op.hypervisor_type not in constants.HYPER_TYPES:
- raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
- self.op.hypervisor_type)
-
- result = utils.RunCmd(["ip", "link", "show", "dev", self.op.master_netdev])
- if result.failed:
- raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
- (self.op.master_netdev,
- result.output.strip()))
-
- if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
- os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
- raise errors.OpPrereqError("Init.d script '%s' missing or not"
- " executable." % constants.NODE_INITD_SCRIPT)
-
- def Exec(self, feedback_fn):
- """Initialize the cluster.
-
- """
- clustername = self.clustername
- hostname = self.hostname
-
- # set up the simple store
- self.sstore = ss = ssconf.SimpleStore()
- ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
- ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
- ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
- ss.SetKey(ss.SS_MASTER_NETDEV, self.op.master_netdev)
- ss.SetKey(ss.SS_CLUSTER_NAME, clustername.name)
- ss.SetKey(ss.SS_FILE_STORAGE_DIR, self.op.file_storage_dir)
-
- # set up the inter-node password and certificate
- _InitGanetiServerSetup(ss)
-
- # start the master ip
- rpc.call_node_start_master(hostname.name)
-
- # set up ssh config and /etc/hosts
- f = open(constants.SSH_HOST_RSA_PUB, 'r')
- try:
- sshline = f.read()
- finally:
- f.close()
- sshkey = sshline.split(" ")[1]
-
- _AddHostToEtcHosts(hostname.name)
- _InitSSHSetup(hostname.name)
-
- # init of cluster config file
- self.cfg = cfgw = config.ConfigWriter()
- cfgw.InitConfig(hostname.name, hostname.ip, self.secondary_ip,
- sshkey, self.op.mac_prefix,
- self.op.vg_name, self.op.def_bridge)
-
- ssh.WriteKnownHostsFile(cfgw, ss, constants.SSH_KNOWN_HOSTS_FILE)
-
-
class LUDestroyCluster(NoHooksLU):
"""Logical unit for destroying the cluster.
rpc.call_node_leave_cluster(master)
-class LUVerifyCluster(NoHooksLU):
+class LUVerifyCluster(LogicalUnit):
"""Verifies the cluster status.
"""
+ HPATH = "cluster-verify"
+ HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = ["skip_checks"]
def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
(node,))
bad = True
else:
- vgstatus = _HasValidVG(vglist, self.cfg.GetVGName())
+ vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
+ constants.MIN_VG_SIZE)
if vgstatus:
feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
bad = True
if 'nodelist' not in node_result:
bad = True
- feedback_fn(" - ERROR: node hasn't returned node connectivity data")
+ feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else:
if node_result['nodelist']:
bad = True
for node in node_result['nodelist']:
- feedback_fn(" - ERROR: communication with node '%s': %s" %
+ feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
(node, node_result['nodelist'][node]))
+ if 'node-net-test' not in node_result:
+ bad = True
+ feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
+ else:
+ if node_result['node-net-test']:
+ bad = True
+ nlist = utils.NiceSort(node_result['node-net-test'].keys())
+ for node in nlist:
+ feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
+ (node, node_result['node-net-test'][node]))
+
hyp_result = node_result.get('hypervisor', None)
if hyp_result is not None:
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
raise errors.OpPrereqError("Invalid checks to be skipped specified")
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ Cluster-Verify hooks just rone in the post phase and their failure makes
+ the output be logged in the verify output and the verification to fail.
+
+ """
+ all_nodes = self.cfg.GetNodeList()
+ # TODO: populate the environment with useful information for verify hooks
+ env = {}
+ return env, [], all_nodes
+
def Exec(self, feedback_fn):
"""Verify integrity of cluster, performing various test on nodes.
vg_name = self.cfg.GetVGName()
nodelist = utils.NiceSort(self.cfg.GetNodeList())
+ nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
i_non_redundant = [] # Non redundant instances
node_volume = {}
'filelist': file_names,
'nodelist': nodelist,
'hypervisor': None,
+ 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
+ for node in nodeinfo]
}
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
all_rversion = rpc.call_version(nodelist)
return int(bad)
+ def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
+ """Analize the post-hooks' result, handle it, and send some
+ nicely-formatted feedback back to the user.
+
+ Args:
+ phase: the hooks phase that has just been run
+ hooks_results: the results of the multi-node hooks rpc call
+ feedback_fn: function to send feedback back to the caller
+ lu_result: previous Exec result
+
+ """
+ # We only really run POST phase hooks, and are only interested in their results
+ if phase == constants.HOOKS_PHASE_POST:
+ # Used to change hooks' output to proper indentation
+ indent_re = re.compile('^', re.M)
+ feedback_fn("* Hooks Results")
+ if not hooks_results:
+ feedback_fn(" - ERROR: general communication failure")
+ lu_result = 1
+ else:
+ for node_name in hooks_results:
+ show_node_header = True
+ res = hooks_results[node_name]
+ if res is False or not isinstance(res, list):
+ feedback_fn(" Communication failure")
+ lu_result = 1
+ continue
+ for script, hkr, output in res:
+ if hkr == constants.HKR_FAIL:
+ # The node header is only shown once, if there are
+ # failing hooks on that node
+ if show_node_header:
+ feedback_fn(" Node %s:" % node_name)
+ show_node_header = False
+ feedback_fn(" ERROR: Script %s failed, output:" % script)
+ output = indent_re.sub(' ', output)
+ feedback_fn("%s" % output)
+ lu_result = 1
+
+ return lu_result
+
class LUVerifyDisks(NoHooksLU):
"""Verifies the cluster disks status.
HPATH = "cluster-rename"
HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = ["name"]
+ REQ_WSSTORE = True
def BuildHooksEnv(self):
"""Build hooks env.
node_list = self.cfg.GetNodeList()
vglist = rpc.call_vg_list(node_list)
for node in node_list:
- vgstatus = _HasValidVG(vglist[node], self.op.vg_name)
+ vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
+ constants.MIN_VG_SIZE)
if vgstatus:
raise errors.OpPrereqError("Error on node '%s': %s" %
(node, vgstatus))
break
if unlock:
- utils.Unlock('cmd')
+ #utils.Unlock('cmd')
+ pass
try:
time.sleep(min(60, max_time))
finally:
if unlock:
- utils.Lock('cmd')
+ #utils.Lock('cmd')
+ pass
if done:
proc.LogInfo("Instance %s's disks are in sync." % instance.name)
for node_name, nr in rlist.iteritems():
if not nr:
continue
- for os in nr:
- if os.name not in all_os:
+ for os_obj in nr:
+ if os_obj.name not in all_os:
# build a list of nodes for this os containing empty lists
# for each node in node_list
- all_os[os.name] = {}
+ all_os[os_obj.name] = {}
for nname in node_list:
- all_os[os.name][nname] = []
- all_os[os.name][node_name].append(os)
+ all_os[os_obj.name][nname] = []
+ all_os[os_obj.name][node_name].append(os_obj)
return all_os
def Exec(self, feedback_fn):
self.cfg.RemoveNode(node.name)
- _RemoveHostFromEtcHosts(node.name)
+ utils.RemoveHostFromEtcHosts(node.name)
class LUQueryNodes(NoHooksLU):
This checks that the fields required are valid output fields.
"""
- self.dynamic_fields = frozenset(["dtotal", "dfree",
- "mtotal", "mnode", "mfree",
- "bootid"])
+ self.dynamic_fields = frozenset([
+ "dtotal", "dfree",
+ "mtotal", "mnode", "mfree",
+ "bootid",
+ "ctotal",
+ ])
_CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
"mfree": utils.TryConvert(int, nodeinfo['memory_free']),
"dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
"dfree": utils.TryConvert(int, nodeinfo['vg_free']),
+ "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
"bootid": nodeinfo['bootid'],
}
else:
raise errors.OpExecError("Cannot transfer ssh keys to the new node")
# Add node to our /etc/hosts, and add key to known_hosts
- _AddHostToEtcHosts(new_node.name)
+ utils.AddHostToEtcHosts(new_node.name)
if new_node.secondary_ip != new_node.primary_ip:
if not rpc.call_node_tcp_ping(new_node.name,
# Distribute updated /etc/hosts and known_hosts to all nodes,
# including the node just added
myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
- dist_nodes = self.cfg.GetNodeList() + [node]
+ dist_nodes = self.cfg.GetNodeList()
+ if not self.op.readd:
+ dist_nodes.append(node)
if myself.name in dist_nodes:
dist_nodes.remove(myself.name)
HPATH = "master-failover"
HTYPE = constants.HTYPE_CLUSTER
REQ_MASTER = False
+ REQ_WSSTORE = True
_OP_REQP = []
def BuildHooksEnv(self):
"export_version": constants.EXPORT_VERSION,
"master": self.sstore.GetMasterNode(),
"architecture": (platform.architecture()[0], platform.machine()),
+ "hypervisor_type": self.sstore.GetHypervisorType(),
}
return result
"""
HPATH = "instance-remove"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name"]
+ _OP_REQP = ["instance_name", "ignore_failures"]
def BuildHooksEnv(self):
"""Build hooks env.
secondary_nodes = instance.secondary_nodes
if not secondary_nodes:
raise errors.ProgrammerError("no secondary node but using "
- "DT_REMOTE_RAID1 template")
+ "a mirrored disk template")
target_node = secondary_nodes[0]
# check memory requirements on the secondary node
feedback_fn("* checking disk consistency between source and target")
for dev in instance.disks:
- # for remote_raid1, these are md over drbd
+ # for drbd, these are drbd over lvm
if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
if instance.status == "up" and not self.op.ignore_consistency:
raise errors.OpExecError("Disk %s is degraded on target node,"
instance.primary_node = target_node
# distribute new instance config to the other nodes
- self.cfg.AddInstance(instance)
+ self.cfg.Update(instance)
# Only start the instance if it's marked as up
if instance.status == "up":
"""
# set optional parameters to none if they don't exist
for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
- "iallocator"]:
+ "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
+ "vnc_bind_address"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
self.op.file_driver)
if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
- raise errors.OpPrereqError("File storage directory not a relative"
- " path")
+ raise errors.OpPrereqError("File storage directory not a relative"
+ " path")
#### allocator run
if [self.op.iallocator, self.op.pnode].count(None) != 1:
info = nodeinfo.get(node, None)
if not info:
raise errors.OpPrereqError("Cannot get current information"
- " from node '%s'" % nodeinfo)
+ " from node '%s'" % node)
vg_free = info.get('vg_free', None)
if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on"
" destination node '%s'" %
(self.op.bridge, pnode.name))
+ # memory check on primary node
+ if self.op.start:
+ _CheckNodeFreeMemory(self.cfg, self.pnode.name,
+ "creating instance %s" % self.op.instance_name,
+ self.op.mem_size)
+
+ # hvm_cdrom_image_path verification
+ if self.op.hvm_cdrom_image_path is not None:
+ if not os.path.isabs(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The path to the HVM CDROM image must"
+ " be an absolute path or None, not %s" %
+ self.op.hvm_cdrom_image_path)
+ if not os.path.isfile(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The HVM CDROM image must either be a"
+ " regular file or a symlink pointing to"
+ " an existing regular file, not %s" %
+ self.op.hvm_cdrom_image_path)
+
+ # vnc_bind_address verification
+ if self.op.vnc_bind_address is not None:
+ if not utils.IsValidIP(self.op.vnc_bind_address):
+ raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
+ " like a valid IP address" %
+ self.op.vnc_bind_address)
+
if self.op.start:
self.instance_status = 'up'
else:
else:
network_port = None
+ if self.op.vnc_bind_address is None:
+ self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+
# this is needed because os.path.join does not accept None arguments
if self.op.file_storage_dir is None:
string_file_storage_dir = ""
kernel_path=self.op.kernel_path,
initrd_path=self.op.initrd_path,
hvm_boot_order=self.op.hvm_boot_order,
+ hvm_acpi=self.op.hvm_acpi,
+ hvm_pae=self.op.hvm_pae,
+ hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
+ vnc_bind_address=self.op.vnc_bind_address,
)
feedback_fn("* creating instance disks...")
# replacement as for drbd7 (no different port allocated)
raise errors.OpPrereqError("Same secondary given, cannot execute"
" replacement")
- # the user gave the current secondary, switch to
- # 'no-replace-secondary' mode for drbd7
- remote_node = None
- if (instance.disk_template == constants.DT_REMOTE_RAID1 and
- self.op.mode != constants.REPLACE_DISK_ALL):
- raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
- " disks replacement, not individual ones")
if instance.disk_template == constants.DT_DRBD8:
if (self.op.mode == constants.REPLACE_DISK_ALL and
remote_node is not None):
(name, instance.name))
self.op.remote_node = remote_node
- def _ExecRR1(self, feedback_fn):
- """Replace the disks of an instance.
-
- """
- instance = self.instance
- iv_names = {}
- # start of work
- if self.op.remote_node is None:
- remote_node = self.sec_node
- else:
- remote_node = self.op.remote_node
- cfg = self.cfg
- for dev in instance.disks:
- size = dev.size
- lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
- names = _GenerateUniqueNames(cfg, lv_names)
- new_drbd = _GenerateMDDRBDBranch(cfg, instance.primary_node,
- remote_node, size, names)
- iv_names[dev.iv_name] = (dev, dev.children[0], new_drbd)
- logger.Info("adding new mirror component on secondary for %s" %
- dev.iv_name)
- #HARDCODE
- if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
- new_drbd, False,
- _GetInstanceInfoText(instance)):
- raise errors.OpExecError("Failed to create new component on secondary"
- " node %s. Full abort, cleanup manually!" %
- remote_node)
-
- logger.Info("adding new mirror component on primary")
- #HARDCODE
- if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
- instance, new_drbd,
- _GetInstanceInfoText(instance)):
- # remove secondary dev
- cfg.SetDiskID(new_drbd, remote_node)
- rpc.call_blockdev_remove(remote_node, new_drbd)
- raise errors.OpExecError("Failed to create volume on primary!"
- " Full abort, cleanup manually!!")
-
- # the device exists now
- # call the primary node to add the mirror to md
- logger.Info("adding new mirror component to md")
- if not rpc.call_blockdev_addchildren(instance.primary_node, dev,
- [new_drbd]):
- logger.Error("Can't add mirror compoment to md!")
- cfg.SetDiskID(new_drbd, remote_node)
- if not rpc.call_blockdev_remove(remote_node, new_drbd):
- logger.Error("Can't rollback on secondary")
- cfg.SetDiskID(new_drbd, instance.primary_node)
- if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
- logger.Error("Can't rollback on primary")
- raise errors.OpExecError("Full abort, cleanup manually!!")
-
- dev.children.append(new_drbd)
- cfg.AddInstance(instance)
-
- # this can fail as the old devices are degraded and _WaitForSync
- # does a combined result over all disks, so we don't check its
- # return value
- _WaitForSync(cfg, instance, self.proc, unlock=True)
-
- # so check manually all the devices
- for name in iv_names:
- dev, child, new_drbd = iv_names[name]
- cfg.SetDiskID(dev, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
- if is_degr:
- raise errors.OpExecError("MD device %s is degraded!" % name)
- cfg.SetDiskID(new_drbd, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, new_drbd)[5]
- if is_degr:
- raise errors.OpExecError("New drbd device %s is degraded!" % name)
-
- for name in iv_names:
- dev, child, new_drbd = iv_names[name]
- logger.Info("remove mirror %s component" % name)
- cfg.SetDiskID(dev, instance.primary_node)
- if not rpc.call_blockdev_removechildren(instance.primary_node,
- dev, [child]):
- logger.Error("Can't remove child from mirror, aborting"
- " *this device cleanup*.\nYou need to cleanup manually!!")
- continue
-
- for node in child.logical_id[:2]:
- logger.Info("remove child device on %s" % node)
- cfg.SetDiskID(child, node)
- if not rpc.call_blockdev_remove(node, child):
- logger.Error("Warning: failed to remove device from node %s,"
- " continuing operation." % node)
-
- dev.children.remove(child)
-
- cfg.AddInstance(instance)
-
def _ExecD8DiskOnly(self, feedback_fn):
"""Replace a disk on the primary or secondary for dbrd8.
"""
instance = self.instance
- if instance.disk_template == constants.DT_REMOTE_RAID1:
- fn = self._ExecRR1
- elif instance.disk_template == constants.DT_DRBD8:
+
+ # Activate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ if instance.disk_template == constants.DT_DRBD8:
if self.op.remote_node is None:
fn = self._ExecD8DiskOnly
else:
fn = self._ExecD8Secondary
else:
raise errors.ProgrammerError("Unhandled disk replacement case")
- return fn(feedback_fn)
+
+ ret = fn(feedback_fn)
+
+ # Deactivate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ return ret
class LUQueryInstanceData(NoHooksLU):
"memory": instance.memory,
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
- "network_port": instance.network_port,
"vcpus": instance.vcpus,
- "kernel_path": instance.kernel_path,
- "initrd_path": instance.initrd_path,
- "hvm_boot_order": instance.hvm_boot_order,
}
+ htkind = self.sstore.GetHypervisorType()
+ if htkind == constants.HT_XEN_PVM30:
+ idict["kernel_path"] = instance.kernel_path
+ idict["initrd_path"] = instance.initrd_path
+
+ if htkind == constants.HT_XEN_HVM31:
+ idict["hvm_boot_order"] = instance.hvm_boot_order
+ idict["hvm_acpi"] = instance.hvm_acpi
+ idict["hvm_pae"] = instance.hvm_pae
+ idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
+
+ if htkind in constants.HTS_REQ_PORT:
+ idict["vnc_bind_address"] = instance.vnc_bind_address
+ idict["network_port"] = instance.network_port
+
result[instance.name] = idict
return result
self.kernel_path = getattr(self.op, "kernel_path", None)
self.initrd_path = getattr(self.op, "initrd_path", None)
self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
- all_params = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
- self.kernel_path, self.initrd_path, self.hvm_boot_order]
- if all_params.count(None) == len(all_params):
+ self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
+ self.hvm_pae = getattr(self.op, "hvm_pae", None)
+ self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
+ self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
+ all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
+ self.kernel_path, self.initrd_path, self.hvm_boot_order,
+ self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
+ self.vnc_bind_address]
+ if all_parms.count(None) == len(all_parms):
raise errors.OpPrereqError("No changes submitted")
if self.mem is not None:
try:
" must be one or more of [acdn]"
" or 'default'")
+ # hvm_cdrom_image_path verification
+ if self.op.hvm_cdrom_image_path is not None:
+ if not os.path.isabs(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The path to the HVM CDROM image must"
+ " be an absolute path or None, not %s" %
+ self.op.hvm_cdrom_image_path)
+ if not os.path.isfile(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The HVM CDROM image must either be a"
+ " regular file or a symlink pointing to"
+ " an existing regular file, not %s" %
+ self.op.hvm_cdrom_image_path)
+
+ # vnc_bind_address verification
+ if self.op.vnc_bind_address is not None:
+ if not utils.IsValidIP(self.op.vnc_bind_address):
+ raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
+ " like a valid IP address" %
+ self.op.vnc_bind_address)
+
instance = self.cfg.GetInstanceInfo(
self.cfg.ExpandInstanceName(self.op.instance_name))
if instance is None:
else:
instance.hvm_boot_order = self.hvm_boot_order
result.append(("hvm_boot_order", self.hvm_boot_order))
+ if self.hvm_acpi:
+ instance.hvm_acpi = self.hvm_acpi
+ result.append(("hvm_acpi", self.hvm_acpi))
+ if self.hvm_pae:
+ instance.hvm_pae = self.hvm_pae
+ result.append(("hvm_pae", self.hvm_pae))
+ if self.hvm_cdrom_image_path:
+ instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
+ result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
+ if self.vnc_bind_address:
+ instance.vnc_bind_address = self.vnc_bind_address
+ result.append(("vnc_bind_address", self.vnc_bind_address))
self.cfg.AddInstance(instance)
self.op.target_node)
self.op.target_node = self.dst_node.name
+ # instance disk type verification
+ for disk in self.instance.disks:
+ if disk.dev_type == constants.LD_FILE:
+ raise errors.OpPrereqError("Export not supported for instances with"
+ " file-based disks")
+
def Exec(self, feedback_fn):
"""Export an instance to an image in the cluster.
# shutdown the instance, but not the disks
if not rpc.call_instance_shutdown(src_node, instance):
raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, src_node))
+ (instance.name, src_node))
vgname = self.cfg.GetVGName()
"version": 1,
"cluster_name": self.sstore.GetClusterName(),
"cluster_tags": list(cfg.GetClusterInfo().GetTags()),
+ "hypervisor_type": self.sstore.GetHypervisorType(),
# we don't have job IDs
}
+ i_list = [cfg.GetInstanceInfo(iname) for iname in cfg.GetInstanceList()]
+
# node data
node_results = {}
node_list = cfg.GetNodeList()
if nname not in node_data or not isinstance(node_data[nname], dict):
raise errors.OpExecError("Can't get data for node %s" % nname)
remote_info = node_data[nname]
- for attr in ['memory_total', 'memory_free',
- 'vg_size', 'vg_free']:
+ for attr in ['memory_total', 'memory_free', 'memory_dom0',
+ 'vg_size', 'vg_free', 'cpu_total']:
if attr not in remote_info:
raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
(nname, attr))
try:
- int(remote_info[attr])
+ remote_info[attr] = int(remote_info[attr])
except ValueError, err:
raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
" %s" % (nname, attr, str(err)))
+ # compute memory used by primary instances
+ i_p_mem = i_p_up_mem = 0
+ for iinfo in i_list:
+ if iinfo.primary_node == nname:
+ i_p_mem += iinfo.memory
+ if iinfo.status == "up":
+ i_p_up_mem += iinfo.memory
+
+ # compute memory used by instances
pnr = {
"tags": list(ninfo.GetTags()),
- "total_memory": utils.TryConvert(int, remote_info['memory_total']),
- "free_memory": utils.TryConvert(int, remote_info['memory_free']),
- "total_disk": utils.TryConvert(int, remote_info['vg_size']),
- "free_disk": utils.TryConvert(int, remote_info['vg_free']),
+ "total_memory": remote_info['memory_total'],
+ "reserved_memory": remote_info['memory_dom0'],
+ "free_memory": remote_info['memory_free'],
+ "i_pri_memory": i_p_mem,
+ "i_pri_up_memory": i_p_up_mem,
+ "total_disk": remote_info['vg_size'],
+ "free_disk": remote_info['vg_free'],
"primary_ip": ninfo.primary_ip,
"secondary_ip": ninfo.secondary_ip,
+ "total_cpus": remote_info['cpu_total'],
}
node_results[nname] = pnr
data["nodes"] = node_results
# instance data
instance_data = {}
- i_list = cfg.GetInstanceList()
- for iname in i_list:
- iinfo = cfg.GetInstanceInfo(iname)
+ for iinfo in i_list:
nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
for n in iinfo.nics]
pir = {
"disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
"disk_template": iinfo.disk_template,
}
- instance_data[iname] = pir
+ instance_data[iinfo.name] = pir
data["instances"] = instance_data
self.in_text = serializer.Dump(self.in_data)
- def Run(self, name, validate=True):
+ def Run(self, name, validate=True, call_fn=rpc.call_iallocator_runner):
"""Run an instance allocator and return the results.
"""
data = self.in_text
- alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
- os.path.isfile)
- if alloc_script is None:
- raise errors.OpExecError("Can't find allocator '%s'" % name)
+ result = call_fn(self.sstore.GetMasterNode(), name, self.in_text)
- fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
- try:
- os.write(fd, data)
- os.close(fd)
- result = utils.RunCmd([alloc_script, fin_name])
- if result.failed:
+ if not isinstance(result, tuple) or len(result) != 4:
+ raise errors.OpExecError("Invalid result from master iallocator runner")
+
+ rcode, stdout, stderr, fail = result
+
+ if rcode == constants.IARUN_NOTFOUND:
+ raise errors.OpExecError("Can't find allocator '%s'" % name)
+ elif rcode == constants.IARUN_FAILURE:
raise errors.OpExecError("Instance allocator call failed: %s,"
" output: %s" %
- (result.fail_reason, result.output))
- finally:
- os.unlink(fin_name)
- self.out_text = result.stdout
+ (fail, stdout+stderr))
+ self.out_text = stdout
if validate:
self._ValidateResult()