if 'nodelist' not in node_result:
bad = True
- feedback_fn(" - ERROR: node hasn't returned node connectivity data")
+ feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else:
if node_result['nodelist']:
bad = True
for node in node_result['nodelist']:
- feedback_fn(" - ERROR: communication with node '%s': %s" %
+ feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
(node, node_result['nodelist'][node]))
+ if 'node-net-test' not in node_result:
+ bad = True
+ feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
+ else:
+ if node_result['node-net-test']:
+ bad = True
+ nlist = utils.NiceSort(node_result['node-net-test'].keys())
+ for node in nlist:
+ feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
+ (node, node_result['node-net-test'][node]))
+
hyp_result = node_result.get('hypervisor', None)
if hyp_result is not None:
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
needed_mem = 0
for instance in instances:
- needed_mem += instance_cfg[instance].memory
+ if instance_cfg[instance].auto_balance:
+ needed_mem += instance_cfg[instance].memory
if nodeinfo['mfree'] < needed_mem:
feedback_fn(" - ERROR: not enough memory on node %s to accomodate"
" failovers should node %s fail" % (node, prinode))
"""
all_nodes = self.cfg.GetNodeList()
+ tags = self.cfg.GetClusterInfo().GetTags()
# TODO: populate the environment with useful information for verify hooks
- env = {}
+ env = {
+ "CLUSTER_TAGS": " ".join(tags),
+ }
return env, [], all_nodes
def Exec(self, feedback_fn):
vg_name = self.cfg.GetVGName()
nodelist = utils.NiceSort(self.cfg.GetNodeList())
+ nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
i_non_redundant = [] # Non redundant instances
+ i_non_a_balanced = [] # Non auto-balanced instances
node_volume = {}
node_instance = {}
node_info = {}
'filelist': file_names,
'nodelist': nodelist,
'hypervisor': None,
+ 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
+ for node in nodeinfo]
}
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
all_rversion = rpc.call_version(nodelist)
all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
+ incomplete_nodeinfo = False
+
for node in nodelist:
feedback_fn("* Verifying node %s" % node)
result = self._VerifyNode(node, file_names, local_checksums,
elif not isinstance(volumeinfo, dict):
feedback_fn(" - ERROR: connection to %s failed" % (node,))
bad = True
+ incomplete_nodeinfo = True
continue
else:
node_volume[node] = volumeinfo
if type(nodeinstance) != list:
feedback_fn(" - ERROR: connection to %s failed" % (node,))
bad = True
+ incomplete_nodeinfo = True
continue
node_instance[node] = nodeinstance
if not isinstance(nodeinfo, dict):
feedback_fn(" - ERROR: connection to %s failed" % (node,))
bad = True
+ incomplete_nodeinfo = True
continue
try:
# secondary.
"sinst-by-pnode": {},
}
- except ValueError:
+ except (ValueError, TypeError):
feedback_fn(" - ERROR: invalid value returned from node %s" % (node,))
bad = True
+ incomplete_nodeinfo = True
continue
node_vol_should = {}
feedback_fn(" - WARNING: multiple secondaries for instance %s"
% instance)
+ if not inst_config.auto_balance:
+ i_non_a_balanced.append(instance)
+
for snode in inst_config.secondary_nodes:
if snode in node_info:
node_info[snode]['sinst'].append(instance)
feedback_fn)
bad = bad or result
- if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
+ if (constants.VERIFY_NPLUSONE_MEM not in self.skip_set and
+ not incomplete_nodeinfo):
feedback_fn("* Verifying N+1 Memory redundancy")
result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
bad = bad or result
feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
% len(i_non_redundant))
+ if i_non_a_balanced:
+ feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
+ % len(i_non_a_balanced))
+
return int(bad)
def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
raise errors.OpPrereqError("Neither the name nor the IP address of the"
" cluster has changed")
if new_ip != old_ip:
- result = utils.RunCmd(["fping", "-q", new_ip])
- if not result.failed:
+ if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("The given cluster IP address (%s) is"
" reachable on the network. Aborting." %
new_ip)
"dtotal", "dfree",
"mtotal", "mnode", "mfree",
"bootid",
- "ctotal",
+ "ctotal", "cnodes", "csockets",
])
_CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
- "pip", "sip"],
+ "pip", "sip", "tags"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
for name in nodenames:
nodeinfo = node_data.get(name, None)
if nodeinfo:
+ fn = utils.TryConvert
live_data[name] = {
- "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
- "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
- "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
- "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
- "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
- "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
- "bootid": nodeinfo['bootid'],
+ "mtotal": fn(int, nodeinfo.get('memory_total', None)),
+ "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
+ "mfree": fn(int, nodeinfo.get('memory_free', None)),
+ "dtotal": fn(int, nodeinfo.get('vg_size', None)),
+ "dfree": fn(int, nodeinfo.get('vg_free', None)),
+ "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
+ "bootid": nodeinfo.get('bootid', None),
+ "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
+ "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
}
else:
live_data[name] = {}
val = node.primary_ip
elif field == "sip":
val = node.secondary_ip
+ elif field == "tags":
+ val = list(node.GetTags())
elif field in self.dynamic_fields:
val = live_data[node.name].get(field, None)
else:
"""
nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
- if not nodeinfo or not isinstance(nodeinfo, dict):
+ if not (nodeinfo and isinstance(nodeinfo, dict) and
+ node in nodeinfo and isinstance(nodeinfo[node], dict)):
raise errors.OpPrereqError("Could not contact node %s for resource"
" information" % (node,))
new_name)
if not getattr(self.op, "ignore_ip", False):
- command = ["fping", "-q", name_info.ip]
- result = utils.RunCmd(command)
- if not result.failed:
+ if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(name_info.ip, new_name))
try:
if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
"sda", "sdb"):
- msg = ("Could run OS rename script for instance %s on node %s (but the"
- " instance has been renamed in Ganeti)" %
+ msg = ("Could not run OS rename script for instance %s on node %s"
+ " (but the instance has been renamed in Ganeti)" %
(inst.name, inst.primary_node))
logger.Error(msg)
finally:
"""
HPATH = "instance-remove"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name"]
+ _OP_REQP = ["instance_name", "ignore_failures"]
def BuildHooksEnv(self):
"""Build hooks env.
_CheckOutputFields(static=["name", "os", "pnode", "snodes",
"admin_state", "admin_ram",
"disk_template", "ip", "mac", "bridge",
- "sda_size", "sdb_size", "vcpus"],
+ "sda_size", "sdb_size", "vcpus", "tags",
+ "auto_balance",
+ "network_port", "kernel_path", "initrd_path",
+ "hvm_boot_order", "hvm_acpi", "hvm_pae",
+ "hvm_cdrom_image_path", "hvm_nic_type",
+ "hvm_disk_type", "vnc_bind_address"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
val = disk.size
elif field == "vcpus":
val = instance.vcpus
+ elif field == "tags":
+ val = list(instance.GetTags())
+ elif field == "auto_balance":
+ val = instance.auto_balance
+ elif field in ("network_port", "kernel_path", "initrd_path",
+ "hvm_boot_order", "hvm_acpi", "hvm_pae",
+ "hvm_cdrom_image_path", "hvm_nic_type",
+ "hvm_disk_type", "vnc_bind_address"):
+ val = getattr(instance, field, None)
+ if val is None:
+ if field in ("hvm_nic_type", "hvm_disk_type",
+ "kernel_path", "initrd_path"):
+ val = "default"
+ else:
+ val = "-"
else:
raise errors.ParameterError(field)
iout.append(val)
instance.primary_node = target_node
# distribute new instance config to the other nodes
- self.cfg.AddInstance(instance)
+ self.cfg.Update(instance)
# Only start the instance if it's marked as up
if instance.status == "up":
(instance.name, target_node))
+class LUMigrateInstance(LogicalUnit):
+ """Migrate an instance.
+
+ This is migration without shutting down, compared to the failover,
+ which is done with shutdown.
+
+ """
+ HPATH = "instance-migrate"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "live", "cleanup"]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = _BuildInstanceHookEnvByObject(self.instance)
+ nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(
+ self.cfg.ExpandInstanceName(self.op.instance_name))
+ if instance is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+
+ if instance.disk_template != constants.DT_DRBD8:
+ raise errors.OpPrereqError("Instance's disk layout is not"
+ " drbd8, cannot migrate.")
+
+ secondary_nodes = instance.secondary_nodes
+ if not secondary_nodes:
+ raise errors.ProgrammerError("no secondary node but using "
+ "drbd8 disk template")
+
+ target_node = secondary_nodes[0]
+ # check memory requirements on the secondary node
+ _CheckNodeFreeMemory(self.cfg, target_node, "migrating instance %s" %
+ instance.name, instance.memory)
+
+ # check bridge existance
+ brlist = [nic.bridge for nic in instance.nics]
+ if not rpc.call_bridges_exist(target_node, brlist):
+ raise errors.OpPrereqError("One or more target bridges %s does not"
+ " exist on destination node '%s'" %
+ (brlist, target_node))
+
+ if not self.op.cleanup:
+ migratable = rpc.call_instance_migratable(instance.primary_node,
+ instance)
+ if not migratable:
+ raise errors.OpPrereqError("Can't contact node '%s'" %
+ instance.primary_node)
+ if not migratable[0]:
+ raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
+ migratable[1])
+
+ self.instance = instance
+
+ def _WaitUntilSync(self):
+ """Poll with custom rpc for disk sync.
+
+ This uses our own step-based rpc call.
+
+ """
+ self.feedback_fn("* wait until resync is done")
+ all_done = False
+ while not all_done:
+ all_done = True
+ result = rpc.call_drbd_reconfig_net(self.all_nodes, self.instance.name,
+ self.instance.disks,
+ self.nodes_ip, False,
+ constants.DRBD_RECONF_RPC_WFSYNC)
+ min_percent = 100
+ for node in self.all_nodes:
+ if not result[node] or not result[node][0]:
+ raise errors.OpExecError("Cannot resync disks on node %s" % (node,))
+ node_done, node_percent = result[node][1]
+ all_done = all_done and node_done
+ if node_percent is not None:
+ min_percent = min(min_percent, node_percent)
+ if not all_done:
+ if min_percent < 100:
+ self.feedback_fn(" - progress: %.1f%%" % min_percent)
+ time.sleep(2)
+
+ def _EnsureSecondary(self, node):
+ """Demote a node to secondary.
+
+ """
+ self.feedback_fn("* switching node %s to secondary mode" % node)
+ result = rpc.call_drbd_reconfig_net([node], self.instance.name,
+ self.instance.disks,
+ self.nodes_ip, False,
+ constants.DRBD_RECONF_RPC_SECONDARY)
+ if not result[node] or not result[node][0]:
+ raise errors.OpExecError("Cannot change disk to secondary on node %s,"
+ " error %s" %
+ (node, result[node][1]))
+
+ def _GoStandalone(self):
+ """Disconnect from the network.
+
+ """
+ self.feedback_fn("* changing into standalone mode")
+ result = rpc.call_drbd_reconfig_net(self.all_nodes, self.instance.name,
+ self.instance.disks,
+ self.nodes_ip, True,
+ constants.DRBD_RECONF_RPC_DISCONNECT)
+ for node in self.all_nodes:
+ if not result[node] or not result[node][0]:
+ raise errors.OpExecError("Cannot disconnect disks node %s,"
+ " error %s" % (node, result[node][1]))
+
+ def _GoReconnect(self, multimaster):
+ """Reconnect to the network.
+
+ """
+ if multimaster:
+ msg = "dual-master"
+ else:
+ msg = "single-master"
+ self.feedback_fn("* changing disks into %s mode" % msg)
+ result = rpc.call_drbd_reconfig_net(self.all_nodes, self.instance.name,
+ self.instance.disks,
+ self.nodes_ip,
+ multimaster,
+ constants.DRBD_RECONF_RPC_RECONNECT)
+ for node in self.all_nodes:
+ if not result[node] or not result[node][0]:
+ raise errors.OpExecError("Cannot change disks config on node %s,"
+ " error %s" % (node, result[node][1]))
+
+ def _IdentifyDisks(self):
+ """Start the migration RPC sequence.
+
+ """
+ self.feedback_fn("* identifying disks")
+ result = rpc.call_drbd_reconfig_net(self.all_nodes,
+ self.instance.name,
+ self.instance.disks,
+ self.nodes_ip, True,
+ constants.DRBD_RECONF_RPC_INIT)
+ for node in self.all_nodes:
+ if not result[node] or not result[node][0]:
+ raise errors.OpExecError("Cannot identify disks node %s,"
+ " error %s" % (node, result[node][1]))
+
+ def _ExecCleanup(self):
+ """Try to cleanup after a failed migration.
+
+ The cleanup is done by:
+ - check that the instance is running only on one node
+ (and update the config if needed)
+ - change disks on its secondary node to secondary
+ - wait until disks are fully synchronized
+ - disconnect from the network
+ - change disks into single-master mode
+ - wait again until disks are fully synchronized
+
+ """
+ instance = self.instance
+ target_node = self.target_node
+ source_node = self.source_node
+
+ # check running on only one node
+ self.feedback_fn("* checking where the instance actually runs"
+ " (if this hangs, the hypervisor might be in"
+ " a bad state)")
+ ins_l = rpc.call_instance_list(self.all_nodes)
+ for node in self.all_nodes:
+ if not type(ins_l[node]) is list:
+ raise errors.OpExecError("Can't contact node '%s'" % node)
+
+ runningon_source = instance.name in ins_l[source_node]
+ runningon_target = instance.name in ins_l[target_node]
+
+ if runningon_source and runningon_target:
+ raise errors.OpExecError("Instance seems to be running on two nodes,"
+ " or the hypervisor is confused. You will have"
+ " to ensure manually that it runs only on one"
+ " and restart this operation.")
+
+ if not (runningon_source or runningon_target):
+ raise errors.OpExecError("Instance does not seem to be running at all."
+ " In this case, it's safer to repair by"
+ " running 'gnt-instance stop' to ensure disk"
+ " shutdown, and then restarting it.")
+
+ if runningon_target:
+ # the migration has actually succeeded, we need to update the config
+ self.feedback_fn("* instance running on secondary node (%s),"
+ " updating config" % target_node)
+ instance.primary_node = target_node
+ self.cfg.Update(instance)
+ demoted_node = source_node
+ else:
+ self.feedback_fn("* instance confirmed to be running on its"
+ " primary node (%s)" % source_node)
+ demoted_node = target_node
+
+ self._IdentifyDisks()
+
+ self._EnsureSecondary(demoted_node)
+ self._WaitUntilSync()
+ self._GoStandalone()
+ self._GoReconnect(False)
+ self._WaitUntilSync()
+
+ self.feedback_fn("* done")
+
+ def _ExecMigration(self):
+ """Migrate an instance.
+
+ The migrate is done by:
+ - change the disks into dual-master mode
+ - wait until disks are fully synchronized again
+ - migrate the instance
+ - change disks on the new secondary node (the old primary) to secondary
+ - wait until disks are fully synchronized
+ - change disks into single-master mode
+
+ """
+ instance = self.instance
+ target_node = self.target_node
+ source_node = self.source_node
+
+ self.feedback_fn("* checking disk consistency between source and target")
+ for dev in instance.disks:
+ if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
+ raise errors.OpExecError("Disk %s is degraded or not fully"
+ " synchronized on target node,"
+ " aborting migrate." % dev.iv_name)
+
+ self._IdentifyDisks()
+
+ self._EnsureSecondary(target_node)
+ self._GoStandalone()
+ self._GoReconnect(True)
+ self._WaitUntilSync()
+
+ self.feedback_fn("* migrating instance to %s" % target_node)
+ time.sleep(10)
+ result = rpc.call_instance_migrate(source_node, instance,
+ self.nodes_ip[target_node],
+ self.op.live)
+ if not result or not result[0]:
+ logger.Error("Instance migration failed, trying to revert disk status")
+ try:
+ self._EnsureSecondary(target_node)
+ self._GoStandalone()
+ self._GoReconnect(False)
+ self._WaitUntilSync()
+ except errors.OpExecError, err:
+ logger.Error("Can't reconnect the drives: error '%s'\n"
+ "Please look and recover the instance status" % str(err))
+
+ raise errors.OpExecError("Could not migrate instance %s: %s" %
+ (instance.name, result[1]))
+ time.sleep(10)
+
+ instance.primary_node = target_node
+ # distribute new instance config to the other nodes
+ self.cfg.Update(instance)
+
+ self._EnsureSecondary(source_node)
+ self._WaitUntilSync()
+ self._GoStandalone()
+ self._GoReconnect(False)
+ self._WaitUntilSync()
+
+ self.feedback_fn("* done")
+
+ def Exec(self, feedback_fn):
+ """Perform the migration.
+
+ """
+ self.feedback_fn = feedback_fn
+
+ self.source_node = self.instance.primary_node
+ self.target_node = self.instance.secondary_nodes[0]
+ self.all_nodes = [self.source_node, self.target_node]
+ self.nodes_ip = {
+ self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
+ self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
+ }
+ if self.op.cleanup:
+ return self._ExecCleanup()
+ else:
+ return self._ExecMigration()
+
+
def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
"""Create a tree of block devices on the primary node.
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "mem_size", "disk_size",
"disk_template", "swap_size", "mode", "start", "vcpus",
- "wait_for_sync", "ip_check", "mac"]
+ "wait_for_sync", "ip_check", "mac", "auto_balance"]
def _RunAllocator(self):
"""Run the allocator based on input opcode.
if len(ial.nodes) != ial.required_nodes:
raise errors.OpPrereqError("iallocator '%s' returned invalid number"
" of nodes (%s), required %s" %
- (len(ial.nodes), ial.required_nodes))
+ (self.op.iallocator, len(ial.nodes),
+ ial.required_nodes))
self.op.pnode = ial.nodes[0]
logger.ToStdout("Selected nodes for the instance: %s" %
(", ".join(ial.nodes),))
# set optional parameters to none if they don't exist
for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
"iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
- "vnc_bind_address"]:
+ "hvm_nic_type", "hvm_disk_type", "vnc_bind_address"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
info = nodeinfo.get(node, None)
if not info:
raise errors.OpPrereqError("Cannot get current information"
- " from node '%s'" % nodeinfo)
+ " from node '%s'" % node)
vg_free = info.get('vg_free', None)
if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on"
" destination node '%s'" %
(self.op.bridge, pnode.name))
+ # memory check on primary node
+ if self.op.start:
+ _CheckNodeFreeMemory(self.cfg, self.pnode.name,
+ "creating instance %s" % self.op.instance_name,
+ self.op.mem_size)
+
# hvm_cdrom_image_path verification
if self.op.hvm_cdrom_image_path is not None:
if not os.path.isabs(self.op.hvm_cdrom_image_path):
" like a valid IP address" %
self.op.vnc_bind_address)
+ # Xen HVM device type checks
+ if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
+ raise errors.OpPrereqError("Invalid NIC type %s specified for Xen HVM"
+ " hypervisor" % self.op.hvm_nic_type)
+ if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
+ raise errors.OpPrereqError("Invalid disk type %s specified for Xen HVM"
+ " hypervisor" % self.op.hvm_disk_type)
+
if self.op.start:
self.instance_status = 'up'
else:
hvm_pae=self.op.hvm_pae,
hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
vnc_bind_address=self.op.vnc_bind_address,
+ hvm_nic_type=self.op.hvm_nic_type,
+ hvm_disk_type=self.op.hvm_disk_type,
+ auto_balance=bool(self.op.auto_balance),
)
feedback_fn("* creating instance disks...")
if self.op.remote_node is not None:
raise errors.OpPrereqError("Give either the iallocator or the new"
" secondary, not both")
- self.op.remote_node = self._RunAllocator()
+ self._RunAllocator()
remote_node = self.op.remote_node
if remote_node is not None:
"""
instance = self.instance
+
+ # Activate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
if instance.disk_template == constants.DT_REMOTE_RAID1:
fn = self._ExecRR1
elif instance.disk_template == constants.DT_DRBD8:
fn = self._ExecD8Secondary
else:
raise errors.ProgrammerError("Unhandled disk replacement case")
- return fn(feedback_fn)
+
+ ret = fn(feedback_fn)
+
+ # Deactivate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ return ret
+
+
+class LUGrowDisk(LogicalUnit):
+ """Grow a disk of an instance.
+
+ """
+ HPATH = "disk-grow"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ env = {
+ "DISK": self.op.disk,
+ "AMOUNT": self.op.amount,
+ }
+ env.update(_BuildInstanceHookEnvByObject(self.instance))
+ nl = [
+ self.sstore.GetMasterNode(),
+ self.instance.primary_node,
+ ]
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(
+ self.cfg.ExpandInstanceName(self.op.instance_name))
+ if instance is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+
+ if self.op.amount <= 0:
+ raise errors.OpPrereqError("Invalid grow-by amount: %s" % self.op.amount)
+
+ self.instance = instance
+ self.op.instance_name = instance.name
+
+ if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
+ raise errors.OpPrereqError("Instance's disk layout does not support"
+ " growing.")
+
+ self.disk = instance.FindDisk(self.op.disk)
+ if self.disk is None:
+ raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
+ (self.op.disk, instance.name))
+
+ nodenames = [instance.primary_node] + list(instance.secondary_nodes)
+ nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ for node in nodenames:
+ info = nodeinfo.get(node, None)
+ if not info:
+ raise errors.OpPrereqError("Cannot get current information"
+ " from node '%s'" % node)
+ vg_free = info.get('vg_free', None)
+ if not isinstance(vg_free, int):
+ raise errors.OpPrereqError("Can't compute free disk space on"
+ " node %s" % node)
+ if self.op.amount > info['vg_free']:
+ raise errors.OpPrereqError("Not enough disk space on target node %s:"
+ " %d MiB available, %d MiB required" %
+ (node, info['vg_free'], self.op.amount))
+ is_primary = (node == instance.primary_node)
+ if not _CheckDiskConsistency(self.cfg, self.disk, node, is_primary):
+ raise errors.OpPrereqError("Disk %s is degraded or not fully"
+ " synchronized on node %s,"
+ " aborting grow." % (self.op.disk, node))
+
+ def Exec(self, feedback_fn):
+ """Execute disk grow.
+
+ """
+ instance = self.instance
+ disk = self.disk
+ for node in (instance.secondary_nodes + (instance.primary_node,)):
+ self.cfg.SetDiskID(disk, node)
+ result = rpc.call_blockdev_grow(node, disk, self.op.amount)
+ if not result or not isinstance(result, tuple) or len(result) != 2:
+ raise errors.OpExecError("grow request failed to node %s" % node)
+ elif not result[0]:
+ raise errors.OpExecError("grow request failed to node %s: %s" %
+ (node, result[1]))
+ disk.RecordGrow(self.op.amount)
+ self.cfg.Update(instance)
+ if self.op.wait_for_sync:
+ disk_abort = not _WaitForSync(self.cfg, instance, self.proc)
+ if disk_abort:
+ logger.Error("Warning: disk sync-ing has not returned a good status.\n"
+ " Please check the instance.")
class LUQueryInstanceData(NoHooksLU):
"""Query runtime instance data.
"""
- _OP_REQP = ["instances"]
+ _OP_REQP = ["instances", "static"]
def CheckPrereq(self):
"""Check prerequisites.
"""Compute block device status.
"""
- self.cfg.SetDiskID(dev, instance.primary_node)
- dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
+ static = self.op.static
+ if not static:
+ self.cfg.SetDiskID(dev, instance.primary_node)
+ dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
+ else:
+ dev_pstatus = None
+
if dev.dev_type in constants.LDS_DRBD:
# we change the snode then (otherwise we use the one passed in)
if dev.logical_id[0] == instance.primary_node:
else:
snode = dev.logical_id[0]
- if snode:
+ if snode and not static:
self.cfg.SetDiskID(dev, snode)
dev_sstatus = rpc.call_blockdev_find(snode, dev)
else:
"""Gather and return data"""
result = {}
for instance in self.wanted_instances:
- remote_info = rpc.call_instance_info(instance.primary_node,
- instance.name)
- if remote_info and "state" in remote_info:
- remote_state = "up"
+ if not self.op.static:
+ remote_info = rpc.call_instance_info(instance.primary_node,
+ instance.name)
+ if remote_info and "state" in remote_info:
+ remote_state = "up"
+ else:
+ remote_state = "down"
else:
- remote_state = "down"
+ remote_state = None
if instance.status == "down":
config_state = "down"
else:
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
"vcpus": instance.vcpus,
+ "auto_balance": instance.auto_balance,
}
htkind = self.sstore.GetHypervisorType()
idict["hvm_acpi"] = instance.hvm_acpi
idict["hvm_pae"] = instance.hvm_pae
idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
+ idict["hvm_nic_type"] = instance.hvm_nic_type
+ idict["hvm_disk_type"] = instance.hvm_disk_type
if htkind in constants.HTS_REQ_PORT:
- idict["vnc_bind_address"] = instance.vnc_bind_address
+ if instance.vnc_bind_address is None:
+ vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+ else:
+ vnc_bind_address = instance.vnc_bind_address
+ if instance.network_port is None:
+ vnc_console_port = None
+ elif vnc_bind_address == constants.BIND_ADDRESS_GLOBAL:
+ vnc_console_port = "%s:%s" % (instance.primary_node,
+ instance.network_port)
+ elif vnc_bind_address == constants.LOCALHOST_IP_ADDRESS:
+ vnc_console_port = "%s:%s on node %s" % (vnc_bind_address,
+ instance.network_port,
+ instance.primary_node)
+ else:
+ vnc_console_port = "%s:%s" % (instance.vnc_bind_address,
+ instance.network_port)
+ idict["vnc_console_port"] = vnc_console_port
+ idict["vnc_bind_address"] = vnc_bind_address
idict["network_port"] = instance.network_port
result[instance.name] = idict
self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
self.hvm_pae = getattr(self.op, "hvm_pae", None)
+ self.hvm_nic_type = getattr(self.op, "hvm_nic_type", None)
+ self.hvm_disk_type = getattr(self.op, "hvm_disk_type", None)
self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
- all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
- self.kernel_path, self.initrd_path, self.hvm_boot_order,
- self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
- self.vnc_bind_address]
+ self.force = getattr(self.op, "force", None)
+ self.auto_balance = getattr(self.op, "auto_balance", None)
+ all_parms = [
+ self.mem, self.vcpus, self.ip, self.bridge, self.mac,
+ self.kernel_path, self.initrd_path, self.hvm_boot_order,
+ self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
+ self.vnc_bind_address, self.hvm_nic_type, self.hvm_disk_type,
+ self.auto_balance,
+ ]
if all_parms.count(None) == len(all_parms):
raise errors.OpPrereqError("No changes submitted")
if self.mem is not None:
# hvm_cdrom_image_path verification
if self.op.hvm_cdrom_image_path is not None:
- if not os.path.isabs(self.op.hvm_cdrom_image_path):
+ if not (os.path.isabs(self.op.hvm_cdrom_image_path) or
+ self.op.hvm_cdrom_image_path.lower() == "none"):
raise errors.OpPrereqError("The path to the HVM CDROM image must"
" be an absolute path or None, not %s" %
self.op.hvm_cdrom_image_path)
- if not os.path.isfile(self.op.hvm_cdrom_image_path):
+ if not (os.path.isfile(self.op.hvm_cdrom_image_path) or
+ self.op.hvm_cdrom_image_path.lower() == "none"):
raise errors.OpPrereqError("The HVM CDROM image must either be a"
" regular file or a symlink pointing to"
" an existing regular file, not %s" %
" like a valid IP address" %
self.op.vnc_bind_address)
+ # Xen HVM device type checks
+ if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ if self.op.hvm_nic_type is not None:
+ if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
+ raise errors.OpPrereqError("Invalid NIC type %s specified for Xen"
+ " HVM hypervisor" % self.op.hvm_nic_type)
+ if self.op.hvm_disk_type is not None:
+ if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
+ raise errors.OpPrereqError("Invalid disk type %s specified for Xen"
+ " HVM hypervisor" % self.op.hvm_disk_type)
+
+ # auto balance setting
+ if self.auto_balance is not None:
+ # convert the value to a proper bool value, if it's not
+ self.auto_balance = bool(self.auto_balance)
+
instance = self.cfg.GetInstanceInfo(
self.cfg.ExpandInstanceName(self.op.instance_name))
if instance is None:
self.op.instance_name)
self.op.instance_name = instance.name
self.instance = instance
+ self.warn = []
+ if self.mem is not None and not self.force:
+ pnode = self.instance.primary_node
+ nodelist = [pnode]
+ if instance.auto_balance:
+ nodelist.extend(instance.secondary_nodes)
+ instance_info = rpc.call_instance_info(pnode, instance.name)
+ nodeinfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
+
+ if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict):
+ # Assume the primary node is unreachable and go ahead
+ self.warn.append("Can't get info from primary node %s" % pnode)
+ else:
+ if instance_info:
+ current_mem = instance_info['memory']
+ else:
+ # Assume instance not running
+ # (there is a slight race condition here, but it's not very probable,
+ # and we have no other way to check)
+ current_mem = 0
+ miss_mem = self.mem - current_mem - nodeinfo[pnode]['memory_free']
+ if miss_mem > 0:
+ raise errors.OpPrereqError("This change will prevent the instance"
+ " from starting, due to %d MB of memory"
+ " missing on its primary node" % miss_mem)
+
+ if instance.auto_balance:
+ for node in instance.secondary_nodes:
+ if node not in nodeinfo or not isinstance(nodeinfo[node], dict):
+ self.warn.append("Can't get info from secondary node %s" % node)
+ elif self.mem > nodeinfo[node]['memory_free']:
+ self.warn.append("Not enough memory to failover instance to"
+ " secondary node %s" % node)
return
def Exec(self, feedback_fn):
All parameters take effect only at the next restart of the instance.
"""
+ # Process here the warnings from CheckPrereq, as we don't have a
+ # feedback_fn there.
+ for warn in self.warn:
+ feedback_fn("WARNING: %s" % warn)
+
result = []
instance = self.instance
if self.mem:
else:
instance.hvm_boot_order = self.hvm_boot_order
result.append(("hvm_boot_order", self.hvm_boot_order))
- if self.hvm_acpi:
+ if self.hvm_acpi is not None:
instance.hvm_acpi = self.hvm_acpi
result.append(("hvm_acpi", self.hvm_acpi))
- if self.hvm_pae:
+ if self.hvm_pae is not None:
instance.hvm_pae = self.hvm_pae
result.append(("hvm_pae", self.hvm_pae))
+ if self.hvm_nic_type is not None:
+ instance.hvm_nic_type = self.hvm_nic_type
+ result.append(("hvm_nic_type", self.hvm_nic_type))
+ if self.hvm_disk_type is not None:
+ instance.hvm_disk_type = self.hvm_disk_type
+ result.append(("hvm_disk_type", self.hvm_disk_type))
if self.hvm_cdrom_image_path:
- instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
+ if self.hvm_cdrom_image_path == constants.VALUE_NONE:
+ instance.hvm_cdrom_image_path = None
+ else:
+ instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
if self.vnc_bind_address:
instance.vnc_bind_address = self.vnc_bind_address
result.append(("vnc_bind_address", self.vnc_bind_address))
+ if self.auto_balance is not None:
+ instance.auto_balance = self.auto_balance
+ result.append(("auto_balance", self.auto_balance))
self.cfg.AddInstance(instance)
"""
TagsLU.CheckPrereq(self)
for tag in self.op.tags:
- objects.TaggableObject.ValidateTag(tag)
+ objects.TaggableObject.ValidateTag(tag, removal=True)
del_tags = frozenset(self.op.tags)
cur_tags = self.target.GetTags()
if not del_tags <= cur_tags: