X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/428958aa0c73e82d55998e5e112dd97c017f80fb..78f66a17b2b32270e3dd705295e652a95724b8e3:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 4140623..0cf2955 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -455,8 +455,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, @param secondary_nodes: list of secondary nodes as strings @type os_type: string @param os_type: the name of the instance's OS - @type status: string - @param status: the desired status of the instances + @type status: boolean + @param status: the should_run status of the instance @type memory: string @param memory: the memory size of the instance @type vcpus: string @@ -468,13 +468,17 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, @return: the hook environment for this instance """ + if status: + str_status = "up" + else: + str_status = "down" env = { "OP_TARGET": name, "INSTANCE_NAME": name, "INSTANCE_PRIMARY": primary_node, "INSTANCE_SECONDARIES": " ".join(secondary_nodes), "INSTANCE_OS_TYPE": os_type, - "INSTANCE_STATUS": status, + "INSTANCE_STATUS": str_status, "INSTANCE_MEMORY": memory, "INSTANCE_VCPUS": vcpus, } @@ -516,7 +520,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None): 'primary_node': instance.primary_node, 'secondary_nodes': instance.secondary_nodes, 'os_type': instance.os, - 'status': instance.os, + 'status': instance.admin_up, 'memory': bep[constants.BE_MEMORY], 'vcpus': bep[constants.BE_VCPUS], 'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics], @@ -613,7 +617,8 @@ class LUVerifyCluster(LogicalUnit): self.share_locks = dict(((i, 1) for i in locking.LEVELS)) def _VerifyNode(self, nodeinfo, file_list, local_cksum, - node_result, feedback_fn, master_files): + node_result, feedback_fn, master_files, + drbd_map): """Run multiple tests against a node. Test list: @@ -630,6 +635,9 @@ class LUVerifyCluster(LogicalUnit): @param node_result: the results from the node @param feedback_fn: function used to accumulate results @param master_files: list of files that only masters should have + @param drbd_map: the useddrbd minors for this node, in + form of minor: (instance, must_exist) which correspond to instances + and their running status """ node = nodeinfo.name @@ -724,6 +732,19 @@ class LUVerifyCluster(LogicalUnit): if hv_result is not None: feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" % (hv_name, hv_result)) + + # check used drbd list + used_minors = node_result.get(constants.NV_DRBDLIST, []) + for minor, (iname, must_exist) in drbd_map.items(): + if minor not in used_minors and must_exist: + feedback_fn(" - ERROR: drbd minor %d of instance %s is not active" % + (minor, iname)) + bad = True + for minor in used_minors: + if minor not in drbd_map: + feedback_fn(" - ERROR: unallocated drbd minor %d is in use" % minor) + bad = True + return bad def _VerifyInstance(self, instance, instanceconfig, node_vol_is, @@ -751,7 +772,7 @@ class LUVerifyCluster(LogicalUnit): (volume, node)) bad = True - if not instanceconfig.status == 'down': + if instanceconfig.admin_up: if ((node_current not in node_instance or not instance in node_instance[node_current]) and node_current not in n_offline): @@ -867,6 +888,8 @@ class LUVerifyCluster(LogicalUnit): nodelist = utils.NiceSort(self.cfg.GetNodeList()) nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] instancelist = utils.NiceSort(self.cfg.GetInstanceList()) + instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname)) + for iname in instancelist) i_non_redundant = [] # Non redundant instances i_non_a_balanced = [] # Non auto-balanced instances n_offline = [] # List of offline nodes @@ -900,12 +923,15 @@ class LUVerifyCluster(LogicalUnit): constants.NV_VGLIST: None, constants.NV_VERSION: None, constants.NV_HVINFO: self.cfg.GetHypervisorType(), + constants.NV_DRBDLIST: None, } all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, self.cfg.GetClusterName()) cluster = self.cfg.GetClusterInfo() master_node = self.cfg.GetMasterNode() + all_drbd_map = self.cfg.ComputeDRBDMap() + for node_i in nodeinfo: node = node_i.name nresult = all_nvinfo[node].data @@ -928,8 +954,13 @@ class LUVerifyCluster(LogicalUnit): bad = True continue + node_drbd = {} + for minor, instance in all_drbd_map[node].items(): + instance = instanceinfo[instance] + node_drbd[minor] = (instance.name, instance.admin_up) result = self._VerifyNode(node_i, file_names, local_checksums, - nresult, feedback_fn, master_files) + nresult, feedback_fn, master_files, + node_drbd) bad = bad or result lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") @@ -985,7 +1016,7 @@ class LUVerifyCluster(LogicalUnit): for instance in instancelist: feedback_fn("* Verifying instance %s" % instance) - inst_config = self.cfg.GetInstanceInfo(instance) + inst_config = instanceinfo[instance] result = self._VerifyInstance(instance, inst_config, node_volume, node_instance, feedback_fn, n_offline) bad = bad or result @@ -1154,7 +1185,7 @@ class LUVerifyDisks(NoHooksLU): nv_dict = {} for inst in instances: inst_lvs = {} - if (inst.status != "up" or + if (not inst.admin_up or inst.disk_template not in constants.DTS_NET_MIRROR): continue inst.MapLVsByNode(inst_lvs) @@ -1495,8 +1526,7 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False): continue rstats = rstats.data retries = 0 - for i in range(len(rstats)): - mstat = rstats[i] + for i, mstat in enumerate(rstats): if mstat is None: lu.LogWarning("Can't compute data for node %s/%s", node, instance.disks[i].iv_name) @@ -1687,11 +1717,8 @@ class LURemoveNode(LogicalUnit): for instance_name in instance_list: instance = self.cfg.GetInstanceInfo(instance_name) - if node.name == instance.primary_node: - raise errors.OpPrereqError("Instance %s still running on the node," - " please remove first." % instance_name) - if node.name in instance.secondary_nodes: - raise errors.OpPrereqError("Instance %s has node as a secondary," + if node.name in instance.all_nodes: + raise errors.OpPrereqError("Instance %s is still running on the node," " please remove first." % instance_name) self.op.node_name = node.name self.node = node @@ -2592,8 +2619,7 @@ class LUStartupInstance(LogicalUnit): "FORCE": self.op.force, } env.update(_BuildInstanceHookEnvByObject(self, self.instance)) - nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + - list(self.instance.secondary_nodes)) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl def CheckPrereq(self): @@ -2631,9 +2657,10 @@ class LUStartupInstance(LogicalUnit): _StartInstanceDisks(self, instance, force) result = self.rpc.call_instance_start(node_current, instance, extra_args) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: _ShutdownInstanceDisks(self, instance) - raise errors.OpExecError("Could not start instance") + raise errors.OpExecError("Could not start instance: %s" % msg) class LURebootInstance(LogicalUnit): @@ -2665,8 +2692,7 @@ class LURebootInstance(LogicalUnit): "IGNORE_SECONDARIES": self.op.ignore_secondaries, } env.update(_BuildInstanceHookEnvByObject(self, self.instance)) - nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + - list(self.instance.secondary_nodes)) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl def CheckPrereq(self): @@ -2707,9 +2733,11 @@ class LURebootInstance(LogicalUnit): _ShutdownInstanceDisks(self, instance) _StartInstanceDisks(self, instance, ignore_secondaries) result = self.rpc.call_instance_start(node_current, instance, extra_args) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: _ShutdownInstanceDisks(self, instance) - raise errors.OpExecError("Could not start instance for full reboot") + raise errors.OpExecError("Could not start instance for" + " full reboot: %s" % msg) self.cfg.MarkInstanceUp(instance.name) @@ -2733,8 +2761,7 @@ class LUShutdownInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self, self.instance) - nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + - list(self.instance.secondary_nodes)) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl def CheckPrereq(self): @@ -2781,8 +2808,7 @@ class LUReinstallInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self, self.instance) - nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + - list(self.instance.secondary_nodes)) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl def CheckPrereq(self): @@ -2799,7 +2825,7 @@ class LUReinstallInstance(LogicalUnit): if instance.disk_template == constants.DT_DISKLESS: raise errors.OpPrereqError("Instance '%s' has no disks" % self.op.instance_name) - if instance.status != "down": + if instance.admin_up: raise errors.OpPrereqError("Instance '%s' is marked to be up" % self.op.instance_name) remote_info = self.rpc.call_instance_info(instance.primary_node, @@ -2841,11 +2867,11 @@ class LUReinstallInstance(LogicalUnit): try: feedback_fn("Running the instance OS create scripts...") result = self.rpc.call_instance_os_add(inst.primary_node, inst) - result.Raise() - if not result.data: + msg = result.RemoteFailMsg() + if msg: raise errors.OpExecError("Could not install OS for instance %s" - " on node %s" % - (inst.name, inst.primary_node)) + " on node %s: %s" % + (inst.name, inst.primary_node, msg)) finally: _ShutdownInstanceDisks(self, inst) @@ -2866,8 +2892,7 @@ class LURenameInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self, self.instance) env["INSTANCE_NEW_NAME"] = self.op.new_name - nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + - list(self.instance.secondary_nodes)) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl def CheckPrereq(self): @@ -2883,7 +2908,7 @@ class LURenameInstance(LogicalUnit): self.op.instance_name) _CheckNodeOnline(self, instance.primary_node) - if instance.status != "down": + if instance.admin_up: raise errors.OpPrereqError("Instance '%s' is marked to be up" % self.op.instance_name) remote_info = self.rpc.call_instance_info(instance.primary_node, @@ -2952,10 +2977,11 @@ class LURenameInstance(LogicalUnit): try: result = self.rpc.call_instance_run_rename(inst.primary_node, inst, old_name) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: msg = ("Could not run OS rename script for instance %s on node %s" - " (but the instance has been renamed in Ganeti)" % - (inst.name, inst.primary_node)) + " (but the instance has been renamed in Ganeti): %s" % + (inst.name, inst.primary_node, msg)) self.proc.LogWarning(msg) finally: _ShutdownInstanceDisks(self, inst) @@ -3147,7 +3173,7 @@ class LUQueryInstances(NoHooksLU): elif field == "snodes": val = list(instance.secondary_nodes) elif field == "admin_state": - val = (instance.status != "down") + val = instance.admin_up elif field == "oper_state": if instance.primary_node in bad_nodes: val = None @@ -3161,12 +3187,12 @@ class LUQueryInstances(NoHooksLU): else: running = bool(live_data.get(instance.name)) if running: - if instance.status != "down": + if instance.admin_up: val = "running" else: val = "ERROR_up" else: - if instance.status != "down": + if instance.admin_up: val = "ERROR_down" else: val = "ADMIN_down" @@ -3340,7 +3366,7 @@ class LUFailoverInstance(LogicalUnit): for dev in instance.disks: # for drbd, these are drbd over lvm if not _CheckDiskConsistency(self, dev, target_node, False): - if instance.status == "up" and not self.op.ignore_consistency: + if instance.admin_up and not self.op.ignore_consistency: raise errors.OpExecError("Disk %s is degraded on target node," " aborting failover." % dev.iv_name) @@ -3368,7 +3394,7 @@ class LUFailoverInstance(LogicalUnit): self.cfg.Update(instance) # Only start the instance if it's marked as up - if instance.status == "up": + if instance.admin_up: feedback_fn("* activating the instance's disks on target node") logging.info("Starting instance %s on node %s", instance.name, target_node) @@ -3381,10 +3407,11 @@ class LUFailoverInstance(LogicalUnit): feedback_fn("* starting the instance on the target node") result = self.rpc.call_instance_start(target_node, instance, None) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: _ShutdownInstanceDisks(self, instance) - raise errors.OpExecError("Could not start instance %s on node %s." % - (instance.name, target_node)) + raise errors.OpExecError("Could not start instance %s on node %s: %s" % + (instance.name, target_node, msg)) class LUMigrateInstance(LogicalUnit): @@ -3608,6 +3635,41 @@ class LUMigrateInstance(LogicalUnit): self.feedback_fn("* done") + def _RevertDiskStatus(self): + """Try to revert the disk status after a failed migration. + + """ + target_node = self.target_node + try: + self._EnsureSecondary(target_node) + self._GoStandalone() + self._GoReconnect(False) + self._WaitUntilSync() + except errors.OpExecError, err: + self.LogWarning("Migration failed and I can't reconnect the" + " drives: error '%s'\n" + "Please look and recover the instance status" % + str(err)) + + def _AbortMigration(self): + """Call the hypervisor code to abort a started migration. + + """ + instance = self.instance + target_node = self.target_node + migration_info = self.migration_info + + abort_result = self.rpc.call_finalize_migration(target_node, + instance, + migration_info, + False) + abort_msg = abort_result.RemoteFailMsg() + if abort_msg: + logging.error("Aborting migration failed on target node %s: %s" % + (target_node, abort_msg)) + # Don't raise an exception here, as we stil have to try to revert the + # disk status, even if this step failed. + def _ExecMigration(self): """Migrate an instance. @@ -3631,11 +3693,38 @@ class LUMigrateInstance(LogicalUnit): " synchronized on target node," " aborting migrate." % dev.iv_name) + # First get the migration information from the remote node + result = self.rpc.call_migration_info(source_node, instance) + msg = result.RemoteFailMsg() + if msg: + log_err = ("Failed fetching source migration information from %s: %s" % + (source_node, msg)) + logging.error(log_err) + raise errors.OpExecError(log_err) + + self.migration_info = migration_info = result.data[1] + + # Then switch the disks to master/master mode self._EnsureSecondary(target_node) self._GoStandalone() self._GoReconnect(True) self._WaitUntilSync() + self.feedback_fn("* preparing %s to accept the instance" % target_node) + result = self.rpc.call_accept_instance(target_node, + instance, + migration_info, + self.nodes_ip[target_node]) + + msg = result.RemoteFailMsg() + if msg: + logging.error("Instance pre-migration failed, trying to revert" + " disk status: %s", msg) + self._AbortMigration() + self._RevertDiskStatus() + raise errors.OpExecError("Could not pre-migrate instance %s: %s" % + (instance.name, msg)) + self.feedback_fn("* migrating instance to %s" % target_node) time.sleep(10) result = self.rpc.call_instance_migrate(source_node, instance, @@ -3645,17 +3734,8 @@ class LUMigrateInstance(LogicalUnit): if msg: logging.error("Instance migration failed, trying to revert" " disk status: %s", msg) - try: - self._EnsureSecondary(target_node) - self._GoStandalone() - self._GoReconnect(False) - self._WaitUntilSync() - except errors.OpExecError, err: - self.LogWarning("Migration failed and I can't reconnect the" - " drives: error '%s'\n" - "Please look and recover the instance status" % - str(err)) - + self._AbortMigration() + self._RevertDiskStatus() raise errors.OpExecError("Could not migrate instance %s: %s" % (instance.name, msg)) time.sleep(10) @@ -3664,6 +3744,17 @@ class LUMigrateInstance(LogicalUnit): # distribute new instance config to the other nodes self.cfg.Update(instance) + result = self.rpc.call_finalize_migration(target_node, + instance, + migration_info, + True) + msg = result.RemoteFailMsg() + if msg: + logging.error("Instance migration succeeded, but finalization failed:" + " %s" % msg) + raise errors.OpExecError("Could not finalize instance migration: %s" % + msg) + self._EnsureSecondary(source_node) self._WaitUntilSync() self._GoStandalone() @@ -3730,14 +3821,40 @@ def _CreateBlockDev(lu, node, instance, device, force_create, if not force_create: return + _CreateSingleBlockDev(lu, node, instance, device, info, force_open) + + +def _CreateSingleBlockDev(lu, node, instance, device, info, force_open): + """Create a single block device on a given node. + + This will not recurse over children of the device, so they must be + created in advance. + + @param lu: the lu on whose behalf we execute + @param node: the node on which to create the device + @type instance: L{objects.Instance} + @param instance: the instance which owns the device + @type device: L{objects.Disk} + @param device: the device to create + @param info: the extra 'metadata' we should attach to the device + (this will be represented as a LVM tag) + @type force_open: boolean + @param force_open: this parameter will be passes to the + L{backend.CreateBlockDevice} function where it specifies + whether we run on primary or not, and it affects both + the child assembly and the device own Open() execution + + """ lu.cfg.SetDiskID(device, node) - new_id = lu.rpc.call_blockdev_create(node, device, device.size, + result = lu.rpc.call_blockdev_create(node, device, device.size, instance.name, force_open, info) - if new_id.failed or not new_id.data: + msg = result.RemoteFailMsg() + if msg: raise errors.OpExecError("Can't create block device %s on" - " node %s" % (device, node)) + " node %s for instance %s: %s" % + (device, node, instance.name, msg)) if device.physical_id is None: - device.physical_id = new_id + device.physical_id = result.data[1] def _GenerateUniqueNames(lu, exts): @@ -3799,7 +3916,8 @@ def _GenerateDiskTemplate(lu, template_name, disk_index = idx + base_index disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"], logical_id=(vgname, names[idx]), - iv_name="disk/%d" % disk_index) + iv_name="disk/%d" % disk_index, + mode=disk["mode"]) disks.append(disk_dev) elif template_name == constants.DT_DRBD8: if len(secondary_nodes) != 1: @@ -3819,6 +3937,7 @@ def _GenerateDiskTemplate(lu, template_name, disk["size"], names[idx*2:idx*2+2], "disk/%d" % disk_index, minors[idx*2], minors[idx*2+1]) + disk_dev.mode = disk["mode"] disks.append(disk_dev) elif template_name == constants.DT_FILE: if len(secondary_nodes) != 0: @@ -3830,7 +3949,8 @@ def _GenerateDiskTemplate(lu, template_name, iv_name="disk/%d" % disk_index, logical_id=(file_driver, "%s/disk%d" % (file_storage_dir, - idx))) + idx)), + mode=disk["mode"]) disks.append(disk_dev) else: raise errors.ProgrammerError("Invalid disk template '%s'" % template_name) @@ -4076,7 +4196,9 @@ class LUCreateInstance(LogicalUnit): raise errors.OpPrereqError("Invalid MAC address specified: %s" % mac) # bridge verification - bridge = nic.get("bridge", self.cfg.GetDefBridge()) + bridge = nic.get("bridge", None) + if bridge is None: + bridge = self.cfg.GetDefBridge() self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge)) # disk checks/pre-build @@ -4378,10 +4500,7 @@ class LUCreateInstance(LogicalUnit): self.be_full[constants.BE_MEMORY], self.op.hypervisor) - if self.op.start: - self.instance_status = 'up' - else: - self.instance_status = 'down' + self.instance_status = self.op.start def Exec(self, feedback_fn): """Create and add the instance to the cluster. @@ -4428,7 +4547,7 @@ class LUCreateInstance(LogicalUnit): primary_node=pnode_name, nics=self.nics, disks=disks, disk_template=self.op.disk_template, - status=self.instance_status, + admin_up=self.instance_status, network_port=network_port, beparams=self.op.beparams, hvparams=self.op.hvparams, @@ -4452,8 +4571,6 @@ class LUCreateInstance(LogicalUnit): # Declare that we don't want to remove the instance lock anymore, as we've # added the instance to the config del self.remove_locks[locking.LEVEL_INSTANCE] - # Remove the temp. assignements for the instance's drbds - self.cfg.ReleaseDRBDMinors(instance) # Unlock all the nodes if self.op.mode == constants.INSTANCE_IMPORT: nodes_keep = [self.op.src_node] @@ -4490,11 +4607,11 @@ class LUCreateInstance(LogicalUnit): if self.op.mode == constants.INSTANCE_CREATE: feedback_fn("* running the instance OS create scripts...") result = self.rpc.call_instance_os_add(pnode_name, iobj) - result.Raise() - if not result.data: + msg = result.RemoteFailMsg() + if msg: raise errors.OpExecError("Could not add os for instance %s" - " on node %s" % - (instance, pnode_name)) + " on node %s: %s" % + (instance, pnode_name, msg)) elif self.op.mode == constants.INSTANCE_IMPORT: feedback_fn("* running the instance OS import scripts...") @@ -4519,9 +4636,9 @@ class LUCreateInstance(LogicalUnit): logging.info("Starting instance %s on node %s", instance, pnode_name) feedback_fn("* starting instance...") result = self.rpc.call_instance_start(pnode_name, iobj, None) - result.Raise() - if not result.data: - raise errors.OpExecError("Could not start instance") + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Could not start instance: %s" % msg) class LUConnectConsole(NoHooksLU): @@ -4614,6 +4731,10 @@ class LUReplaceDisks(LogicalUnit): raise errors.OpPrereqError("Node '%s' not known" % self.op.remote_node) self.op.remote_node = remote_node + # Warning: do not remove the locking of the new secondary here + # unless DRBD8.AddChildren is changed to work in parallel; + # currently it doesn't since parallel invocations of + # FindUnusedMinor will conflict self.needed_locks[locking.LEVEL_NODE] = [remote_node] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND else: @@ -5017,9 +5138,9 @@ class LUReplaceDisks(LogicalUnit): logical_id=new_alone_id, children=dev.children) try: - _CreateBlockDev(self, new_node, instance, new_drbd, False, - _GetInstanceInfoText(instance), False) - except error.BlockDeviceError: + _CreateSingleBlockDev(self, new_node, instance, new_drbd, + _GetInstanceInfoText(instance), False) + except errors.BlockDeviceError: self.cfg.ReleaseDRBDMinors(instance.name) raise @@ -5050,9 +5171,6 @@ class LUReplaceDisks(LogicalUnit): dev.logical_id = new_logical_id cfg.SetDiskID(dev, pri_node) cfg.Update(instance) - # we can remove now the temp minors as now the new values are - # written to the config file (and therefore stable) - self.cfg.ReleaseDRBDMinors(instance.name) # and now perform the drbd attach info("attaching primary drbds to new secondary (standalone => connected)") @@ -5099,7 +5217,7 @@ class LUReplaceDisks(LogicalUnit): instance = self.instance # Activate the instance disks if we're replacing them on a down instance - if instance.status == "down": + if not instance.admin_up: _StartInstanceDisks(self, instance, True) if self.op.mode == constants.REPLACE_DISK_CHG: @@ -5110,7 +5228,7 @@ class LUReplaceDisks(LogicalUnit): ret = fn(feedback_fn) # Deactivate the instance disks if we're replacing them on a down instance - if instance.status == "down": + if not instance.admin_up: _SafeShutdownInstanceDisks(self, instance) return ret @@ -5160,8 +5278,8 @@ class LUGrowDisk(LogicalUnit): instance = self.cfg.GetInstanceInfo(self.op.instance_name) assert instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name - _CheckNodeOnline(self, instance.primary_node) - for node in instance.secondary_nodes: + nodenames = list(instance.all_nodes) + for node in nodenames: _CheckNodeOnline(self, node) @@ -5173,7 +5291,6 @@ class LUGrowDisk(LogicalUnit): self.disk = instance.FindDisk(self.op.disk) - nodenames = [instance.primary_node] + list(instance.secondary_nodes) nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(), instance.hypervisor) for node in nodenames: @@ -5196,7 +5313,7 @@ class LUGrowDisk(LogicalUnit): """ instance = self.instance disk = self.disk - for node in (instance.secondary_nodes + (instance.primary_node,)): + for node in instance.all_nodes: self.cfg.SetDiskID(disk, node) result = self.rpc.call_blockdev_grow(node, disk, self.op.amount) result.Raise() @@ -5327,10 +5444,10 @@ class LUQueryInstanceData(NoHooksLU): remote_state = "down" else: remote_state = None - if instance.status == "down": - config_state = "down" - else: + if instance.admin_up: config_state = "up" + else: + config_state = "down" disks = [self._ComputeDiskStatus(instance, None, device) for device in instance.disks] @@ -5395,7 +5512,7 @@ class LUSetInstanceParams(LogicalUnit): raise errors.OpPrereqError("Invalid disk index") if disk_op == constants.DDM_ADD: mode = disk_dict.setdefault('mode', constants.DISK_RDWR) - if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR): + if mode not in constants.DISK_ACCESS_SET: raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode) size = disk_dict.get('size', None) if size is None: @@ -5475,8 +5592,7 @@ class LUSetInstanceParams(LogicalUnit): args['vcpus'] = self.be_new[constants.BE_VCPUS] # FIXME: readd disk/nic changes env = _BuildInstanceHookEnvByObject(self, self.instance, override=args) - nl = [self.cfg.GetMasterNode(), - self.instance.primary_node] + list(self.instance.secondary_nodes) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl def CheckPrereq(self): @@ -5492,9 +5608,8 @@ class LUSetInstanceParams(LogicalUnit): instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name - pnode = self.instance.primary_node - nodelist = [pnode] - nodelist.extend(instance.secondary_nodes) + pnode = instance.primary_node + nodelist = list(instance.all_nodes) # hvparams processing if self.op.hvparams: @@ -5667,13 +5782,12 @@ class LUSetInstanceParams(LogicalUnit): disk_idx_base = len(instance.disks) new_disk = _GenerateDiskTemplate(self, instance.disk_template, - instance, instance.primary_node, + instance.name, instance.primary_node, instance.secondary_nodes, [disk_dict], file_path, file_driver, disk_idx_base)[0] - new_disk.mode = disk_dict['mode'] instance.disks.append(new_disk) info = _GetInstanceInfoText(instance) @@ -5686,7 +5800,7 @@ class LUSetInstanceParams(LogicalUnit): try: _CreateBlockDev(self, node, instance, new_disk, f_create, info, f_create) - except error.OpExecError, err: + except errors.OpExecError, err: self.LogWarning("Failed to create volume %s (%s) on" " node %s: %s", new_disk.iv_name, new_disk, node, err) @@ -5888,11 +6002,12 @@ class LUExportInstance(LogicalUnit): snap_disks.append(new_dev) finally: - if self.op.shutdown and instance.status == "up": + if self.op.shutdown and instance.admin_up: result = self.rpc.call_instance_start(src_node, instance, None) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: _ShutdownInstanceDisks(self, instance) - raise errors.OpExecError("Could not start instance") + raise errors.OpExecError("Could not start instance: %s" % msg) # TODO: check for size @@ -6263,7 +6378,7 @@ class IAllocator(object): "version": 1, "cluster_name": cfg.GetClusterName(), "cluster_tags": list(cluster_info.GetTags()), - "enable_hypervisors": list(cluster_info.enabled_hypervisors), + "enabled_hypervisors": list(cluster_info.enabled_hypervisors), # we don't have job IDs } iinfo = cfg.GetAllInstancesInfo().values() @@ -6282,52 +6397,60 @@ class IAllocator(object): hypervisor_name) node_iinfo = self.lu.rpc.call_all_instances_info(node_list, cluster_info.enabled_hypervisors) - for nname in node_list: + for nname, nresult in node_data.items(): + # first fill in static (config-based) values ninfo = cfg.GetNodeInfo(nname) - node_data[nname].Raise() - if not isinstance(node_data[nname].data, dict): - raise errors.OpExecError("Can't get data for node %s" % nname) - remote_info = node_data[nname].data - for attr in ['memory_total', 'memory_free', 'memory_dom0', - 'vg_size', 'vg_free', 'cpu_total']: - if attr not in remote_info: - raise errors.OpExecError("Node '%s' didn't return attribute '%s'" % - (nname, attr)) - try: - remote_info[attr] = int(remote_info[attr]) - except ValueError, err: - raise errors.OpExecError("Node '%s' returned invalid value for '%s':" - " %s" % (nname, attr, str(err))) - # compute memory used by primary instances - i_p_mem = i_p_up_mem = 0 - for iinfo, beinfo in i_list: - if iinfo.primary_node == nname: - i_p_mem += beinfo[constants.BE_MEMORY] - if iinfo.name not in node_iinfo[nname]: - i_used_mem = 0 - else: - i_used_mem = int(node_iinfo[nname][iinfo.name]['memory']) - i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem - remote_info['memory_free'] -= max(0, i_mem_diff) - - if iinfo.status == "up": - i_p_up_mem += beinfo[constants.BE_MEMORY] - - # compute memory used by instances pnr = { "tags": list(ninfo.GetTags()), - "total_memory": remote_info['memory_total'], - "reserved_memory": remote_info['memory_dom0'], - "free_memory": remote_info['memory_free'], - "i_pri_memory": i_p_mem, - "i_pri_up_memory": i_p_up_mem, - "total_disk": remote_info['vg_size'], - "free_disk": remote_info['vg_free'], "primary_ip": ninfo.primary_ip, "secondary_ip": ninfo.secondary_ip, - "total_cpus": remote_info['cpu_total'], "offline": ninfo.offline, + "master_candidate": ninfo.master_candidate, } + + if not ninfo.offline: + nresult.Raise() + if not isinstance(nresult.data, dict): + raise errors.OpExecError("Can't get data for node %s" % nname) + remote_info = nresult.data + for attr in ['memory_total', 'memory_free', 'memory_dom0', + 'vg_size', 'vg_free', 'cpu_total']: + if attr not in remote_info: + raise errors.OpExecError("Node '%s' didn't return attribute" + " '%s'" % (nname, attr)) + try: + remote_info[attr] = int(remote_info[attr]) + except ValueError, err: + raise errors.OpExecError("Node '%s' returned invalid value" + " for '%s': %s" % (nname, attr, err)) + # compute memory used by primary instances + i_p_mem = i_p_up_mem = 0 + for iinfo, beinfo in i_list: + if iinfo.primary_node == nname: + i_p_mem += beinfo[constants.BE_MEMORY] + if iinfo.name not in node_iinfo[nname].data: + i_used_mem = 0 + else: + i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory']) + i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem + remote_info['memory_free'] -= max(0, i_mem_diff) + + if iinfo.admin_up: + i_p_up_mem += beinfo[constants.BE_MEMORY] + + # compute memory used by instances + pnr_dyn = { + "total_memory": remote_info['memory_total'], + "reserved_memory": remote_info['memory_dom0'], + "free_memory": remote_info['memory_free'], + "total_disk": remote_info['vg_size'], + "free_disk": remote_info['vg_free'], + "total_cpus": remote_info['cpu_total'], + "i_pri_memory": i_p_mem, + "i_pri_up_memory": i_p_up_mem, + } + pnr.update(pnr_dyn) + node_results[nname] = pnr data["nodes"] = node_results @@ -6338,13 +6461,13 @@ class IAllocator(object): for n in iinfo.nics] pir = { "tags": list(iinfo.GetTags()), - "should_run": iinfo.status == "up", + "admin_up": iinfo.admin_up, "vcpus": beinfo[constants.BE_VCPUS], "memory": beinfo[constants.BE_MEMORY], "os": iinfo.os, "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes), "nics": nic_data, - "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks], + "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks], "disk_template": iinfo.disk_template, "hypervisor": iinfo.hypervisor, } @@ -6365,8 +6488,6 @@ class IAllocator(object): """ data = self.in_data - if len(self.disks) != 2: - raise errors.OpExecError("Only two-disk configurations supported") disk_space = _ComputeDiskSize(self.disk_template, self.disks) @@ -6523,8 +6644,6 @@ class LUTestAllocator(NoHooksLU): " 'nics' parameter") if not isinstance(self.op.disks, list): raise errors.OpPrereqError("Invalid parameter 'disks'") - if len(self.op.disks) != 2: - raise errors.OpPrereqError("Only two-disk configurations supported") for row in self.op.disks: if (not isinstance(row, dict) or "size" not in row or @@ -6533,7 +6652,7 @@ class LUTestAllocator(NoHooksLU): row["mode"] not in ['r', 'w']): raise errors.OpPrereqError("Invalid contents of the" " 'disks' parameter") - if self.op.hypervisor is None: + if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None: self.op.hypervisor = self.cfg.GetHypervisorType() elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: if not hasattr(self.op, "name"):