X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/45bc463555d1bc3440b0faa72d742f47a515b48e..ca83454f36da2226fe84b32f2cce81610f938568:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index cf28d4f..bc833e9 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -60,11 +60,11 @@ from ganeti import ssconf from ganeti import serializer from ganeti import netutils from ganeti import runtime -from ganeti import mcpu from ganeti import compat from ganeti import pathutils from ganeti import vcluster from ganeti import ht +from ganeti import hooksmaster _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" @@ -110,6 +110,34 @@ class RPCFail(Exception): """ +def _GetInstReasonFilename(instance_name): + """Path of the file containing the reason of the instance status change. + + @type instance_name: string + @param instance_name: The name of the instance + @rtype: string + @return: The path of the file + + """ + return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name) + + +def _StoreInstReasonTrail(instance_name, trail): + """Serialize a reason trail related to an instance change of state to file. + + The exact location of the file depends on the name of the instance and on + the configuration of the Ganeti cluster defined at deploy time. + + @type instance_name: string + @param instance_name: The name of the instance + @rtype: None + + """ + json = serializer.DumpJson(trail) + filename = _GetInstReasonFilename(instance_name) + utils.WriteFile(filename, data=json) + + def _Fail(msg, *args, **kwargs): """Log an error and the raise an RPCFail exception. @@ -297,10 +325,10 @@ def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn): cfg = _GetConfig() hr = HooksRunner() - hm = mcpu.HooksMaster(hook_opcode, hooks_path, nodes, hr.RunLocalHooks, - None, env_fn, logging.warning, cfg.GetClusterName(), - cfg.GetMasterNode()) - + hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, + hr.RunLocalHooks, None, env_fn, + logging.warning, cfg.GetClusterName(), + cfg.GetMasterNode()) hm.RunPhase(constants.HOOKS_PHASE_PRE) result = fn(*args, **kwargs) hm.RunPhase(constants.HOOKS_PHASE_POST) @@ -1300,13 +1328,17 @@ def _GatherAndLinkBlockDevs(instance): return block_devices -def StartInstance(instance, startup_paused): +def StartInstance(instance, startup_paused, reason, store_reason=True): """Start an instance. @type instance: L{objects.Instance} @param instance: the instance object @type startup_paused: bool @param instance: pause instance at startup? + @type reason: list of reasons + @param reason: the reason trail for this startup + @type store_reason: boolean + @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ @@ -1320,6 +1352,8 @@ def StartInstance(instance, startup_paused): block_devices = _GatherAndLinkBlockDevs(instance) hyper = hypervisor.GetHypervisor(instance.hypervisor) hyper.StartInstance(instance, block_devices, startup_paused) + if store_reason: + _StoreInstReasonTrail(instance.name, reason) except errors.BlockDeviceError, err: _Fail("Block device error: %s", err, exc=True) except errors.HypervisorError, err: @@ -1327,7 +1361,7 @@ def StartInstance(instance, startup_paused): _Fail("Hypervisor error: %s", err, exc=True) -def InstanceShutdown(instance, timeout): +def InstanceShutdown(instance, timeout, reason, store_reason=True): """Shut an instance down. @note: this functions uses polling with a hardcoded timeout. @@ -1336,6 +1370,10 @@ def InstanceShutdown(instance, timeout): @param instance: the instance object @type timeout: integer @param timeout: maximum timeout for soft shutdown + @type reason: list of reasons + @param reason: the reason trail for this shutdown + @type store_reason: boolean + @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ @@ -1357,6 +1395,8 @@ def InstanceShutdown(instance, timeout): try: hyper.StopInstance(instance, retry=self.tried_once) + if store_reason: + _StoreInstReasonTrail(instance.name, reason) except errors.HypervisorError, err: if iname not in hyper.ListInstances(): # if the instance is no longer existing, consider this a @@ -1396,7 +1436,7 @@ def InstanceShutdown(instance, timeout): _RemoveBlockDevLinks(iname, instance.disks) -def InstanceReboot(instance, reboot_type, shutdown_timeout): +def InstanceReboot(instance, reboot_type, shutdown_timeout, reason): """Reboot an instance. @type instance: L{objects.Instance} @@ -1414,6 +1454,8 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout): instance (instead of a call_instance_reboot RPC) @type shutdown_timeout: integer @param shutdown_timeout: maximum timeout for soft shutdown + @type reason: list of reasons + @param reason: the reason trail for this reboot @rtype: None """ @@ -1430,8 +1472,10 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout): _Fail("Failed to soft reboot instance %s: %s", instance.name, err) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: - InstanceShutdown(instance, shutdown_timeout) - return StartInstance(instance, False) + InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) + result = StartInstance(instance, False, reason, store_reason=False) + _StoreInstReasonTrail(instance.name, reason) + return result except errors.HypervisorError, err: _Fail("Failed to hard reboot instance %s: %s", instance.name, err) else: @@ -2501,11 +2545,6 @@ def OSEnvironment(instance, inst_os, debug=0): if nic.netinfo: nobj = objects.Network.FromDict(nic.netinfo) result.update(nobj.HooksDict("NIC_%d_" % idx)) - elif nic.network: - # FIXME: broken network reference: the instance NIC specifies a network, - # but the relevant network entry was not in the config. This should be - # made impossible. - result["INSTANCE_NIC%d_NETWORK" % idx] = nic.network if constants.HV_NIC_TYPE in instance.hvparams: result["NIC_%d_FRONTEND_TYPE" % idx] = \ instance.hvparams[constants.HV_NIC_TYPE] @@ -2967,7 +3006,7 @@ def JobQueueUpdate(file_name, content): # Write and replace the file atomically utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, - gid=getents.masterd_gid) + gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS) def JobQueueRename(old, new): @@ -2991,8 +3030,8 @@ def JobQueueRename(old, new): getents = runtime.GetEnts() - utils.RenameFile(old, new, mkdir=True, mkdir_mode=0700, - dir_uid=getents.masterd_uid, dir_gid=getents.masterd_gid) + utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, + dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid) def BlockdevClose(instance_name, disks):