self._ErrorIf(test, self.ENODEHOOKS, node_name,
"Communication failure in hooks execution: %s", msg)
if res.offline or msg:
- # No need to investigate payload if node is offline or gave an error.
- # override manually lu_result here as _ErrorIf only
- # overrides self.bad
- lu_result = 1
+ # No need to investigate payload if node is offline or gave
+ # an error.
continue
for script, hkr, output in res.payload:
test = hkr == constants.HKR_FAIL
if test:
output = self._HOOKS_INDENT_RE.sub(" ", output)
feedback_fn("%s" % output)
- lu_result = 0
+ lu_result = False
return lu_result
if not redist:
files_all.update(constants.ALL_CERT_FILES)
files_all.update(ssconf.SimpleStore().GetFileList())
+ else:
+ # we need to ship at least the RAPI certificate
+ files_all.add(constants.RAPI_CERT_FILE)
if cluster.modify_etc_hosts:
files_all.add(constants.ETC_HOSTS)
@return: Opcode object
"""
- CheckType(body, dict, "Body contents")
+ if body is None:
+ params = {}
+ else:
+ CheckType(body, dict, "Body contents")
- # Make copy to be modified
- params = body.copy()
+ # Make copy to be modified
+ params = body.copy()
if rename:
for old, new in rename.items():
@return: a job id
"""
- baserlib.CheckType(self.request_body, dict, "Body contents")
-
no_remember = bool(self._checkIntVariable("no_remember"))
op = _ParseShutdownInstanceRequest(self.items[0], self.request_body,
bool(self.dryRun()), no_remember)
result = utils.RunCmd(command)
if result.failed:
- logging.error("Copy to node %s failed (%s) error %s,"
- " command was %s",
+ logging.error("Copy to node %s failed (%s) error '%s',"
+ " command was '%s'",
node, result.fail_reason, result.output, result.cmd)
return not result.failed
if debug:
stderr_handler.setLevel(logging.NOTSET)
else:
- stderr_handler.setLevel(logging.CRITICAL)
+ stderr_handler.setLevel(logging.ERROR)
root_logger.addHandler(stderr_handler)
if syslog in (constants.SYSLOG_YES, constants.SYSLOG_ONLY):
jobset will be executed in parallel. The jobsets themselves are
executed serially.
+ The execution of the job series can be interrupted, see below for
+ signal handling.
+
-l *N*, --max-length=*N*
Restrict the solution to this length. This can be used for example
to automate the execution of the balancing.
-V, --version
Just show the program version and exit.
+SIGNAL HANDLING
+---------------
+
+When executing jobs via LUXI (using the ``-X`` option), normally hbal
+will execute all jobs until either one errors out or all the jobs finish
+successfully.
+
+Since balancing can take a long time, it is possible to stop hbal early
+in two ways:
+
+- by sending a ``SIGINT`` (``^C``), hbal will register the termination
+ request, and will wait until the currently submitted jobs finish, at
+ which point it will exit (with exit code 1)
+- by sending a ``SIGTERM``, hbal will immediately exit (with exit code
+ 2); it is the responsibility of the user to follow up with Ganeti the
+ result of the currently-executing jobs
+
+Note that in any situation, it's perfectly safe to kill hbal, either via
+the above signals or via any other signal (e.g. ``SIGQUIT``,
+``SIGKILL``), since the jobs themselves are processed by Ganeti whereas
+hbal (after submission) only watches their progression. In this case,
+the use will again have to query Ganeti for job results.
+
EXIT STATUS
-----------
-The exit status of the command will be zero, unless for some reason
-the algorithm fatally failed (e.g. wrong node or instance data), or
-(in case of job execution) any job has failed.
+The exit status of the command will be zero, unless for some reason the
+algorithm fatally failed (e.g. wrong node or instance data), or (in case
+of job execution) either one of the jobs has failed or the balancing was
+interrupted early.
BUGS
----
-The program does not check its input data for consistency, and aborts
-with cryptic errors messages in this case.
+The program does not check all its input data for consistency, and
+sometime aborts with cryptic errors messages with invalid data.
The algorithm is not perfect.
-The output format is not easily scriptable, and the program should
-feed moves directly into Ganeti (either via RAPI or via a gnt-debug
-input file).
-
EXAMPLE
-------
qa_rapi.TestRapiStoppedInstanceConsole, instance)
RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
+ # Test shutdown/start via RAPI
+ RunTestIf(["instance-shutdown", "rapi"],
+ qa_rapi.TestRapiInstanceShutdown, instance)
+ RunTestIf(["instance-shutdown", "rapi"],
+ qa_rapi.TestRapiInstanceStartup, instance)
+
RunTestIf("instance-list", qa_instance.TestInstanceList)
RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
_WaitForRapiJob(_rapi_client.FailoverInstance(instance["name"]))
+def TestRapiInstanceShutdown(instance):
+ """Test stopping an instance via RAPI"""
+ _WaitForRapiJob(_rapi_client.ShutdownInstance(instance["name"]))
+
+
+def TestRapiInstanceStartup(instance):
+ """Test starting an instance via RAPI"""
+ _WaitForRapiJob(_rapi_client.StartupInstance(instance["name"]))
+
+
def TestRapiInstanceRename(rename_source, rename_target):
"""Test renaming instance via RAPI"""
_WaitForRapiJob(_rapi_client.RenameInstance(rename_source, rename_target))
print " %s: uploading files" % hostname
upload_dir = UploadFiles(connection, executable,
filelist, logfile)
- command = ("cd %s && ./%s %s" %
- (upload_dir, os.path.basename(executable), exec_args))
+ command = ("cd %s && ./%s" %
+ (upload_dir, os.path.basename(executable)))
+ if exec_args:
+ command += " %s" % exec_args
print " %s: executing remote command" % hostname
cmd_result = RunRemoteCommand(connection, command, logfile)
if cmd_result is True: