import sys
import textwrap
import os.path
-import copy
import time
import logging
from cStringIO import StringIO
from ganeti import luxi
from ganeti import ssconf
from ganeti import rpc
+from ganeti import ssh
+from ganeti import compat
from optparse import (OptionParser, TitledHelpFormatter,
Option, OptionValueError)
__all__ = [
# Command line options
+ "ADD_UIDS_OPT",
+ "ALLOCATABLE_OPT",
"ALL_OPT",
+ "AUTO_PROMOTE_OPT",
"AUTO_REPLACE_OPT",
"BACKEND_OPT",
"CLEANUP_OPT",
"CONFIRM_OPT",
+ "CP_SIZE_OPT",
"DEBUG_OPT",
"DEBUG_SIMERR_OPT",
"DISKIDX_OPT",
"DISK_OPT",
"DISK_TEMPLATE_OPT",
"DRAINED_OPT",
+ "EARLY_RELEASE_OPT",
+ "ENABLED_HV_OPT",
+ "ERROR_CODES_OPT",
"FIELDS_OPT",
"FILESTORE_DIR_OPT",
"FILESTORE_DRIVER_OPT",
+ "FORCE_OPT",
+ "FORCE_VARIANT_OPT",
+ "GLOBAL_FILEDIR_OPT",
"HVLIST_OPT",
"HVOPTS_OPT",
"HYPERVISOR_OPT",
"IALLOCATOR_OPT",
+ "IDENTIFY_DEFAULTS_OPT",
"IGNORE_CONSIST_OPT",
"IGNORE_FAILURES_OPT",
+ "IGNORE_SECONDARIES_OPT",
"IGNORE_SIZE_OPT",
- "FORCE_OPT",
+ "MAC_PREFIX_OPT",
+ "MAINTAIN_NODE_HEALTH_OPT",
+ "MASTER_NETDEV_OPT",
"MC_OPT",
"NET_OPT",
+ "NEW_CLUSTER_CERT_OPT",
+ "NEW_CONFD_HMAC_KEY_OPT",
+ "NEW_RAPI_CERT_OPT",
"NEW_SECONDARY_OPT",
+ "NIC_PARAMS_OPT",
"NODE_LIST_OPT",
"NODE_PLACEMENT_OPT",
"NOHDR_OPT",
"NOIPCHECK_OPT",
+ "NO_INSTALL_OPT",
+ "NONAMECHECK_OPT",
+ "NOLVM_STORAGE_OPT",
+ "NOMODIFY_ETCHOSTS_OPT",
+ "NOMODIFY_SSH_SETUP_OPT",
"NONICS_OPT",
"NONLIVE_OPT",
+ "NONPLUS1_OPT",
+ "NOSHUTDOWN_OPT",
"NOSTART_OPT",
"NOSSH_KEYCHECK_OPT",
+ "NOVOTING_OPT",
"NWSYNC_OPT",
"ON_PRIMARY_OPT",
"ON_SECONDARY_OPT",
"OFFLINE_OPT",
"OS_OPT",
"OS_SIZE_OPT",
+ "RAPI_CERT_OPT",
"READD_OPT",
+ "REBOOT_TYPE_OPT",
+ "REMOVE_UIDS_OPT",
+ "ROMAN_OPT",
"SECONDARY_IP_OPT",
"SELECT_OS_OPT",
"SEP_OPT",
"SHOWCMD_OPT",
+ "SHUTDOWN_TIMEOUT_OPT",
"SINGLE_NODE_OPT",
"SRC_DIR_OPT",
"SRC_NODE_OPT",
"STATIC_OPT",
"SYNC_OPT",
"TAG_SRC_OPT",
+ "TIMEOUT_OPT",
+ "UIDPOOL_OPT",
"USEUNITS_OPT",
+ "USE_REPL_NET_OPT",
"VERBOSE_OPT",
+ "VG_NAME_OPT",
+ "YES_DOIT_OPT",
# Generic functions for CLI programs
"GenericMain",
+ "GenericInstanceCreate",
"GetClient",
"GetOnlineNodes",
"JobExecutor",
"JobSubmittedException",
"ParseTimespec",
+ "RunWhileClusterStopped",
"SubmitOpCode",
"SubmitOrSend",
"UsesRPC",
"ARGS_NONE",
"ARGS_ONE_INSTANCE",
"ARGS_ONE_NODE",
+ "ARGS_ONE_OS",
"ArgChoice",
"ArgCommand",
"ArgFile",
"ArgInstance",
"ArgJobId",
"ArgNode",
+ "ArgOs",
"ArgSuggest",
"ArgUnknown",
"OPT_COMPL_INST_ADD_NODES",
"OPT_COMPL_ONE_OS",
"cli_option",
"SplitNodeOption",
+ "CalculateOSNames",
]
NO_PREFIX = "no_"
class _Argument:
- def __init__(self, min=0, max=None):
+ def __init__(self, min=0, max=None): # pylint: disable-msg=W0622
self.min = min
self.max = max
Value can be any of the ones passed to the constructor.
"""
+ # pylint: disable-msg=W0622
def __init__(self, min=0, max=None, choices=None):
_Argument.__init__(self, min=min, max=max)
self.choices = choices
"""
+class ArgOs(_Argument):
+ """OS argument.
+
+ """
+
+
ARGS_NONE = []
ARGS_MANY_INSTANCES = [ArgInstance()]
ARGS_MANY_NODES = [ArgNode()]
ARGS_ONE_INSTANCE = [ArgInstance(min=1, max=1)]
ARGS_ONE_NODE = [ArgNode(min=1, max=1)]
-
+ARGS_ONE_OS = [ArgOs(min=1, max=1)]
def _ExtractTagsObject(opts, args):
"""
kind, name = _ExtractTagsObject(opts, args)
- op = opcodes.OpGetTags(kind=kind, name=name)
- result = SubmitOpCode(op)
+ cl = GetClient()
+ result = cl.QueryTags(kind, name)
result = list(result)
result.sort()
for tag in result:
SubmitOpCode(op)
-def check_unit(option, opt, value):
+def check_unit(option, opt, value): # pylint: disable-msg=W0613
"""OptParsers custom converter for units.
"""
"""
kv_dict = {}
if data:
- for elem in data.split(","):
+ for elem in utils.UnescapeAndSplit(data, sep=","):
if "=" in elem:
key, val = elem.split("=", 1)
else:
return kv_dict
-def check_ident_key_val(option, opt, value):
+def check_ident_key_val(option, opt, value): # pylint: disable-msg=W0613
"""Custom parser for ident:key=val,key=val options.
This will store the parsed values as a tuple (ident, {key: val}). As such,
return retval
-def check_key_val(option, opt, value):
+def check_key_val(option, opt, value): # pylint: disable-msg=W0613
"""Custom parser class for key=val,key=val options.
This will store the parsed values as a dict {key: val}.
return _SplitKeyVal(opt, value)
+def check_bool(option, opt, value): # pylint: disable-msg=W0613
+ """Custom parser for yes/no options.
+
+ This will store the parsed value as either True or False.
+
+ """
+ value = value.lower()
+ if value == constants.VALUE_FALSE or value == "no":
+ return False
+ elif value == constants.VALUE_TRUE or value == "yes":
+ return True
+ else:
+ raise errors.ParameterError("Invalid boolean value '%s'" % value)
+
+
# completion_suggestion is normally a list. Using numeric values not evaluating
# to False for dynamic completion.
(OPT_COMPL_MANY_NODES,
"identkeyval",
"keyval",
"unit",
+ "bool",
)
TYPE_CHECKER = Option.TYPE_CHECKER.copy()
TYPE_CHECKER["identkeyval"] = check_ident_key_val
TYPE_CHECKER["keyval"] = check_key_val
TYPE_CHECKER["unit"] = check_unit
+ TYPE_CHECKER["bool"] = check_bool
# optparse.py sets make_option, so we do it for our own option class, too
cli_option = CliOption
-_YESNO = ("yes", "no")
_YORNO = "yes|no"
-DEBUG_OPT = cli_option("-d", "--debug", default=False,
- action="store_true",
- help="Turn debugging on")
+DEBUG_OPT = cli_option("-d", "--debug", default=0, action="count",
+ help="Increase debugging level")
NOHDR_OPT = cli_option("--no-headers", default=False,
action="store_true", dest="no_headers",
metavar="<os>",
completion_suggest=OPT_COMPL_ONE_OS)
+FORCE_VARIANT_OPT = cli_option("--force-variant", dest="force_variant",
+ action="store_true", default=False,
+ help="Force an unknown variant")
+
+NO_INSTALL_OPT = cli_option("--no-install", dest="no_install",
+ action="store_true", default=False,
+ help="Do not install the OS (will"
+ " enable no-start)")
+
BACKEND_OPT = cli_option("-B", "--backend-parameters", dest="beparams",
type="keyval", default={},
help="Backend parameters")
help="Don't check that the instance's IP"
" is alive")
+NONAMECHECK_OPT = cli_option("--no-name-check", dest="name_check",
+ default=True, action="store_false",
+ help="Don't check that the instance's name"
+ " is resolvable")
+
NET_OPT = cli_option("--net",
help="NIC parameters", default=[],
dest="nics", action="append", type="identkeyval")
help="Replace the disk(s) on the secondary"
" node (only for the drbd template)")
+AUTO_PROMOTE_OPT = cli_option("--auto-promote", dest="auto_promote",
+ default=False, action="store_true",
+ help="Lock all nodes and auto-promote as needed"
+ " to MC status")
+
AUTO_REPLACE_OPT = cli_option("-a", "--auto", dest="auto",
default=False, action="store_true",
help="Automatically replace faulty disks"
MC_OPT = cli_option("-C", "--master-candidate", dest="master_candidate",
- choices=_YESNO, default=None, metavar=_YORNO,
+ type="bool", default=None, metavar=_YORNO,
help="Set the master_candidate flag on the node")
OFFLINE_OPT = cli_option("-O", "--offline", dest="offline", metavar=_YORNO,
- choices=_YESNO, default=None,
+ type="bool", default=None,
help="Set the offline flag on the node")
DRAINED_OPT = cli_option("-D", "--drained", dest="drained", metavar=_YORNO,
- choices=_YESNO, default=None,
+ type="bool", default=None,
help="Set the drained flag on the node")
+ALLOCATABLE_OPT = cli_option("--allocatable", dest="allocatable",
+ type="bool", default=None, metavar=_YORNO,
+ help="Set the allocatable flag on a volume")
+
+NOLVM_STORAGE_OPT = cli_option("--no-lvm-storage", dest="lvm_storage",
+ help="Disable support for lvm based instances"
+ " (cluster-wide)",
+ action="store_false", default=True)
+
+ENABLED_HV_OPT = cli_option("--enabled-hypervisors",
+ dest="enabled_hypervisors",
+ help="Comma-separated list of hypervisors",
+ type="string", default=None)
+
+NIC_PARAMS_OPT = cli_option("-N", "--nic-parameters", dest="nicparams",
+ type="keyval", default={},
+ help="NIC parameters")
+
+CP_SIZE_OPT = cli_option("-C", "--candidate-pool-size", default=None,
+ dest="candidate_pool_size", type="int",
+ help="Set the candidate pool size")
+
+VG_NAME_OPT = cli_option("-g", "--vg-name", dest="vg_name",
+ help="Enables LVM and specifies the volume group"
+ " name (cluster-wide) for disk allocation [xenvg]",
+ metavar="VG", default=None)
+
+YES_DOIT_OPT = cli_option("--yes-do-it", dest="yes_do_it",
+ help="Destroy cluster", action="store_true")
+
+NOVOTING_OPT = cli_option("--no-voting", dest="no_voting",
+ help="Skip node agreement check (dangerous)",
+ action="store_true", default=False)
+
+MAC_PREFIX_OPT = cli_option("-m", "--mac-prefix", dest="mac_prefix",
+ help="Specify the mac prefix for the instance IP"
+ " addresses, in the format XX:XX:XX",
+ metavar="PREFIX",
+ default=None)
+
+MASTER_NETDEV_OPT = cli_option("--master-netdev", dest="master_netdev",
+ help="Specify the node interface (cluster-wide)"
+ " on which the master IP address will be added "
+ " [%s]" % constants.DEFAULT_BRIDGE,
+ metavar="NETDEV",
+ default=constants.DEFAULT_BRIDGE)
+
+
+GLOBAL_FILEDIR_OPT = cli_option("--file-storage-dir", dest="file_storage_dir",
+ help="Specify the default directory (cluster-"
+ "wide) for storing the file-based disks [%s]" %
+ constants.DEFAULT_FILE_STORAGE_DIR,
+ metavar="DIR",
+ default=constants.DEFAULT_FILE_STORAGE_DIR)
+
+NOMODIFY_ETCHOSTS_OPT = cli_option("--no-etc-hosts", dest="modify_etc_hosts",
+ help="Don't modify /etc/hosts",
+ action="store_false", default=True)
+
+NOMODIFY_SSH_SETUP_OPT = cli_option("--no-ssh-init", dest="modify_ssh_setup",
+ help="Don't initialize SSH keys",
+ action="store_false", default=True)
+
+ERROR_CODES_OPT = cli_option("--error-codes", dest="error_codes",
+ help="Enable parseable error messages",
+ action="store_true", default=False)
+
+NONPLUS1_OPT = cli_option("--no-nplus1-mem", dest="skip_nplusone_mem",
+ help="Skip N+1 memory redundancy tests",
+ action="store_true", default=False)
+
+REBOOT_TYPE_OPT = cli_option("-t", "--type", dest="reboot_type",
+ help="Type of reboot: soft/hard/full",
+ default=constants.INSTANCE_REBOOT_HARD,
+ metavar="<REBOOT>",
+ choices=list(constants.REBOOT_TYPES))
+
+IGNORE_SECONDARIES_OPT = cli_option("--ignore-secondaries",
+ dest="ignore_secondaries",
+ default=False, action="store_true",
+ help="Ignore errors from secondaries")
+
+NOSHUTDOWN_OPT = cli_option("--noshutdown", dest="shutdown",
+ action="store_false", default=True,
+ help="Don't shutdown the instance (unsafe)")
+
+TIMEOUT_OPT = cli_option("--timeout", dest="timeout", type="int",
+ default=constants.DEFAULT_SHUTDOWN_TIMEOUT,
+ help="Maximum time to wait")
+
+SHUTDOWN_TIMEOUT_OPT = cli_option("--shutdown-timeout",
+ dest="shutdown_timeout", type="int",
+ default=constants.DEFAULT_SHUTDOWN_TIMEOUT,
+ help="Maximum time to wait for instance shutdown")
+
+EARLY_RELEASE_OPT = cli_option("--early-release",
+ dest="early_release", default=False,
+ action="store_true",
+ help="Release the locks on the secondary"
+ " node(s) early")
+
+NEW_CLUSTER_CERT_OPT = cli_option("--new-cluster-certificate",
+ dest="new_cluster_cert",
+ default=False, action="store_true",
+ help="Generate a new cluster certificate")
+
+RAPI_CERT_OPT = cli_option("--rapi-certificate", dest="rapi_cert",
+ default=None,
+ help="File containing new RAPI certificate")
+
+NEW_RAPI_CERT_OPT = cli_option("--new-rapi-certificate", dest="new_rapi_cert",
+ default=None, action="store_true",
+ help=("Generate a new self-signed RAPI"
+ " certificate"))
+
+NEW_CONFD_HMAC_KEY_OPT = cli_option("--new-confd-hmac-key",
+ dest="new_confd_hmac_key",
+ default=False, action="store_true",
+ help=("Create a new HMAC key for %s" %
+ constants.CONFD))
+
+USE_REPL_NET_OPT = cli_option("--use-replication-network",
+ dest="use_replication_network",
+ help="Whether to use the replication network"
+ " for talking to the nodes",
+ action="store_true", default=False)
+
+MAINTAIN_NODE_HEALTH_OPT = \
+ cli_option("--maintain-node-health", dest="maintain_node_health",
+ metavar=_YORNO, default=None, type="bool",
+ help="Configure the cluster to automatically maintain node"
+ " health, by shutting down unknown instances, shutting down"
+ " unknown DRBD devices, etc.")
+
+IDENTIFY_DEFAULTS_OPT = \
+ cli_option("--identify-defaults", dest="identify_defaults",
+ default=False, action="store_true",
+ help="Identify which saved instance parameters are equal to"
+ " the current cluster defaults and set them as such, instead"
+ " of marking them as overridden")
+
+UIDPOOL_OPT = cli_option("--uid-pool", default=None,
+ action="store", dest="uid_pool",
+ help=("A list of user-ids or user-id"
+ " ranges separated by commas"))
+
+ADD_UIDS_OPT = cli_option("--add-uids", default=None,
+ action="store", dest="add_uids",
+ help=("A list of user-ids or user-id"
+ " ranges separated by commas, to be"
+ " added to the user-id pool"))
+
+REMOVE_UIDS_OPT = cli_option("--remove-uids", default=None,
+ action="store", dest="remove_uids",
+ help=("A list of user-ids or user-id"
+ " ranges separated by commas, to be"
+ " removed from the user-id pool"))
+
+ROMAN_OPT = cli_option("--roman",
+ dest="roman_integers", default=False,
+ action="store_true",
+ help="Use roman numbers for positive integers")
+
+
def _ParseArgs(argv, commands, aliases):
"""Parser for the command line arguments.
cmd = aliases[cmd]
func, args_def, parser_opts, usage, description = commands[cmd]
- parser = OptionParser(option_list=parser_opts + [_DRY_RUN_OPT],
+ parser = OptionParser(option_list=parser_opts + [_DRY_RUN_OPT, DEBUG_OPT],
description=description,
formatter=TitledHelpFormatter(),
usage="%%prog %s %s" % (cmd, usage))
return (value, None)
+def CalculateOSNames(os_name, os_variants):
+ """Calculates all the names an OS can be called, according to its variants.
+
+ @type os_name: string
+ @param os_name: base name of the os
+ @type os_variants: list or None
+ @param os_variants: list of supported variants
+ @rtype: list
+ @return: list of valid names
+
+ """
+ if os_variants:
+ return ['%s+%s' % (os_name, v) for v in os_variants]
+ else:
+ return [os_name]
+
+
def UsesRPC(fn):
def wrapper(*args, **kwargs):
rpc.Init()
return job_id
-def PollJob(job_id, cl=None, feedback_fn=None):
- """Function to poll for the result of a job.
+def GenericPollJob(job_id, cbs, report_cbs):
+ """Generic job-polling function.
- @type job_id: job identified
- @param job_id: the job to poll for results
- @type cl: luxi.Client
- @param cl: the luxi client to use for communicating with the master;
- if None, a new client will be created
+ @type job_id: number
+ @param job_id: Job ID
+ @type cbs: Instance of L{JobPollCbBase}
+ @param cbs: Data callbacks
+ @type report_cbs: Instance of L{JobPollReportCbBase}
+ @param report_cbs: Reporting callbacks
"""
- if cl is None:
- cl = GetClient()
-
prev_job_info = None
prev_logmsg_serial = None
+ status = None
+
while True:
- result = cl.WaitForJobChange(job_id, ["status"], prev_job_info,
- prev_logmsg_serial)
+ result = cbs.WaitForJobChangeOnce(job_id, ["status"], prev_job_info,
+ prev_logmsg_serial)
if not result:
# job not found, go away!
raise errors.JobLost("Job with id %s lost" % job_id)
+ if result == constants.JOB_NOTCHANGED:
+ report_cbs.ReportNotChanged(job_id, status)
+
+ # Wait again
+ continue
+
# Split result, a tuple of (field values, log entries)
(job_info, log_entries) = result
(status, ) = job_info
if log_entries:
for log_entry in log_entries:
- (serial, timestamp, _, message) = log_entry
- if callable(feedback_fn):
- feedback_fn(log_entry[1:])
- else:
- encoded = utils.SafeEncode(message)
- ToStdout("%s %s", time.ctime(utils.MergeTime(timestamp)), encoded)
+ (serial, timestamp, log_type, message) = log_entry
+ report_cbs.ReportLogMessage(job_id, serial, timestamp,
+ log_type, message)
prev_logmsg_serial = max(prev_logmsg_serial, serial)
# TODO: Handle canceled and archived jobs
prev_job_info = job_info
- jobs = cl.QueryJobs([job_id], ["status", "opstatus", "opresult"])
+ jobs = cbs.QueryJobs([job_id], ["status", "opstatus", "opresult"])
if not jobs:
raise errors.JobLost("Job with id %s lost" % job_id)
status, opstatus, result = jobs[0]
+
if status == constants.JOB_STATUS_SUCCESS:
return result
- elif status in (constants.JOB_STATUS_CANCELING,
- constants.JOB_STATUS_CANCELED):
+
+ if status in (constants.JOB_STATUS_CANCELING, constants.JOB_STATUS_CANCELED):
raise errors.OpExecError("Job was canceled")
+
+ has_ok = False
+ for idx, (status, msg) in enumerate(zip(opstatus, result)):
+ if status == constants.OP_STATUS_SUCCESS:
+ has_ok = True
+ elif status == constants.OP_STATUS_ERROR:
+ errors.MaybeRaise(msg)
+
+ if has_ok:
+ raise errors.OpExecError("partial failure (opcode %d): %s" %
+ (idx, msg))
+
+ raise errors.OpExecError(str(msg))
+
+ # default failure mode
+ raise errors.OpExecError(result)
+
+
+class JobPollCbBase:
+ """Base class for L{GenericPollJob} callbacks.
+
+ """
+ def __init__(self):
+ """Initializes this class.
+
+ """
+
+ def WaitForJobChangeOnce(self, job_id, fields,
+ prev_job_info, prev_log_serial):
+ """Waits for changes on a job.
+
+ """
+ raise NotImplementedError()
+
+ def QueryJobs(self, job_ids, fields):
+ """Returns the selected fields for the selected job IDs.
+
+ @type job_ids: list of numbers
+ @param job_ids: Job IDs
+ @type fields: list of strings
+ @param fields: Fields
+
+ """
+ raise NotImplementedError()
+
+
+class JobPollReportCbBase:
+ """Base class for L{GenericPollJob} reporting callbacks.
+
+ """
+ def __init__(self):
+ """Initializes this class.
+
+ """
+
+ def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg):
+ """Handles a log message.
+
+ """
+ raise NotImplementedError()
+
+ def ReportNotChanged(self, job_id, status):
+ """Called for if a job hasn't changed in a while.
+
+ @type job_id: number
+ @param job_id: Job ID
+ @type status: string or None
+ @param status: Job status if available
+
+ """
+ raise NotImplementedError()
+
+
+class _LuxiJobPollCb(JobPollCbBase):
+ def __init__(self, cl):
+ """Initializes this class.
+
+ """
+ JobPollCbBase.__init__(self)
+ self.cl = cl
+
+ def WaitForJobChangeOnce(self, job_id, fields,
+ prev_job_info, prev_log_serial):
+ """Waits for changes on a job.
+
+ """
+ return self.cl.WaitForJobChangeOnce(job_id, fields,
+ prev_job_info, prev_log_serial)
+
+ def QueryJobs(self, job_ids, fields):
+ """Returns the selected fields for the selected job IDs.
+
+ """
+ return self.cl.QueryJobs(job_ids, fields)
+
+
+class FeedbackFnJobPollReportCb(JobPollReportCbBase):
+ def __init__(self, feedback_fn):
+ """Initializes this class.
+
+ """
+ JobPollReportCbBase.__init__(self)
+
+ self.feedback_fn = feedback_fn
+
+ assert callable(feedback_fn)
+
+ def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg):
+ """Handles a log message.
+
+ """
+ self.feedback_fn((timestamp, log_type, log_msg))
+
+ def ReportNotChanged(self, job_id, status):
+ """Called if a job hasn't changed in a while.
+
+ """
+ # Ignore
+
+
+class StdioJobPollReportCb(JobPollReportCbBase):
+ def __init__(self):
+ """Initializes this class.
+
+ """
+ JobPollReportCbBase.__init__(self)
+
+ self.notified_queued = False
+ self.notified_waitlock = False
+
+ def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg):
+ """Handles a log message.
+
+ """
+ ToStdout("%s %s", time.ctime(utils.MergeTime(timestamp)),
+ utils.SafeEncode(log_msg))
+
+ def ReportNotChanged(self, job_id, status):
+ """Called if a job hasn't changed in a while.
+
+ """
+ if status is None:
+ return
+
+ if status == constants.JOB_STATUS_QUEUED and not self.notified_queued:
+ ToStderr("Job %s is waiting in queue", job_id)
+ self.notified_queued = True
+
+ elif status == constants.JOB_STATUS_WAITLOCK and not self.notified_waitlock:
+ ToStderr("Job %s is trying to acquire all necessary locks", job_id)
+ self.notified_waitlock = True
+
+
+def PollJob(job_id, cl=None, feedback_fn=None):
+ """Function to poll for the result of a job.
+
+ @type job_id: job identified
+ @param job_id: the job to poll for results
+ @type cl: luxi.Client
+ @param cl: the luxi client to use for communicating with the master;
+ if None, a new client will be created
+
+ """
+ if cl is None:
+ cl = GetClient()
+
+ if feedback_fn:
+ reporter = FeedbackFnJobPollReportCb(feedback_fn)
else:
- has_ok = False
- for idx, (status, msg) in enumerate(zip(opstatus, result)):
- if status == constants.OP_STATUS_SUCCESS:
- has_ok = True
- elif status == constants.OP_STATUS_ERROR:
- errors.MaybeRaise(msg)
- if has_ok:
- raise errors.OpExecError("partial failure (opcode %d): %s" %
- (idx, msg))
- else:
- raise errors.OpExecError(str(msg))
- # default failure mode
- raise errors.OpExecError(result)
+ reporter = StdioJobPollReportCb()
+
+ return GenericPollJob(job_id, _LuxiJobPollCb(cl), reporter)
-def SubmitOpCode(op, cl=None, feedback_fn=None):
+def SubmitOpCode(op, cl=None, feedback_fn=None, opts=None):
"""Legacy function to submit an opcode.
This is just a simple wrapper over the construction of the processor
if cl is None:
cl = GetClient()
+ SetGenericOpcodeOpts([op], opts)
+
job_id = SendJob([op], cl)
op_results = PollJob(job_id, cl=cl, feedback_fn=feedback_fn)
whether to just send the job and print its identifier. It is used in
order to simplify the implementation of the '--submit' option.
- It will also add the dry-run parameter from the options passed, if true.
+ It will also process the opcodes if we're sending the via SendJob
+ (otherwise SubmitOpCode does it).
"""
- if opts and opts.dry_run:
- op.dry_run = opts.dry_run
if opts and opts.submit_only:
- job_id = SendJob([op], cl=cl)
+ job = [op]
+ SetGenericOpcodeOpts(job, opts)
+ job_id = SendJob(job, cl=cl)
raise JobSubmittedException(job_id)
else:
- return SubmitOpCode(op, cl=cl, feedback_fn=feedback_fn)
+ return SubmitOpCode(op, cl=cl, feedback_fn=feedback_fn, opts=opts)
+
+
+def SetGenericOpcodeOpts(opcode_list, options):
+ """Processor for generic options.
+
+ This function updates the given opcodes based on generic command
+ line options (like debug, dry-run, etc.).
+
+ @param opcode_list: list of opcodes
+ @param options: command line options or None
+ @return: None (in-place modification)
+
+ """
+ if not options:
+ return
+ for op in opcode_list:
+ op.dry_run = options.dry_run
+ op.debug_level = options.debug
def GetClient():
try:
client = luxi.Client()
except luxi.NoMasterError:
- master, myself = ssconf.GetMasterAndMyself()
+ ss = ssconf.SimpleStore()
+
+ # Try to read ssconf file
+ try:
+ ss.GetMasterNode()
+ except errors.ConfigurationError:
+ raise errors.OpPrereqError("Cluster not initialized or this machine is"
+ " not part of a cluster")
+
+ master, myself = ssconf.GetMasterAndMyself(ss=ss)
if master != myself:
raise errors.OpPrereqError("This is not the master node, please connect"
" to node '%s' and rerun the command" %
master)
- else:
- raise
+ raise
return client
msg = "Failure: can't resolve hostname '%s'"
obuf.write(msg % err.args[0])
elif isinstance(err, errors.OpPrereqError):
- obuf.write("Failure: prerequisites not met for this"
- " operation:\n%s" % msg)
+ if len(err.args) == 2:
+ obuf.write("Failure: prerequisites not met for this"
+ " operation:\nerror type: %s, error details:\n%s" %
+ (err.args[1], err.args[0]))
+ else:
+ obuf.write("Failure: prerequisites not met for this"
+ " operation:\n%s" % msg)
elif isinstance(err, errors.OpExecError):
obuf.write("Failure: command execution error:\n%s" % msg)
elif isinstance(err, errors.TagError):
return result
+def GenericInstanceCreate(mode, opts, args):
+ """Add an instance to the cluster via either creation or import.
+
+ @param mode: constants.INSTANCE_CREATE or constants.INSTANCE_IMPORT
+ @param opts: the command line options selected by the user
+ @type args: list
+ @param args: should contain only one element, the new instance name
+ @rtype: int
+ @return: the desired exit code
+
+ """
+ instance = args[0]
+
+ (pnode, snode) = SplitNodeOption(opts.node)
+
+ hypervisor = None
+ hvparams = {}
+ if opts.hypervisor:
+ hypervisor, hvparams = opts.hypervisor
+
+ if opts.nics:
+ try:
+ nic_max = max(int(nidx[0]) + 1 for nidx in opts.nics)
+ except ValueError, err:
+ raise errors.OpPrereqError("Invalid NIC index passed: %s" % str(err))
+ nics = [{}] * nic_max
+ for nidx, ndict in opts.nics:
+ nidx = int(nidx)
+ if not isinstance(ndict, dict):
+ msg = "Invalid nic/%d value: expected dict, got %s" % (nidx, ndict)
+ raise errors.OpPrereqError(msg)
+ nics[nidx] = ndict
+ elif opts.no_nics:
+ # no nics
+ nics = []
+ elif mode == constants.INSTANCE_CREATE:
+ # default of one nic, all auto
+ nics = [{}]
+ else:
+ # mode == import
+ nics = []
+
+ if opts.disk_template == constants.DT_DISKLESS:
+ if opts.disks or opts.sd_size is not None:
+ raise errors.OpPrereqError("Diskless instance but disk"
+ " information passed")
+ disks = []
+ else:
+ if (not opts.disks and not opts.sd_size
+ and mode == constants.INSTANCE_CREATE):
+ raise errors.OpPrereqError("No disk information specified")
+ if opts.disks and opts.sd_size is not None:
+ raise errors.OpPrereqError("Please use either the '--disk' or"
+ " '-s' option")
+ if opts.sd_size is not None:
+ opts.disks = [(0, {"size": opts.sd_size})]
+
+ if opts.disks:
+ try:
+ disk_max = max(int(didx[0]) + 1 for didx in opts.disks)
+ except ValueError, err:
+ raise errors.OpPrereqError("Invalid disk index passed: %s" % str(err))
+ disks = [{}] * disk_max
+ else:
+ disks = []
+ for didx, ddict in opts.disks:
+ didx = int(didx)
+ if not isinstance(ddict, dict):
+ msg = "Invalid disk/%d value: expected dict, got %s" % (didx, ddict)
+ raise errors.OpPrereqError(msg)
+ elif "size" in ddict:
+ if "adopt" in ddict:
+ raise errors.OpPrereqError("Only one of 'size' and 'adopt' allowed"
+ " (disk %d)" % didx)
+ try:
+ ddict["size"] = utils.ParseUnit(ddict["size"])
+ except ValueError, err:
+ raise errors.OpPrereqError("Invalid disk size for disk %d: %s" %
+ (didx, err))
+ elif "adopt" in ddict:
+ if mode == constants.INSTANCE_IMPORT:
+ raise errors.OpPrereqError("Disk adoption not allowed for instance"
+ " import")
+ ddict["size"] = 0
+ else:
+ raise errors.OpPrereqError("Missing size or adoption source for"
+ " disk %d" % didx)
+ disks[didx] = ddict
+
+ utils.ForceDictType(opts.beparams, constants.BES_PARAMETER_TYPES)
+ utils.ForceDictType(hvparams, constants.HVS_PARAMETER_TYPES)
+
+ if mode == constants.INSTANCE_CREATE:
+ start = opts.start
+ os_type = opts.os
+ src_node = None
+ src_path = None
+ no_install = opts.no_install
+ identify_defaults = False
+ elif mode == constants.INSTANCE_IMPORT:
+ start = False
+ os_type = None
+ src_node = opts.src_node
+ src_path = opts.src_dir
+ no_install = None
+ identify_defaults = opts.identify_defaults
+ else:
+ raise errors.ProgrammerError("Invalid creation mode %s" % mode)
+
+ op = opcodes.OpCreateInstance(instance_name=instance,
+ disks=disks,
+ disk_template=opts.disk_template,
+ nics=nics,
+ pnode=pnode, snode=snode,
+ ip_check=opts.ip_check,
+ name_check=opts.name_check,
+ wait_for_sync=opts.wait_for_sync,
+ file_storage_dir=opts.file_storage_dir,
+ file_driver=opts.file_driver,
+ iallocator=opts.iallocator,
+ hypervisor=hypervisor,
+ hvparams=hvparams,
+ beparams=opts.beparams,
+ mode=mode,
+ start=start,
+ os_type=os_type,
+ src_node=src_node,
+ src_path=src_path,
+ no_install=no_install,
+ identify_defaults=identify_defaults)
+
+ SubmitOrSend(op, opts)
+ return 0
+
+
+class _RunWhileClusterStoppedHelper:
+ """Helper class for L{RunWhileClusterStopped} to simplify state management
+
+ """
+ def __init__(self, feedback_fn, cluster_name, master_node, online_nodes):
+ """Initializes this class.
+
+ @type feedback_fn: callable
+ @param feedback_fn: Feedback function
+ @type cluster_name: string
+ @param cluster_name: Cluster name
+ @type master_node: string
+ @param master_node Master node name
+ @type online_nodes: list
+ @param online_nodes: List of names of online nodes
+
+ """
+ self.feedback_fn = feedback_fn
+ self.cluster_name = cluster_name
+ self.master_node = master_node
+ self.online_nodes = online_nodes
+
+ self.ssh = ssh.SshRunner(self.cluster_name)
+
+ self.nonmaster_nodes = [name for name in online_nodes
+ if name != master_node]
+
+ assert self.master_node not in self.nonmaster_nodes
+
+ def _RunCmd(self, node_name, cmd):
+ """Runs a command on the local or a remote machine.
+
+ @type node_name: string
+ @param node_name: Machine name
+ @type cmd: list
+ @param cmd: Command
+
+ """
+ if node_name is None or node_name == self.master_node:
+ # No need to use SSH
+ result = utils.RunCmd(cmd)
+ else:
+ result = self.ssh.Run(node_name, "root", utils.ShellQuoteArgs(cmd))
+
+ if result.failed:
+ errmsg = ["Failed to run command %s" % result.cmd]
+ if node_name:
+ errmsg.append("on node %s" % node_name)
+ errmsg.append(": exitcode %s and error %s" %
+ (result.exit_code, result.output))
+ raise errors.OpExecError(" ".join(errmsg))
+
+ def Call(self, fn, *args):
+ """Call function while all daemons are stopped.
+
+ @type fn: callable
+ @param fn: Function to be called
+
+ """
+ # Pause watcher by acquiring an exclusive lock on watcher state file
+ self.feedback_fn("Blocking watcher")
+ watcher_block = utils.FileLock.Open(constants.WATCHER_STATEFILE)
+ try:
+ # TODO: Currently, this just blocks. There's no timeout.
+ # TODO: Should it be a shared lock?
+ watcher_block.Exclusive(blocking=True)
+
+ # Stop master daemons, so that no new jobs can come in and all running
+ # ones are finished
+ self.feedback_fn("Stopping master daemons")
+ self._RunCmd(None, [constants.DAEMON_UTIL, "stop-master"])
+ try:
+ # Stop daemons on all nodes
+ for node_name in self.online_nodes:
+ self.feedback_fn("Stopping daemons on %s" % node_name)
+ self._RunCmd(node_name, [constants.DAEMON_UTIL, "stop-all"])
+
+ # All daemons are shut down now
+ try:
+ return fn(self, *args)
+ except Exception, err:
+ _, errmsg = FormatError(err)
+ logging.exception("Caught exception")
+ self.feedback_fn(errmsg)
+ raise
+ finally:
+ # Start cluster again, master node last
+ for node_name in self.nonmaster_nodes + [self.master_node]:
+ self.feedback_fn("Starting daemons on %s" % node_name)
+ self._RunCmd(node_name, [constants.DAEMON_UTIL, "start-all"])
+ finally:
+ # Resume watcher
+ watcher_block.Close()
+
+
+def RunWhileClusterStopped(feedback_fn, fn, *args):
+ """Calls a function while all cluster daemons are stopped.
+
+ @type feedback_fn: callable
+ @param feedback_fn: Feedback function
+ @type fn: callable
+ @param fn: Function to be called when daemons are stopped
+
+ """
+ feedback_fn("Gathering cluster information")
+
+ # This ensures we're running on the master daemon
+ cl = GetClient()
+
+ (cluster_name, master_node) = \
+ cl.QueryConfigValues(["cluster_name", "master_node"])
+
+ online_nodes = GetOnlineNodes([], cl=cl)
+
+ # Don't keep a reference to the client. The master daemon will go away.
+ del cl
+
+ assert master_node in online_nodes
+
+ return _RunWhileClusterStoppedHelper(feedback_fn, cluster_name, master_node,
+ online_nodes).Call(fn, *args)
+
+
def GenerateTable(headers, fields, separator, data,
numfields=None, unitfields=None,
units=None):
if unitfields is None:
unitfields = []
- numfields = utils.FieldSet(*numfields)
- unitfields = utils.FieldSet(*unitfields)
+ numfields = utils.FieldSet(*numfields) # pylint: disable-msg=W0142
+ unitfields = utils.FieldSet(*unitfields) # pylint: disable-msg=W0142
format_fields = []
for field in fields:
if unitfields.Matches(fields[idx]):
try:
val = int(val)
- except ValueError:
+ except (TypeError, ValueError):
pass
else:
val = row[idx] = utils.FormatUnit(val, units)
args.append(hdr)
result.append(format % tuple(args))
+ if separator is None:
+ assert len(mlens) == len(fields)
+
+ if fields and not numfields.Matches(fields[-1]):
+ mlens[-1] = 0
+
for line in data:
args = []
if line is None:
line = ['-' for _ in fields]
- for idx in xrange(len(fields)):
+ for idx in range(len(fields)):
if separator is None:
args.append(mlens[idx])
args.append(line[idx])
if value[-1] not in suffix_map:
try:
value = int(value)
- except ValueError:
+ except (TypeError, ValueError):
raise errors.OpPrereqError("Invalid time specification '%s'" % value)
else:
multiplier = suffix_map[value[-1]]
" suffix passed)")
try:
value = int(value) * multiplier
- except ValueError:
+ except (TypeError, ValueError):
raise errors.OpPrereqError("Invalid time specification '%s'" % value)
return value
-def GetOnlineNodes(nodes, cl=None, nowarn=False):
+def GetOnlineNodes(nodes, cl=None, nowarn=False, secondary_ips=False,
+ filter_master=False):
"""Returns the names of online nodes.
This function will also log a warning on stderr with the names of
@param nowarn: by default, this function will output a note with the
offline nodes that are skipped; if this parameter is True the
note is not displayed
+ @type secondary_ips: boolean
+ @param secondary_ips: if True, return the secondary IPs instead of the
+ names, useful for doing network traffic over the replication interface
+ (if any)
+ @type filter_master: boolean
+ @param filter_master: if True, do not return the master node in the list
+ (useful in coordination with secondary_ips where we cannot check our
+ node name against the list)
"""
if cl is None:
cl = GetClient()
- result = cl.QueryNodes(names=nodes, fields=["name", "offline"],
+ if secondary_ips:
+ name_idx = 2
+ else:
+ name_idx = 0
+
+ if filter_master:
+ master_node = cl.QueryConfigValues(["master_node"])[0]
+ filter_fn = lambda x: x != master_node
+ else:
+ filter_fn = lambda _: True
+
+ result = cl.QueryNodes(names=nodes, fields=["name", "offline", "sip"],
use_locking=False)
offline = [row[0] for row in result if row[1]]
if offline and not nowarn:
- ToStderr("Note: skipping offline node(s): %s" % ", ".join(offline))
- return [row[0] for row in result if not row[1]]
+ ToStderr("Note: skipping offline node(s): %s" % utils.CommaJoin(offline))
+ return [row[name_idx] for row in result if not row[1] and filter_fn(row[0])]
def _ToStream(stream, txt, *args):
GetResults() calls.
"""
- def __init__(self, cl=None, verbose=True):
+ def __init__(self, cl=None, verbose=True, opts=None, feedback_fn=None):
self.queue = []
if cl is None:
cl = GetClient()
self.cl = cl
self.verbose = verbose
self.jobs = []
+ self.opts = opts
+ self.feedback_fn = feedback_fn
def QueueJob(self, name, *ops):
"""Record a job for later submit.
@type name: string
@param name: a description of the job, will be used in WaitJobSet
"""
+ SetGenericOpcodeOpts(ops, self.opts)
self.queue.append((name, ops))
def SubmitPending(self):
"""
results = self.cl.SubmitManyJobs([row[1] for row in self.queue])
- for ((status, data), (name, _)) in zip(results, self.queue):
- self.jobs.append((status, data, name))
+ for (idx, ((status, data), (name, _))) in enumerate(zip(results,
+ self.queue)):
+ self.jobs.append((idx, status, data, name))
+
+ def _ChooseJob(self):
+ """Choose a non-waiting/queued job to poll next.
+
+ """
+ assert self.jobs, "_ChooseJob called with empty job list"
+
+ result = self.cl.QueryJobs([i[2] for i in self.jobs], ["status"])
+ assert result
+
+ for job_data, status in zip(self.jobs, result):
+ if status[0] in (constants.JOB_STATUS_QUEUED,
+ constants.JOB_STATUS_WAITLOCK,
+ constants.JOB_STATUS_CANCELING):
+ # job is still waiting
+ continue
+ # good candidate found
+ self.jobs.remove(job_data)
+ return job_data
+
+ # no job found
+ return self.jobs.pop(0)
def GetResults(self):
"""Wait for and return the results of all jobs.
self.SubmitPending()
results = []
if self.verbose:
- ok_jobs = [row[1] for row in self.jobs if row[0]]
+ ok_jobs = [row[2] for row in self.jobs if row[1]]
if ok_jobs:
- ToStdout("Submitted jobs %s", ", ".join(ok_jobs))
- for submit_status, jid, name in self.jobs:
- if not submit_status:
- ToStderr("Failed to submit job for %s: %s", name, jid)
- results.append((False, jid))
- continue
- if self.verbose:
- ToStdout("Waiting for job %s for %s...", jid, name)
+ ToStdout("Submitted jobs %s", utils.CommaJoin(ok_jobs))
+
+ # first, remove any non-submitted jobs
+ self.jobs, failures = compat.partition(self.jobs, lambda x: x[1])
+ for idx, _, jid, name in failures:
+ ToStderr("Failed to submit job for %s: %s", name, jid)
+ results.append((idx, False, jid))
+
+ while self.jobs:
+ (idx, _, jid, name) = self._ChooseJob()
+ ToStdout("Waiting for job %s for %s...", jid, name)
try:
- job_result = PollJob(jid, cl=self.cl)
+ job_result = PollJob(jid, cl=self.cl, feedback_fn=self.feedback_fn)
success = True
except (errors.GenericError, luxi.ProtocolError), err:
_, job_result = FormatError(err)
# the error message will always be shown, verbose or not
ToStderr("Job %s for %s has failed: %s", jid, name, job_result)
- results.append((success, job_result))
+ results.append((idx, success, job_result))
+
+ # sort based on the index, then drop it
+ results.sort()
+ results = [i[1:] for i in results]
+
return results
def WaitOrShow(self, wait):
else:
if not self.jobs:
self.SubmitPending()
- for status, result, name in self.jobs:
+ for _, status, result, name in self.jobs:
if status:
ToStdout("%s: %s", result, name)
else: