4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Debugging commands"""
23 # pylint: disable-msg=W0401,W0614,C0103
24 # W0401: Wildcard import ganeti.cli
25 # W0614: Unused import %s from wildcard import (since we need cli)
26 # C0103: Invalid name gnt-backup
34 from ganeti.cli import *
35 from ganeti import cli
36 from ganeti import constants
37 from ganeti import opcodes
38 from ganeti import utils
39 from ganeti import errors
42 def Delay(opts, args):
45 @param opts: the command line options selected by the user
47 @param args: should contain only one element, the duration
50 @return: the desired exit code
53 delay = float(args[0])
54 op = opcodes.OpTestDelay(duration=delay,
55 on_master=opts.on_master,
56 on_nodes=opts.on_nodes,
58 SubmitOpCode(op, opts=opts)
63 def GenericOpCodes(opts, args):
64 """Send any opcode to the master.
66 @param opts: the command line options selected by the user
68 @param args: should contain only one element, the path of
69 the file with the opcode definition
71 @return: the desired exit code
75 jex = cli.JobExecutor(cl=cl, verbose=opts.verbose, opts=opts)
80 ToStdout("Loading...")
81 for job_idx in range(opts.rep_job):
83 # pylint: disable-msg=W0142
84 op_data = simplejson.loads(utils.ReadFile(fname))
85 op_list = [opcodes.OpCode.LoadOpCode(val) for val in op_data]
86 op_list = op_list * opts.rep_op
87 jex.QueueJob("file %s/%d" % (fname, job_idx), *op_list)
88 op_cnt += len(op_list)
93 ToStdout("Submitting...")
95 jex.SubmitPending(each=opts.each)
99 ToStdout("Executing...")
102 if opts.timing_stats:
104 ToStdout("C:op %4d" % op_cnt)
105 ToStdout("C:job %4d" % job_cnt)
106 ToStdout("T:submit %4.4f" % (t2-t1))
107 ToStdout("T:exec %4.4f" % (t3-t2))
108 ToStdout("T:total %4.4f" % (t3-t1))
112 def TestAllocator(opts, args):
113 """Runs the test allocator opcode.
115 @param opts: the command line options selected by the user
117 @param args: should contain only one element, the iallocator name
119 @return: the desired exit code
123 disks = [{"size": utils.ParseUnit(val), "mode": 'w'}
124 for val in opts.disks.split(",")]
125 except errors.UnitParseError, err:
126 ToStderr("Invalid disks parameter '%s': %s", opts.disks, err)
129 nics = [val.split("/") for val in opts.nics.split(",")]
136 nic_dict = [{"mac": v[0], "ip": v[1], "bridge": v[2]} for v in nics]
138 if opts.tags is None:
141 opts.tags = opts.tags.split(",")
143 op = opcodes.OpTestAllocator(mode=opts.mode,
148 disk_template=opts.disk_template,
153 direction=opts.direction,
154 allocator=opts.iallocator,
156 result = SubmitOpCode(op, opts=opts)
157 ToStdout("%s" % result)
161 class _JobQueueTestReporter(cli.StdioJobPollReportCb):
163 """Initializes this class.
166 cli.StdioJobPollReportCb.__init__(self)
167 self._expected_msgcount = 0
168 self._all_testmsgs = []
169 self._testmsgs = None
172 def GetTestMessages(self):
173 """Returns all test log messages received so far.
176 return self._all_testmsgs
179 """Returns the job ID.
184 def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg):
185 """Handles a log message.
188 if self._job_id is None:
189 self._job_id = job_id
190 elif self._job_id != job_id:
191 raise errors.ProgrammerError("The same reporter instance was used for"
192 " more than one job")
194 if log_type == constants.ELOG_JQUEUE_TEST:
195 (sockname, test, arg) = log_msg
196 return self._ProcessTestMessage(job_id, sockname, test, arg)
198 elif (log_type == constants.ELOG_MESSAGE and
199 log_msg.startswith(constants.JQT_MSGPREFIX)):
200 if self._testmsgs is None:
201 raise errors.OpExecError("Received test message without a preceding"
203 testmsg = log_msg[len(constants.JQT_MSGPREFIX):]
204 self._testmsgs.append(testmsg)
205 self._all_testmsgs.append(testmsg)
208 return cli.StdioJobPollReportCb.ReportLogMessage(self, job_id, serial,
212 def _ProcessTestMessage(self, job_id, sockname, test, arg):
213 """Handles a job queue test message.
216 if test not in constants.JQT_ALL:
217 raise errors.OpExecError("Received invalid test message %s" % test)
219 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
221 sock.settimeout(30.0)
223 logging.debug("Connecting to %s", sockname)
224 sock.connect(sockname)
226 logging.debug("Checking status")
227 jobdetails = cli.GetClient().QueryJobs([job_id], ["status"])[0]
229 raise errors.OpExecError("Can't find job %s" % job_id)
231 status = jobdetails[0]
233 logging.debug("Status of job %s is %s", job_id, status)
235 if test == constants.JQT_EXPANDNAMES:
236 if status != constants.JOB_STATUS_WAITLOCK:
237 raise errors.OpExecError("Job status while expanding names is '%s',"
238 " not '%s' as expected" %
239 (status, constants.JOB_STATUS_WAITLOCK))
240 elif test in (constants.JQT_EXEC, constants.JQT_LOGMSG):
241 if status != constants.JOB_STATUS_RUNNING:
242 raise errors.OpExecError("Job status while executing opcode is '%s',"
243 " not '%s' as expected" %
244 (status, constants.JOB_STATUS_RUNNING))
246 if test == constants.JQT_STARTMSG:
247 logging.debug("Expecting %s test messages", arg)
249 elif test == constants.JQT_LOGMSG:
250 if len(self._testmsgs) != arg:
251 raise errors.OpExecError("Received %s test messages when %s are"
252 " expected" % (len(self._testmsgs), arg))
254 logging.debug("Closing socket")
258 def TestJobqueue(opts, _):
259 """Runs a few tests on the job queue.
265 TM_PARTFAIL) = range(4)
266 TM_ALL = frozenset([TM_SUCCESS, TM_MULTISUCCESS, TM_FAIL, TM_PARTFAIL])
270 "Testing mode %s" % mode,
276 utils.TimestampForFilename(),
279 fail = mode in (TM_FAIL, TM_PARTFAIL)
281 if mode == TM_PARTFAIL:
282 ToStdout("Testing partial job failure")
284 opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
285 log_messages=test_messages, fail=False),
286 opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
287 log_messages=test_messages, fail=False),
288 opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
289 log_messages=test_messages, fail=True),
290 opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
291 log_messages=test_messages, fail=False),
293 expect_messages = 3 * [test_messages]
295 constants.OP_STATUS_SUCCESS,
296 constants.OP_STATUS_SUCCESS,
297 constants.OP_STATUS_ERROR,
298 constants.OP_STATUS_ERROR,
301 elif mode == TM_MULTISUCCESS:
302 ToStdout("Testing multiple successful opcodes")
304 opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
305 log_messages=test_messages, fail=False),
306 opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
307 log_messages=test_messages, fail=False),
309 expect_messages = 2 * [test_messages]
311 constants.OP_STATUS_SUCCESS,
312 constants.OP_STATUS_SUCCESS,
316 if mode == TM_SUCCESS:
317 ToStdout("Testing job success")
318 expect_opstatus = [constants.OP_STATUS_SUCCESS]
319 elif mode == TM_FAIL:
320 ToStdout("Testing job failure")
321 expect_opstatus = [constants.OP_STATUS_ERROR]
323 raise errors.ProgrammerError("Unknown test mode %s" % mode)
326 opcodes.OpTestJobqueue(notify_waitlock=True,
328 log_messages=test_messages,
331 expect_messages = [test_messages]
335 cli.SetGenericOpcodeOpts(ops, opts)
337 # Send job to master daemon
338 job_id = cli.SendJob(ops, cl=cl)
340 reporter = _JobQueueTestReporter()
344 results = cli.PollJob(job_id, cl=cl, reporter=reporter)
345 except errors.OpExecError, err:
348 ToStdout("Ignoring error: %s", err)
351 raise errors.OpExecError("Job didn't fail when it should")
353 # Check length of result
355 if results is not None:
356 raise errors.OpExecError("Received result from failed job")
357 elif len(results) != expect_resultlen:
358 raise errors.OpExecError("Received %s results (%s), expected %s" %
359 (len(results), results, expect_resultlen))
361 # Check received log messages
362 all_messages = [i for j in expect_messages for i in j]
363 if reporter.GetTestMessages() != all_messages:
364 raise errors.OpExecError("Received test messages don't match input"
365 " (input %r, received %r)" %
366 (all_messages, reporter.GetTestMessages()))
369 reported_job_id = reporter.GetJobId()
370 if reported_job_id != job_id:
371 raise errors.OpExecError("Reported job ID %s doesn't match"
372 "submission job ID %s" %
373 (reported_job_id, job_id))
375 jobdetails = cli.GetClient().QueryJobs([job_id], ["status", "opstatus"])[0]
377 raise errors.OpExecError("Can't find job %s" % job_id)
380 exp_status = constants.JOB_STATUS_ERROR
382 exp_status = constants.JOB_STATUS_SUCCESS
384 (final_status, final_opstatus) = jobdetails
385 if final_status != exp_status:
386 raise errors.OpExecError("Final job status is %s, not %s as expected" %
387 (final_status, exp_status))
388 if len(final_opstatus) != len(ops):
389 raise errors.OpExecError("Did not receive status for all opcodes (got %s,"
391 (len(final_opstatus), len(ops)))
392 if final_opstatus != expect_opstatus:
393 raise errors.OpExecError("Opcode status is %s, expected %s" %
394 (final_opstatus, expect_opstatus))
396 ToStdout("Job queue test successful")
403 Delay, [ArgUnknown(min=1, max=1)],
404 [cli_option("--no-master", dest="on_master", default=True,
405 action="store_false", help="Do not sleep in the master code"),
406 cli_option("-n", dest="on_nodes", default=[],
407 action="append", help="Select nodes to sleep on"),
408 cli_option("-r", "--repeat", type="int", default="0", dest="repeat",
409 help="Number of times to repeat the sleep"),
411 "[opts...] <duration>", "Executes a TestDelay OpCode"),
413 GenericOpCodes, [ArgFile(min=1)],
415 cli_option("--op-repeat", type="int", default="1", dest="rep_op",
416 help="Repeat the opcode sequence this number of times"),
417 cli_option("--job-repeat", type="int", default="1", dest="rep_job",
418 help="Repeat the job this number of times"),
419 cli_option("--timing-stats", default=False,
420 action="store_true", help="Show timing stats"),
421 cli_option("--each", default=False, action="store_true",
422 help="Submit each job separately"),
424 "<op_list_file...>", "Submits jobs built from json files"
425 " containing a list of serialized opcodes"),
427 TestAllocator, [ArgUnknown(min=1)],
428 [cli_option("--dir", dest="direction",
429 default="in", choices=["in", "out"],
430 help="Show allocator input (in) or allocator"
433 cli_option("-m", "--mode", default="relocate",
434 choices=["relocate", "allocate", "multi-evacuate"],
435 help="Request mode, either allocate or relocate"),
436 cli_option("--mem", default=128, type="unit",
437 help="Memory size for the instance (MiB)"),
438 cli_option("--disks", default="4096,4096",
439 help="Comma separated list of disk sizes (MiB)"),
441 cli_option("--nics", default="00:11:22:33:44:55",
442 help="Comma separated list of nics, each nic"
443 " definition is of form mac/ip/bridge, if"
444 " missing values are replace by None"),
446 cli_option("-p", "--vcpus", default=1, type="int",
447 help="Select number of VCPUs for the instance"),
448 cli_option("--tags", default=None,
449 help="Comma separated list of tags"),
451 "{opts...} <instance>", "Executes a TestAllocator OpCode"),
453 TestJobqueue, ARGS_NONE, [],
454 "", "Test a few aspects of the job queue")
458 if __name__ == '__main__':
459 sys.exit(GenericMain(commands))