Statistics
| Branch: | Tag: | Revision:

root / scripts / gnt-debug @ 19b9ba9a

History | View | Annotate | Download (16.7 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Debugging commands"""
22

    
23
# pylint: disable-msg=W0401,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0614: Unused import %s from wildcard import (since we need cli)
26
# C0103: Invalid name gnt-backup
27

    
28
import sys
29
import simplejson
30
import time
31
import socket
32
import logging
33

    
34
from ganeti.cli import *
35
from ganeti import cli
36
from ganeti import constants
37
from ganeti import opcodes
38
from ganeti import utils
39
from ganeti import errors
40

    
41

    
42
#: Default fields for L{ListLocks}
43
_LIST_LOCKS_DEF_FIELDS = [
44
  "name",
45
  "mode",
46
  "owner",
47
  ]
48

    
49

    
50
def Delay(opts, args):
51
  """Sleeps for a while
52

    
53
  @param opts: the command line options selected by the user
54
  @type args: list
55
  @param args: should contain only one element, the duration
56
      the sleep
57
  @rtype: int
58
  @return: the desired exit code
59

    
60
  """
61
  delay = float(args[0])
62
  op = opcodes.OpTestDelay(duration=delay,
63
                           on_master=opts.on_master,
64
                           on_nodes=opts.on_nodes,
65
                           repeat=opts.repeat)
66
  SubmitOpCode(op, opts=opts)
67

    
68
  return 0
69

    
70

    
71
def GenericOpCodes(opts, args):
72
  """Send any opcode to the master.
73

    
74
  @param opts: the command line options selected by the user
75
  @type args: list
76
  @param args: should contain only one element, the path of
77
      the file with the opcode definition
78
  @rtype: int
79
  @return: the desired exit code
80

    
81
  """
82
  cl = cli.GetClient()
83
  jex = cli.JobExecutor(cl=cl, verbose=opts.verbose, opts=opts)
84

    
85
  job_cnt = 0
86
  op_cnt = 0
87
  if opts.timing_stats:
88
    ToStdout("Loading...")
89
  for job_idx in range(opts.rep_job):
90
    for fname in args:
91
      # pylint: disable-msg=W0142
92
      op_data = simplejson.loads(utils.ReadFile(fname))
93
      op_list = [opcodes.OpCode.LoadOpCode(val) for val in op_data]
94
      op_list = op_list * opts.rep_op
95
      jex.QueueJob("file %s/%d" % (fname, job_idx), *op_list)
96
      op_cnt += len(op_list)
97
      job_cnt += 1
98

    
99
  if opts.timing_stats:
100
    t1 = time.time()
101
    ToStdout("Submitting...")
102

    
103
  jex.SubmitPending(each=opts.each)
104

    
105
  if opts.timing_stats:
106
    t2 = time.time()
107
    ToStdout("Executing...")
108

    
109
  jex.GetResults()
110
  if opts.timing_stats:
111
    t3 = time.time()
112
    ToStdout("C:op     %4d" % op_cnt)
113
    ToStdout("C:job    %4d" % job_cnt)
114
    ToStdout("T:submit %4.4f" % (t2-t1))
115
    ToStdout("T:exec   %4.4f" % (t3-t2))
116
    ToStdout("T:total  %4.4f" % (t3-t1))
117
  return 0
118

    
119

    
120
def TestAllocator(opts, args):
121
  """Runs the test allocator opcode.
122

    
123
  @param opts: the command line options selected by the user
124
  @type args: list
125
  @param args: should contain only one element, the iallocator name
126
  @rtype: int
127
  @return: the desired exit code
128

    
129
  """
130
  try:
131
    disks = [{"size": utils.ParseUnit(val), "mode": 'w'}
132
             for val in opts.disks.split(",")]
133
  except errors.UnitParseError, err:
134
    ToStderr("Invalid disks parameter '%s': %s", opts.disks, err)
135
    return 1
136

    
137
  nics = [val.split("/") for val in opts.nics.split(",")]
138
  for row in nics:
139
    while len(row) < 3:
140
      row.append(None)
141
    for i in range(3):
142
      if row[i] == '':
143
        row[i] = None
144
  nic_dict = [{"mac": v[0], "ip": v[1], "bridge": v[2]} for v in nics]
145

    
146
  if opts.tags is None:
147
    opts.tags = []
148
  else:
149
    opts.tags = opts.tags.split(",")
150

    
151
  op = opcodes.OpTestAllocator(mode=opts.mode,
152
                               name=args[0],
153
                               evac_nodes=args,
154
                               mem_size=opts.mem,
155
                               disks=disks,
156
                               disk_template=opts.disk_template,
157
                               nics=nic_dict,
158
                               os=opts.os,
159
                               vcpus=opts.vcpus,
160
                               tags=opts.tags,
161
                               direction=opts.direction,
162
                               allocator=opts.iallocator,
163
                               )
164
  result = SubmitOpCode(op, opts=opts)
165
  ToStdout("%s" % result)
166
  return 0
167

    
168

    
169
class _JobQueueTestReporter(cli.StdioJobPollReportCb):
170
  def __init__(self):
171
    """Initializes this class.
172

    
173
    """
174
    cli.StdioJobPollReportCb.__init__(self)
175
    self._expected_msgcount = 0
176
    self._all_testmsgs = []
177
    self._testmsgs = None
178
    self._job_id = None
179

    
180
  def GetTestMessages(self):
181
    """Returns all test log messages received so far.
182

    
183
    """
184
    return self._all_testmsgs
185

    
186
  def GetJobId(self):
187
    """Returns the job ID.
188

    
189
    """
190
    return self._job_id
191

    
192
  def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg):
193
    """Handles a log message.
194

    
195
    """
196
    if self._job_id is None:
197
      self._job_id = job_id
198
    elif self._job_id != job_id:
199
      raise errors.ProgrammerError("The same reporter instance was used for"
200
                                   " more than one job")
201

    
202
    if log_type == constants.ELOG_JQUEUE_TEST:
203
      (sockname, test, arg) = log_msg
204
      return self._ProcessTestMessage(job_id, sockname, test, arg)
205

    
206
    elif (log_type == constants.ELOG_MESSAGE and
207
          log_msg.startswith(constants.JQT_MSGPREFIX)):
208
      if self._testmsgs is None:
209
        raise errors.OpExecError("Received test message without a preceding"
210
                                 " start message")
211
      testmsg = log_msg[len(constants.JQT_MSGPREFIX):]
212
      self._testmsgs.append(testmsg)
213
      self._all_testmsgs.append(testmsg)
214
      return
215

    
216
    return cli.StdioJobPollReportCb.ReportLogMessage(self, job_id, serial,
217
                                                     timestamp, log_type,
218
                                                     log_msg)
219

    
220
  def _ProcessTestMessage(self, job_id, sockname, test, arg):
221
    """Handles a job queue test message.
222

    
223
    """
224
    if test not in constants.JQT_ALL:
225
      raise errors.OpExecError("Received invalid test message %s" % test)
226

    
227
    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
228
    try:
229
      sock.settimeout(30.0)
230

    
231
      logging.debug("Connecting to %s", sockname)
232
      sock.connect(sockname)
233

    
234
      logging.debug("Checking status")
235
      jobdetails = cli.GetClient().QueryJobs([job_id], ["status"])[0]
236
      if not jobdetails:
237
        raise errors.OpExecError("Can't find job %s" % job_id)
238

    
239
      status = jobdetails[0]
240

    
241
      logging.debug("Status of job %s is %s", job_id, status)
242

    
243
      if test == constants.JQT_EXPANDNAMES:
244
        if status != constants.JOB_STATUS_WAITLOCK:
245
          raise errors.OpExecError("Job status while expanding names is '%s',"
246
                                   " not '%s' as expected" %
247
                                   (status, constants.JOB_STATUS_WAITLOCK))
248
      elif test in (constants.JQT_EXEC, constants.JQT_LOGMSG):
249
        if status != constants.JOB_STATUS_RUNNING:
250
          raise errors.OpExecError("Job status while executing opcode is '%s',"
251
                                   " not '%s' as expected" %
252
                                   (status, constants.JOB_STATUS_RUNNING))
253

    
254
      if test == constants.JQT_STARTMSG:
255
        logging.debug("Expecting %s test messages", arg)
256
        self._testmsgs = []
257
      elif test == constants.JQT_LOGMSG:
258
        if len(self._testmsgs) != arg:
259
          raise errors.OpExecError("Received %s test messages when %s are"
260
                                   " expected" % (len(self._testmsgs), arg))
261
    finally:
262
      logging.debug("Closing socket")
263
      sock.close()
264

    
265

    
266
def TestJobqueue(opts, _):
267
  """Runs a few tests on the job queue.
268

    
269
  """
270
  (TM_SUCCESS,
271
   TM_MULTISUCCESS,
272
   TM_FAIL,
273
   TM_PARTFAIL) = range(4)
274
  TM_ALL = frozenset([TM_SUCCESS, TM_MULTISUCCESS, TM_FAIL, TM_PARTFAIL])
275

    
276
  for mode in TM_ALL:
277
    test_messages = [
278
      "Testing mode %s" % mode,
279
      "Hello World",
280
      "A",
281
      "",
282
      "B"
283
      "Foo|bar|baz",
284
      utils.TimestampForFilename(),
285
      ]
286

    
287
    fail = mode in (TM_FAIL, TM_PARTFAIL)
288

    
289
    if mode == TM_PARTFAIL:
290
      ToStdout("Testing partial job failure")
291
      ops = [
292
        opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
293
                               log_messages=test_messages, fail=False),
294
        opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
295
                               log_messages=test_messages, fail=False),
296
        opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
297
                               log_messages=test_messages, fail=True),
298
        opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
299
                               log_messages=test_messages, fail=False),
300
        ]
301
      expect_messages = 3 * [test_messages]
302
      expect_opstatus = [
303
        constants.OP_STATUS_SUCCESS,
304
        constants.OP_STATUS_SUCCESS,
305
        constants.OP_STATUS_ERROR,
306
        constants.OP_STATUS_ERROR,
307
        ]
308
      expect_resultlen = 2
309
    elif mode == TM_MULTISUCCESS:
310
      ToStdout("Testing multiple successful opcodes")
311
      ops = [
312
        opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
313
                               log_messages=test_messages, fail=False),
314
        opcodes.OpTestJobqueue(notify_waitlock=True, notify_exec=True,
315
                               log_messages=test_messages, fail=False),
316
        ]
317
      expect_messages = 2 * [test_messages]
318
      expect_opstatus = [
319
        constants.OP_STATUS_SUCCESS,
320
        constants.OP_STATUS_SUCCESS,
321
        ]
322
      expect_resultlen = 2
323
    else:
324
      if mode == TM_SUCCESS:
325
        ToStdout("Testing job success")
326
        expect_opstatus = [constants.OP_STATUS_SUCCESS]
327
      elif mode == TM_FAIL:
328
        ToStdout("Testing job failure")
329
        expect_opstatus = [constants.OP_STATUS_ERROR]
330
      else:
331
        raise errors.ProgrammerError("Unknown test mode %s" % mode)
332

    
333
      ops = [
334
        opcodes.OpTestJobqueue(notify_waitlock=True,
335
                               notify_exec=True,
336
                               log_messages=test_messages,
337
                               fail=fail)
338
        ]
339
      expect_messages = [test_messages]
340
      expect_resultlen = 1
341

    
342
    cl = cli.GetClient()
343
    cli.SetGenericOpcodeOpts(ops, opts)
344

    
345
    # Send job to master daemon
346
    job_id = cli.SendJob(ops, cl=cl)
347

    
348
    reporter = _JobQueueTestReporter()
349
    results = None
350

    
351
    try:
352
      results = cli.PollJob(job_id, cl=cl, reporter=reporter)
353
    except errors.OpExecError, err:
354
      if not fail:
355
        raise
356
      ToStdout("Ignoring error: %s", err)
357
    else:
358
      if fail:
359
        raise errors.OpExecError("Job didn't fail when it should")
360

    
361
    # Check length of result
362
    if fail:
363
      if results is not None:
364
        raise errors.OpExecError("Received result from failed job")
365
    elif len(results) != expect_resultlen:
366
      raise errors.OpExecError("Received %s results (%s), expected %s" %
367
                               (len(results), results, expect_resultlen))
368

    
369
    # Check received log messages
370
    all_messages = [i for j in expect_messages for i in j]
371
    if reporter.GetTestMessages() != all_messages:
372
      raise errors.OpExecError("Received test messages don't match input"
373
                               " (input %r, received %r)" %
374
                               (all_messages, reporter.GetTestMessages()))
375

    
376
    # Check final status
377
    reported_job_id = reporter.GetJobId()
378
    if reported_job_id != job_id:
379
      raise errors.OpExecError("Reported job ID %s doesn't match"
380
                               "submission job ID %s" %
381
                               (reported_job_id, job_id))
382

    
383
    jobdetails = cli.GetClient().QueryJobs([job_id], ["status", "opstatus"])[0]
384
    if not jobdetails:
385
      raise errors.OpExecError("Can't find job %s" % job_id)
386

    
387
    if fail:
388
      exp_status = constants.JOB_STATUS_ERROR
389
    else:
390
      exp_status = constants.JOB_STATUS_SUCCESS
391

    
392
    (final_status, final_opstatus) = jobdetails
393
    if final_status != exp_status:
394
      raise errors.OpExecError("Final job status is %s, not %s as expected" %
395
                               (final_status, exp_status))
396
    if len(final_opstatus) != len(ops):
397
      raise errors.OpExecError("Did not receive status for all opcodes (got %s,"
398
                               " expected %s)" %
399
                               (len(final_opstatus), len(ops)))
400
    if final_opstatus != expect_opstatus:
401
      raise errors.OpExecError("Opcode status is %s, expected %s" %
402
                               (final_opstatus, expect_opstatus))
403

    
404
  ToStdout("Job queue test successful")
405

    
406
  return 0
407

    
408

    
409
def ListLocks(opts, args): # pylint: disable-msg=W0613
410
  """List all locks.
411

    
412
  @param opts: the command line options selected by the user
413
  @type args: list
414
  @param args: should be an empty list
415
  @rtype: int
416
  @return: the desired exit code
417

    
418
  """
419
  selected_fields = ParseFields(opts.output, _LIST_LOCKS_DEF_FIELDS)
420

    
421
  if not opts.no_headers:
422
    headers = {
423
      "name": "Name",
424
      "mode": "Mode",
425
      "owner": "Owner",
426
      }
427
  else:
428
    headers = None
429

    
430
  while True:
431
    # Not reusing client as interval might be too long
432
    output = GetClient().QueryLocks(selected_fields, False)
433

    
434
    # change raw values to nicer strings
435
    for row in output:
436
      for idx, field in enumerate(selected_fields):
437
        val = row[idx]
438

    
439
        if field in ("mode", "owner") and val is None:
440
          val = "-"
441
        elif field == "owner":
442
          val = utils.CommaJoin(val)
443

    
444
        row[idx] = str(val)
445

    
446
    data = GenerateTable(separator=opts.separator, headers=headers,
447
                         fields=selected_fields, data=output)
448
    for line in data:
449
      ToStdout(line)
450

    
451
    if not opts.interval:
452
      break
453

    
454
    ToStdout("")
455
    time.sleep(opts.interval)
456

    
457
  return 0
458

    
459

    
460
commands = {
461
  'delay': (
462
    Delay, [ArgUnknown(min=1, max=1)],
463
    [cli_option("--no-master", dest="on_master", default=True,
464
                action="store_false", help="Do not sleep in the master code"),
465
     cli_option("-n", dest="on_nodes", default=[],
466
                action="append", help="Select nodes to sleep on"),
467
     cli_option("-r", "--repeat", type="int", default="0", dest="repeat",
468
                help="Number of times to repeat the sleep"),
469
     DRY_RUN_OPT,
470
     ],
471
    "[opts...] <duration>", "Executes a TestDelay OpCode"),
472
  'submit-job': (
473
    GenericOpCodes, [ArgFile(min=1)],
474
    [VERBOSE_OPT,
475
     cli_option("--op-repeat", type="int", default="1", dest="rep_op",
476
                help="Repeat the opcode sequence this number of times"),
477
     cli_option("--job-repeat", type="int", default="1", dest="rep_job",
478
                help="Repeat the job this number of times"),
479
     cli_option("--timing-stats", default=False,
480
                action="store_true", help="Show timing stats"),
481
     cli_option("--each", default=False, action="store_true",
482
                help="Submit each job separately"),
483
     DRY_RUN_OPT,
484
     ],
485
    "<op_list_file...>", "Submits jobs built from json files"
486
    " containing a list of serialized opcodes"),
487
  'allocator': (
488
    TestAllocator, [ArgUnknown(min=1)],
489
    [cli_option("--dir", dest="direction",
490
                default="in", choices=["in", "out"],
491
                help="Show allocator input (in) or allocator"
492
                " results (out)"),
493
     IALLOCATOR_OPT,
494
     cli_option("-m", "--mode", default="relocate",
495
                choices=["relocate", "allocate", "multi-evacuate"],
496
                help="Request mode, either allocate or relocate"),
497
     cli_option("--mem", default=128, type="unit",
498
                help="Memory size for the instance (MiB)"),
499
     cli_option("--disks", default="4096,4096",
500
                help="Comma separated list of disk sizes (MiB)"),
501
     DISK_TEMPLATE_OPT,
502
     cli_option("--nics", default="00:11:22:33:44:55",
503
                help="Comma separated list of nics, each nic"
504
                " definition is of form mac/ip/bridge, if"
505
                " missing values are replace by None"),
506
     OS_OPT,
507
     cli_option("-p", "--vcpus", default=1, type="int",
508
                help="Select number of VCPUs for the instance"),
509
     cli_option("--tags", default=None,
510
                help="Comma separated list of tags"),
511
     DRY_RUN_OPT,
512
     ],
513
    "{opts...} <instance>", "Executes a TestAllocator OpCode"),
514
  "test-jobqueue": (
515
    TestJobqueue, ARGS_NONE, [],
516
    "", "Test a few aspects of the job queue"),
517
  "locks": (
518
    ListLocks, ARGS_NONE, [NOHDR_OPT, SEP_OPT, FIELDS_OPT, INTERVAL_OPT],
519
    "[--interval N]", "Show a list of locks in the master daemon"),
520
  }
521

    
522

    
523
if __name__ == '__main__':
524
  sys.exit(GenericMain(commands))