Statistics
| Branch: | Tag: | Revision:

root / qa / qa_cluster.py @ 78453739

History | View | Annotate | Download (17 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Cluster related QA tests.
23

24
"""
25

    
26
import tempfile
27
import os.path
28

    
29
from ganeti import constants
30
from ganeti import compat
31
from ganeti import utils
32
from ganeti import pathutils
33

    
34
import qa_config
35
import qa_utils
36
import qa_error
37

    
38
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
39

    
40

    
41
#: cluster verify command
42
_CLUSTER_VERIFY = ["gnt-cluster", "verify"]
43

    
44

    
45
def _RemoveFileFromAllNodes(filename):
46
  """Removes a file from all nodes.
47

48
  """
49
  for node in qa_config.get("nodes"):
50
    AssertCommand(["rm", "-f", filename], node=node)
51

    
52

    
53
def _CheckFileOnAllNodes(filename, content):
54
  """Verifies the content of the given file on all nodes.
55

56
  """
57
  cmd = utils.ShellQuoteArgs(["cat", filename])
58
  for node in qa_config.get("nodes"):
59
    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
60

    
61

    
62
# data for testing failures due to bad keys/values for disk parameters
63
_FAIL_PARAMS = ["nonexistent:resync-rate=1",
64
                "drbd:nonexistent=1",
65
                "drbd:resync-rate=invalid",
66
                ]
67

    
68

    
69
def TestClusterInitDisk():
70
  """gnt-cluster init -D"""
71
  name = qa_config.get("name")
72
  for param in _FAIL_PARAMS:
73
    AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
74

    
75

    
76
def TestClusterInit(rapi_user, rapi_secret):
77
  """gnt-cluster init"""
78
  master = qa_config.GetMasterNode()
79

    
80
  rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
81

    
82
  # First create the RAPI credentials
83
  fh = tempfile.NamedTemporaryFile()
84
  try:
85
    fh.write("%s %s write\n" % (rapi_user, rapi_secret))
86
    fh.flush()
87

    
88
    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
89
    try:
90
      AssertCommand(["mkdir", "-p", rapi_dir])
91
      AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
92
    finally:
93
      AssertCommand(["rm", "-f", tmpru])
94
  finally:
95
    fh.close()
96

    
97
  # Initialize cluster
98
  cmd = [
99
    "gnt-cluster", "init",
100
    "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
101
    "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
102
    ]
103

    
104
  for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
105
                    "nic-count"):
106
    for spec_val in ("min", "max", "std"):
107
      spec = qa_config.get("ispec_%s_%s" %
108
                           (spec_type.replace('-', '_'), spec_val), None)
109
      if spec:
110
        cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
111

    
112
  if master.get("secondary", None):
113
    cmd.append("--secondary-ip=%s" % master["secondary"])
114

    
115
  bridge = qa_config.get("bridge", None)
116
  if bridge:
117
    cmd.append("--master-netdev=%s" % bridge)
118

    
119
  cmd.append(qa_config.get("name"))
120
  AssertCommand(cmd)
121

    
122
  cmd = ["gnt-cluster", "modify"]
123

    
124
  # hypervisor parameter modifications
125
  hvp = qa_config.get("hypervisor-parameters", {})
126
  for k, v in hvp.items():
127
    cmd.extend(["-H", "%s:%s" % (k, v)])
128
  # backend parameter modifications
129
  bep = qa_config.get("backend-parameters", "")
130
  if bep:
131
    cmd.extend(["-B", bep])
132

    
133
  if len(cmd) > 2:
134
    AssertCommand(cmd)
135

    
136
  # OS parameters
137
  osp = qa_config.get("os-parameters", {})
138
  for k, v in osp.items():
139
    AssertCommand(["gnt-os", "modify", "-O", v, k])
140

    
141
  # OS hypervisor parameters
142
  os_hvp = qa_config.get("os-hvp", {})
143
  for os_name in os_hvp:
144
    for hv, hvp in os_hvp[os_name].items():
145
      AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
146

    
147

    
148
def TestClusterRename():
149
  """gnt-cluster rename"""
150
  cmd = ["gnt-cluster", "rename", "-f"]
151

    
152
  original_name = qa_config.get("name")
153
  rename_target = qa_config.get("rename", None)
154
  if rename_target is None:
155
    print qa_utils.FormatError('"rename" entry is missing')
156
    return
157

    
158
  for data in [
159
    cmd + [rename_target],
160
    _CLUSTER_VERIFY,
161
    cmd + [original_name],
162
    _CLUSTER_VERIFY,
163
    ]:
164
    AssertCommand(data)
165

    
166

    
167
def TestClusterOob():
168
  """out-of-band framework"""
169
  oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
170

    
171
  AssertCommand(_CLUSTER_VERIFY)
172
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
173
                 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
174
                 utils.NewUUID()])
175

    
176
  AssertCommand(_CLUSTER_VERIFY, fail=True)
177

    
178
  AssertCommand(["touch", oob_path_exists])
179
  AssertCommand(["chmod", "0400", oob_path_exists])
180
  AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
181

    
182
  try:
183
    AssertCommand(["gnt-cluster", "modify", "--node-parameters",
184
                   "oob_program=%s" % oob_path_exists])
185

    
186
    AssertCommand(_CLUSTER_VERIFY, fail=True)
187

    
188
    AssertCommand(["chmod", "0500", oob_path_exists])
189
    AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
190

    
191
    AssertCommand(_CLUSTER_VERIFY)
192
  finally:
193
    AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
194

    
195
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
196
                 "oob_program="])
197

    
198

    
199
def TestClusterEpo():
200
  """gnt-cluster epo"""
201
  master = qa_config.GetMasterNode()
202

    
203
  # Assert that OOB is unavailable for all nodes
204
  result_output = GetCommandOutput(master["primary"],
205
                                   "gnt-node list --verbose --no-headers -o"
206
                                   " powered")
207
  AssertEqual(compat.all(powered == "(unavail)"
208
                         for powered in result_output.splitlines()), True)
209

    
210
  # Conflicting
211
  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
212
  # --all doesn't expect arguments
213
  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
214

    
215
  # Unless --all is given master is not allowed to be in the list
216
  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
217

    
218
  # This shouldn't fail
219
  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
220

    
221
  # All instances should have been stopped now
222
  result_output = GetCommandOutput(master["primary"],
223
                                   "gnt-instance list --no-headers -o status")
224
  # ERROR_down because the instance is stopped but not recorded as such
225
  AssertEqual(compat.all(status == "ERROR_down"
226
                         for status in result_output.splitlines()), True)
227

    
228
  # Now start everything again
229
  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
230

    
231
  # All instances should have been started now
232
  result_output = GetCommandOutput(master["primary"],
233
                                   "gnt-instance list --no-headers -o status")
234
  AssertEqual(compat.all(status == "running"
235
                         for status in result_output.splitlines()), True)
236

    
237

    
238
def TestClusterVerify():
239
  """gnt-cluster verify"""
240
  AssertCommand(_CLUSTER_VERIFY)
241
  AssertCommand(["gnt-cluster", "verify-disks"])
242

    
243

    
244
def TestJobqueue():
245
  """gnt-debug test-jobqueue"""
246
  AssertCommand(["gnt-debug", "test-jobqueue"])
247

    
248

    
249
def TestDelay(node):
250
  """gnt-debug delay"""
251
  AssertCommand(["gnt-debug", "delay", "1"])
252
  AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
253
  AssertCommand(["gnt-debug", "delay", "--no-master",
254
                 "-n", node["primary"], "1"])
255

    
256

    
257
def TestClusterReservedLvs():
258
  """gnt-cluster reserved lvs"""
259
  for fail, cmd in [
260
    (False, _CLUSTER_VERIFY),
261
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
262
    (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
263
    (True, _CLUSTER_VERIFY),
264
    (False, ["gnt-cluster", "modify", "--reserved-lvs",
265
             "xenvg/qa-test,.*/other-test"]),
266
    (False, _CLUSTER_VERIFY),
267
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
268
    (False, _CLUSTER_VERIFY),
269
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
270
    (True, _CLUSTER_VERIFY),
271
    (False, ["lvremove", "-f", "xenvg/qa-test"]),
272
    (False, _CLUSTER_VERIFY),
273
    ]:
274
    AssertCommand(cmd, fail=fail)
275

    
276

    
277
def TestClusterModifyEmpty():
278
  """gnt-cluster modify"""
279
  AssertCommand(["gnt-cluster", "modify"], fail=True)
280

    
281

    
282
def TestClusterModifyDisk():
283
  """gnt-cluster modify -D"""
284
  for param in _FAIL_PARAMS:
285
    AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
286

    
287

    
288
def TestClusterModifyBe():
289
  """gnt-cluster modify -B"""
290
  for fail, cmd in [
291
    # max/min mem
292
    (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
293
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
294
    (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
295
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
296
    (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
297
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
298
    (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
299
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
300
    (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
301
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
302
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
303
    # vcpus
304
    (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
305
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
306
    (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
307
    (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
308
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
309
    # auto_balance
310
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
311
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
312
    (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
313
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
314
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
315
    ]:
316
    AssertCommand(cmd, fail=fail)
317

    
318
  # redo the original-requested BE parameters, if any
319
  bep = qa_config.get("backend-parameters", "")
320
  if bep:
321
    AssertCommand(["gnt-cluster", "modify", "-B", bep])
322

    
323

    
324
def TestClusterInfo():
325
  """gnt-cluster info"""
326
  AssertCommand(["gnt-cluster", "info"])
327

    
328

    
329
def TestClusterRedistConf():
330
  """gnt-cluster redist-conf"""
331
  AssertCommand(["gnt-cluster", "redist-conf"])
332

    
333

    
334
def TestClusterGetmaster():
335
  """gnt-cluster getmaster"""
336
  AssertCommand(["gnt-cluster", "getmaster"])
337

    
338

    
339
def TestClusterVersion():
340
  """gnt-cluster version"""
341
  AssertCommand(["gnt-cluster", "version"])
342

    
343

    
344
def TestClusterRenewCrypto():
345
  """gnt-cluster renew-crypto"""
346
  master = qa_config.GetMasterNode()
347

    
348
  # Conflicting options
349
  cmd = ["gnt-cluster", "renew-crypto", "--force",
350
         "--new-cluster-certificate", "--new-confd-hmac-key"]
351
  conflicting = [
352
    ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
353
    ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
354
    ]
355
  for i in conflicting:
356
    AssertCommand(cmd + i, fail=True)
357

    
358
  # Invalid RAPI certificate
359
  cmd = ["gnt-cluster", "renew-crypto", "--force",
360
         "--rapi-certificate=/dev/null"]
361
  AssertCommand(cmd, fail=True)
362

    
363
  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
364
                                         pathutils.RAPI_CERT_FILE)
365
  try:
366
    # Custom RAPI certificate
367
    fh = tempfile.NamedTemporaryFile()
368

    
369
    # Ensure certificate doesn't cause "gnt-cluster verify" to complain
370
    validity = constants.SSL_CERT_EXPIRATION_WARN * 3
371

    
372
    utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
373

    
374
    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
375
    try:
376
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
377
                     "--rapi-certificate=%s" % tmpcert])
378
    finally:
379
      AssertCommand(["rm", "-f", tmpcert])
380

    
381
    # Custom cluster domain secret
382
    cds_fh = tempfile.NamedTemporaryFile()
383
    cds_fh.write(utils.GenerateSecret())
384
    cds_fh.write("\n")
385
    cds_fh.flush()
386

    
387
    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
388
    try:
389
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
390
                     "--cluster-domain-secret=%s" % tmpcds])
391
    finally:
392
      AssertCommand(["rm", "-f", tmpcds])
393

    
394
    # Normal case
395
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
396
                   "--new-cluster-certificate", "--new-confd-hmac-key",
397
                   "--new-rapi-certificate", "--new-cluster-domain-secret"])
398

    
399
    # Restore RAPI certificate
400
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
401
                   "--rapi-certificate=%s" % rapi_cert_backup])
402
  finally:
403
    AssertCommand(["rm", "-f", rapi_cert_backup])
404

    
405

    
406
def TestClusterBurnin():
407
  """Burnin"""
408
  master = qa_config.GetMasterNode()
409

    
410
  options = qa_config.get("options", {})
411
  disk_template = options.get("burnin-disk-template", "drbd")
412
  parallel = options.get("burnin-in-parallel", False)
413
  check_inst = options.get("burnin-check-instances", False)
414
  do_rename = options.get("burnin-rename", "")
415
  do_reboot = options.get("burnin-reboot", True)
416
  reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
417

    
418
  # Get as many instances as we need
419
  instances = []
420
  try:
421
    try:
422
      num = qa_config.get("options", {}).get("burnin-instances", 1)
423
      for _ in range(0, num):
424
        instances.append(qa_config.AcquireInstance())
425
    except qa_error.OutOfInstancesError:
426
      print "Not enough instances, continuing anyway."
427

    
428
    if len(instances) < 1:
429
      raise qa_error.Error("Burnin needs at least one instance")
430

    
431
    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
432
    try:
433
      # Run burnin
434
      cmd = [script,
435
             "--os=%s" % qa_config.get("os"),
436
             "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
437
             "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
438
             "--disk-size=%s" % ",".join(qa_config.get("disk")),
439
             "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
440
             "--disk-template=%s" % disk_template]
441
      if parallel:
442
        cmd.append("--parallel")
443
        cmd.append("--early-release")
444
      if check_inst:
445
        cmd.append("--http-check")
446
      if do_rename:
447
        cmd.append("--rename=%s" % do_rename)
448
      if not do_reboot:
449
        cmd.append("--no-reboot")
450
      else:
451
        cmd.append("--reboot-types=%s" % ",".join(reboot_types))
452
      cmd += [inst["name"] for inst in instances]
453
      AssertCommand(cmd)
454
    finally:
455
      AssertCommand(["rm", "-f", script])
456

    
457
  finally:
458
    for inst in instances:
459
      qa_config.ReleaseInstance(inst)
460

    
461

    
462
def TestClusterMasterFailover():
463
  """gnt-cluster master-failover"""
464
  master = qa_config.GetMasterNode()
465
  failovermaster = qa_config.AcquireNode(exclude=master)
466

    
467
  cmd = ["gnt-cluster", "master-failover"]
468
  try:
469
    AssertCommand(cmd, node=failovermaster)
470
    # Back to original master node
471
    AssertCommand(cmd, node=master)
472
  finally:
473
    qa_config.ReleaseNode(failovermaster)
474

    
475

    
476
def TestClusterMasterFailoverWithDrainedQueue():
477
  """gnt-cluster master-failover with drained queue"""
478
  drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
479

    
480
  master = qa_config.GetMasterNode()
481
  failovermaster = qa_config.AcquireNode(exclude=master)
482

    
483
  # Ensure queue is not drained
484
  for node in [master, failovermaster]:
485
    AssertCommand(drain_check, node=node, fail=True)
486

    
487
  # Drain queue on failover master
488
  AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
489

    
490
  cmd = ["gnt-cluster", "master-failover"]
491
  try:
492
    AssertCommand(drain_check, node=failovermaster)
493
    AssertCommand(cmd, node=failovermaster)
494
    AssertCommand(drain_check, fail=True)
495
    AssertCommand(drain_check, node=failovermaster, fail=True)
496

    
497
    # Back to original master node
498
    AssertCommand(cmd, node=master)
499
  finally:
500
    qa_config.ReleaseNode(failovermaster)
501

    
502
  AssertCommand(drain_check, fail=True)
503
  AssertCommand(drain_check, node=failovermaster, fail=True)
504

    
505

    
506
def TestClusterCopyfile():
507
  """gnt-cluster copyfile"""
508
  master = qa_config.GetMasterNode()
509

    
510
  uniqueid = utils.NewUUID()
511

    
512
  # Create temporary file
513
  f = tempfile.NamedTemporaryFile()
514
  f.write(uniqueid)
515
  f.flush()
516
  f.seek(0)
517

    
518
  # Upload file to master node
519
  testname = qa_utils.UploadFile(master["primary"], f.name)
520
  try:
521
    # Copy file to all nodes
522
    AssertCommand(["gnt-cluster", "copyfile", testname])
523
    _CheckFileOnAllNodes(testname, uniqueid)
524
  finally:
525
    _RemoveFileFromAllNodes(testname)
526

    
527

    
528
def TestClusterCommand():
529
  """gnt-cluster command"""
530
  uniqueid = utils.NewUUID()
531
  rfile = "/tmp/gnt%s" % utils.NewUUID()
532
  rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
533
  cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
534
                              "%s >%s" % (rcmd, rfile)])
535

    
536
  try:
537
    AssertCommand(cmd)
538
    _CheckFileOnAllNodes(rfile, uniqueid)
539
  finally:
540
    _RemoveFileFromAllNodes(rfile)
541

    
542

    
543
def TestClusterDestroy():
544
  """gnt-cluster destroy"""
545
  AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
546

    
547

    
548
def TestClusterRepairDiskSizes():
549
  """gnt-cluster repair-disk-sizes"""
550
  AssertCommand(["gnt-cluster", "repair-disk-sizes"])