Statistics
| Branch: | Tag: | Revision:

root / qa / qa_cluster.py @ f14a8b15

History | View | Annotate | Download (17 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Cluster related QA tests.
23

24
"""
25

    
26
import tempfile
27
import os.path
28

    
29
from ganeti import constants
30
from ganeti import compat
31
from ganeti import utils
32

    
33
import qa_config
34
import qa_utils
35
import qa_error
36

    
37
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38

    
39

    
40
#: cluster verify command
41
_CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42

    
43

    
44
def _RemoveFileFromAllNodes(filename):
45
  """Removes a file from all nodes.
46

47
  """
48
  for node in qa_config.get("nodes"):
49
    AssertCommand(["rm", "-f", filename], node=node)
50

    
51

    
52
def _CheckFileOnAllNodes(filename, content):
53
  """Verifies the content of the given file on all nodes.
54

55
  """
56
  cmd = utils.ShellQuoteArgs(["cat", filename])
57
  for node in qa_config.get("nodes"):
58
    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59

    
60

    
61
# data for testing failures due to bad keys/values for disk parameters
62
_FAIL_PARAMS = ["nonexistent:resync-rate=1",
63
                "drbd:nonexistent=1",
64
                "drbd:resync-rate=invalid",
65
                ]
66

    
67

    
68
def TestClusterInitDisk():
69
  """gnt-cluster init -D"""
70
  name = qa_config.get("name")
71
  for param in _FAIL_PARAMS:
72
    AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
73

    
74

    
75
def TestClusterInit(rapi_user, rapi_secret):
76
  """gnt-cluster init"""
77
  master = qa_config.GetMasterNode()
78

    
79
  rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
80

    
81
  # First create the RAPI credentials
82
  fh = tempfile.NamedTemporaryFile()
83
  try:
84
    fh.write("%s %s write\n" % (rapi_user, rapi_secret))
85
    fh.flush()
86

    
87
    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
88
    try:
89
      AssertCommand(["mkdir", "-p", rapi_dir])
90
      AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
91
    finally:
92
      AssertCommand(["rm", "-f", tmpru])
93
  finally:
94
    fh.close()
95

    
96
  # Initialize cluster
97
  cmd = ["gnt-cluster", "init"]
98

    
99
  cmd.append("--primary-ip-version=%d" %
100
             qa_config.get("primary_ip_version", 4))
101

    
102
  for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
103
                    "nic-count"):
104
    for spec_val in ("min", "max", "std"):
105
      spec = qa_config.get("ispec_%s_%s" %
106
                           (spec_type.replace('-', '_'), spec_val), None)
107
      if spec:
108
        cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
109

    
110
  if master.get("secondary", None):
111
    cmd.append("--secondary-ip=%s" % master["secondary"])
112

    
113
  bridge = qa_config.get("bridge", None)
114
  if bridge:
115
    cmd.append("--bridge=%s" % bridge)
116
    cmd.append("--master-netdev=%s" % bridge)
117

    
118
  htype = qa_config.get("enabled-hypervisors", None)
119
  if htype:
120
    cmd.append("--enabled-hypervisors=%s" % htype)
121

    
122
  cmd.append(qa_config.get("name"))
123
  AssertCommand(cmd)
124

    
125
  cmd = ["gnt-cluster", "modify"]
126

    
127
  # hypervisor parameter modifications
128
  hvp = qa_config.get("hypervisor-parameters", {})
129
  for k, v in hvp.items():
130
    cmd.extend(["-H", "%s:%s" % (k, v)])
131
  # backend parameter modifications
132
  bep = qa_config.get("backend-parameters", "")
133
  if bep:
134
    cmd.extend(["-B", bep])
135

    
136
  if len(cmd) > 2:
137
    AssertCommand(cmd)
138

    
139
  # OS parameters
140
  osp = qa_config.get("os-parameters", {})
141
  for k, v in osp.items():
142
    AssertCommand(["gnt-os", "modify", "-O", v, k])
143

    
144
  # OS hypervisor parameters
145
  os_hvp = qa_config.get("os-hvp", {})
146
  for os_name in os_hvp:
147
    for hv, hvp in os_hvp[os_name].items():
148
      AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
149

    
150

    
151
def TestClusterRename():
152
  """gnt-cluster rename"""
153
  cmd = ["gnt-cluster", "rename", "-f"]
154

    
155
  original_name = qa_config.get("name")
156
  rename_target = qa_config.get("rename", None)
157
  if rename_target is None:
158
    print qa_utils.FormatError('"rename" entry is missing')
159
    return
160

    
161
  for data in [
162
    cmd + [rename_target],
163
    _CLUSTER_VERIFY,
164
    cmd + [original_name],
165
    _CLUSTER_VERIFY,
166
    ]:
167
    AssertCommand(data)
168

    
169

    
170
def TestClusterOob():
171
  """out-of-band framework"""
172
  oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
173

    
174
  AssertCommand(_CLUSTER_VERIFY)
175
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
176
                 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
177
                 utils.NewUUID()])
178

    
179
  AssertCommand(_CLUSTER_VERIFY, fail=True)
180

    
181
  AssertCommand(["touch", oob_path_exists])
182
  AssertCommand(["chmod", "0400", oob_path_exists])
183
  AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
184

    
185
  try:
186
    AssertCommand(["gnt-cluster", "modify", "--node-parameters",
187
                   "oob_program=%s" % oob_path_exists])
188

    
189
    AssertCommand(_CLUSTER_VERIFY, fail=True)
190

    
191
    AssertCommand(["chmod", "0500", oob_path_exists])
192
    AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
193

    
194
    AssertCommand(_CLUSTER_VERIFY)
195
  finally:
196
    AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
197

    
198
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
199
                 "oob_program="])
200

    
201

    
202
def TestClusterEpo():
203
  """gnt-cluster epo"""
204
  master = qa_config.GetMasterNode()
205

    
206
  # Assert that OOB is unavailable for all nodes
207
  result_output = GetCommandOutput(master["primary"],
208
                                   "gnt-node list --verbose --no-headers -o"
209
                                   " powered")
210
  AssertEqual(compat.all(powered == "(unavail)"
211
                         for powered in result_output.splitlines()), True)
212

    
213
  # Conflicting
214
  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
215
  # --all doesn't expect arguments
216
  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
217

    
218
  # Unless --all is given master is not allowed to be in the list
219
  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
220

    
221
  # This shouldn't fail
222
  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
223

    
224
  # All instances should have been stopped now
225
  result_output = GetCommandOutput(master["primary"],
226
                                   "gnt-instance list --no-headers -o status")
227
  # ERROR_down because the instance is stopped but not recorded as such
228
  AssertEqual(compat.all(status == "ERROR_down"
229
                         for status in result_output.splitlines()), True)
230

    
231
  # Now start everything again
232
  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
233

    
234
  # All instances should have been started now
235
  result_output = GetCommandOutput(master["primary"],
236
                                   "gnt-instance list --no-headers -o status")
237
  AssertEqual(compat.all(status == "running"
238
                         for status in result_output.splitlines()), True)
239

    
240

    
241
def TestClusterVerify():
242
  """gnt-cluster verify"""
243
  AssertCommand(_CLUSTER_VERIFY)
244
  AssertCommand(["gnt-cluster", "verify-disks"])
245

    
246

    
247
def TestJobqueue():
248
  """gnt-debug test-jobqueue"""
249
  AssertCommand(["gnt-debug", "test-jobqueue"])
250

    
251

    
252
def TestDelay(node):
253
  """gnt-debug delay"""
254
  AssertCommand(["gnt-debug", "delay", "1"])
255
  AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
256
  AssertCommand(["gnt-debug", "delay", "--no-master",
257
                 "-n", node["primary"], "1"])
258

    
259

    
260
def TestClusterReservedLvs():
261
  """gnt-cluster reserved lvs"""
262
  for fail, cmd in [
263
    (False, _CLUSTER_VERIFY),
264
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
265
    (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
266
    (True, _CLUSTER_VERIFY),
267
    (False, ["gnt-cluster", "modify", "--reserved-lvs",
268
             "xenvg/qa-test,.*/other-test"]),
269
    (False, _CLUSTER_VERIFY),
270
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
271
    (False, _CLUSTER_VERIFY),
272
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
273
    (True, _CLUSTER_VERIFY),
274
    (False, ["lvremove", "-f", "xenvg/qa-test"]),
275
    (False, _CLUSTER_VERIFY),
276
    ]:
277
    AssertCommand(cmd, fail=fail)
278

    
279

    
280
def TestClusterModifyEmpty():
281
  """gnt-cluster modify"""
282
  AssertCommand(["gnt-cluster", "modify"], fail=True)
283

    
284

    
285
def TestClusterModifyDisk():
286
  """gnt-cluster modify -D"""
287
  for param in _FAIL_PARAMS:
288
    AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
289

    
290

    
291
def TestClusterModifyBe():
292
  """gnt-cluster modify -B"""
293
  for fail, cmd in [
294
    # max/min mem
295
    (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
296
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
297
    (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
298
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
299
    (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
300
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
301
    (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
302
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
303
    (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
304
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
305
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
306
    # vcpus
307
    (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
308
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
309
    (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
310
    (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
311
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
312
    # auto_balance
313
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
314
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
315
    (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
316
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
317
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
318
    ]:
319
    AssertCommand(cmd, fail=fail)
320

    
321
  # redo the original-requested BE parameters, if any
322
  bep = qa_config.get("backend-parameters", "")
323
  if bep:
324
    AssertCommand(["gnt-cluster", "modify", "-B", bep])
325

    
326

    
327
def TestClusterInfo():
328
  """gnt-cluster info"""
329
  AssertCommand(["gnt-cluster", "info"])
330

    
331

    
332
def TestClusterRedistConf():
333
  """gnt-cluster redist-conf"""
334
  AssertCommand(["gnt-cluster", "redist-conf"])
335

    
336

    
337
def TestClusterGetmaster():
338
  """gnt-cluster getmaster"""
339
  AssertCommand(["gnt-cluster", "getmaster"])
340

    
341

    
342
def TestClusterVersion():
343
  """gnt-cluster version"""
344
  AssertCommand(["gnt-cluster", "version"])
345

    
346

    
347
def TestClusterRenewCrypto():
348
  """gnt-cluster renew-crypto"""
349
  master = qa_config.GetMasterNode()
350

    
351
  # Conflicting options
352
  cmd = ["gnt-cluster", "renew-crypto", "--force",
353
         "--new-cluster-certificate", "--new-confd-hmac-key"]
354
  conflicting = [
355
    ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
356
    ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
357
    ]
358
  for i in conflicting:
359
    AssertCommand(cmd + i, fail=True)
360

    
361
  # Invalid RAPI certificate
362
  cmd = ["gnt-cluster", "renew-crypto", "--force",
363
         "--rapi-certificate=/dev/null"]
364
  AssertCommand(cmd, fail=True)
365

    
366
  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
367
                                         constants.RAPI_CERT_FILE)
368
  try:
369
    # Custom RAPI certificate
370
    fh = tempfile.NamedTemporaryFile()
371

    
372
    # Ensure certificate doesn't cause "gnt-cluster verify" to complain
373
    validity = constants.SSL_CERT_EXPIRATION_WARN * 3
374

    
375
    utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
376

    
377
    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
378
    try:
379
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
380
                     "--rapi-certificate=%s" % tmpcert])
381
    finally:
382
      AssertCommand(["rm", "-f", tmpcert])
383

    
384
    # Custom cluster domain secret
385
    cds_fh = tempfile.NamedTemporaryFile()
386
    cds_fh.write(utils.GenerateSecret())
387
    cds_fh.write("\n")
388
    cds_fh.flush()
389

    
390
    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
391
    try:
392
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
393
                     "--cluster-domain-secret=%s" % tmpcds])
394
    finally:
395
      AssertCommand(["rm", "-f", tmpcds])
396

    
397
    # Normal case
398
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
399
                   "--new-cluster-certificate", "--new-confd-hmac-key",
400
                   "--new-rapi-certificate", "--new-cluster-domain-secret"])
401

    
402
    # Restore RAPI certificate
403
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
404
                   "--rapi-certificate=%s" % rapi_cert_backup])
405
  finally:
406
    AssertCommand(["rm", "-f", rapi_cert_backup])
407

    
408

    
409
def TestClusterBurnin():
410
  """Burnin"""
411
  master = qa_config.GetMasterNode()
412

    
413
  options = qa_config.get("options", {})
414
  disk_template = options.get("burnin-disk-template", "drbd")
415
  parallel = options.get("burnin-in-parallel", False)
416
  check_inst = options.get("burnin-check-instances", False)
417
  do_rename = options.get("burnin-rename", "")
418
  do_reboot = options.get("burnin-reboot", True)
419
  reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
420

    
421
  # Get as many instances as we need
422
  instances = []
423
  try:
424
    try:
425
      num = qa_config.get("options", {}).get("burnin-instances", 1)
426
      for _ in range(0, num):
427
        instances.append(qa_config.AcquireInstance())
428
    except qa_error.OutOfInstancesError:
429
      print "Not enough instances, continuing anyway."
430

    
431
    if len(instances) < 1:
432
      raise qa_error.Error("Burnin needs at least one instance")
433

    
434
    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
435
    try:
436
      # Run burnin
437
      cmd = [script,
438
             "--os=%s" % qa_config.get("os"),
439
             "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
440
             "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
441
             "--disk-size=%s" % ",".join(qa_config.get("disk")),
442
             "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
443
             "--disk-template=%s" % disk_template]
444
      if parallel:
445
        cmd.append("--parallel")
446
        cmd.append("--early-release")
447
      if check_inst:
448
        cmd.append("--http-check")
449
      if do_rename:
450
        cmd.append("--rename=%s" % do_rename)
451
      if not do_reboot:
452
        cmd.append("--no-reboot")
453
      else:
454
        cmd.append("--reboot-types=%s" % ",".join(reboot_types))
455
      cmd += [inst["name"] for inst in instances]
456
      AssertCommand(cmd)
457
    finally:
458
      AssertCommand(["rm", "-f", script])
459

    
460
  finally:
461
    for inst in instances:
462
      qa_config.ReleaseInstance(inst)
463

    
464

    
465
def TestClusterMasterFailover():
466
  """gnt-cluster master-failover"""
467
  master = qa_config.GetMasterNode()
468
  failovermaster = qa_config.AcquireNode(exclude=master)
469

    
470
  cmd = ["gnt-cluster", "master-failover"]
471
  try:
472
    AssertCommand(cmd, node=failovermaster)
473
    # Back to original master node
474
    AssertCommand(cmd, node=master)
475
  finally:
476
    qa_config.ReleaseNode(failovermaster)
477

    
478

    
479
def TestClusterMasterFailoverWithDrainedQueue():
480
  """gnt-cluster master-failover with drained queue"""
481
  drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
482

    
483
  master = qa_config.GetMasterNode()
484
  failovermaster = qa_config.AcquireNode(exclude=master)
485

    
486
  # Ensure queue is not drained
487
  for node in [master, failovermaster]:
488
    AssertCommand(drain_check, node=node, fail=True)
489

    
490
  # Drain queue on failover master
491
  AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
492

    
493
  cmd = ["gnt-cluster", "master-failover"]
494
  try:
495
    AssertCommand(drain_check, node=failovermaster)
496
    AssertCommand(cmd, node=failovermaster)
497
    AssertCommand(drain_check, fail=True)
498
    AssertCommand(drain_check, node=failovermaster, fail=True)
499

    
500
    # Back to original master node
501
    AssertCommand(cmd, node=master)
502
  finally:
503
    qa_config.ReleaseNode(failovermaster)
504

    
505
  AssertCommand(drain_check, fail=True)
506
  AssertCommand(drain_check, node=failovermaster, fail=True)
507

    
508

    
509
def TestClusterCopyfile():
510
  """gnt-cluster copyfile"""
511
  master = qa_config.GetMasterNode()
512

    
513
  uniqueid = utils.NewUUID()
514

    
515
  # Create temporary file
516
  f = tempfile.NamedTemporaryFile()
517
  f.write(uniqueid)
518
  f.flush()
519
  f.seek(0)
520

    
521
  # Upload file to master node
522
  testname = qa_utils.UploadFile(master["primary"], f.name)
523
  try:
524
    # Copy file to all nodes
525
    AssertCommand(["gnt-cluster", "copyfile", testname])
526
    _CheckFileOnAllNodes(testname, uniqueid)
527
  finally:
528
    _RemoveFileFromAllNodes(testname)
529

    
530

    
531
def TestClusterCommand():
532
  """gnt-cluster command"""
533
  uniqueid = utils.NewUUID()
534
  rfile = "/tmp/gnt%s" % utils.NewUUID()
535
  rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
536
  cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
537
                              "%s >%s" % (rcmd, rfile)])
538

    
539
  try:
540
    AssertCommand(cmd)
541
    _CheckFileOnAllNodes(rfile, uniqueid)
542
  finally:
543
    _RemoveFileFromAllNodes(rfile)
544

    
545

    
546
def TestClusterDestroy():
547
  """gnt-cluster destroy"""
548
  AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
549

    
550

    
551
def TestClusterRepairDiskSizes():
552
  """gnt-cluster repair-disk-sizes"""
553
  AssertCommand(["gnt-cluster", "repair-disk-sizes"])