Statistics
| Branch: | Tag: | Revision:

root / qa / qa_cluster.py @ cbf5114e

History | View | Annotate | Download (16.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Cluster related QA tests.
23

24
"""
25

    
26
import tempfile
27
import os.path
28

    
29
from ganeti import constants
30
from ganeti import compat
31
from ganeti import utils
32

    
33
import qa_config
34
import qa_utils
35
import qa_error
36

    
37
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38

    
39

    
40
#: cluster verify command
41
_CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42

    
43

    
44
def _RemoveFileFromAllNodes(filename):
45
  """Removes a file from all nodes.
46

47
  """
48
  for node in qa_config.get("nodes"):
49
    AssertCommand(["rm", "-f", filename], node=node)
50

    
51

    
52
def _CheckFileOnAllNodes(filename, content):
53
  """Verifies the content of the given file on all nodes.
54

55
  """
56
  cmd = utils.ShellQuoteArgs(["cat", filename])
57
  for node in qa_config.get("nodes"):
58
    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59

    
60

    
61
# data for testing failures due to bad keys/values for disk parameters
62
_FAIL_PARAMS = ["nonexistent:resync-rate=1",
63
                "drbd:nonexistent=1",
64
                "drbd:resync-rate=invalid",
65
                ]
66

    
67

    
68
def TestClusterInitDisk():
69
  """gnt-cluster init -D"""
70
  name = qa_config.get("name")
71
  for param in _FAIL_PARAMS:
72
    AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
73

    
74

    
75
def TestClusterInit(rapi_user, rapi_secret):
76
  """gnt-cluster init"""
77
  master = qa_config.GetMasterNode()
78

    
79
  rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
80

    
81
  # First create the RAPI credentials
82
  fh = tempfile.NamedTemporaryFile()
83
  try:
84
    fh.write("%s %s write\n" % (rapi_user, rapi_secret))
85
    fh.flush()
86

    
87
    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
88
    try:
89
      AssertCommand(["mkdir", "-p", rapi_dir])
90
      AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
91
    finally:
92
      AssertCommand(["rm", "-f", tmpru])
93
  finally:
94
    fh.close()
95

    
96
  # Initialize cluster
97
  cmd = [
98
    "gnt-cluster", "init",
99
    "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
100
    "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
101
    ]
102

    
103
  for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
104
                    "nic-count"):
105
    for spec_val in ("min", "max", "std"):
106
      spec = qa_config.get("ispec_%s_%s" %
107
                           (spec_type.replace('-', '_'), spec_val), None)
108
      if spec:
109
        cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
110

    
111
  if master.get("secondary", None):
112
    cmd.append("--secondary-ip=%s" % master["secondary"])
113

    
114
  bridge = qa_config.get("bridge", None)
115
  if bridge:
116
    cmd.append("--master-netdev=%s" % bridge)
117

    
118
  cmd.append(qa_config.get("name"))
119
  AssertCommand(cmd)
120

    
121
  cmd = ["gnt-cluster", "modify"]
122

    
123
  # hypervisor parameter modifications
124
  hvp = qa_config.get("hypervisor-parameters", {})
125
  for k, v in hvp.items():
126
    cmd.extend(["-H", "%s:%s" % (k, v)])
127
  # backend parameter modifications
128
  bep = qa_config.get("backend-parameters", "")
129
  if bep:
130
    cmd.extend(["-B", bep])
131

    
132
  if len(cmd) > 2:
133
    AssertCommand(cmd)
134

    
135
  # OS parameters
136
  osp = qa_config.get("os-parameters", {})
137
  for k, v in osp.items():
138
    AssertCommand(["gnt-os", "modify", "-O", v, k])
139

    
140
  # OS hypervisor parameters
141
  os_hvp = qa_config.get("os-hvp", {})
142
  for os_name in os_hvp:
143
    for hv, hvp in os_hvp[os_name].items():
144
      AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
145

    
146

    
147
def TestClusterRename():
148
  """gnt-cluster rename"""
149
  cmd = ["gnt-cluster", "rename", "-f"]
150

    
151
  original_name = qa_config.get("name")
152
  rename_target = qa_config.get("rename", None)
153
  if rename_target is None:
154
    print qa_utils.FormatError('"rename" entry is missing')
155
    return
156

    
157
  for data in [
158
    cmd + [rename_target],
159
    _CLUSTER_VERIFY,
160
    cmd + [original_name],
161
    _CLUSTER_VERIFY,
162
    ]:
163
    AssertCommand(data)
164

    
165

    
166
def TestClusterOob():
167
  """out-of-band framework"""
168
  oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
169

    
170
  AssertCommand(_CLUSTER_VERIFY)
171
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
172
                 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
173
                 utils.NewUUID()])
174

    
175
  AssertCommand(_CLUSTER_VERIFY, fail=True)
176

    
177
  AssertCommand(["touch", oob_path_exists])
178
  AssertCommand(["chmod", "0400", oob_path_exists])
179
  AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
180

    
181
  try:
182
    AssertCommand(["gnt-cluster", "modify", "--node-parameters",
183
                   "oob_program=%s" % oob_path_exists])
184

    
185
    AssertCommand(_CLUSTER_VERIFY, fail=True)
186

    
187
    AssertCommand(["chmod", "0500", oob_path_exists])
188
    AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
189

    
190
    AssertCommand(_CLUSTER_VERIFY)
191
  finally:
192
    AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
193

    
194
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
195
                 "oob_program="])
196

    
197

    
198
def TestClusterEpo():
199
  """gnt-cluster epo"""
200
  master = qa_config.GetMasterNode()
201

    
202
  # Assert that OOB is unavailable for all nodes
203
  result_output = GetCommandOutput(master["primary"],
204
                                   "gnt-node list --verbose --no-headers -o"
205
                                   " powered")
206
  AssertEqual(compat.all(powered == "(unavail)"
207
                         for powered in result_output.splitlines()), True)
208

    
209
  # Conflicting
210
  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
211
  # --all doesn't expect arguments
212
  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
213

    
214
  # Unless --all is given master is not allowed to be in the list
215
  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
216

    
217
  # This shouldn't fail
218
  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
219

    
220
  # All instances should have been stopped now
221
  result_output = GetCommandOutput(master["primary"],
222
                                   "gnt-instance list --no-headers -o status")
223
  # ERROR_down because the instance is stopped but not recorded as such
224
  AssertEqual(compat.all(status == "ERROR_down"
225
                         for status in result_output.splitlines()), True)
226

    
227
  # Now start everything again
228
  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
229

    
230
  # All instances should have been started now
231
  result_output = GetCommandOutput(master["primary"],
232
                                   "gnt-instance list --no-headers -o status")
233
  AssertEqual(compat.all(status == "running"
234
                         for status in result_output.splitlines()), True)
235

    
236

    
237
def TestClusterVerify():
238
  """gnt-cluster verify"""
239
  AssertCommand(_CLUSTER_VERIFY)
240
  AssertCommand(["gnt-cluster", "verify-disks"])
241

    
242

    
243
def TestJobqueue():
244
  """gnt-debug test-jobqueue"""
245
  AssertCommand(["gnt-debug", "test-jobqueue"])
246

    
247

    
248
def TestDelay(node):
249
  """gnt-debug delay"""
250
  AssertCommand(["gnt-debug", "delay", "1"])
251
  AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
252
  AssertCommand(["gnt-debug", "delay", "--no-master",
253
                 "-n", node["primary"], "1"])
254

    
255

    
256
def TestClusterReservedLvs():
257
  """gnt-cluster reserved lvs"""
258
  for fail, cmd in [
259
    (False, _CLUSTER_VERIFY),
260
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
261
    (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
262
    (True, _CLUSTER_VERIFY),
263
    (False, ["gnt-cluster", "modify", "--reserved-lvs",
264
             "xenvg/qa-test,.*/other-test"]),
265
    (False, _CLUSTER_VERIFY),
266
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
267
    (False, _CLUSTER_VERIFY),
268
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
269
    (True, _CLUSTER_VERIFY),
270
    (False, ["lvremove", "-f", "xenvg/qa-test"]),
271
    (False, _CLUSTER_VERIFY),
272
    ]:
273
    AssertCommand(cmd, fail=fail)
274

    
275

    
276
def TestClusterModifyEmpty():
277
  """gnt-cluster modify"""
278
  AssertCommand(["gnt-cluster", "modify"], fail=True)
279

    
280

    
281
def TestClusterModifyDisk():
282
  """gnt-cluster modify -D"""
283
  for param in _FAIL_PARAMS:
284
    AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
285

    
286

    
287
def TestClusterModifyBe():
288
  """gnt-cluster modify -B"""
289
  for fail, cmd in [
290
    # max/min mem
291
    (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
292
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
293
    (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
294
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
295
    (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
296
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
297
    (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
298
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
299
    (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
300
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
301
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
302
    # vcpus
303
    (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
304
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
305
    (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
306
    (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
307
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
308
    # auto_balance
309
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
310
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
311
    (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
312
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
313
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
314
    ]:
315
    AssertCommand(cmd, fail=fail)
316

    
317
  # redo the original-requested BE parameters, if any
318
  bep = qa_config.get("backend-parameters", "")
319
  if bep:
320
    AssertCommand(["gnt-cluster", "modify", "-B", bep])
321

    
322

    
323
def TestClusterInfo():
324
  """gnt-cluster info"""
325
  AssertCommand(["gnt-cluster", "info"])
326

    
327

    
328
def TestClusterRedistConf():
329
  """gnt-cluster redist-conf"""
330
  AssertCommand(["gnt-cluster", "redist-conf"])
331

    
332

    
333
def TestClusterGetmaster():
334
  """gnt-cluster getmaster"""
335
  AssertCommand(["gnt-cluster", "getmaster"])
336

    
337

    
338
def TestClusterVersion():
339
  """gnt-cluster version"""
340
  AssertCommand(["gnt-cluster", "version"])
341

    
342

    
343
def TestClusterRenewCrypto():
344
  """gnt-cluster renew-crypto"""
345
  master = qa_config.GetMasterNode()
346

    
347
  # Conflicting options
348
  cmd = ["gnt-cluster", "renew-crypto", "--force",
349
         "--new-cluster-certificate", "--new-confd-hmac-key"]
350
  conflicting = [
351
    ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
352
    ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
353
    ]
354
  for i in conflicting:
355
    AssertCommand(cmd + i, fail=True)
356

    
357
  # Invalid RAPI certificate
358
  cmd = ["gnt-cluster", "renew-crypto", "--force",
359
         "--rapi-certificate=/dev/null"]
360
  AssertCommand(cmd, fail=True)
361

    
362
  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
363
                                         constants.RAPI_CERT_FILE)
364
  try:
365
    # Custom RAPI certificate
366
    fh = tempfile.NamedTemporaryFile()
367

    
368
    # Ensure certificate doesn't cause "gnt-cluster verify" to complain
369
    validity = constants.SSL_CERT_EXPIRATION_WARN * 3
370

    
371
    utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
372

    
373
    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
374
    try:
375
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
376
                     "--rapi-certificate=%s" % tmpcert])
377
    finally:
378
      AssertCommand(["rm", "-f", tmpcert])
379

    
380
    # Custom cluster domain secret
381
    cds_fh = tempfile.NamedTemporaryFile()
382
    cds_fh.write(utils.GenerateSecret())
383
    cds_fh.write("\n")
384
    cds_fh.flush()
385

    
386
    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
387
    try:
388
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
389
                     "--cluster-domain-secret=%s" % tmpcds])
390
    finally:
391
      AssertCommand(["rm", "-f", tmpcds])
392

    
393
    # Normal case
394
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
395
                   "--new-cluster-certificate", "--new-confd-hmac-key",
396
                   "--new-rapi-certificate", "--new-cluster-domain-secret"])
397

    
398
    # Restore RAPI certificate
399
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
400
                   "--rapi-certificate=%s" % rapi_cert_backup])
401
  finally:
402
    AssertCommand(["rm", "-f", rapi_cert_backup])
403

    
404

    
405
def TestClusterBurnin():
406
  """Burnin"""
407
  master = qa_config.GetMasterNode()
408

    
409
  options = qa_config.get("options", {})
410
  disk_template = options.get("burnin-disk-template", "drbd")
411
  parallel = options.get("burnin-in-parallel", False)
412
  check_inst = options.get("burnin-check-instances", False)
413
  do_rename = options.get("burnin-rename", "")
414
  do_reboot = options.get("burnin-reboot", True)
415
  reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
416

    
417
  # Get as many instances as we need
418
  instances = []
419
  try:
420
    try:
421
      num = qa_config.get("options", {}).get("burnin-instances", 1)
422
      for _ in range(0, num):
423
        instances.append(qa_config.AcquireInstance())
424
    except qa_error.OutOfInstancesError:
425
      print "Not enough instances, continuing anyway."
426

    
427
    if len(instances) < 1:
428
      raise qa_error.Error("Burnin needs at least one instance")
429

    
430
    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
431
    try:
432
      # Run burnin
433
      cmd = [script,
434
             "--os=%s" % qa_config.get("os"),
435
             "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
436
             "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
437
             "--disk-size=%s" % ",".join(qa_config.get("disk")),
438
             "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
439
             "--disk-template=%s" % disk_template]
440
      if parallel:
441
        cmd.append("--parallel")
442
        cmd.append("--early-release")
443
      if check_inst:
444
        cmd.append("--http-check")
445
      if do_rename:
446
        cmd.append("--rename=%s" % do_rename)
447
      if not do_reboot:
448
        cmd.append("--no-reboot")
449
      else:
450
        cmd.append("--reboot-types=%s" % ",".join(reboot_types))
451
      cmd += [inst["name"] for inst in instances]
452
      AssertCommand(cmd)
453
    finally:
454
      AssertCommand(["rm", "-f", script])
455

    
456
  finally:
457
    for inst in instances:
458
      qa_config.ReleaseInstance(inst)
459

    
460

    
461
def TestClusterMasterFailover():
462
  """gnt-cluster master-failover"""
463
  master = qa_config.GetMasterNode()
464
  failovermaster = qa_config.AcquireNode(exclude=master)
465

    
466
  cmd = ["gnt-cluster", "master-failover"]
467
  try:
468
    AssertCommand(cmd, node=failovermaster)
469
    # Back to original master node
470
    AssertCommand(cmd, node=master)
471
  finally:
472
    qa_config.ReleaseNode(failovermaster)
473

    
474

    
475
def TestClusterMasterFailoverWithDrainedQueue():
476
  """gnt-cluster master-failover with drained queue"""
477
  drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
478

    
479
  master = qa_config.GetMasterNode()
480
  failovermaster = qa_config.AcquireNode(exclude=master)
481

    
482
  # Ensure queue is not drained
483
  for node in [master, failovermaster]:
484
    AssertCommand(drain_check, node=node, fail=True)
485

    
486
  # Drain queue on failover master
487
  AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
488

    
489
  cmd = ["gnt-cluster", "master-failover"]
490
  try:
491
    AssertCommand(drain_check, node=failovermaster)
492
    AssertCommand(cmd, node=failovermaster)
493
    AssertCommand(drain_check, fail=True)
494
    AssertCommand(drain_check, node=failovermaster, fail=True)
495

    
496
    # Back to original master node
497
    AssertCommand(cmd, node=master)
498
  finally:
499
    qa_config.ReleaseNode(failovermaster)
500

    
501
  AssertCommand(drain_check, fail=True)
502
  AssertCommand(drain_check, node=failovermaster, fail=True)
503

    
504

    
505
def TestClusterCopyfile():
506
  """gnt-cluster copyfile"""
507
  master = qa_config.GetMasterNode()
508

    
509
  uniqueid = utils.NewUUID()
510

    
511
  # Create temporary file
512
  f = tempfile.NamedTemporaryFile()
513
  f.write(uniqueid)
514
  f.flush()
515
  f.seek(0)
516

    
517
  # Upload file to master node
518
  testname = qa_utils.UploadFile(master["primary"], f.name)
519
  try:
520
    # Copy file to all nodes
521
    AssertCommand(["gnt-cluster", "copyfile", testname])
522
    _CheckFileOnAllNodes(testname, uniqueid)
523
  finally:
524
    _RemoveFileFromAllNodes(testname)
525

    
526

    
527
def TestClusterCommand():
528
  """gnt-cluster command"""
529
  uniqueid = utils.NewUUID()
530
  rfile = "/tmp/gnt%s" % utils.NewUUID()
531
  rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
532
  cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
533
                              "%s >%s" % (rcmd, rfile)])
534

    
535
  try:
536
    AssertCommand(cmd)
537
    _CheckFileOnAllNodes(rfile, uniqueid)
538
  finally:
539
    _RemoveFileFromAllNodes(rfile)
540

    
541

    
542
def TestClusterDestroy():
543
  """gnt-cluster destroy"""
544
  AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
545

    
546

    
547
def TestClusterRepairDiskSizes():
548
  """gnt-cluster repair-disk-sizes"""
549
  AssertCommand(["gnt-cluster", "repair-disk-sizes"])