Statistics
| Branch: | Tag: | Revision:

root / qa / qa_cluster.py @ 20286f7c

History | View | Annotate | Download (16.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Cluster related QA tests.
23

24
"""
25

    
26
import tempfile
27
import os.path
28

    
29
from ganeti import constants
30
from ganeti import compat
31
from ganeti import utils
32

    
33
import qa_config
34
import qa_utils
35
import qa_error
36

    
37
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38

    
39

    
40
#: cluster verify command
41
_CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42

    
43

    
44
def _RemoveFileFromAllNodes(filename):
45
  """Removes a file from all nodes.
46

47
  """
48
  for node in qa_config.get("nodes"):
49
    AssertCommand(["rm", "-f", filename], node=node)
50

    
51

    
52
def _CheckFileOnAllNodes(filename, content):
53
  """Verifies the content of the given file on all nodes.
54

55
  """
56
  cmd = utils.ShellQuoteArgs(["cat", filename])
57
  for node in qa_config.get("nodes"):
58
    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59

    
60

    
61
def TestClusterInit(rapi_user, rapi_secret):
62
  """gnt-cluster init"""
63
  # data for testing failures due to bad keys/values for disk parameters
64
  fail_params = ("-D nonexistent:resync-rate=1",
65
                 "-D drbd:nonexistent=1",
66
                 "-D drbd:resync-rate=invalid")
67

    
68
  master = qa_config.GetMasterNode()
69

    
70
  rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
71

    
72
  # First create the RAPI credentials
73
  fh = tempfile.NamedTemporaryFile()
74
  try:
75
    fh.write("%s %s write\n" % (rapi_user, rapi_secret))
76
    fh.flush()
77

    
78
    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
79
    try:
80
      AssertCommand(["mkdir", "-p", rapi_dir])
81
      AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
82
    finally:
83
      AssertCommand(["rm", "-f", tmpru])
84
  finally:
85
    fh.close()
86

    
87
  # Initialize cluster
88
  cmd = ["gnt-cluster", "init"]
89

    
90
  cmd.append("--primary-ip-version=%d" %
91
             qa_config.get("primary_ip_version", 4))
92

    
93
  for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
94
                    "nic-count"):
95
    for spec_val in ("min", "max", "std"):
96
      spec = qa_config.get("ispec_%s_%s" %
97
                           (spec_type.replace('-', '_'), spec_val), None)
98
      if spec:
99
        cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
100

    
101
  if master.get("secondary", None):
102
    cmd.append("--secondary-ip=%s" % master["secondary"])
103

    
104
  bridge = qa_config.get("bridge", None)
105
  if bridge:
106
    cmd.append("--bridge=%s" % bridge)
107
    cmd.append("--master-netdev=%s" % bridge)
108

    
109
  htype = qa_config.get("enabled-hypervisors", None)
110
  if htype:
111
    cmd.append("--enabled-hypervisors=%s" % htype)
112

    
113
  # test gnt-cluster init failures due to bad keys/values in disk parameters
114
  for param in fail_params:
115
    cmd.extend([param, qa_config.get("name")])
116
    AssertCommand(cmd, fail=True)
117
    cmd.pop()
118
    cmd.pop()
119

    
120
  cmd.append(qa_config.get("name"))
121
  AssertCommand(cmd)
122

    
123
  cmd = ["gnt-cluster", "modify"]
124
  # test gnt-cluster modify failures due to bad keys/values in disk parameters
125
  for param in fail_params:
126
    cmd.append(param)
127
    AssertCommand(cmd, fail=True)
128
    cmd.pop()
129

    
130
  # hypervisor parameter modifications
131
  hvp = qa_config.get("hypervisor-parameters", {})
132
  for k, v in hvp.items():
133
    cmd.extend(["-H", "%s:%s" % (k, v)])
134
  # backend parameter modifications
135
  bep = qa_config.get("backend-parameters", "")
136
  if bep:
137
    cmd.extend(["-B", bep])
138

    
139
  if len(cmd) > 2:
140
    AssertCommand(cmd)
141

    
142
  # OS parameters
143
  osp = qa_config.get("os-parameters", {})
144
  for k, v in osp.items():
145
    AssertCommand(["gnt-os", "modify", "-O", v, k])
146

    
147
  # OS hypervisor parameters
148
  os_hvp = qa_config.get("os-hvp", {})
149
  for os_name in os_hvp:
150
    for hv, hvp in os_hvp[os_name].items():
151
      AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
152

    
153

    
154
def TestClusterRename():
155
  """gnt-cluster rename"""
156
  cmd = ["gnt-cluster", "rename", "-f"]
157

    
158
  original_name = qa_config.get("name")
159
  rename_target = qa_config.get("rename", None)
160
  if rename_target is None:
161
    print qa_utils.FormatError('"rename" entry is missing')
162
    return
163

    
164
  for data in [
165
    cmd + [rename_target],
166
    _CLUSTER_VERIFY,
167
    cmd + [original_name],
168
    _CLUSTER_VERIFY,
169
    ]:
170
    AssertCommand(data)
171

    
172

    
173
def TestClusterOob():
174
  """out-of-band framework"""
175
  oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
176

    
177
  AssertCommand(_CLUSTER_VERIFY)
178
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
179
                 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
180
                 utils.NewUUID()])
181

    
182
  AssertCommand(_CLUSTER_VERIFY, fail=True)
183

    
184
  AssertCommand(["touch", oob_path_exists])
185
  AssertCommand(["chmod", "0400", oob_path_exists])
186
  AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
187

    
188
  try:
189
    AssertCommand(["gnt-cluster", "modify", "--node-parameters",
190
                   "oob_program=%s" % oob_path_exists])
191

    
192
    AssertCommand(_CLUSTER_VERIFY, fail=True)
193

    
194
    AssertCommand(["chmod", "0500", oob_path_exists])
195
    AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
196

    
197
    AssertCommand(_CLUSTER_VERIFY)
198
  finally:
199
    AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
200

    
201
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
202
                 "oob_program="])
203

    
204

    
205
def TestClusterEpo():
206
  """gnt-cluster epo"""
207
  master = qa_config.GetMasterNode()
208

    
209
  # Assert that OOB is unavailable for all nodes
210
  result_output = GetCommandOutput(master["primary"],
211
                                   "gnt-node list --verbose --no-header -o"
212
                                   " powered")
213
  AssertEqual(compat.all(powered == "(unavail)"
214
                         for powered in result_output.splitlines()), True)
215

    
216
  # Conflicting
217
  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
218
  # --all doesn't expect arguments
219
  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
220

    
221
  # Unless --all is given master is not allowed to be in the list
222
  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
223

    
224
  # This shouldn't fail
225
  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
226

    
227
  # All instances should have been stopped now
228
  result_output = GetCommandOutput(master["primary"],
229
                                   "gnt-instance list --no-header -o status")
230
  AssertEqual(compat.all(status == "ADMIN_down"
231
                         for status in result_output.splitlines()), True)
232

    
233
  # Now start everything again
234
  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
235

    
236
  # All instances should have been started now
237
  result_output = GetCommandOutput(master["primary"],
238
                                   "gnt-instance list --no-header -o status")
239
  AssertEqual(compat.all(status == "running"
240
                         for status in result_output.splitlines()), True)
241

    
242

    
243
def TestClusterVerify():
244
  """gnt-cluster verify"""
245
  AssertCommand(_CLUSTER_VERIFY)
246
  AssertCommand(["gnt-cluster", "verify-disks"])
247

    
248

    
249
def TestJobqueue():
250
  """gnt-debug test-jobqueue"""
251
  AssertCommand(["gnt-debug", "test-jobqueue"])
252

    
253

    
254
def TestDelay(node):
255
  """gnt-debug delay"""
256
  AssertCommand(["gnt-debug", "delay", "1"])
257
  AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
258
  AssertCommand(["gnt-debug", "delay", "--no-master",
259
                 "-n", node["primary"], "1"])
260

    
261

    
262
def TestClusterReservedLvs():
263
  """gnt-cluster reserved lvs"""
264
  for fail, cmd in [
265
    (False, _CLUSTER_VERIFY),
266
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
267
    (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
268
    (True, _CLUSTER_VERIFY),
269
    (False, ["gnt-cluster", "modify", "--reserved-lvs",
270
             "xenvg/qa-test,.*/other-test"]),
271
    (False, _CLUSTER_VERIFY),
272
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
273
    (False, _CLUSTER_VERIFY),
274
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
275
    (True, _CLUSTER_VERIFY),
276
    (False, ["lvremove", "-f", "xenvg/qa-test"]),
277
    (False, _CLUSTER_VERIFY),
278
    ]:
279
    AssertCommand(cmd, fail=fail)
280

    
281

    
282
def TestClusterModifyBe():
283
  """gnt-cluster modify -B"""
284
  for fail, cmd in [
285
    # max/min mem
286
    (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
287
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
288
    (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
289
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
290
    (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
291
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
292
    (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
293
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
294
    (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
295
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
296
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
297
    # vcpus
298
    (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
299
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
300
    (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
301
    (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
302
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
303
    # auto_balance
304
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
305
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
306
    (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
307
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
308
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
309
    ]:
310
    AssertCommand(cmd, fail=fail)
311

    
312
  # redo the original-requested BE parameters, if any
313
  bep = qa_config.get("backend-parameters", "")
314
  if bep:
315
    AssertCommand(["gnt-cluster", "modify", "-B", bep])
316

    
317

    
318
def TestClusterInfo():
319
  """gnt-cluster info"""
320
  AssertCommand(["gnt-cluster", "info"])
321

    
322

    
323
def TestClusterRedistConf():
324
  """gnt-cluster redist-conf"""
325
  AssertCommand(["gnt-cluster", "redist-conf"])
326

    
327

    
328
def TestClusterGetmaster():
329
  """gnt-cluster getmaster"""
330
  AssertCommand(["gnt-cluster", "getmaster"])
331

    
332

    
333
def TestClusterVersion():
334
  """gnt-cluster version"""
335
  AssertCommand(["gnt-cluster", "version"])
336

    
337

    
338
def TestClusterRenewCrypto():
339
  """gnt-cluster renew-crypto"""
340
  master = qa_config.GetMasterNode()
341

    
342
  # Conflicting options
343
  cmd = ["gnt-cluster", "renew-crypto", "--force",
344
         "--new-cluster-certificate", "--new-confd-hmac-key"]
345
  conflicting = [
346
    ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
347
    ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
348
    ]
349
  for i in conflicting:
350
    AssertCommand(cmd + i, fail=True)
351

    
352
  # Invalid RAPI certificate
353
  cmd = ["gnt-cluster", "renew-crypto", "--force",
354
         "--rapi-certificate=/dev/null"]
355
  AssertCommand(cmd, fail=True)
356

    
357
  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
358
                                         constants.RAPI_CERT_FILE)
359
  try:
360
    # Custom RAPI certificate
361
    fh = tempfile.NamedTemporaryFile()
362

    
363
    # Ensure certificate doesn't cause "gnt-cluster verify" to complain
364
    validity = constants.SSL_CERT_EXPIRATION_WARN * 3
365

    
366
    utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
367

    
368
    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
369
    try:
370
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
371
                     "--rapi-certificate=%s" % tmpcert])
372
    finally:
373
      AssertCommand(["rm", "-f", tmpcert])
374

    
375
    # Custom cluster domain secret
376
    cds_fh = tempfile.NamedTemporaryFile()
377
    cds_fh.write(utils.GenerateSecret())
378
    cds_fh.write("\n")
379
    cds_fh.flush()
380

    
381
    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
382
    try:
383
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
384
                     "--cluster-domain-secret=%s" % tmpcds])
385
    finally:
386
      AssertCommand(["rm", "-f", tmpcds])
387

    
388
    # Normal case
389
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
390
                   "--new-cluster-certificate", "--new-confd-hmac-key",
391
                   "--new-rapi-certificate", "--new-cluster-domain-secret"])
392

    
393
    # Restore RAPI certificate
394
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
395
                   "--rapi-certificate=%s" % rapi_cert_backup])
396
  finally:
397
    AssertCommand(["rm", "-f", rapi_cert_backup])
398

    
399

    
400
def TestClusterBurnin():
401
  """Burnin"""
402
  master = qa_config.GetMasterNode()
403

    
404
  options = qa_config.get("options", {})
405
  disk_template = options.get("burnin-disk-template", "drbd")
406
  parallel = options.get("burnin-in-parallel", False)
407
  check_inst = options.get("burnin-check-instances", False)
408
  do_rename = options.get("burnin-rename", "")
409
  do_reboot = options.get("burnin-reboot", True)
410
  reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
411

    
412
  # Get as many instances as we need
413
  instances = []
414
  try:
415
    try:
416
      num = qa_config.get("options", {}).get("burnin-instances", 1)
417
      for _ in range(0, num):
418
        instances.append(qa_config.AcquireInstance())
419
    except qa_error.OutOfInstancesError:
420
      print "Not enough instances, continuing anyway."
421

    
422
    if len(instances) < 1:
423
      raise qa_error.Error("Burnin needs at least one instance")
424

    
425
    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
426
    try:
427
      # Run burnin
428
      cmd = [script,
429
             "--os=%s" % qa_config.get("os"),
430
             "--disk-size=%s" % ",".join(qa_config.get("disk")),
431
             "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
432
             "--disk-template=%s" % disk_template]
433
      if parallel:
434
        cmd.append("--parallel")
435
        cmd.append("--early-release")
436
      if check_inst:
437
        cmd.append("--http-check")
438
      if do_rename:
439
        cmd.append("--rename=%s" % do_rename)
440
      if not do_reboot:
441
        cmd.append("--no-reboot")
442
      else:
443
        cmd.append("--reboot-types=%s" % ",".join(reboot_types))
444
      cmd += [inst["name"] for inst in instances]
445
      AssertCommand(cmd)
446
    finally:
447
      AssertCommand(["rm", "-f", script])
448

    
449
  finally:
450
    for inst in instances:
451
      qa_config.ReleaseInstance(inst)
452

    
453

    
454
def TestClusterMasterFailover():
455
  """gnt-cluster master-failover"""
456
  master = qa_config.GetMasterNode()
457
  failovermaster = qa_config.AcquireNode(exclude=master)
458

    
459
  cmd = ["gnt-cluster", "master-failover"]
460
  try:
461
    AssertCommand(cmd, node=failovermaster)
462
    # Back to original master node
463
    AssertCommand(cmd, node=master)
464
  finally:
465
    qa_config.ReleaseNode(failovermaster)
466

    
467

    
468
def TestClusterMasterFailoverWithDrainedQueue():
469
  """gnt-cluster master-failover with drained queue"""
470
  drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
471

    
472
  master = qa_config.GetMasterNode()
473
  failovermaster = qa_config.AcquireNode(exclude=master)
474

    
475
  # Ensure queue is not drained
476
  for node in [master, failovermaster]:
477
    AssertCommand(drain_check, node=node, fail=True)
478

    
479
  # Drain queue on failover master
480
  AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
481

    
482
  cmd = ["gnt-cluster", "master-failover"]
483
  try:
484
    AssertCommand(drain_check, node=failovermaster)
485
    AssertCommand(cmd, node=failovermaster)
486
    AssertCommand(drain_check, fail=True)
487
    AssertCommand(drain_check, node=failovermaster, fail=True)
488

    
489
    # Back to original master node
490
    AssertCommand(cmd, node=master)
491
  finally:
492
    qa_config.ReleaseNode(failovermaster)
493

    
494
  AssertCommand(drain_check, fail=True)
495
  AssertCommand(drain_check, node=failovermaster, fail=True)
496

    
497

    
498
def TestClusterCopyfile():
499
  """gnt-cluster copyfile"""
500
  master = qa_config.GetMasterNode()
501

    
502
  uniqueid = utils.NewUUID()
503

    
504
  # Create temporary file
505
  f = tempfile.NamedTemporaryFile()
506
  f.write(uniqueid)
507
  f.flush()
508
  f.seek(0)
509

    
510
  # Upload file to master node
511
  testname = qa_utils.UploadFile(master["primary"], f.name)
512
  try:
513
    # Copy file to all nodes
514
    AssertCommand(["gnt-cluster", "copyfile", testname])
515
    _CheckFileOnAllNodes(testname, uniqueid)
516
  finally:
517
    _RemoveFileFromAllNodes(testname)
518

    
519

    
520
def TestClusterCommand():
521
  """gnt-cluster command"""
522
  uniqueid = utils.NewUUID()
523
  rfile = "/tmp/gnt%s" % utils.NewUUID()
524
  rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
525
  cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
526
                              "%s >%s" % (rcmd, rfile)])
527

    
528
  try:
529
    AssertCommand(cmd)
530
    _CheckFileOnAllNodes(rfile, uniqueid)
531
  finally:
532
    _RemoveFileFromAllNodes(rfile)
533

    
534

    
535
def TestClusterDestroy():
536
  """gnt-cluster destroy"""
537
  AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
538

    
539

    
540
def TestClusterRepairDiskSizes():
541
  """gnt-cluster repair-disk-sizes"""
542
  AssertCommand(["gnt-cluster", "repair-disk-sizes"])