Statistics
| Branch: | Tag: | Revision:

root / lib / client / gnt_cluster.py @ 93f2399e

History | View | Annotate | Download (47.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Cluster related commands"""
22

    
23
# pylint: disable=W0401,W0613,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0613: Unused argument, since all functions follow the same API
26
# W0614: Unused import %s from wildcard import (since we need cli)
27
# C0103: Invalid name gnt-cluster
28

    
29
import os.path
30
import time
31
import OpenSSL
32
import itertools
33

    
34
from ganeti.cli import *
35
from ganeti import opcodes
36
from ganeti import constants
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import bootstrap
40
from ganeti import ssh
41
from ganeti import objects
42
from ganeti import uidpool
43
from ganeti import compat
44
from ganeti import netutils
45

    
46

    
47
ON_OPT = cli_option("--on", default=False,
48
                    action="store_true", dest="on",
49
                    help="Recover from an EPO")
50

    
51
GROUPS_OPT = cli_option("--groups", default=False,
52
                    action="store_true", dest="groups",
53
                    help="Arguments are node groups instead of nodes")
54

    
55
_EPO_PING_INTERVAL = 30 # 30 seconds between pings
56
_EPO_PING_TIMEOUT = 1 # 1 second
57
_EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
58

    
59

    
60
@UsesRPC
61
def InitCluster(opts, args):
62
  """Initialize the cluster.
63

64
  @param opts: the command line options selected by the user
65
  @type args: list
66
  @param args: should contain only one element, the desired
67
      cluster name
68
  @rtype: int
69
  @return: the desired exit code
70

71
  """
72
  if not opts.lvm_storage and opts.vg_name:
73
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
74
    return 1
75

    
76
  vg_name = opts.vg_name
77
  if opts.lvm_storage and not opts.vg_name:
78
    vg_name = constants.DEFAULT_VG
79

    
80
  if not opts.drbd_storage and opts.drbd_helper:
81
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
82
    return 1
83

    
84
  drbd_helper = opts.drbd_helper
85
  if opts.drbd_storage and not opts.drbd_helper:
86
    drbd_helper = constants.DEFAULT_DRBD_HELPER
87

    
88
  master_netdev = opts.master_netdev
89
  if master_netdev is None:
90
    master_netdev = constants.DEFAULT_BRIDGE
91

    
92
  hvlist = opts.enabled_hypervisors
93
  if hvlist is None:
94
    hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
95
  hvlist = hvlist.split(",")
96

    
97
  hvparams = dict(opts.hvparams)
98
  beparams = opts.beparams
99
  nicparams = opts.nicparams
100

    
101
  # prepare beparams dict
102
  beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
103
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
104

    
105
  # prepare nicparams dict
106
  nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
107
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
108

    
109
  # prepare ndparams dict
110
  if opts.ndparams is None:
111
    ndparams = dict(constants.NDC_DEFAULTS)
112
  else:
113
    ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
114
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
115

    
116
  # prepare hvparams dict
117
  for hv in constants.HYPER_TYPES:
118
    if hv not in hvparams:
119
      hvparams[hv] = {}
120
    hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
121
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
122

    
123
  if opts.candidate_pool_size is None:
124
    opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
125

    
126
  if opts.mac_prefix is None:
127
    opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
128

    
129
  uid_pool = opts.uid_pool
130
  if uid_pool is not None:
131
    uid_pool = uidpool.ParseUidPool(uid_pool)
132

    
133
  if opts.prealloc_wipe_disks is None:
134
    opts.prealloc_wipe_disks = False
135

    
136
  try:
137
    primary_ip_version = int(opts.primary_ip_version)
138
  except (ValueError, TypeError), err:
139
    ToStderr("Invalid primary ip version value: %s" % str(err))
140
    return 1
141

    
142
  master_netmask = opts.master_netmask
143
  try:
144
    if master_netmask is not None:
145
      master_netmask = int(master_netmask)
146
  except (ValueError, TypeError), err:
147
    ToStderr("Invalid master netmask value: %s" % str(err))
148
    return 1
149

    
150
  bootstrap.InitCluster(cluster_name=args[0],
151
                        secondary_ip=opts.secondary_ip,
152
                        vg_name=vg_name,
153
                        mac_prefix=opts.mac_prefix,
154
                        master_netmask=master_netmask,
155
                        master_netdev=master_netdev,
156
                        file_storage_dir=opts.file_storage_dir,
157
                        shared_file_storage_dir=opts.shared_file_storage_dir,
158
                        enabled_hypervisors=hvlist,
159
                        hvparams=hvparams,
160
                        beparams=beparams,
161
                        nicparams=nicparams,
162
                        ndparams=ndparams,
163
                        candidate_pool_size=opts.candidate_pool_size,
164
                        modify_etc_hosts=opts.modify_etc_hosts,
165
                        modify_ssh_setup=opts.modify_ssh_setup,
166
                        maintain_node_health=opts.maintain_node_health,
167
                        drbd_helper=drbd_helper,
168
                        uid_pool=uid_pool,
169
                        default_iallocator=opts.default_iallocator,
170
                        primary_ip_version=primary_ip_version,
171
                        prealloc_wipe_disks=opts.prealloc_wipe_disks,
172
                        )
173
  op = opcodes.OpClusterPostInit()
174
  SubmitOpCode(op, opts=opts)
175
  return 0
176

    
177

    
178
@UsesRPC
179
def DestroyCluster(opts, args):
180
  """Destroy the cluster.
181

182
  @param opts: the command line options selected by the user
183
  @type args: list
184
  @param args: should be an empty list
185
  @rtype: int
186
  @return: the desired exit code
187

188
  """
189
  if not opts.yes_do_it:
190
    ToStderr("Destroying a cluster is irreversible. If you really want"
191
             " destroy this cluster, supply the --yes-do-it option.")
192
    return 1
193

    
194
  op = opcodes.OpClusterDestroy()
195
  master = SubmitOpCode(op, opts=opts)
196
  # if we reached this, the opcode didn't fail; we can proceed to
197
  # shutdown all the daemons
198
  bootstrap.FinalizeClusterDestroy(master)
199
  return 0
200

    
201

    
202
def RenameCluster(opts, args):
203
  """Rename the cluster.
204

205
  @param opts: the command line options selected by the user
206
  @type args: list
207
  @param args: should contain only one element, the new cluster name
208
  @rtype: int
209
  @return: the desired exit code
210

211
  """
212
  cl = GetClient()
213

    
214
  (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
215

    
216
  new_name = args[0]
217
  if not opts.force:
218
    usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
219
                " connected over the network to the cluster name, the"
220
                " operation is very dangerous as the IP address will be"
221
                " removed from the node and the change may not go through."
222
                " Continue?") % (cluster_name, new_name)
223
    if not AskUser(usertext):
224
      return 1
225

    
226
  op = opcodes.OpClusterRename(name=new_name)
227
  result = SubmitOpCode(op, opts=opts, cl=cl)
228

    
229
  if result:
230
    ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
231

    
232
  return 0
233

    
234

    
235
def ActivateMasterIp(opts, args):
236
  """Activates the master IP.
237

238
  """
239
  op = opcodes.OpClusterActivateMasterIp()
240
  SubmitOpCode(op)
241
  return 0
242

    
243

    
244
def DeactivateMasterIp(opts, args):
245
  """Deactivates the master IP.
246

247
  """
248
  if not opts.confirm:
249
    usertext = ("This will disable the master IP. All the open connections to"
250
                " the master IP will be closed. To reach the master you will"
251
                " need to use its node IP."
252
                " Continue?")
253
    if not AskUser(usertext):
254
      return 1
255

    
256
  op = opcodes.OpClusterDeactivateMasterIp()
257
  SubmitOpCode(op)
258
  return 0
259

    
260

    
261
def RedistributeConfig(opts, args):
262
  """Forces push of the cluster configuration.
263

264
  @param opts: the command line options selected by the user
265
  @type args: list
266
  @param args: empty list
267
  @rtype: int
268
  @return: the desired exit code
269

270
  """
271
  op = opcodes.OpClusterRedistConf()
272
  SubmitOrSend(op, opts)
273
  return 0
274

    
275

    
276
def ShowClusterVersion(opts, args):
277
  """Write version of ganeti software to the standard output.
278

279
  @param opts: the command line options selected by the user
280
  @type args: list
281
  @param args: should be an empty list
282
  @rtype: int
283
  @return: the desired exit code
284

285
  """
286
  cl = GetClient()
287
  result = cl.QueryClusterInfo()
288
  ToStdout("Software version: %s", result["software_version"])
289
  ToStdout("Internode protocol: %s", result["protocol_version"])
290
  ToStdout("Configuration format: %s", result["config_version"])
291
  ToStdout("OS api version: %s", result["os_api_version"])
292
  ToStdout("Export interface: %s", result["export_version"])
293
  return 0
294

    
295

    
296
def ShowClusterMaster(opts, args):
297
  """Write name of master node to the standard output.
298

299
  @param opts: the command line options selected by the user
300
  @type args: list
301
  @param args: should be an empty list
302
  @rtype: int
303
  @return: the desired exit code
304

305
  """
306
  master = bootstrap.GetMaster()
307
  ToStdout(master)
308
  return 0
309

    
310

    
311
def _PrintGroupedParams(paramsdict, level=1, roman=False):
312
  """Print Grouped parameters (be, nic, disk) by group.
313

314
  @type paramsdict: dict of dicts
315
  @param paramsdict: {group: {param: value, ...}, ...}
316
  @type level: int
317
  @param level: Level of indention
318

319
  """
320
  indent = "  " * level
321
  for item, val in sorted(paramsdict.items()):
322
    if isinstance(val, dict):
323
      ToStdout("%s- %s:", indent, item)
324
      _PrintGroupedParams(val, level=level + 1, roman=roman)
325
    elif roman and isinstance(val, int):
326
      ToStdout("%s  %s: %s", indent, item, compat.TryToRoman(val))
327
    else:
328
      ToStdout("%s  %s: %s", indent, item, val)
329

    
330

    
331
def ShowClusterConfig(opts, args):
332
  """Shows cluster information.
333

334
  @param opts: the command line options selected by the user
335
  @type args: list
336
  @param args: should be an empty list
337
  @rtype: int
338
  @return: the desired exit code
339

340
  """
341
  cl = GetClient()
342
  result = cl.QueryClusterInfo()
343

    
344
  ToStdout("Cluster name: %s", result["name"])
345
  ToStdout("Cluster UUID: %s", result["uuid"])
346

    
347
  ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
348
  ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
349

    
350
  ToStdout("Master node: %s", result["master"])
351

    
352
  ToStdout("Architecture (this node): %s (%s)",
353
           result["architecture"][0], result["architecture"][1])
354

    
355
  if result["tags"]:
356
    tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
357
  else:
358
    tags = "(none)"
359

    
360
  ToStdout("Tags: %s", tags)
361

    
362
  ToStdout("Default hypervisor: %s", result["default_hypervisor"])
363
  ToStdout("Enabled hypervisors: %s",
364
           utils.CommaJoin(result["enabled_hypervisors"]))
365

    
366
  ToStdout("Hypervisor parameters:")
367
  _PrintGroupedParams(result["hvparams"])
368

    
369
  ToStdout("OS-specific hypervisor parameters:")
370
  _PrintGroupedParams(result["os_hvp"])
371

    
372
  ToStdout("OS parameters:")
373
  _PrintGroupedParams(result["osparams"])
374

    
375
  ToStdout("Hidden OSes: %s", utils.CommaJoin(result["hidden_os"]))
376
  ToStdout("Blacklisted OSes: %s", utils.CommaJoin(result["blacklisted_os"]))
377

    
378
  ToStdout("Cluster parameters:")
379
  ToStdout("  - candidate pool size: %s",
380
            compat.TryToRoman(result["candidate_pool_size"],
381
                              convert=opts.roman_integers))
382
  ToStdout("  - master netdev: %s", result["master_netdev"])
383
  ToStdout("  - master netmask: %s", result["master_netmask"])
384
  ToStdout("  - lvm volume group: %s", result["volume_group_name"])
385
  if result["reserved_lvs"]:
386
    reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
387
  else:
388
    reserved_lvs = "(none)"
389
  ToStdout("  - lvm reserved volumes: %s", reserved_lvs)
390
  ToStdout("  - drbd usermode helper: %s", result["drbd_usermode_helper"])
391
  ToStdout("  - file storage path: %s", result["file_storage_dir"])
392
  ToStdout("  - shared file storage path: %s",
393
           result["shared_file_storage_dir"])
394
  ToStdout("  - maintenance of node health: %s",
395
           result["maintain_node_health"])
396
  ToStdout("  - uid pool: %s",
397
            uidpool.FormatUidPool(result["uid_pool"],
398
                                  roman=opts.roman_integers))
399
  ToStdout("  - default instance allocator: %s", result["default_iallocator"])
400
  ToStdout("  - primary ip version: %d", result["primary_ip_version"])
401
  ToStdout("  - preallocation wipe disks: %s", result["prealloc_wipe_disks"])
402
  ToStdout("  - OS search path: %s", utils.CommaJoin(constants.OS_SEARCH_PATH))
403

    
404
  ToStdout("Default node parameters:")
405
  _PrintGroupedParams(result["ndparams"], roman=opts.roman_integers)
406

    
407
  ToStdout("Default instance parameters:")
408
  _PrintGroupedParams(result["beparams"], roman=opts.roman_integers)
409

    
410
  ToStdout("Default nic parameters:")
411
  _PrintGroupedParams(result["nicparams"], roman=opts.roman_integers)
412

    
413
  return 0
414

    
415

    
416
def ClusterCopyFile(opts, args):
417
  """Copy a file from master to some nodes.
418

419
  @param opts: the command line options selected by the user
420
  @type args: list
421
  @param args: should contain only one element, the path of
422
      the file to be copied
423
  @rtype: int
424
  @return: the desired exit code
425

426
  """
427
  filename = args[0]
428
  if not os.path.exists(filename):
429
    raise errors.OpPrereqError("No such filename '%s'" % filename,
430
                               errors.ECODE_INVAL)
431

    
432
  cl = GetClient()
433

    
434
  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
435

    
436
  results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True,
437
                           secondary_ips=opts.use_replication_network,
438
                           nodegroup=opts.nodegroup)
439

    
440
  srun = ssh.SshRunner(cluster_name=cluster_name)
441
  for node in results:
442
    if not srun.CopyFileToNode(node, filename):
443
      ToStderr("Copy of file %s to node %s failed", filename, node)
444

    
445
  return 0
446

    
447

    
448
def RunClusterCommand(opts, args):
449
  """Run a command on some nodes.
450

451
  @param opts: the command line options selected by the user
452
  @type args: list
453
  @param args: should contain the command to be run and its arguments
454
  @rtype: int
455
  @return: the desired exit code
456

457
  """
458
  cl = GetClient()
459

    
460
  command = " ".join(args)
461

    
462
  nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup)
463

    
464
  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
465
                                                    "master_node"])
466

    
467
  srun = ssh.SshRunner(cluster_name=cluster_name)
468

    
469
  # Make sure master node is at list end
470
  if master_node in nodes:
471
    nodes.remove(master_node)
472
    nodes.append(master_node)
473

    
474
  for name in nodes:
475
    result = srun.Run(name, "root", command)
476
    ToStdout("------------------------------------------------")
477
    ToStdout("node: %s", name)
478
    ToStdout("%s", result.output)
479
    ToStdout("return code = %s", result.exit_code)
480

    
481
  return 0
482

    
483

    
484
def VerifyCluster(opts, args):
485
  """Verify integrity of cluster, performing various test on nodes.
486

487
  @param opts: the command line options selected by the user
488
  @type args: list
489
  @param args: should be an empty list
490
  @rtype: int
491
  @return: the desired exit code
492

493
  """
494
  skip_checks = []
495

    
496
  if opts.skip_nplusone_mem:
497
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
498

    
499
  cl = GetClient()
500

    
501
  op = opcodes.OpClusterVerify(verbose=opts.verbose,
502
                               error_codes=opts.error_codes,
503
                               debug_simulate_errors=opts.simulate_errors,
504
                               skip_checks=skip_checks,
505
                               ignore_errors=opts.ignore_errors,
506
                               group_name=opts.nodegroup)
507
  result = SubmitOpCode(op, cl=cl, opts=opts)
508

    
509
  # Keep track of submitted jobs
510
  jex = JobExecutor(cl=cl, opts=opts)
511

    
512
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
513
    jex.AddJobId(None, status, job_id)
514

    
515
  results = jex.GetResults()
516

    
517
  (bad_jobs, bad_results) = \
518
    map(len,
519
        # Convert iterators to lists
520
        map(list,
521
            # Count errors
522
            map(compat.partial(itertools.ifilterfalse, bool),
523
                # Convert result to booleans in a tuple
524
                zip(*((job_success, len(op_results) == 1 and op_results[0])
525
                      for (job_success, op_results) in results)))))
526

    
527
  if bad_jobs == 0 and bad_results == 0:
528
    rcode = constants.EXIT_SUCCESS
529
  else:
530
    rcode = constants.EXIT_FAILURE
531
    if bad_jobs > 0:
532
      ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
533

    
534
  return rcode
535

    
536

    
537
def VerifyDisks(opts, args):
538
  """Verify integrity of cluster disks.
539

540
  @param opts: the command line options selected by the user
541
  @type args: list
542
  @param args: should be an empty list
543
  @rtype: int
544
  @return: the desired exit code
545

546
  """
547
  cl = GetClient()
548

    
549
  op = opcodes.OpClusterVerifyDisks()
550

    
551
  result = SubmitOpCode(op, cl=cl, opts=opts)
552

    
553
  # Keep track of submitted jobs
554
  jex = JobExecutor(cl=cl, opts=opts)
555

    
556
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
557
    jex.AddJobId(None, status, job_id)
558

    
559
  retcode = constants.EXIT_SUCCESS
560

    
561
  for (status, result) in jex.GetResults():
562
    if not status:
563
      ToStdout("Job failed: %s", result)
564
      continue
565

    
566
    ((bad_nodes, instances, missing), ) = result
567

    
568
    for node, text in bad_nodes.items():
569
      ToStdout("Error gathering data on node %s: %s",
570
               node, utils.SafeEncode(text[-400:]))
571
      retcode = constants.EXIT_FAILURE
572
      ToStdout("You need to fix these nodes first before fixing instances")
573

    
574
    for iname in instances:
575
      if iname in missing:
576
        continue
577
      op = opcodes.OpInstanceActivateDisks(instance_name=iname)
578
      try:
579
        ToStdout("Activating disks for instance '%s'", iname)
580
        SubmitOpCode(op, opts=opts, cl=cl)
581
      except errors.GenericError, err:
582
        nret, msg = FormatError(err)
583
        retcode |= nret
584
        ToStderr("Error activating disks for instance %s: %s", iname, msg)
585

    
586
    if missing:
587
      for iname, ival in missing.iteritems():
588
        all_missing = compat.all(x[0] in bad_nodes for x in ival)
589
        if all_missing:
590
          ToStdout("Instance %s cannot be verified as it lives on"
591
                   " broken nodes", iname)
592
        else:
593
          ToStdout("Instance %s has missing logical volumes:", iname)
594
          ival.sort()
595
          for node, vol in ival:
596
            if node in bad_nodes:
597
              ToStdout("\tbroken node %s /dev/%s", node, vol)
598
            else:
599
              ToStdout("\t%s /dev/%s", node, vol)
600

    
601
      ToStdout("You need to replace or recreate disks for all the above"
602
               " instances if this message persists after fixing broken nodes.")
603
      retcode = constants.EXIT_FAILURE
604

    
605
  return retcode
606

    
607

    
608
def RepairDiskSizes(opts, args):
609
  """Verify sizes of cluster disks.
610

611
  @param opts: the command line options selected by the user
612
  @type args: list
613
  @param args: optional list of instances to restrict check to
614
  @rtype: int
615
  @return: the desired exit code
616

617
  """
618
  op = opcodes.OpClusterRepairDiskSizes(instances=args)
619
  SubmitOpCode(op, opts=opts)
620

    
621

    
622
@UsesRPC
623
def MasterFailover(opts, args):
624
  """Failover the master node.
625

626
  This command, when run on a non-master node, will cause the current
627
  master to cease being master, and the non-master to become new
628
  master.
629

630
  @param opts: the command line options selected by the user
631
  @type args: list
632
  @param args: should be an empty list
633
  @rtype: int
634
  @return: the desired exit code
635

636
  """
637
  if opts.no_voting:
638
    usertext = ("This will perform the failover even if most other nodes"
639
                " are down, or if this node is outdated. This is dangerous"
640
                " as it can lead to a non-consistent cluster. Check the"
641
                " gnt-cluster(8) man page before proceeding. Continue?")
642
    if not AskUser(usertext):
643
      return 1
644

    
645
  return bootstrap.MasterFailover(no_voting=opts.no_voting)
646

    
647

    
648
def MasterPing(opts, args):
649
  """Checks if the master is alive.
650

651
  @param opts: the command line options selected by the user
652
  @type args: list
653
  @param args: should be an empty list
654
  @rtype: int
655
  @return: the desired exit code
656

657
  """
658
  try:
659
    cl = GetClient()
660
    cl.QueryClusterInfo()
661
    return 0
662
  except Exception: # pylint: disable=W0703
663
    return 1
664

    
665

    
666
def SearchTags(opts, args):
667
  """Searches the tags on all the cluster.
668

669
  @param opts: the command line options selected by the user
670
  @type args: list
671
  @param args: should contain only one element, the tag pattern
672
  @rtype: int
673
  @return: the desired exit code
674

675
  """
676
  op = opcodes.OpTagsSearch(pattern=args[0])
677
  result = SubmitOpCode(op, opts=opts)
678
  if not result:
679
    return 1
680
  result = list(result)
681
  result.sort()
682
  for path, tag in result:
683
    ToStdout("%s %s", path, tag)
684

    
685

    
686
def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
687
  """Reads and verifies an X509 certificate.
688

689
  @type cert_filename: string
690
  @param cert_filename: the path of the file containing the certificate to
691
                        verify encoded in PEM format
692
  @type verify_private_key: bool
693
  @param verify_private_key: whether to verify the private key in addition to
694
                             the public certificate
695
  @rtype: string
696
  @return: a string containing the PEM-encoded certificate.
697

698
  """
699
  try:
700
    pem = utils.ReadFile(cert_filename)
701
  except IOError, err:
702
    raise errors.X509CertError(cert_filename,
703
                               "Unable to read certificate: %s" % str(err))
704

    
705
  try:
706
    OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
707
  except Exception, err:
708
    raise errors.X509CertError(cert_filename,
709
                               "Unable to load certificate: %s" % str(err))
710

    
711
  if verify_private_key:
712
    try:
713
      OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
714
    except Exception, err:
715
      raise errors.X509CertError(cert_filename,
716
                                 "Unable to load private key: %s" % str(err))
717

    
718
  return pem
719

    
720

    
721
def _RenewCrypto(new_cluster_cert, new_rapi_cert, #pylint: disable=R0911
722
                 rapi_cert_filename, new_spice_cert, spice_cert_filename,
723
                 spice_cacert_filename, new_confd_hmac_key, new_cds,
724
                 cds_filename, force):
725
  """Renews cluster certificates, keys and secrets.
726

727
  @type new_cluster_cert: bool
728
  @param new_cluster_cert: Whether to generate a new cluster certificate
729
  @type new_rapi_cert: bool
730
  @param new_rapi_cert: Whether to generate a new RAPI certificate
731
  @type rapi_cert_filename: string
732
  @param rapi_cert_filename: Path to file containing new RAPI certificate
733
  @type new_spice_cert: bool
734
  @param new_spice_cert: Whether to generate a new SPICE certificate
735
  @type spice_cert_filename: string
736
  @param spice_cert_filename: Path to file containing new SPICE certificate
737
  @type spice_cacert_filename: string
738
  @param spice_cacert_filename: Path to file containing the certificate of the
739
                                CA that signed the SPICE certificate
740
  @type new_confd_hmac_key: bool
741
  @param new_confd_hmac_key: Whether to generate a new HMAC key
742
  @type new_cds: bool
743
  @param new_cds: Whether to generate a new cluster domain secret
744
  @type cds_filename: string
745
  @param cds_filename: Path to file containing new cluster domain secret
746
  @type force: bool
747
  @param force: Whether to ask user for confirmation
748

749
  """
750
  if new_rapi_cert and rapi_cert_filename:
751
    ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
752
             " options can be specified at the same time.")
753
    return 1
754

    
755
  if new_cds and cds_filename:
756
    ToStderr("Only one of the --new-cluster-domain-secret and"
757
             " --cluster-domain-secret options can be specified at"
758
             " the same time.")
759
    return 1
760

    
761
  if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
762
    ToStderr("When using --new-spice-certificate, the --spice-certificate"
763
             " and --spice-ca-certificate must not be used.")
764
    return 1
765

    
766
  if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
767
    ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
768
             " specified.")
769
    return 1
770

    
771
  rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
772
  try:
773
    if rapi_cert_filename:
774
      rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
775
    if spice_cert_filename:
776
      spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
777
      spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
778
  except errors.X509CertError, err:
779
    ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
780
    return 1
781

    
782
  if cds_filename:
783
    try:
784
      cds = utils.ReadFile(cds_filename)
785
    except Exception, err: # pylint: disable=W0703
786
      ToStderr("Can't load new cluster domain secret from %s: %s" %
787
               (cds_filename, str(err)))
788
      return 1
789
  else:
790
    cds = None
791

    
792
  if not force:
793
    usertext = ("This requires all daemons on all nodes to be restarted and"
794
                " may take some time. Continue?")
795
    if not AskUser(usertext):
796
      return 1
797

    
798
  def _RenewCryptoInner(ctx):
799
    ctx.feedback_fn("Updating certificates and keys")
800
    bootstrap.GenerateClusterCrypto(new_cluster_cert,
801
                                    new_rapi_cert,
802
                                    new_spice_cert,
803
                                    new_confd_hmac_key,
804
                                    new_cds,
805
                                    rapi_cert_pem=rapi_cert_pem,
806
                                    spice_cert_pem=spice_cert_pem,
807
                                    spice_cacert_pem=spice_cacert_pem,
808
                                    cds=cds)
809

    
810
    files_to_copy = []
811

    
812
    if new_cluster_cert:
813
      files_to_copy.append(constants.NODED_CERT_FILE)
814

    
815
    if new_rapi_cert or rapi_cert_pem:
816
      files_to_copy.append(constants.RAPI_CERT_FILE)
817

    
818
    if new_spice_cert or spice_cert_pem:
819
      files_to_copy.append(constants.SPICE_CERT_FILE)
820
      files_to_copy.append(constants.SPICE_CACERT_FILE)
821

    
822
    if new_confd_hmac_key:
823
      files_to_copy.append(constants.CONFD_HMAC_KEY)
824

    
825
    if new_cds or cds:
826
      files_to_copy.append(constants.CLUSTER_DOMAIN_SECRET_FILE)
827

    
828
    if files_to_copy:
829
      for node_name in ctx.nonmaster_nodes:
830
        ctx.feedback_fn("Copying %s to %s" %
831
                        (", ".join(files_to_copy), node_name))
832
        for file_name in files_to_copy:
833
          ctx.ssh.CopyFileToNode(node_name, file_name)
834

    
835
  RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
836

    
837
  ToStdout("All requested certificates and keys have been replaced."
838
           " Running \"gnt-cluster verify\" now is recommended.")
839

    
840
  return 0
841

    
842

    
843
def RenewCrypto(opts, args):
844
  """Renews cluster certificates, keys and secrets.
845

846
  """
847
  return _RenewCrypto(opts.new_cluster_cert,
848
                      opts.new_rapi_cert,
849
                      opts.rapi_cert,
850
                      opts.new_spice_cert,
851
                      opts.spice_cert,
852
                      opts.spice_cacert,
853
                      opts.new_confd_hmac_key,
854
                      opts.new_cluster_domain_secret,
855
                      opts.cluster_domain_secret,
856
                      opts.force)
857

    
858

    
859
def SetClusterParams(opts, args):
860
  """Modify the cluster.
861

862
  @param opts: the command line options selected by the user
863
  @type args: list
864
  @param args: should be an empty list
865
  @rtype: int
866
  @return: the desired exit code
867

868
  """
869
  if not (not opts.lvm_storage or opts.vg_name or
870
          not opts.drbd_storage or opts.drbd_helper or
871
          opts.enabled_hypervisors or opts.hvparams or
872
          opts.beparams or opts.nicparams or opts.ndparams or
873
          opts.candidate_pool_size is not None or
874
          opts.uid_pool is not None or
875
          opts.maintain_node_health is not None or
876
          opts.add_uids is not None or
877
          opts.remove_uids is not None or
878
          opts.default_iallocator is not None or
879
          opts.reserved_lvs is not None or
880
          opts.master_netdev is not None or
881
          opts.master_netmask is not None or
882
          opts.prealloc_wipe_disks is not None):
883
    ToStderr("Please give at least one of the parameters.")
884
    return 1
885

    
886
  vg_name = opts.vg_name
887
  if not opts.lvm_storage and opts.vg_name:
888
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
889
    return 1
890

    
891
  if not opts.lvm_storage:
892
    vg_name = ""
893

    
894
  drbd_helper = opts.drbd_helper
895
  if not opts.drbd_storage and opts.drbd_helper:
896
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
897
    return 1
898

    
899
  if not opts.drbd_storage:
900
    drbd_helper = ""
901

    
902
  hvlist = opts.enabled_hypervisors
903
  if hvlist is not None:
904
    hvlist = hvlist.split(",")
905

    
906
  # a list of (name, dict) we can pass directly to dict() (or [])
907
  hvparams = dict(opts.hvparams)
908
  for hv_params in hvparams.values():
909
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
910

    
911
  beparams = opts.beparams
912
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
913

    
914
  nicparams = opts.nicparams
915
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
916

    
917
  ndparams = opts.ndparams
918
  if ndparams is not None:
919
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
920

    
921
  mnh = opts.maintain_node_health
922

    
923
  uid_pool = opts.uid_pool
924
  if uid_pool is not None:
925
    uid_pool = uidpool.ParseUidPool(uid_pool)
926

    
927
  add_uids = opts.add_uids
928
  if add_uids is not None:
929
    add_uids = uidpool.ParseUidPool(add_uids)
930

    
931
  remove_uids = opts.remove_uids
932
  if remove_uids is not None:
933
    remove_uids = uidpool.ParseUidPool(remove_uids)
934

    
935
  if opts.reserved_lvs is not None:
936
    if opts.reserved_lvs == "":
937
      opts.reserved_lvs = []
938
    else:
939
      opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
940

    
941
  if opts.master_netmask is not None:
942
    try:
943
      opts.master_netmask = int(opts.master_netmask)
944
    except ValueError:
945
      ToStderr("The --master-netmask option expects an int parameter.")
946
      return 1
947

    
948
  op = opcodes.OpClusterSetParams(vg_name=vg_name,
949
                                  drbd_helper=drbd_helper,
950
                                  enabled_hypervisors=hvlist,
951
                                  hvparams=hvparams,
952
                                  os_hvp=None,
953
                                  beparams=beparams,
954
                                  nicparams=nicparams,
955
                                  ndparams=ndparams,
956
                                  candidate_pool_size=opts.candidate_pool_size,
957
                                  maintain_node_health=mnh,
958
                                  uid_pool=uid_pool,
959
                                  add_uids=add_uids,
960
                                  remove_uids=remove_uids,
961
                                  default_iallocator=opts.default_iallocator,
962
                                  prealloc_wipe_disks=opts.prealloc_wipe_disks,
963
                                  master_netdev=opts.master_netdev,
964
                                  master_netmask=opts.master_netmask,
965
                                  reserved_lvs=opts.reserved_lvs)
966
  SubmitOpCode(op, opts=opts)
967
  return 0
968

    
969

    
970
def QueueOps(opts, args):
971
  """Queue operations.
972

973
  @param opts: the command line options selected by the user
974
  @type args: list
975
  @param args: should contain only one element, the subcommand
976
  @rtype: int
977
  @return: the desired exit code
978

979
  """
980
  command = args[0]
981
  client = GetClient()
982
  if command in ("drain", "undrain"):
983
    drain_flag = command == "drain"
984
    client.SetQueueDrainFlag(drain_flag)
985
  elif command == "info":
986
    result = client.QueryConfigValues(["drain_flag"])
987
    if result[0]:
988
      val = "set"
989
    else:
990
      val = "unset"
991
    ToStdout("The drain flag is %s" % val)
992
  else:
993
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
994
                               errors.ECODE_INVAL)
995

    
996
  return 0
997

    
998

    
999
def _ShowWatcherPause(until):
1000
  if until is None or until < time.time():
1001
    ToStdout("The watcher is not paused.")
1002
  else:
1003
    ToStdout("The watcher is paused until %s.", time.ctime(until))
1004

    
1005

    
1006
def WatcherOps(opts, args):
1007
  """Watcher operations.
1008

1009
  @param opts: the command line options selected by the user
1010
  @type args: list
1011
  @param args: should contain only one element, the subcommand
1012
  @rtype: int
1013
  @return: the desired exit code
1014

1015
  """
1016
  command = args[0]
1017
  client = GetClient()
1018

    
1019
  if command == "continue":
1020
    client.SetWatcherPause(None)
1021
    ToStdout("The watcher is no longer paused.")
1022

    
1023
  elif command == "pause":
1024
    if len(args) < 2:
1025
      raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1026

    
1027
    result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1028
    _ShowWatcherPause(result)
1029

    
1030
  elif command == "info":
1031
    result = client.QueryConfigValues(["watcher_pause"])
1032
    _ShowWatcherPause(result[0])
1033

    
1034
  else:
1035
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1036
                               errors.ECODE_INVAL)
1037

    
1038
  return 0
1039

    
1040

    
1041
def _OobPower(opts, node_list, power):
1042
  """Puts the node in the list to desired power state.
1043

1044
  @param opts: The command line options selected by the user
1045
  @param node_list: The list of nodes to operate on
1046
  @param power: True if they should be powered on, False otherwise
1047
  @return: The success of the operation (none failed)
1048

1049
  """
1050
  if power:
1051
    command = constants.OOB_POWER_ON
1052
  else:
1053
    command = constants.OOB_POWER_OFF
1054

    
1055
  op = opcodes.OpOobCommand(node_names=node_list,
1056
                            command=command,
1057
                            ignore_status=True,
1058
                            timeout=opts.oob_timeout,
1059
                            power_delay=opts.power_delay)
1060
  result = SubmitOpCode(op, opts=opts)
1061
  errs = 0
1062
  for node_result in result:
1063
    (node_tuple, data_tuple) = node_result
1064
    (_, node_name) = node_tuple
1065
    (data_status, _) = data_tuple
1066
    if data_status != constants.RS_NORMAL:
1067
      assert data_status != constants.RS_UNAVAIL
1068
      errs += 1
1069
      ToStderr("There was a problem changing power for %s, please investigate",
1070
               node_name)
1071

    
1072
  if errs > 0:
1073
    return False
1074

    
1075
  return True
1076

    
1077

    
1078
def _InstanceStart(opts, inst_list, start):
1079
  """Puts the instances in the list to desired state.
1080

1081
  @param opts: The command line options selected by the user
1082
  @param inst_list: The list of instances to operate on
1083
  @param start: True if they should be started, False for shutdown
1084
  @return: The success of the operation (none failed)
1085

1086
  """
1087
  if start:
1088
    opcls = opcodes.OpInstanceStartup
1089
    text_submit, text_success, text_failed = ("startup", "started", "starting")
1090
  else:
1091
    opcls = compat.partial(opcodes.OpInstanceShutdown,
1092
                           timeout=opts.shutdown_timeout)
1093
    text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1094

    
1095
  jex = JobExecutor(opts=opts)
1096

    
1097
  for inst in inst_list:
1098
    ToStdout("Submit %s of instance %s", text_submit, inst)
1099
    op = opcls(instance_name=inst)
1100
    jex.QueueJob(inst, op)
1101

    
1102
  results = jex.GetResults()
1103
  bad_cnt = len([1 for (success, _) in results if not success])
1104

    
1105
  if bad_cnt == 0:
1106
    ToStdout("All instances have been %s successfully", text_success)
1107
  else:
1108
    ToStderr("There were errors while %s instances:\n"
1109
             "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1110
             len(results))
1111
    return False
1112

    
1113
  return True
1114

    
1115

    
1116
class _RunWhenNodesReachableHelper:
1117
  """Helper class to make shared internal state sharing easier.
1118

1119
  @ivar success: Indicates if all action_cb calls were successful
1120

1121
  """
1122
  def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1123
               _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1124
    """Init the object.
1125

1126
    @param node_list: The list of nodes to be reachable
1127
    @param action_cb: Callback called when a new host is reachable
1128
    @type node2ip: dict
1129
    @param node2ip: Node to ip mapping
1130
    @param port: The port to use for the TCP ping
1131
    @param feedback_fn: The function used for feedback
1132
    @param _ping_fn: Function to check reachabilty (for unittest use only)
1133
    @param _sleep_fn: Function to sleep (for unittest use only)
1134

1135
    """
1136
    self.down = set(node_list)
1137
    self.up = set()
1138
    self.node2ip = node2ip
1139
    self.success = True
1140
    self.action_cb = action_cb
1141
    self.port = port
1142
    self.feedback_fn = feedback_fn
1143
    self._ping_fn = _ping_fn
1144
    self._sleep_fn = _sleep_fn
1145

    
1146
  def __call__(self):
1147
    """When called we run action_cb.
1148

1149
    @raises utils.RetryAgain: When there are still down nodes
1150

1151
    """
1152
    if not self.action_cb(self.up):
1153
      self.success = False
1154

    
1155
    if self.down:
1156
      raise utils.RetryAgain()
1157
    else:
1158
      return self.success
1159

    
1160
  def Wait(self, secs):
1161
    """Checks if a host is up or waits remaining seconds.
1162

1163
    @param secs: The secs remaining
1164

1165
    """
1166
    start = time.time()
1167
    for node in self.down:
1168
      if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1169
                       live_port_needed=True):
1170
        self.feedback_fn("Node %s became available" % node)
1171
        self.up.add(node)
1172
        self.down -= self.up
1173
        # If we have a node available there is the possibility to run the
1174
        # action callback successfully, therefore we don't wait and return
1175
        return
1176

    
1177
    self._sleep_fn(max(0.0, start + secs - time.time()))
1178

    
1179

    
1180
def _RunWhenNodesReachable(node_list, action_cb, interval):
1181
  """Run action_cb when nodes become reachable.
1182

1183
  @param node_list: The list of nodes to be reachable
1184
  @param action_cb: Callback called when a new host is reachable
1185
  @param interval: The earliest time to retry
1186

1187
  """
1188
  client = GetClient()
1189
  cluster_info = client.QueryClusterInfo()
1190
  if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1191
    family = netutils.IPAddress.family
1192
  else:
1193
    family = netutils.IP6Address.family
1194

    
1195
  node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1196
                 for node in node_list)
1197

    
1198
  port = netutils.GetDaemonPort(constants.NODED)
1199
  helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1200
                                        ToStdout)
1201

    
1202
  try:
1203
    return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1204
                       wait_fn=helper.Wait)
1205
  except utils.RetryTimeout:
1206
    ToStderr("Time exceeded while waiting for nodes to become reachable"
1207
             " again:\n  - %s", "  - ".join(helper.down))
1208
    return False
1209

    
1210

    
1211
def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1212
                          _instance_start_fn=_InstanceStart):
1213
  """Start the instances conditional based on node_states.
1214

1215
  @param opts: The command line options selected by the user
1216
  @param inst_map: A dict of inst -> nodes mapping
1217
  @param nodes_online: A list of nodes online
1218
  @param _instance_start_fn: Callback to start instances (unittest use only)
1219
  @return: Success of the operation on all instances
1220

1221
  """
1222
  start_inst_list = []
1223
  for (inst, nodes) in inst_map.items():
1224
    if not (nodes - nodes_online):
1225
      # All nodes the instance lives on are back online
1226
      start_inst_list.append(inst)
1227

    
1228
  for inst in start_inst_list:
1229
    del inst_map[inst]
1230

    
1231
  if start_inst_list:
1232
    return _instance_start_fn(opts, start_inst_list, True)
1233

    
1234
  return True
1235

    
1236

    
1237
def _EpoOn(opts, full_node_list, node_list, inst_map):
1238
  """Does the actual power on.
1239

1240
  @param opts: The command line options selected by the user
1241
  @param full_node_list: All nodes to operate on (includes nodes not supporting
1242
                         OOB)
1243
  @param node_list: The list of nodes to operate on (all need to support OOB)
1244
  @param inst_map: A dict of inst -> nodes mapping
1245
  @return: The desired exit status
1246

1247
  """
1248
  if node_list and not _OobPower(opts, node_list, False):
1249
    ToStderr("Not all nodes seem to get back up, investigate and start"
1250
             " manually if needed")
1251

    
1252
  # Wait for the nodes to be back up
1253
  action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1254

    
1255
  ToStdout("Waiting until all nodes are available again")
1256
  if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1257
    ToStderr("Please investigate and start stopped instances manually")
1258
    return constants.EXIT_FAILURE
1259

    
1260
  return constants.EXIT_SUCCESS
1261

    
1262

    
1263
def _EpoOff(opts, node_list, inst_map):
1264
  """Does the actual power off.
1265

1266
  @param opts: The command line options selected by the user
1267
  @param node_list: The list of nodes to operate on (all need to support OOB)
1268
  @param inst_map: A dict of inst -> nodes mapping
1269
  @return: The desired exit status
1270

1271
  """
1272
  if not _InstanceStart(opts, inst_map.keys(), False):
1273
    ToStderr("Please investigate and stop instances manually before continuing")
1274
    return constants.EXIT_FAILURE
1275

    
1276
  if not node_list:
1277
    return constants.EXIT_SUCCESS
1278

    
1279
  if _OobPower(opts, node_list, False):
1280
    return constants.EXIT_SUCCESS
1281
  else:
1282
    return constants.EXIT_FAILURE
1283

    
1284

    
1285
def Epo(opts, args):
1286
  """EPO operations.
1287

1288
  @param opts: the command line options selected by the user
1289
  @type args: list
1290
  @param args: should contain only one element, the subcommand
1291
  @rtype: int
1292
  @return: the desired exit code
1293

1294
  """
1295
  if opts.groups and opts.show_all:
1296
    ToStderr("Only one of --groups or --all are allowed")
1297
    return constants.EXIT_FAILURE
1298
  elif args and opts.show_all:
1299
    ToStderr("Arguments in combination with --all are not allowed")
1300
    return constants.EXIT_FAILURE
1301

    
1302
  client = GetClient()
1303

    
1304
  if opts.groups:
1305
    node_query_list = itertools.chain(*client.QueryGroups(names=args,
1306
                                                          fields=["node_list"],
1307
                                                          use_locking=False))
1308
  else:
1309
    node_query_list = args
1310

    
1311
  result = client.QueryNodes(names=node_query_list,
1312
                             fields=["name", "master", "pinst_list",
1313
                                     "sinst_list", "powered", "offline"],
1314
                             use_locking=False)
1315
  node_list = []
1316
  inst_map = {}
1317
  for (idx, (node, master, pinsts, sinsts, powered,
1318
             offline)) in enumerate(result):
1319
    # Normalize the node_query_list as well
1320
    if not opts.show_all:
1321
      node_query_list[idx] = node
1322
    if not offline:
1323
      for inst in (pinsts + sinsts):
1324
        if inst in inst_map:
1325
          if not master:
1326
            inst_map[inst].add(node)
1327
        elif master:
1328
          inst_map[inst] = set()
1329
        else:
1330
          inst_map[inst] = set([node])
1331

    
1332
    if master and opts.on:
1333
      # We ignore the master for turning on the machines, in fact we are
1334
      # already operating on the master at this point :)
1335
      continue
1336
    elif master and not opts.show_all:
1337
      ToStderr("%s is the master node, please do a master-failover to another"
1338
               " node not affected by the EPO or use --all if you intend to"
1339
               " shutdown the whole cluster", node)
1340
      return constants.EXIT_FAILURE
1341
    elif powered is None:
1342
      ToStdout("Node %s does not support out-of-band handling, it can not be"
1343
               " handled in a fully automated manner", node)
1344
    elif powered == opts.on:
1345
      ToStdout("Node %s is already in desired power state, skipping", node)
1346
    elif not offline or (offline and powered):
1347
      node_list.append(node)
1348

    
1349
  if not opts.force and not ConfirmOperation(node_query_list, "nodes", "epo"):
1350
    return constants.EXIT_FAILURE
1351

    
1352
  if opts.on:
1353
    return _EpoOn(opts, node_query_list, node_list, inst_map)
1354
  else:
1355
    return _EpoOff(opts, node_list, inst_map)
1356

    
1357

    
1358
commands = {
1359
  "init": (
1360
    InitCluster, [ArgHost(min=1, max=1)],
1361
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
1362
     HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
1363
     NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT,
1364
     NOMODIFY_SSH_SETUP_OPT, SECONDARY_IP_OPT, VG_NAME_OPT,
1365
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, DRBD_HELPER_OPT, NODRBD_STORAGE_OPT,
1366
     DEFAULT_IALLOCATOR_OPT, PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT,
1367
     NODE_PARAMS_OPT, GLOBAL_SHARED_FILEDIR_OPT],
1368
    "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
1369
  "destroy": (
1370
    DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
1371
    "", "Destroy cluster"),
1372
  "rename": (
1373
    RenameCluster, [ArgHost(min=1, max=1)],
1374
    [FORCE_OPT, DRY_RUN_OPT],
1375
    "<new_name>",
1376
    "Renames the cluster"),
1377
  "redist-conf": (
1378
    RedistributeConfig, ARGS_NONE, [SUBMIT_OPT, DRY_RUN_OPT, PRIORITY_OPT],
1379
    "", "Forces a push of the configuration file and ssconf files"
1380
    " to the nodes in the cluster"),
1381
  "verify": (
1382
    VerifyCluster, ARGS_NONE,
1383
    [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
1384
     DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT],
1385
    "", "Does a check on the cluster configuration"),
1386
  "verify-disks": (
1387
    VerifyDisks, ARGS_NONE, [PRIORITY_OPT],
1388
    "", "Does a check on the cluster disk status"),
1389
  "repair-disk-sizes": (
1390
    RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
1391
    "", "Updates mismatches in recorded disk sizes"),
1392
  "master-failover": (
1393
    MasterFailover, ARGS_NONE, [NOVOTING_OPT],
1394
    "", "Makes the current node the master"),
1395
  "master-ping": (
1396
    MasterPing, ARGS_NONE, [],
1397
    "", "Checks if the master is alive"),
1398
  "version": (
1399
    ShowClusterVersion, ARGS_NONE, [],
1400
    "", "Shows the cluster version"),
1401
  "getmaster": (
1402
    ShowClusterMaster, ARGS_NONE, [],
1403
    "", "Shows the cluster master"),
1404
  "copyfile": (
1405
    ClusterCopyFile, [ArgFile(min=1, max=1)],
1406
    [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
1407
    "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
1408
  "command": (
1409
    RunClusterCommand, [ArgCommand(min=1)],
1410
    [NODE_LIST_OPT, NODEGROUP_OPT],
1411
    "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
1412
  "info": (
1413
    ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
1414
    "[--roman]", "Show cluster configuration"),
1415
  "list-tags": (
1416
    ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
1417
  "add-tags": (
1418
    AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1419
    "tag...", "Add tags to the cluster"),
1420
  "remove-tags": (
1421
    RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1422
    "tag...", "Remove tags from the cluster"),
1423
  "search-tags": (
1424
    SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
1425
    "Searches the tags on all objects on"
1426
    " the cluster for a given pattern (regex)"),
1427
  "queue": (
1428
    QueueOps,
1429
    [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
1430
    [], "drain|undrain|info", "Change queue properties"),
1431
  "watcher": (
1432
    WatcherOps,
1433
    [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
1434
     ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
1435
    [],
1436
    "{pause <timespec>|continue|info}", "Change watcher properties"),
1437
  "modify": (
1438
    SetClusterParams, ARGS_NONE,
1439
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT, MASTER_NETDEV_OPT,
1440
     MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT,
1441
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT,
1442
     DRBD_HELPER_OPT, NODRBD_STORAGE_OPT, DEFAULT_IALLOCATOR_OPT,
1443
     RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, PREALLOC_WIPE_DISKS_OPT,
1444
     NODE_PARAMS_OPT],
1445
    "[opts...]",
1446
    "Alters the parameters of the cluster"),
1447
  "renew-crypto": (
1448
    RenewCrypto, ARGS_NONE,
1449
    [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
1450
     NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
1451
     NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
1452
     NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT],
1453
    "[opts...]",
1454
    "Renews cluster certificates, keys and secrets"),
1455
  "epo": (
1456
    Epo, [ArgUnknown()],
1457
    [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
1458
     SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
1459
    "[opts...] [args]",
1460
    "Performs an emergency power-off on given args"),
1461
  "activate-master-ip": (
1462
    ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
1463
  "deactivate-master-ip": (
1464
    DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
1465
    "Deactivates the master IP"),
1466
  }
1467

    
1468

    
1469
#: dictionary with aliases for commands
1470
aliases = {
1471
  "masterfailover": "master-failover",
1472
}
1473

    
1474

    
1475
def Main():
1476
  return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
1477
                     aliases=aliases)