Statistics
| Branch: | Tag: | Revision:

root / lib / client / gnt_cluster.py @ b2e233a5

History | View | Annotate | Download (47.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Cluster related commands"""
22

    
23
# pylint: disable=W0401,W0613,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0613: Unused argument, since all functions follow the same API
26
# W0614: Unused import %s from wildcard import (since we need cli)
27
# C0103: Invalid name gnt-cluster
28

    
29
import os.path
30
import time
31
import OpenSSL
32
import itertools
33

    
34
from ganeti.cli import *
35
from ganeti import opcodes
36
from ganeti import constants
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import bootstrap
40
from ganeti import ssh
41
from ganeti import objects
42
from ganeti import uidpool
43
from ganeti import compat
44
from ganeti import netutils
45

    
46

    
47
ON_OPT = cli_option("--on", default=False,
48
                    action="store_true", dest="on",
49
                    help="Recover from an EPO")
50

    
51
GROUPS_OPT = cli_option("--groups", default=False,
52
                    action="store_true", dest="groups",
53
                    help="Arguments are node groups instead of nodes")
54

    
55
_EPO_PING_INTERVAL = 30 # 30 seconds between pings
56
_EPO_PING_TIMEOUT = 1 # 1 second
57
_EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
58

    
59

    
60
@UsesRPC
61
def InitCluster(opts, args):
62
  """Initialize the cluster.
63

64
  @param opts: the command line options selected by the user
65
  @type args: list
66
  @param args: should contain only one element, the desired
67
      cluster name
68
  @rtype: int
69
  @return: the desired exit code
70

71
  """
72
  if not opts.lvm_storage and opts.vg_name:
73
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
74
    return 1
75

    
76
  vg_name = opts.vg_name
77
  if opts.lvm_storage and not opts.vg_name:
78
    vg_name = constants.DEFAULT_VG
79

    
80
  if not opts.drbd_storage and opts.drbd_helper:
81
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
82
    return 1
83

    
84
  drbd_helper = opts.drbd_helper
85
  if opts.drbd_storage and not opts.drbd_helper:
86
    drbd_helper = constants.DEFAULT_DRBD_HELPER
87

    
88
  master_netdev = opts.master_netdev
89
  if master_netdev is None:
90
    master_netdev = constants.DEFAULT_BRIDGE
91

    
92
  hvlist = opts.enabled_hypervisors
93
  if hvlist is None:
94
    hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
95
  hvlist = hvlist.split(",")
96

    
97
  hvparams = dict(opts.hvparams)
98
  beparams = opts.beparams
99
  nicparams = opts.nicparams
100

    
101
  # prepare beparams dict
102
  beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
103
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
104

    
105
  # prepare nicparams dict
106
  nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
107
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
108

    
109
  # prepare ndparams dict
110
  if opts.ndparams is None:
111
    ndparams = dict(constants.NDC_DEFAULTS)
112
  else:
113
    ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
114
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
115

    
116
  # prepare hvparams dict
117
  for hv in constants.HYPER_TYPES:
118
    if hv not in hvparams:
119
      hvparams[hv] = {}
120
    hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
121
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
122

    
123
  if opts.candidate_pool_size is None:
124
    opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
125

    
126
  if opts.mac_prefix is None:
127
    opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
128

    
129
  uid_pool = opts.uid_pool
130
  if uid_pool is not None:
131
    uid_pool = uidpool.ParseUidPool(uid_pool)
132

    
133
  if opts.prealloc_wipe_disks is None:
134
    opts.prealloc_wipe_disks = False
135

    
136
  external_ip_setup_script = opts.use_external_mip_script
137
  if external_ip_setup_script is None:
138
    external_ip_setup_script = False
139

    
140
  try:
141
    primary_ip_version = int(opts.primary_ip_version)
142
  except (ValueError, TypeError), err:
143
    ToStderr("Invalid primary ip version value: %s" % str(err))
144
    return 1
145

    
146
  master_netmask = opts.master_netmask
147
  try:
148
    if master_netmask is not None:
149
      master_netmask = int(master_netmask)
150
  except (ValueError, TypeError), err:
151
    ToStderr("Invalid master netmask value: %s" % str(err))
152
    return 1
153

    
154
  bootstrap.InitCluster(cluster_name=args[0],
155
                        secondary_ip=opts.secondary_ip,
156
                        vg_name=vg_name,
157
                        mac_prefix=opts.mac_prefix,
158
                        master_netmask=master_netmask,
159
                        master_netdev=master_netdev,
160
                        file_storage_dir=opts.file_storage_dir,
161
                        shared_file_storage_dir=opts.shared_file_storage_dir,
162
                        enabled_hypervisors=hvlist,
163
                        hvparams=hvparams,
164
                        beparams=beparams,
165
                        nicparams=nicparams,
166
                        ndparams=ndparams,
167
                        candidate_pool_size=opts.candidate_pool_size,
168
                        modify_etc_hosts=opts.modify_etc_hosts,
169
                        modify_ssh_setup=opts.modify_ssh_setup,
170
                        maintain_node_health=opts.maintain_node_health,
171
                        drbd_helper=drbd_helper,
172
                        uid_pool=uid_pool,
173
                        default_iallocator=opts.default_iallocator,
174
                        primary_ip_version=primary_ip_version,
175
                        prealloc_wipe_disks=opts.prealloc_wipe_disks,
176
                        use_external_mip_script=external_ip_setup_script,
177
                        )
178
  op = opcodes.OpClusterPostInit()
179
  SubmitOpCode(op, opts=opts)
180
  return 0
181

    
182

    
183
@UsesRPC
184
def DestroyCluster(opts, args):
185
  """Destroy the cluster.
186

187
  @param opts: the command line options selected by the user
188
  @type args: list
189
  @param args: should be an empty list
190
  @rtype: int
191
  @return: the desired exit code
192

193
  """
194
  if not opts.yes_do_it:
195
    ToStderr("Destroying a cluster is irreversible. If you really want"
196
             " destroy this cluster, supply the --yes-do-it option.")
197
    return 1
198

    
199
  op = opcodes.OpClusterDestroy()
200
  master = SubmitOpCode(op, opts=opts)
201
  # if we reached this, the opcode didn't fail; we can proceed to
202
  # shutdown all the daemons
203
  bootstrap.FinalizeClusterDestroy(master)
204
  return 0
205

    
206

    
207
def RenameCluster(opts, args):
208
  """Rename the cluster.
209

210
  @param opts: the command line options selected by the user
211
  @type args: list
212
  @param args: should contain only one element, the new cluster name
213
  @rtype: int
214
  @return: the desired exit code
215

216
  """
217
  cl = GetClient()
218

    
219
  (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
220

    
221
  new_name = args[0]
222
  if not opts.force:
223
    usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
224
                " connected over the network to the cluster name, the"
225
                " operation is very dangerous as the IP address will be"
226
                " removed from the node and the change may not go through."
227
                " Continue?") % (cluster_name, new_name)
228
    if not AskUser(usertext):
229
      return 1
230

    
231
  op = opcodes.OpClusterRename(name=new_name)
232
  result = SubmitOpCode(op, opts=opts, cl=cl)
233

    
234
  if result:
235
    ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
236

    
237
  return 0
238

    
239

    
240
def ActivateMasterIp(opts, args):
241
  """Activates the master IP.
242

243
  """
244
  op = opcodes.OpClusterActivateMasterIp()
245
  SubmitOpCode(op)
246
  return 0
247

    
248

    
249
def DeactivateMasterIp(opts, args):
250
  """Deactivates the master IP.
251

252
  """
253
  if not opts.confirm:
254
    usertext = ("This will disable the master IP. All the open connections to"
255
                " the master IP will be closed. To reach the master you will"
256
                " need to use its node IP."
257
                " Continue?")
258
    if not AskUser(usertext):
259
      return 1
260

    
261
  op = opcodes.OpClusterDeactivateMasterIp()
262
  SubmitOpCode(op)
263
  return 0
264

    
265

    
266
def RedistributeConfig(opts, args):
267
  """Forces push of the cluster configuration.
268

269
  @param opts: the command line options selected by the user
270
  @type args: list
271
  @param args: empty list
272
  @rtype: int
273
  @return: the desired exit code
274

275
  """
276
  op = opcodes.OpClusterRedistConf()
277
  SubmitOrSend(op, opts)
278
  return 0
279

    
280

    
281
def ShowClusterVersion(opts, args):
282
  """Write version of ganeti software to the standard output.
283

284
  @param opts: the command line options selected by the user
285
  @type args: list
286
  @param args: should be an empty list
287
  @rtype: int
288
  @return: the desired exit code
289

290
  """
291
  cl = GetClient()
292
  result = cl.QueryClusterInfo()
293
  ToStdout("Software version: %s", result["software_version"])
294
  ToStdout("Internode protocol: %s", result["protocol_version"])
295
  ToStdout("Configuration format: %s", result["config_version"])
296
  ToStdout("OS api version: %s", result["os_api_version"])
297
  ToStdout("Export interface: %s", result["export_version"])
298
  return 0
299

    
300

    
301
def ShowClusterMaster(opts, args):
302
  """Write name of master node to the standard output.
303

304
  @param opts: the command line options selected by the user
305
  @type args: list
306
  @param args: should be an empty list
307
  @rtype: int
308
  @return: the desired exit code
309

310
  """
311
  master = bootstrap.GetMaster()
312
  ToStdout(master)
313
  return 0
314

    
315

    
316
def _PrintGroupedParams(paramsdict, level=1, roman=False):
317
  """Print Grouped parameters (be, nic, disk) by group.
318

319
  @type paramsdict: dict of dicts
320
  @param paramsdict: {group: {param: value, ...}, ...}
321
  @type level: int
322
  @param level: Level of indention
323

324
  """
325
  indent = "  " * level
326
  for item, val in sorted(paramsdict.items()):
327
    if isinstance(val, dict):
328
      ToStdout("%s- %s:", indent, item)
329
      _PrintGroupedParams(val, level=level + 1, roman=roman)
330
    elif roman and isinstance(val, int):
331
      ToStdout("%s  %s: %s", indent, item, compat.TryToRoman(val))
332
    else:
333
      ToStdout("%s  %s: %s", indent, item, val)
334

    
335

    
336
def ShowClusterConfig(opts, args):
337
  """Shows cluster information.
338

339
  @param opts: the command line options selected by the user
340
  @type args: list
341
  @param args: should be an empty list
342
  @rtype: int
343
  @return: the desired exit code
344

345
  """
346
  cl = GetClient()
347
  result = cl.QueryClusterInfo()
348

    
349
  ToStdout("Cluster name: %s", result["name"])
350
  ToStdout("Cluster UUID: %s", result["uuid"])
351

    
352
  ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
353
  ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
354

    
355
  ToStdout("Master node: %s", result["master"])
356

    
357
  ToStdout("Architecture (this node): %s (%s)",
358
           result["architecture"][0], result["architecture"][1])
359

    
360
  if result["tags"]:
361
    tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
362
  else:
363
    tags = "(none)"
364

    
365
  ToStdout("Tags: %s", tags)
366

    
367
  ToStdout("Default hypervisor: %s", result["default_hypervisor"])
368
  ToStdout("Enabled hypervisors: %s",
369
           utils.CommaJoin(result["enabled_hypervisors"]))
370

    
371
  ToStdout("Hypervisor parameters:")
372
  _PrintGroupedParams(result["hvparams"])
373

    
374
  ToStdout("OS-specific hypervisor parameters:")
375
  _PrintGroupedParams(result["os_hvp"])
376

    
377
  ToStdout("OS parameters:")
378
  _PrintGroupedParams(result["osparams"])
379

    
380
  ToStdout("Hidden OSes: %s", utils.CommaJoin(result["hidden_os"]))
381
  ToStdout("Blacklisted OSes: %s", utils.CommaJoin(result["blacklisted_os"]))
382

    
383
  ToStdout("Cluster parameters:")
384
  ToStdout("  - candidate pool size: %s",
385
            compat.TryToRoman(result["candidate_pool_size"],
386
                              convert=opts.roman_integers))
387
  ToStdout("  - master netdev: %s", result["master_netdev"])
388
  ToStdout("  - master netmask: %s", result["master_netmask"])
389
  ToStdout("  - use external master IP address setup script: %s",
390
           result["use_external_mip_script"])
391
  ToStdout("  - lvm volume group: %s", result["volume_group_name"])
392
  if result["reserved_lvs"]:
393
    reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
394
  else:
395
    reserved_lvs = "(none)"
396
  ToStdout("  - lvm reserved volumes: %s", reserved_lvs)
397
  ToStdout("  - drbd usermode helper: %s", result["drbd_usermode_helper"])
398
  ToStdout("  - file storage path: %s", result["file_storage_dir"])
399
  ToStdout("  - shared file storage path: %s",
400
           result["shared_file_storage_dir"])
401
  ToStdout("  - maintenance of node health: %s",
402
           result["maintain_node_health"])
403
  ToStdout("  - uid pool: %s",
404
            uidpool.FormatUidPool(result["uid_pool"],
405
                                  roman=opts.roman_integers))
406
  ToStdout("  - default instance allocator: %s", result["default_iallocator"])
407
  ToStdout("  - primary ip version: %d", result["primary_ip_version"])
408
  ToStdout("  - preallocation wipe disks: %s", result["prealloc_wipe_disks"])
409
  ToStdout("  - OS search path: %s", utils.CommaJoin(constants.OS_SEARCH_PATH))
410

    
411
  ToStdout("Default node parameters:")
412
  _PrintGroupedParams(result["ndparams"], roman=opts.roman_integers)
413

    
414
  ToStdout("Default instance parameters:")
415
  _PrintGroupedParams(result["beparams"], roman=opts.roman_integers)
416

    
417
  ToStdout("Default nic parameters:")
418
  _PrintGroupedParams(result["nicparams"], roman=opts.roman_integers)
419

    
420
  return 0
421

    
422

    
423
def ClusterCopyFile(opts, args):
424
  """Copy a file from master to some nodes.
425

426
  @param opts: the command line options selected by the user
427
  @type args: list
428
  @param args: should contain only one element, the path of
429
      the file to be copied
430
  @rtype: int
431
  @return: the desired exit code
432

433
  """
434
  filename = args[0]
435
  if not os.path.exists(filename):
436
    raise errors.OpPrereqError("No such filename '%s'" % filename,
437
                               errors.ECODE_INVAL)
438

    
439
  cl = GetClient()
440

    
441
  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
442

    
443
  results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True,
444
                           secondary_ips=opts.use_replication_network,
445
                           nodegroup=opts.nodegroup)
446

    
447
  srun = ssh.SshRunner(cluster_name=cluster_name)
448
  for node in results:
449
    if not srun.CopyFileToNode(node, filename):
450
      ToStderr("Copy of file %s to node %s failed", filename, node)
451

    
452
  return 0
453

    
454

    
455
def RunClusterCommand(opts, args):
456
  """Run a command on some nodes.
457

458
  @param opts: the command line options selected by the user
459
  @type args: list
460
  @param args: should contain the command to be run and its arguments
461
  @rtype: int
462
  @return: the desired exit code
463

464
  """
465
  cl = GetClient()
466

    
467
  command = " ".join(args)
468

    
469
  nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup)
470

    
471
  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
472
                                                    "master_node"])
473

    
474
  srun = ssh.SshRunner(cluster_name=cluster_name)
475

    
476
  # Make sure master node is at list end
477
  if master_node in nodes:
478
    nodes.remove(master_node)
479
    nodes.append(master_node)
480

    
481
  for name in nodes:
482
    result = srun.Run(name, "root", command)
483
    ToStdout("------------------------------------------------")
484
    ToStdout("node: %s", name)
485
    ToStdout("%s", result.output)
486
    ToStdout("return code = %s", result.exit_code)
487

    
488
  return 0
489

    
490

    
491
def VerifyCluster(opts, args):
492
  """Verify integrity of cluster, performing various test on nodes.
493

494
  @param opts: the command line options selected by the user
495
  @type args: list
496
  @param args: should be an empty list
497
  @rtype: int
498
  @return: the desired exit code
499

500
  """
501
  skip_checks = []
502

    
503
  if opts.skip_nplusone_mem:
504
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
505

    
506
  cl = GetClient()
507

    
508
  op = opcodes.OpClusterVerify(verbose=opts.verbose,
509
                               error_codes=opts.error_codes,
510
                               debug_simulate_errors=opts.simulate_errors,
511
                               skip_checks=skip_checks,
512
                               ignore_errors=opts.ignore_errors,
513
                               group_name=opts.nodegroup)
514
  result = SubmitOpCode(op, cl=cl, opts=opts)
515

    
516
  # Keep track of submitted jobs
517
  jex = JobExecutor(cl=cl, opts=opts)
518

    
519
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
520
    jex.AddJobId(None, status, job_id)
521

    
522
  results = jex.GetResults()
523

    
524
  (bad_jobs, bad_results) = \
525
    map(len,
526
        # Convert iterators to lists
527
        map(list,
528
            # Count errors
529
            map(compat.partial(itertools.ifilterfalse, bool),
530
                # Convert result to booleans in a tuple
531
                zip(*((job_success, len(op_results) == 1 and op_results[0])
532
                      for (job_success, op_results) in results)))))
533

    
534
  if bad_jobs == 0 and bad_results == 0:
535
    rcode = constants.EXIT_SUCCESS
536
  else:
537
    rcode = constants.EXIT_FAILURE
538
    if bad_jobs > 0:
539
      ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
540

    
541
  return rcode
542

    
543

    
544
def VerifyDisks(opts, args):
545
  """Verify integrity of cluster disks.
546

547
  @param opts: the command line options selected by the user
548
  @type args: list
549
  @param args: should be an empty list
550
  @rtype: int
551
  @return: the desired exit code
552

553
  """
554
  cl = GetClient()
555

    
556
  op = opcodes.OpClusterVerifyDisks()
557

    
558
  result = SubmitOpCode(op, cl=cl, opts=opts)
559

    
560
  # Keep track of submitted jobs
561
  jex = JobExecutor(cl=cl, opts=opts)
562

    
563
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
564
    jex.AddJobId(None, status, job_id)
565

    
566
  retcode = constants.EXIT_SUCCESS
567

    
568
  for (status, result) in jex.GetResults():
569
    if not status:
570
      ToStdout("Job failed: %s", result)
571
      continue
572

    
573
    ((bad_nodes, instances, missing), ) = result
574

    
575
    for node, text in bad_nodes.items():
576
      ToStdout("Error gathering data on node %s: %s",
577
               node, utils.SafeEncode(text[-400:]))
578
      retcode = constants.EXIT_FAILURE
579
      ToStdout("You need to fix these nodes first before fixing instances")
580

    
581
    for iname in instances:
582
      if iname in missing:
583
        continue
584
      op = opcodes.OpInstanceActivateDisks(instance_name=iname)
585
      try:
586
        ToStdout("Activating disks for instance '%s'", iname)
587
        SubmitOpCode(op, opts=opts, cl=cl)
588
      except errors.GenericError, err:
589
        nret, msg = FormatError(err)
590
        retcode |= nret
591
        ToStderr("Error activating disks for instance %s: %s", iname, msg)
592

    
593
    if missing:
594
      for iname, ival in missing.iteritems():
595
        all_missing = compat.all(x[0] in bad_nodes for x in ival)
596
        if all_missing:
597
          ToStdout("Instance %s cannot be verified as it lives on"
598
                   " broken nodes", iname)
599
        else:
600
          ToStdout("Instance %s has missing logical volumes:", iname)
601
          ival.sort()
602
          for node, vol in ival:
603
            if node in bad_nodes:
604
              ToStdout("\tbroken node %s /dev/%s", node, vol)
605
            else:
606
              ToStdout("\t%s /dev/%s", node, vol)
607

    
608
      ToStdout("You need to replace or recreate disks for all the above"
609
               " instances if this message persists after fixing broken nodes.")
610
      retcode = constants.EXIT_FAILURE
611

    
612
  return retcode
613

    
614

    
615
def RepairDiskSizes(opts, args):
616
  """Verify sizes of cluster disks.
617

618
  @param opts: the command line options selected by the user
619
  @type args: list
620
  @param args: optional list of instances to restrict check to
621
  @rtype: int
622
  @return: the desired exit code
623

624
  """
625
  op = opcodes.OpClusterRepairDiskSizes(instances=args)
626
  SubmitOpCode(op, opts=opts)
627

    
628

    
629
@UsesRPC
630
def MasterFailover(opts, args):
631
  """Failover the master node.
632

633
  This command, when run on a non-master node, will cause the current
634
  master to cease being master, and the non-master to become new
635
  master.
636

637
  @param opts: the command line options selected by the user
638
  @type args: list
639
  @param args: should be an empty list
640
  @rtype: int
641
  @return: the desired exit code
642

643
  """
644
  if opts.no_voting:
645
    usertext = ("This will perform the failover even if most other nodes"
646
                " are down, or if this node is outdated. This is dangerous"
647
                " as it can lead to a non-consistent cluster. Check the"
648
                " gnt-cluster(8) man page before proceeding. Continue?")
649
    if not AskUser(usertext):
650
      return 1
651

    
652
  return bootstrap.MasterFailover(no_voting=opts.no_voting)
653

    
654

    
655
def MasterPing(opts, args):
656
  """Checks if the master is alive.
657

658
  @param opts: the command line options selected by the user
659
  @type args: list
660
  @param args: should be an empty list
661
  @rtype: int
662
  @return: the desired exit code
663

664
  """
665
  try:
666
    cl = GetClient()
667
    cl.QueryClusterInfo()
668
    return 0
669
  except Exception: # pylint: disable=W0703
670
    return 1
671

    
672

    
673
def SearchTags(opts, args):
674
  """Searches the tags on all the cluster.
675

676
  @param opts: the command line options selected by the user
677
  @type args: list
678
  @param args: should contain only one element, the tag pattern
679
  @rtype: int
680
  @return: the desired exit code
681

682
  """
683
  op = opcodes.OpTagsSearch(pattern=args[0])
684
  result = SubmitOpCode(op, opts=opts)
685
  if not result:
686
    return 1
687
  result = list(result)
688
  result.sort()
689
  for path, tag in result:
690
    ToStdout("%s %s", path, tag)
691

    
692

    
693
def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
694
  """Reads and verifies an X509 certificate.
695

696
  @type cert_filename: string
697
  @param cert_filename: the path of the file containing the certificate to
698
                        verify encoded in PEM format
699
  @type verify_private_key: bool
700
  @param verify_private_key: whether to verify the private key in addition to
701
                             the public certificate
702
  @rtype: string
703
  @return: a string containing the PEM-encoded certificate.
704

705
  """
706
  try:
707
    pem = utils.ReadFile(cert_filename)
708
  except IOError, err:
709
    raise errors.X509CertError(cert_filename,
710
                               "Unable to read certificate: %s" % str(err))
711

    
712
  try:
713
    OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
714
  except Exception, err:
715
    raise errors.X509CertError(cert_filename,
716
                               "Unable to load certificate: %s" % str(err))
717

    
718
  if verify_private_key:
719
    try:
720
      OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
721
    except Exception, err:
722
      raise errors.X509CertError(cert_filename,
723
                                 "Unable to load private key: %s" % str(err))
724

    
725
  return pem
726

    
727

    
728
def _RenewCrypto(new_cluster_cert, new_rapi_cert, #pylint: disable=R0911
729
                 rapi_cert_filename, new_spice_cert, spice_cert_filename,
730
                 spice_cacert_filename, new_confd_hmac_key, new_cds,
731
                 cds_filename, force):
732
  """Renews cluster certificates, keys and secrets.
733

734
  @type new_cluster_cert: bool
735
  @param new_cluster_cert: Whether to generate a new cluster certificate
736
  @type new_rapi_cert: bool
737
  @param new_rapi_cert: Whether to generate a new RAPI certificate
738
  @type rapi_cert_filename: string
739
  @param rapi_cert_filename: Path to file containing new RAPI certificate
740
  @type new_spice_cert: bool
741
  @param new_spice_cert: Whether to generate a new SPICE certificate
742
  @type spice_cert_filename: string
743
  @param spice_cert_filename: Path to file containing new SPICE certificate
744
  @type spice_cacert_filename: string
745
  @param spice_cacert_filename: Path to file containing the certificate of the
746
                                CA that signed the SPICE certificate
747
  @type new_confd_hmac_key: bool
748
  @param new_confd_hmac_key: Whether to generate a new HMAC key
749
  @type new_cds: bool
750
  @param new_cds: Whether to generate a new cluster domain secret
751
  @type cds_filename: string
752
  @param cds_filename: Path to file containing new cluster domain secret
753
  @type force: bool
754
  @param force: Whether to ask user for confirmation
755

756
  """
757
  if new_rapi_cert and rapi_cert_filename:
758
    ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
759
             " options can be specified at the same time.")
760
    return 1
761

    
762
  if new_cds and cds_filename:
763
    ToStderr("Only one of the --new-cluster-domain-secret and"
764
             " --cluster-domain-secret options can be specified at"
765
             " the same time.")
766
    return 1
767

    
768
  if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
769
    ToStderr("When using --new-spice-certificate, the --spice-certificate"
770
             " and --spice-ca-certificate must not be used.")
771
    return 1
772

    
773
  if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
774
    ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
775
             " specified.")
776
    return 1
777

    
778
  rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
779
  try:
780
    if rapi_cert_filename:
781
      rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
782
    if spice_cert_filename:
783
      spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
784
      spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
785
  except errors.X509CertError, err:
786
    ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
787
    return 1
788

    
789
  if cds_filename:
790
    try:
791
      cds = utils.ReadFile(cds_filename)
792
    except Exception, err: # pylint: disable=W0703
793
      ToStderr("Can't load new cluster domain secret from %s: %s" %
794
               (cds_filename, str(err)))
795
      return 1
796
  else:
797
    cds = None
798

    
799
  if not force:
800
    usertext = ("This requires all daemons on all nodes to be restarted and"
801
                " may take some time. Continue?")
802
    if not AskUser(usertext):
803
      return 1
804

    
805
  def _RenewCryptoInner(ctx):
806
    ctx.feedback_fn("Updating certificates and keys")
807
    bootstrap.GenerateClusterCrypto(new_cluster_cert,
808
                                    new_rapi_cert,
809
                                    new_spice_cert,
810
                                    new_confd_hmac_key,
811
                                    new_cds,
812
                                    rapi_cert_pem=rapi_cert_pem,
813
                                    spice_cert_pem=spice_cert_pem,
814
                                    spice_cacert_pem=spice_cacert_pem,
815
                                    cds=cds)
816

    
817
    files_to_copy = []
818

    
819
    if new_cluster_cert:
820
      files_to_copy.append(constants.NODED_CERT_FILE)
821

    
822
    if new_rapi_cert or rapi_cert_pem:
823
      files_to_copy.append(constants.RAPI_CERT_FILE)
824

    
825
    if new_spice_cert or spice_cert_pem:
826
      files_to_copy.append(constants.SPICE_CERT_FILE)
827
      files_to_copy.append(constants.SPICE_CACERT_FILE)
828

    
829
    if new_confd_hmac_key:
830
      files_to_copy.append(constants.CONFD_HMAC_KEY)
831

    
832
    if new_cds or cds:
833
      files_to_copy.append(constants.CLUSTER_DOMAIN_SECRET_FILE)
834

    
835
    if files_to_copy:
836
      for node_name in ctx.nonmaster_nodes:
837
        ctx.feedback_fn("Copying %s to %s" %
838
                        (", ".join(files_to_copy), node_name))
839
        for file_name in files_to_copy:
840
          ctx.ssh.CopyFileToNode(node_name, file_name)
841

    
842
  RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
843

    
844
  ToStdout("All requested certificates and keys have been replaced."
845
           " Running \"gnt-cluster verify\" now is recommended.")
846

    
847
  return 0
848

    
849

    
850
def RenewCrypto(opts, args):
851
  """Renews cluster certificates, keys and secrets.
852

853
  """
854
  return _RenewCrypto(opts.new_cluster_cert,
855
                      opts.new_rapi_cert,
856
                      opts.rapi_cert,
857
                      opts.new_spice_cert,
858
                      opts.spice_cert,
859
                      opts.spice_cacert,
860
                      opts.new_confd_hmac_key,
861
                      opts.new_cluster_domain_secret,
862
                      opts.cluster_domain_secret,
863
                      opts.force)
864

    
865

    
866
def SetClusterParams(opts, args):
867
  """Modify the cluster.
868

869
  @param opts: the command line options selected by the user
870
  @type args: list
871
  @param args: should be an empty list
872
  @rtype: int
873
  @return: the desired exit code
874

875
  """
876
  if not (not opts.lvm_storage or opts.vg_name or
877
          not opts.drbd_storage or opts.drbd_helper or
878
          opts.enabled_hypervisors or opts.hvparams or
879
          opts.beparams or opts.nicparams or opts.ndparams or
880
          opts.candidate_pool_size is not None or
881
          opts.uid_pool is not None or
882
          opts.maintain_node_health is not None or
883
          opts.add_uids is not None or
884
          opts.remove_uids is not None or
885
          opts.default_iallocator is not None or
886
          opts.reserved_lvs is not None or
887
          opts.master_netdev is not None or
888
          opts.master_netmask is not None or
889
          opts.use_external_mip_script is not None or
890
          opts.prealloc_wipe_disks is not None):
891
    ToStderr("Please give at least one of the parameters.")
892
    return 1
893

    
894
  vg_name = opts.vg_name
895
  if not opts.lvm_storage and opts.vg_name:
896
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
897
    return 1
898

    
899
  if not opts.lvm_storage:
900
    vg_name = ""
901

    
902
  drbd_helper = opts.drbd_helper
903
  if not opts.drbd_storage and opts.drbd_helper:
904
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
905
    return 1
906

    
907
  if not opts.drbd_storage:
908
    drbd_helper = ""
909

    
910
  hvlist = opts.enabled_hypervisors
911
  if hvlist is not None:
912
    hvlist = hvlist.split(",")
913

    
914
  # a list of (name, dict) we can pass directly to dict() (or [])
915
  hvparams = dict(opts.hvparams)
916
  for hv_params in hvparams.values():
917
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
918

    
919
  beparams = opts.beparams
920
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
921

    
922
  nicparams = opts.nicparams
923
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
924

    
925
  ndparams = opts.ndparams
926
  if ndparams is not None:
927
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
928

    
929
  mnh = opts.maintain_node_health
930

    
931
  uid_pool = opts.uid_pool
932
  if uid_pool is not None:
933
    uid_pool = uidpool.ParseUidPool(uid_pool)
934

    
935
  add_uids = opts.add_uids
936
  if add_uids is not None:
937
    add_uids = uidpool.ParseUidPool(add_uids)
938

    
939
  remove_uids = opts.remove_uids
940
  if remove_uids is not None:
941
    remove_uids = uidpool.ParseUidPool(remove_uids)
942

    
943
  if opts.reserved_lvs is not None:
944
    if opts.reserved_lvs == "":
945
      opts.reserved_lvs = []
946
    else:
947
      opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
948

    
949
  if opts.master_netmask is not None:
950
    try:
951
      opts.master_netmask = int(opts.master_netmask)
952
    except ValueError:
953
      ToStderr("The --master-netmask option expects an int parameter.")
954
      return 1
955

    
956
  ext_ip_script = opts.use_external_mip_script
957

    
958
  op = opcodes.OpClusterSetParams(vg_name=vg_name,
959
                                  drbd_helper=drbd_helper,
960
                                  enabled_hypervisors=hvlist,
961
                                  hvparams=hvparams,
962
                                  os_hvp=None,
963
                                  beparams=beparams,
964
                                  nicparams=nicparams,
965
                                  ndparams=ndparams,
966
                                  candidate_pool_size=opts.candidate_pool_size,
967
                                  maintain_node_health=mnh,
968
                                  uid_pool=uid_pool,
969
                                  add_uids=add_uids,
970
                                  remove_uids=remove_uids,
971
                                  default_iallocator=opts.default_iallocator,
972
                                  prealloc_wipe_disks=opts.prealloc_wipe_disks,
973
                                  master_netdev=opts.master_netdev,
974
                                  master_netmask=opts.master_netmask,
975
                                  reserved_lvs=opts.reserved_lvs,
976
                                  use_external_mip_script=ext_ip_script,
977
                                  )
978
  SubmitOpCode(op, opts=opts)
979
  return 0
980

    
981

    
982
def QueueOps(opts, args):
983
  """Queue operations.
984

985
  @param opts: the command line options selected by the user
986
  @type args: list
987
  @param args: should contain only one element, the subcommand
988
  @rtype: int
989
  @return: the desired exit code
990

991
  """
992
  command = args[0]
993
  client = GetClient()
994
  if command in ("drain", "undrain"):
995
    drain_flag = command == "drain"
996
    client.SetQueueDrainFlag(drain_flag)
997
  elif command == "info":
998
    result = client.QueryConfigValues(["drain_flag"])
999
    if result[0]:
1000
      val = "set"
1001
    else:
1002
      val = "unset"
1003
    ToStdout("The drain flag is %s" % val)
1004
  else:
1005
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1006
                               errors.ECODE_INVAL)
1007

    
1008
  return 0
1009

    
1010

    
1011
def _ShowWatcherPause(until):
1012
  if until is None or until < time.time():
1013
    ToStdout("The watcher is not paused.")
1014
  else:
1015
    ToStdout("The watcher is paused until %s.", time.ctime(until))
1016

    
1017

    
1018
def WatcherOps(opts, args):
1019
  """Watcher operations.
1020

1021
  @param opts: the command line options selected by the user
1022
  @type args: list
1023
  @param args: should contain only one element, the subcommand
1024
  @rtype: int
1025
  @return: the desired exit code
1026

1027
  """
1028
  command = args[0]
1029
  client = GetClient()
1030

    
1031
  if command == "continue":
1032
    client.SetWatcherPause(None)
1033
    ToStdout("The watcher is no longer paused.")
1034

    
1035
  elif command == "pause":
1036
    if len(args) < 2:
1037
      raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1038

    
1039
    result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1040
    _ShowWatcherPause(result)
1041

    
1042
  elif command == "info":
1043
    result = client.QueryConfigValues(["watcher_pause"])
1044
    _ShowWatcherPause(result[0])
1045

    
1046
  else:
1047
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1048
                               errors.ECODE_INVAL)
1049

    
1050
  return 0
1051

    
1052

    
1053
def _OobPower(opts, node_list, power):
1054
  """Puts the node in the list to desired power state.
1055

1056
  @param opts: The command line options selected by the user
1057
  @param node_list: The list of nodes to operate on
1058
  @param power: True if they should be powered on, False otherwise
1059
  @return: The success of the operation (none failed)
1060

1061
  """
1062
  if power:
1063
    command = constants.OOB_POWER_ON
1064
  else:
1065
    command = constants.OOB_POWER_OFF
1066

    
1067
  op = opcodes.OpOobCommand(node_names=node_list,
1068
                            command=command,
1069
                            ignore_status=True,
1070
                            timeout=opts.oob_timeout,
1071
                            power_delay=opts.power_delay)
1072
  result = SubmitOpCode(op, opts=opts)
1073
  errs = 0
1074
  for node_result in result:
1075
    (node_tuple, data_tuple) = node_result
1076
    (_, node_name) = node_tuple
1077
    (data_status, _) = data_tuple
1078
    if data_status != constants.RS_NORMAL:
1079
      assert data_status != constants.RS_UNAVAIL
1080
      errs += 1
1081
      ToStderr("There was a problem changing power for %s, please investigate",
1082
               node_name)
1083

    
1084
  if errs > 0:
1085
    return False
1086

    
1087
  return True
1088

    
1089

    
1090
def _InstanceStart(opts, inst_list, start):
1091
  """Puts the instances in the list to desired state.
1092

1093
  @param opts: The command line options selected by the user
1094
  @param inst_list: The list of instances to operate on
1095
  @param start: True if they should be started, False for shutdown
1096
  @return: The success of the operation (none failed)
1097

1098
  """
1099
  if start:
1100
    opcls = opcodes.OpInstanceStartup
1101
    text_submit, text_success, text_failed = ("startup", "started", "starting")
1102
  else:
1103
    opcls = compat.partial(opcodes.OpInstanceShutdown,
1104
                           timeout=opts.shutdown_timeout)
1105
    text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1106

    
1107
  jex = JobExecutor(opts=opts)
1108

    
1109
  for inst in inst_list:
1110
    ToStdout("Submit %s of instance %s", text_submit, inst)
1111
    op = opcls(instance_name=inst)
1112
    jex.QueueJob(inst, op)
1113

    
1114
  results = jex.GetResults()
1115
  bad_cnt = len([1 for (success, _) in results if not success])
1116

    
1117
  if bad_cnt == 0:
1118
    ToStdout("All instances have been %s successfully", text_success)
1119
  else:
1120
    ToStderr("There were errors while %s instances:\n"
1121
             "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1122
             len(results))
1123
    return False
1124

    
1125
  return True
1126

    
1127

    
1128
class _RunWhenNodesReachableHelper:
1129
  """Helper class to make shared internal state sharing easier.
1130

1131
  @ivar success: Indicates if all action_cb calls were successful
1132

1133
  """
1134
  def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1135
               _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1136
    """Init the object.
1137

1138
    @param node_list: The list of nodes to be reachable
1139
    @param action_cb: Callback called when a new host is reachable
1140
    @type node2ip: dict
1141
    @param node2ip: Node to ip mapping
1142
    @param port: The port to use for the TCP ping
1143
    @param feedback_fn: The function used for feedback
1144
    @param _ping_fn: Function to check reachabilty (for unittest use only)
1145
    @param _sleep_fn: Function to sleep (for unittest use only)
1146

1147
    """
1148
    self.down = set(node_list)
1149
    self.up = set()
1150
    self.node2ip = node2ip
1151
    self.success = True
1152
    self.action_cb = action_cb
1153
    self.port = port
1154
    self.feedback_fn = feedback_fn
1155
    self._ping_fn = _ping_fn
1156
    self._sleep_fn = _sleep_fn
1157

    
1158
  def __call__(self):
1159
    """When called we run action_cb.
1160

1161
    @raises utils.RetryAgain: When there are still down nodes
1162

1163
    """
1164
    if not self.action_cb(self.up):
1165
      self.success = False
1166

    
1167
    if self.down:
1168
      raise utils.RetryAgain()
1169
    else:
1170
      return self.success
1171

    
1172
  def Wait(self, secs):
1173
    """Checks if a host is up or waits remaining seconds.
1174

1175
    @param secs: The secs remaining
1176

1177
    """
1178
    start = time.time()
1179
    for node in self.down:
1180
      if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1181
                       live_port_needed=True):
1182
        self.feedback_fn("Node %s became available" % node)
1183
        self.up.add(node)
1184
        self.down -= self.up
1185
        # If we have a node available there is the possibility to run the
1186
        # action callback successfully, therefore we don't wait and return
1187
        return
1188

    
1189
    self._sleep_fn(max(0.0, start + secs - time.time()))
1190

    
1191

    
1192
def _RunWhenNodesReachable(node_list, action_cb, interval):
1193
  """Run action_cb when nodes become reachable.
1194

1195
  @param node_list: The list of nodes to be reachable
1196
  @param action_cb: Callback called when a new host is reachable
1197
  @param interval: The earliest time to retry
1198

1199
  """
1200
  client = GetClient()
1201
  cluster_info = client.QueryClusterInfo()
1202
  if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1203
    family = netutils.IPAddress.family
1204
  else:
1205
    family = netutils.IP6Address.family
1206

    
1207
  node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1208
                 for node in node_list)
1209

    
1210
  port = netutils.GetDaemonPort(constants.NODED)
1211
  helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1212
                                        ToStdout)
1213

    
1214
  try:
1215
    return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1216
                       wait_fn=helper.Wait)
1217
  except utils.RetryTimeout:
1218
    ToStderr("Time exceeded while waiting for nodes to become reachable"
1219
             " again:\n  - %s", "  - ".join(helper.down))
1220
    return False
1221

    
1222

    
1223
def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1224
                          _instance_start_fn=_InstanceStart):
1225
  """Start the instances conditional based on node_states.
1226

1227
  @param opts: The command line options selected by the user
1228
  @param inst_map: A dict of inst -> nodes mapping
1229
  @param nodes_online: A list of nodes online
1230
  @param _instance_start_fn: Callback to start instances (unittest use only)
1231
  @return: Success of the operation on all instances
1232

1233
  """
1234
  start_inst_list = []
1235
  for (inst, nodes) in inst_map.items():
1236
    if not (nodes - nodes_online):
1237
      # All nodes the instance lives on are back online
1238
      start_inst_list.append(inst)
1239

    
1240
  for inst in start_inst_list:
1241
    del inst_map[inst]
1242

    
1243
  if start_inst_list:
1244
    return _instance_start_fn(opts, start_inst_list, True)
1245

    
1246
  return True
1247

    
1248

    
1249
def _EpoOn(opts, full_node_list, node_list, inst_map):
1250
  """Does the actual power on.
1251

1252
  @param opts: The command line options selected by the user
1253
  @param full_node_list: All nodes to operate on (includes nodes not supporting
1254
                         OOB)
1255
  @param node_list: The list of nodes to operate on (all need to support OOB)
1256
  @param inst_map: A dict of inst -> nodes mapping
1257
  @return: The desired exit status
1258

1259
  """
1260
  if node_list and not _OobPower(opts, node_list, False):
1261
    ToStderr("Not all nodes seem to get back up, investigate and start"
1262
             " manually if needed")
1263

    
1264
  # Wait for the nodes to be back up
1265
  action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1266

    
1267
  ToStdout("Waiting until all nodes are available again")
1268
  if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1269
    ToStderr("Please investigate and start stopped instances manually")
1270
    return constants.EXIT_FAILURE
1271

    
1272
  return constants.EXIT_SUCCESS
1273

    
1274

    
1275
def _EpoOff(opts, node_list, inst_map):
1276
  """Does the actual power off.
1277

1278
  @param opts: The command line options selected by the user
1279
  @param node_list: The list of nodes to operate on (all need to support OOB)
1280
  @param inst_map: A dict of inst -> nodes mapping
1281
  @return: The desired exit status
1282

1283
  """
1284
  if not _InstanceStart(opts, inst_map.keys(), False):
1285
    ToStderr("Please investigate and stop instances manually before continuing")
1286
    return constants.EXIT_FAILURE
1287

    
1288
  if not node_list:
1289
    return constants.EXIT_SUCCESS
1290

    
1291
  if _OobPower(opts, node_list, False):
1292
    return constants.EXIT_SUCCESS
1293
  else:
1294
    return constants.EXIT_FAILURE
1295

    
1296

    
1297
def Epo(opts, args):
1298
  """EPO operations.
1299

1300
  @param opts: the command line options selected by the user
1301
  @type args: list
1302
  @param args: should contain only one element, the subcommand
1303
  @rtype: int
1304
  @return: the desired exit code
1305

1306
  """
1307
  if opts.groups and opts.show_all:
1308
    ToStderr("Only one of --groups or --all are allowed")
1309
    return constants.EXIT_FAILURE
1310
  elif args and opts.show_all:
1311
    ToStderr("Arguments in combination with --all are not allowed")
1312
    return constants.EXIT_FAILURE
1313

    
1314
  client = GetClient()
1315

    
1316
  if opts.groups:
1317
    node_query_list = itertools.chain(*client.QueryGroups(names=args,
1318
                                                          fields=["node_list"],
1319
                                                          use_locking=False))
1320
  else:
1321
    node_query_list = args
1322

    
1323
  result = client.QueryNodes(names=node_query_list,
1324
                             fields=["name", "master", "pinst_list",
1325
                                     "sinst_list", "powered", "offline"],
1326
                             use_locking=False)
1327
  node_list = []
1328
  inst_map = {}
1329
  for (idx, (node, master, pinsts, sinsts, powered,
1330
             offline)) in enumerate(result):
1331
    # Normalize the node_query_list as well
1332
    if not opts.show_all:
1333
      node_query_list[idx] = node
1334
    if not offline:
1335
      for inst in (pinsts + sinsts):
1336
        if inst in inst_map:
1337
          if not master:
1338
            inst_map[inst].add(node)
1339
        elif master:
1340
          inst_map[inst] = set()
1341
        else:
1342
          inst_map[inst] = set([node])
1343

    
1344
    if master and opts.on:
1345
      # We ignore the master for turning on the machines, in fact we are
1346
      # already operating on the master at this point :)
1347
      continue
1348
    elif master and not opts.show_all:
1349
      ToStderr("%s is the master node, please do a master-failover to another"
1350
               " node not affected by the EPO or use --all if you intend to"
1351
               " shutdown the whole cluster", node)
1352
      return constants.EXIT_FAILURE
1353
    elif powered is None:
1354
      ToStdout("Node %s does not support out-of-band handling, it can not be"
1355
               " handled in a fully automated manner", node)
1356
    elif powered == opts.on:
1357
      ToStdout("Node %s is already in desired power state, skipping", node)
1358
    elif not offline or (offline and powered):
1359
      node_list.append(node)
1360

    
1361
  if not opts.force and not ConfirmOperation(node_query_list, "nodes", "epo"):
1362
    return constants.EXIT_FAILURE
1363

    
1364
  if opts.on:
1365
    return _EpoOn(opts, node_query_list, node_list, inst_map)
1366
  else:
1367
    return _EpoOff(opts, node_list, inst_map)
1368

    
1369

    
1370
commands = {
1371
  "init": (
1372
    InitCluster, [ArgHost(min=1, max=1)],
1373
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
1374
     HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
1375
     NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT,
1376
     NOMODIFY_SSH_SETUP_OPT, SECONDARY_IP_OPT, VG_NAME_OPT,
1377
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, DRBD_HELPER_OPT, NODRBD_STORAGE_OPT,
1378
     DEFAULT_IALLOCATOR_OPT, PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT,
1379
     NODE_PARAMS_OPT, GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT],
1380
    "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
1381
  "destroy": (
1382
    DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
1383
    "", "Destroy cluster"),
1384
  "rename": (
1385
    RenameCluster, [ArgHost(min=1, max=1)],
1386
    [FORCE_OPT, DRY_RUN_OPT],
1387
    "<new_name>",
1388
    "Renames the cluster"),
1389
  "redist-conf": (
1390
    RedistributeConfig, ARGS_NONE, [SUBMIT_OPT, DRY_RUN_OPT, PRIORITY_OPT],
1391
    "", "Forces a push of the configuration file and ssconf files"
1392
    " to the nodes in the cluster"),
1393
  "verify": (
1394
    VerifyCluster, ARGS_NONE,
1395
    [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
1396
     DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT],
1397
    "", "Does a check on the cluster configuration"),
1398
  "verify-disks": (
1399
    VerifyDisks, ARGS_NONE, [PRIORITY_OPT],
1400
    "", "Does a check on the cluster disk status"),
1401
  "repair-disk-sizes": (
1402
    RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
1403
    "[instance...]", "Updates mismatches in recorded disk sizes"),
1404
  "master-failover": (
1405
    MasterFailover, ARGS_NONE, [NOVOTING_OPT],
1406
    "", "Makes the current node the master"),
1407
  "master-ping": (
1408
    MasterPing, ARGS_NONE, [],
1409
    "", "Checks if the master is alive"),
1410
  "version": (
1411
    ShowClusterVersion, ARGS_NONE, [],
1412
    "", "Shows the cluster version"),
1413
  "getmaster": (
1414
    ShowClusterMaster, ARGS_NONE, [],
1415
    "", "Shows the cluster master"),
1416
  "copyfile": (
1417
    ClusterCopyFile, [ArgFile(min=1, max=1)],
1418
    [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
1419
    "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
1420
  "command": (
1421
    RunClusterCommand, [ArgCommand(min=1)],
1422
    [NODE_LIST_OPT, NODEGROUP_OPT],
1423
    "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
1424
  "info": (
1425
    ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
1426
    "[--roman]", "Show cluster configuration"),
1427
  "list-tags": (
1428
    ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
1429
  "add-tags": (
1430
    AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1431
    "tag...", "Add tags to the cluster"),
1432
  "remove-tags": (
1433
    RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1434
    "tag...", "Remove tags from the cluster"),
1435
  "search-tags": (
1436
    SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
1437
    "Searches the tags on all objects on"
1438
    " the cluster for a given pattern (regex)"),
1439
  "queue": (
1440
    QueueOps,
1441
    [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
1442
    [], "drain|undrain|info", "Change queue properties"),
1443
  "watcher": (
1444
    WatcherOps,
1445
    [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
1446
     ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
1447
    [],
1448
    "{pause <timespec>|continue|info}", "Change watcher properties"),
1449
  "modify": (
1450
    SetClusterParams, ARGS_NONE,
1451
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT, MASTER_NETDEV_OPT,
1452
     MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT,
1453
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT,
1454
     DRBD_HELPER_OPT, NODRBD_STORAGE_OPT, DEFAULT_IALLOCATOR_OPT,
1455
     RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, PREALLOC_WIPE_DISKS_OPT,
1456
     NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT],
1457
    "[opts...]",
1458
    "Alters the parameters of the cluster"),
1459
  "renew-crypto": (
1460
    RenewCrypto, ARGS_NONE,
1461
    [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
1462
     NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
1463
     NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
1464
     NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT],
1465
    "[opts...]",
1466
    "Renews cluster certificates, keys and secrets"),
1467
  "epo": (
1468
    Epo, [ArgUnknown()],
1469
    [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
1470
     SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
1471
    "[opts...] [args]",
1472
    "Performs an emergency power-off on given args"),
1473
  "activate-master-ip": (
1474
    ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
1475
  "deactivate-master-ip": (
1476
    DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
1477
    "Deactivates the master IP"),
1478
  }
1479

    
1480

    
1481
#: dictionary with aliases for commands
1482
aliases = {
1483
  "masterfailover": "master-failover",
1484
}
1485

    
1486

    
1487
def Main():
1488
  return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
1489
                     aliases=aliases)