Statistics
| Branch: | Tag: | Revision:

root / lib / client / gnt_cluster.py @ 2da9f556

History | View | Annotate | Download (49.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Cluster related commands"""
22

    
23
# pylint: disable=W0401,W0613,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0613: Unused argument, since all functions follow the same API
26
# W0614: Unused import %s from wildcard import (since we need cli)
27
# C0103: Invalid name gnt-cluster
28

    
29
import os.path
30
import time
31
import OpenSSL
32
import itertools
33

    
34
from ganeti.cli import *
35
from ganeti import opcodes
36
from ganeti import constants
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import bootstrap
40
from ganeti import ssh
41
from ganeti import objects
42
from ganeti import uidpool
43
from ganeti import compat
44
from ganeti import netutils
45

    
46

    
47
ON_OPT = cli_option("--on", default=False,
48
                    action="store_true", dest="on",
49
                    help="Recover from an EPO")
50

    
51
GROUPS_OPT = cli_option("--groups", default=False,
52
                    action="store_true", dest="groups",
53
                    help="Arguments are node groups instead of nodes")
54

    
55
_EPO_PING_INTERVAL = 30 # 30 seconds between pings
56
_EPO_PING_TIMEOUT = 1 # 1 second
57
_EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
58

    
59

    
60
@UsesRPC
61
def InitCluster(opts, args):
62
  """Initialize the cluster.
63

64
  @param opts: the command line options selected by the user
65
  @type args: list
66
  @param args: should contain only one element, the desired
67
      cluster name
68
  @rtype: int
69
  @return: the desired exit code
70

71
  """
72
  if not opts.lvm_storage and opts.vg_name:
73
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
74
    return 1
75

    
76
  vg_name = opts.vg_name
77
  if opts.lvm_storage and not opts.vg_name:
78
    vg_name = constants.DEFAULT_VG
79

    
80
  if not opts.drbd_storage and opts.drbd_helper:
81
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
82
    return 1
83

    
84
  drbd_helper = opts.drbd_helper
85
  if opts.drbd_storage and not opts.drbd_helper:
86
    drbd_helper = constants.DEFAULT_DRBD_HELPER
87

    
88
  master_netdev = opts.master_netdev
89
  if master_netdev is None:
90
    master_netdev = constants.DEFAULT_BRIDGE
91

    
92
  hvlist = opts.enabled_hypervisors
93
  if hvlist is None:
94
    hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
95
  hvlist = hvlist.split(",")
96

    
97
  hvparams = dict(opts.hvparams)
98
  beparams = opts.beparams
99
  nicparams = opts.nicparams
100

    
101
  diskparams = dict(opts.diskparams)
102

    
103
  # check the disk template types here, as we cannot rely on the type check done
104
  # by the opcode parameter types
105
  diskparams_keys = set(diskparams.keys())
106
  if not (diskparams_keys <= constants.DISK_TEMPLATES):
107
    unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
108
    ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
109
    return 1
110

    
111
  # prepare beparams dict
112
  beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
113
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
114

    
115
  # prepare nicparams dict
116
  nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
117
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
118

    
119
  # prepare ndparams dict
120
  if opts.ndparams is None:
121
    ndparams = dict(constants.NDC_DEFAULTS)
122
  else:
123
    ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
124
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
125

    
126
  # prepare hvparams dict
127
  for hv in constants.HYPER_TYPES:
128
    if hv not in hvparams:
129
      hvparams[hv] = {}
130
    hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
131
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
132

    
133
  # prepare diskparams dict
134
  for templ in constants.DISK_TEMPLATES:
135
    if templ not in diskparams:
136
      diskparams[templ] = {}
137
    diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
138
                                         diskparams[templ])
139
    utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)
140

    
141
  if opts.candidate_pool_size is None:
142
    opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
143

    
144
  if opts.mac_prefix is None:
145
    opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
146

    
147
  uid_pool = opts.uid_pool
148
  if uid_pool is not None:
149
    uid_pool = uidpool.ParseUidPool(uid_pool)
150

    
151
  if opts.prealloc_wipe_disks is None:
152
    opts.prealloc_wipe_disks = False
153

    
154
  external_ip_setup_script = opts.use_external_mip_script
155
  if external_ip_setup_script is None:
156
    external_ip_setup_script = False
157

    
158
  try:
159
    primary_ip_version = int(opts.primary_ip_version)
160
  except (ValueError, TypeError), err:
161
    ToStderr("Invalid primary ip version value: %s" % str(err))
162
    return 1
163

    
164
  master_netmask = opts.master_netmask
165
  try:
166
    if master_netmask is not None:
167
      master_netmask = int(master_netmask)
168
  except (ValueError, TypeError), err:
169
    ToStderr("Invalid master netmask value: %s" % str(err))
170
    return 1
171

    
172
  bootstrap.InitCluster(cluster_name=args[0],
173
                        secondary_ip=opts.secondary_ip,
174
                        vg_name=vg_name,
175
                        mac_prefix=opts.mac_prefix,
176
                        master_netmask=master_netmask,
177
                        master_netdev=master_netdev,
178
                        file_storage_dir=opts.file_storage_dir,
179
                        shared_file_storage_dir=opts.shared_file_storage_dir,
180
                        enabled_hypervisors=hvlist,
181
                        hvparams=hvparams,
182
                        beparams=beparams,
183
                        nicparams=nicparams,
184
                        ndparams=ndparams,
185
                        diskparams=diskparams,
186
                        candidate_pool_size=opts.candidate_pool_size,
187
                        modify_etc_hosts=opts.modify_etc_hosts,
188
                        modify_ssh_setup=opts.modify_ssh_setup,
189
                        maintain_node_health=opts.maintain_node_health,
190
                        drbd_helper=drbd_helper,
191
                        uid_pool=uid_pool,
192
                        default_iallocator=opts.default_iallocator,
193
                        primary_ip_version=primary_ip_version,
194
                        prealloc_wipe_disks=opts.prealloc_wipe_disks,
195
                        use_external_mip_script=external_ip_setup_script,
196
                        )
197
  op = opcodes.OpClusterPostInit()
198
  SubmitOpCode(op, opts=opts)
199
  return 0
200

    
201

    
202
@UsesRPC
203
def DestroyCluster(opts, args):
204
  """Destroy the cluster.
205

206
  @param opts: the command line options selected by the user
207
  @type args: list
208
  @param args: should be an empty list
209
  @rtype: int
210
  @return: the desired exit code
211

212
  """
213
  if not opts.yes_do_it:
214
    ToStderr("Destroying a cluster is irreversible. If you really want"
215
             " destroy this cluster, supply the --yes-do-it option.")
216
    return 1
217

    
218
  op = opcodes.OpClusterDestroy()
219
  master = SubmitOpCode(op, opts=opts)
220
  # if we reached this, the opcode didn't fail; we can proceed to
221
  # shutdown all the daemons
222
  bootstrap.FinalizeClusterDestroy(master)
223
  return 0
224

    
225

    
226
def RenameCluster(opts, args):
227
  """Rename the cluster.
228

229
  @param opts: the command line options selected by the user
230
  @type args: list
231
  @param args: should contain only one element, the new cluster name
232
  @rtype: int
233
  @return: the desired exit code
234

235
  """
236
  cl = GetClient()
237

    
238
  (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
239

    
240
  new_name = args[0]
241
  if not opts.force:
242
    usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
243
                " connected over the network to the cluster name, the"
244
                " operation is very dangerous as the IP address will be"
245
                " removed from the node and the change may not go through."
246
                " Continue?") % (cluster_name, new_name)
247
    if not AskUser(usertext):
248
      return 1
249

    
250
  op = opcodes.OpClusterRename(name=new_name)
251
  result = SubmitOpCode(op, opts=opts, cl=cl)
252

    
253
  if result:
254
    ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
255

    
256
  return 0
257

    
258

    
259
def ActivateMasterIp(opts, args):
260
  """Activates the master IP.
261

262
  """
263
  op = opcodes.OpClusterActivateMasterIp()
264
  SubmitOpCode(op)
265
  return 0
266

    
267

    
268
def DeactivateMasterIp(opts, args):
269
  """Deactivates the master IP.
270

271
  """
272
  if not opts.confirm:
273
    usertext = ("This will disable the master IP. All the open connections to"
274
                " the master IP will be closed. To reach the master you will"
275
                " need to use its node IP."
276
                " Continue?")
277
    if not AskUser(usertext):
278
      return 1
279

    
280
  op = opcodes.OpClusterDeactivateMasterIp()
281
  SubmitOpCode(op)
282
  return 0
283

    
284

    
285
def RedistributeConfig(opts, args):
286
  """Forces push of the cluster configuration.
287

288
  @param opts: the command line options selected by the user
289
  @type args: list
290
  @param args: empty list
291
  @rtype: int
292
  @return: the desired exit code
293

294
  """
295
  op = opcodes.OpClusterRedistConf()
296
  SubmitOrSend(op, opts)
297
  return 0
298

    
299

    
300
def ShowClusterVersion(opts, args):
301
  """Write version of ganeti software to the standard output.
302

303
  @param opts: the command line options selected by the user
304
  @type args: list
305
  @param args: should be an empty list
306
  @rtype: int
307
  @return: the desired exit code
308

309
  """
310
  cl = GetClient()
311
  result = cl.QueryClusterInfo()
312
  ToStdout("Software version: %s", result["software_version"])
313
  ToStdout("Internode protocol: %s", result["protocol_version"])
314
  ToStdout("Configuration format: %s", result["config_version"])
315
  ToStdout("OS api version: %s", result["os_api_version"])
316
  ToStdout("Export interface: %s", result["export_version"])
317
  return 0
318

    
319

    
320
def ShowClusterMaster(opts, args):
321
  """Write name of master node to the standard output.
322

323
  @param opts: the command line options selected by the user
324
  @type args: list
325
  @param args: should be an empty list
326
  @rtype: int
327
  @return: the desired exit code
328

329
  """
330
  master = bootstrap.GetMaster()
331
  ToStdout(master)
332
  return 0
333

    
334

    
335
def _PrintGroupedParams(paramsdict, level=1, roman=False):
336
  """Print Grouped parameters (be, nic, disk) by group.
337

338
  @type paramsdict: dict of dicts
339
  @param paramsdict: {group: {param: value, ...}, ...}
340
  @type level: int
341
  @param level: Level of indention
342

343
  """
344
  indent = "  " * level
345
  for item, val in sorted(paramsdict.items()):
346
    if isinstance(val, dict):
347
      ToStdout("%s- %s:", indent, item)
348
      _PrintGroupedParams(val, level=level + 1, roman=roman)
349
    elif roman and isinstance(val, int):
350
      ToStdout("%s  %s: %s", indent, item, compat.TryToRoman(val))
351
    else:
352
      ToStdout("%s  %s: %s", indent, item, val)
353

    
354

    
355
def ShowClusterConfig(opts, args):
356
  """Shows cluster information.
357

358
  @param opts: the command line options selected by the user
359
  @type args: list
360
  @param args: should be an empty list
361
  @rtype: int
362
  @return: the desired exit code
363

364
  """
365
  cl = GetClient()
366
  result = cl.QueryClusterInfo()
367

    
368
  ToStdout("Cluster name: %s", result["name"])
369
  ToStdout("Cluster UUID: %s", result["uuid"])
370

    
371
  ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
372
  ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
373

    
374
  ToStdout("Master node: %s", result["master"])
375

    
376
  ToStdout("Architecture (this node): %s (%s)",
377
           result["architecture"][0], result["architecture"][1])
378

    
379
  if result["tags"]:
380
    tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
381
  else:
382
    tags = "(none)"
383

    
384
  ToStdout("Tags: %s", tags)
385

    
386
  ToStdout("Default hypervisor: %s", result["default_hypervisor"])
387
  ToStdout("Enabled hypervisors: %s",
388
           utils.CommaJoin(result["enabled_hypervisors"]))
389

    
390
  ToStdout("Hypervisor parameters:")
391
  _PrintGroupedParams(result["hvparams"])
392

    
393
  ToStdout("OS-specific hypervisor parameters:")
394
  _PrintGroupedParams(result["os_hvp"])
395

    
396
  ToStdout("OS parameters:")
397
  _PrintGroupedParams(result["osparams"])
398

    
399
  ToStdout("Hidden OSes: %s", utils.CommaJoin(result["hidden_os"]))
400
  ToStdout("Blacklisted OSes: %s", utils.CommaJoin(result["blacklisted_os"]))
401

    
402
  ToStdout("Cluster parameters:")
403
  ToStdout("  - candidate pool size: %s",
404
            compat.TryToRoman(result["candidate_pool_size"],
405
                              convert=opts.roman_integers))
406
  ToStdout("  - master netdev: %s", result["master_netdev"])
407
  ToStdout("  - master netmask: %s", result["master_netmask"])
408
  ToStdout("  - use external master IP address setup script: %s",
409
           result["use_external_mip_script"])
410
  ToStdout("  - lvm volume group: %s", result["volume_group_name"])
411
  if result["reserved_lvs"]:
412
    reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
413
  else:
414
    reserved_lvs = "(none)"
415
  ToStdout("  - lvm reserved volumes: %s", reserved_lvs)
416
  ToStdout("  - drbd usermode helper: %s", result["drbd_usermode_helper"])
417
  ToStdout("  - file storage path: %s", result["file_storage_dir"])
418
  ToStdout("  - shared file storage path: %s",
419
           result["shared_file_storage_dir"])
420
  ToStdout("  - maintenance of node health: %s",
421
           result["maintain_node_health"])
422
  ToStdout("  - uid pool: %s",
423
            uidpool.FormatUidPool(result["uid_pool"],
424
                                  roman=opts.roman_integers))
425
  ToStdout("  - default instance allocator: %s", result["default_iallocator"])
426
  ToStdout("  - primary ip version: %d", result["primary_ip_version"])
427
  ToStdout("  - preallocation wipe disks: %s", result["prealloc_wipe_disks"])
428
  ToStdout("  - OS search path: %s", utils.CommaJoin(constants.OS_SEARCH_PATH))
429

    
430
  ToStdout("Default node parameters:")
431
  _PrintGroupedParams(result["ndparams"], roman=opts.roman_integers)
432

    
433
  ToStdout("Default instance parameters:")
434
  _PrintGroupedParams(result["beparams"], roman=opts.roman_integers)
435

    
436
  ToStdout("Default nic parameters:")
437
  _PrintGroupedParams(result["nicparams"], roman=opts.roman_integers)
438

    
439
  return 0
440

    
441

    
442
def ClusterCopyFile(opts, args):
443
  """Copy a file from master to some nodes.
444

445
  @param opts: the command line options selected by the user
446
  @type args: list
447
  @param args: should contain only one element, the path of
448
      the file to be copied
449
  @rtype: int
450
  @return: the desired exit code
451

452
  """
453
  filename = args[0]
454
  if not os.path.exists(filename):
455
    raise errors.OpPrereqError("No such filename '%s'" % filename,
456
                               errors.ECODE_INVAL)
457

    
458
  cl = GetClient()
459

    
460
  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
461

    
462
  results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True,
463
                           secondary_ips=opts.use_replication_network,
464
                           nodegroup=opts.nodegroup)
465

    
466
  srun = ssh.SshRunner(cluster_name=cluster_name)
467
  for node in results:
468
    if not srun.CopyFileToNode(node, filename):
469
      ToStderr("Copy of file %s to node %s failed", filename, node)
470

    
471
  return 0
472

    
473

    
474
def RunClusterCommand(opts, args):
475
  """Run a command on some nodes.
476

477
  @param opts: the command line options selected by the user
478
  @type args: list
479
  @param args: should contain the command to be run and its arguments
480
  @rtype: int
481
  @return: the desired exit code
482

483
  """
484
  cl = GetClient()
485

    
486
  command = " ".join(args)
487

    
488
  nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup)
489

    
490
  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
491
                                                    "master_node"])
492

    
493
  srun = ssh.SshRunner(cluster_name=cluster_name)
494

    
495
  # Make sure master node is at list end
496
  if master_node in nodes:
497
    nodes.remove(master_node)
498
    nodes.append(master_node)
499

    
500
  for name in nodes:
501
    result = srun.Run(name, "root", command)
502
    ToStdout("------------------------------------------------")
503
    ToStdout("node: %s", name)
504
    ToStdout("%s", result.output)
505
    ToStdout("return code = %s", result.exit_code)
506

    
507
  return 0
508

    
509

    
510
def VerifyCluster(opts, args):
511
  """Verify integrity of cluster, performing various test on nodes.
512

513
  @param opts: the command line options selected by the user
514
  @type args: list
515
  @param args: should be an empty list
516
  @rtype: int
517
  @return: the desired exit code
518

519
  """
520
  skip_checks = []
521

    
522
  if opts.skip_nplusone_mem:
523
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
524

    
525
  cl = GetClient()
526

    
527
  op = opcodes.OpClusterVerify(verbose=opts.verbose,
528
                               error_codes=opts.error_codes,
529
                               debug_simulate_errors=opts.simulate_errors,
530
                               skip_checks=skip_checks,
531
                               ignore_errors=opts.ignore_errors,
532
                               group_name=opts.nodegroup)
533
  result = SubmitOpCode(op, cl=cl, opts=opts)
534

    
535
  # Keep track of submitted jobs
536
  jex = JobExecutor(cl=cl, opts=opts)
537

    
538
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
539
    jex.AddJobId(None, status, job_id)
540

    
541
  results = jex.GetResults()
542

    
543
  (bad_jobs, bad_results) = \
544
    map(len,
545
        # Convert iterators to lists
546
        map(list,
547
            # Count errors
548
            map(compat.partial(itertools.ifilterfalse, bool),
549
                # Convert result to booleans in a tuple
550
                zip(*((job_success, len(op_results) == 1 and op_results[0])
551
                      for (job_success, op_results) in results)))))
552

    
553
  if bad_jobs == 0 and bad_results == 0:
554
    rcode = constants.EXIT_SUCCESS
555
  else:
556
    rcode = constants.EXIT_FAILURE
557
    if bad_jobs > 0:
558
      ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
559

    
560
  return rcode
561

    
562

    
563
def VerifyDisks(opts, args):
564
  """Verify integrity of cluster disks.
565

566
  @param opts: the command line options selected by the user
567
  @type args: list
568
  @param args: should be an empty list
569
  @rtype: int
570
  @return: the desired exit code
571

572
  """
573
  cl = GetClient()
574

    
575
  op = opcodes.OpClusterVerifyDisks()
576

    
577
  result = SubmitOpCode(op, cl=cl, opts=opts)
578

    
579
  # Keep track of submitted jobs
580
  jex = JobExecutor(cl=cl, opts=opts)
581

    
582
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
583
    jex.AddJobId(None, status, job_id)
584

    
585
  retcode = constants.EXIT_SUCCESS
586

    
587
  for (status, result) in jex.GetResults():
588
    if not status:
589
      ToStdout("Job failed: %s", result)
590
      continue
591

    
592
    ((bad_nodes, instances, missing), ) = result
593

    
594
    for node, text in bad_nodes.items():
595
      ToStdout("Error gathering data on node %s: %s",
596
               node, utils.SafeEncode(text[-400:]))
597
      retcode = constants.EXIT_FAILURE
598
      ToStdout("You need to fix these nodes first before fixing instances")
599

    
600
    for iname in instances:
601
      if iname in missing:
602
        continue
603
      op = opcodes.OpInstanceActivateDisks(instance_name=iname)
604
      try:
605
        ToStdout("Activating disks for instance '%s'", iname)
606
        SubmitOpCode(op, opts=opts, cl=cl)
607
      except errors.GenericError, err:
608
        nret, msg = FormatError(err)
609
        retcode |= nret
610
        ToStderr("Error activating disks for instance %s: %s", iname, msg)
611

    
612
    if missing:
613
      for iname, ival in missing.iteritems():
614
        all_missing = compat.all(x[0] in bad_nodes for x in ival)
615
        if all_missing:
616
          ToStdout("Instance %s cannot be verified as it lives on"
617
                   " broken nodes", iname)
618
        else:
619
          ToStdout("Instance %s has missing logical volumes:", iname)
620
          ival.sort()
621
          for node, vol in ival:
622
            if node in bad_nodes:
623
              ToStdout("\tbroken node %s /dev/%s", node, vol)
624
            else:
625
              ToStdout("\t%s /dev/%s", node, vol)
626

    
627
      ToStdout("You need to replace or recreate disks for all the above"
628
               " instances if this message persists after fixing broken nodes.")
629
      retcode = constants.EXIT_FAILURE
630

    
631
  return retcode
632

    
633

    
634
def RepairDiskSizes(opts, args):
635
  """Verify sizes of cluster disks.
636

637
  @param opts: the command line options selected by the user
638
  @type args: list
639
  @param args: optional list of instances to restrict check to
640
  @rtype: int
641
  @return: the desired exit code
642

643
  """
644
  op = opcodes.OpClusterRepairDiskSizes(instances=args)
645
  SubmitOpCode(op, opts=opts)
646

    
647

    
648
@UsesRPC
649
def MasterFailover(opts, args):
650
  """Failover the master node.
651

652
  This command, when run on a non-master node, will cause the current
653
  master to cease being master, and the non-master to become new
654
  master.
655

656
  @param opts: the command line options selected by the user
657
  @type args: list
658
  @param args: should be an empty list
659
  @rtype: int
660
  @return: the desired exit code
661

662
  """
663
  if opts.no_voting:
664
    usertext = ("This will perform the failover even if most other nodes"
665
                " are down, or if this node is outdated. This is dangerous"
666
                " as it can lead to a non-consistent cluster. Check the"
667
                " gnt-cluster(8) man page before proceeding. Continue?")
668
    if not AskUser(usertext):
669
      return 1
670

    
671
  return bootstrap.MasterFailover(no_voting=opts.no_voting)
672

    
673

    
674
def MasterPing(opts, args):
675
  """Checks if the master is alive.
676

677
  @param opts: the command line options selected by the user
678
  @type args: list
679
  @param args: should be an empty list
680
  @rtype: int
681
  @return: the desired exit code
682

683
  """
684
  try:
685
    cl = GetClient()
686
    cl.QueryClusterInfo()
687
    return 0
688
  except Exception: # pylint: disable=W0703
689
    return 1
690

    
691

    
692
def SearchTags(opts, args):
693
  """Searches the tags on all the cluster.
694

695
  @param opts: the command line options selected by the user
696
  @type args: list
697
  @param args: should contain only one element, the tag pattern
698
  @rtype: int
699
  @return: the desired exit code
700

701
  """
702
  op = opcodes.OpTagsSearch(pattern=args[0])
703
  result = SubmitOpCode(op, opts=opts)
704
  if not result:
705
    return 1
706
  result = list(result)
707
  result.sort()
708
  for path, tag in result:
709
    ToStdout("%s %s", path, tag)
710

    
711

    
712
def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
713
  """Reads and verifies an X509 certificate.
714

715
  @type cert_filename: string
716
  @param cert_filename: the path of the file containing the certificate to
717
                        verify encoded in PEM format
718
  @type verify_private_key: bool
719
  @param verify_private_key: whether to verify the private key in addition to
720
                             the public certificate
721
  @rtype: string
722
  @return: a string containing the PEM-encoded certificate.
723

724
  """
725
  try:
726
    pem = utils.ReadFile(cert_filename)
727
  except IOError, err:
728
    raise errors.X509CertError(cert_filename,
729
                               "Unable to read certificate: %s" % str(err))
730

    
731
  try:
732
    OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
733
  except Exception, err:
734
    raise errors.X509CertError(cert_filename,
735
                               "Unable to load certificate: %s" % str(err))
736

    
737
  if verify_private_key:
738
    try:
739
      OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
740
    except Exception, err:
741
      raise errors.X509CertError(cert_filename,
742
                                 "Unable to load private key: %s" % str(err))
743

    
744
  return pem
745

    
746

    
747
def _RenewCrypto(new_cluster_cert, new_rapi_cert, #pylint: disable=R0911
748
                 rapi_cert_filename, new_spice_cert, spice_cert_filename,
749
                 spice_cacert_filename, new_confd_hmac_key, new_cds,
750
                 cds_filename, force):
751
  """Renews cluster certificates, keys and secrets.
752

753
  @type new_cluster_cert: bool
754
  @param new_cluster_cert: Whether to generate a new cluster certificate
755
  @type new_rapi_cert: bool
756
  @param new_rapi_cert: Whether to generate a new RAPI certificate
757
  @type rapi_cert_filename: string
758
  @param rapi_cert_filename: Path to file containing new RAPI certificate
759
  @type new_spice_cert: bool
760
  @param new_spice_cert: Whether to generate a new SPICE certificate
761
  @type spice_cert_filename: string
762
  @param spice_cert_filename: Path to file containing new SPICE certificate
763
  @type spice_cacert_filename: string
764
  @param spice_cacert_filename: Path to file containing the certificate of the
765
                                CA that signed the SPICE certificate
766
  @type new_confd_hmac_key: bool
767
  @param new_confd_hmac_key: Whether to generate a new HMAC key
768
  @type new_cds: bool
769
  @param new_cds: Whether to generate a new cluster domain secret
770
  @type cds_filename: string
771
  @param cds_filename: Path to file containing new cluster domain secret
772
  @type force: bool
773
  @param force: Whether to ask user for confirmation
774

775
  """
776
  if new_rapi_cert and rapi_cert_filename:
777
    ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
778
             " options can be specified at the same time.")
779
    return 1
780

    
781
  if new_cds and cds_filename:
782
    ToStderr("Only one of the --new-cluster-domain-secret and"
783
             " --cluster-domain-secret options can be specified at"
784
             " the same time.")
785
    return 1
786

    
787
  if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
788
    ToStderr("When using --new-spice-certificate, the --spice-certificate"
789
             " and --spice-ca-certificate must not be used.")
790
    return 1
791

    
792
  if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
793
    ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
794
             " specified.")
795
    return 1
796

    
797
  rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
798
  try:
799
    if rapi_cert_filename:
800
      rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
801
    if spice_cert_filename:
802
      spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
803
      spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
804
  except errors.X509CertError, err:
805
    ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
806
    return 1
807

    
808
  if cds_filename:
809
    try:
810
      cds = utils.ReadFile(cds_filename)
811
    except Exception, err: # pylint: disable=W0703
812
      ToStderr("Can't load new cluster domain secret from %s: %s" %
813
               (cds_filename, str(err)))
814
      return 1
815
  else:
816
    cds = None
817

    
818
  if not force:
819
    usertext = ("This requires all daemons on all nodes to be restarted and"
820
                " may take some time. Continue?")
821
    if not AskUser(usertext):
822
      return 1
823

    
824
  def _RenewCryptoInner(ctx):
825
    ctx.feedback_fn("Updating certificates and keys")
826
    bootstrap.GenerateClusterCrypto(new_cluster_cert,
827
                                    new_rapi_cert,
828
                                    new_spice_cert,
829
                                    new_confd_hmac_key,
830
                                    new_cds,
831
                                    rapi_cert_pem=rapi_cert_pem,
832
                                    spice_cert_pem=spice_cert_pem,
833
                                    spice_cacert_pem=spice_cacert_pem,
834
                                    cds=cds)
835

    
836
    files_to_copy = []
837

    
838
    if new_cluster_cert:
839
      files_to_copy.append(constants.NODED_CERT_FILE)
840

    
841
    if new_rapi_cert or rapi_cert_pem:
842
      files_to_copy.append(constants.RAPI_CERT_FILE)
843

    
844
    if new_spice_cert or spice_cert_pem:
845
      files_to_copy.append(constants.SPICE_CERT_FILE)
846
      files_to_copy.append(constants.SPICE_CACERT_FILE)
847

    
848
    if new_confd_hmac_key:
849
      files_to_copy.append(constants.CONFD_HMAC_KEY)
850

    
851
    if new_cds or cds:
852
      files_to_copy.append(constants.CLUSTER_DOMAIN_SECRET_FILE)
853

    
854
    if files_to_copy:
855
      for node_name in ctx.nonmaster_nodes:
856
        ctx.feedback_fn("Copying %s to %s" %
857
                        (", ".join(files_to_copy), node_name))
858
        for file_name in files_to_copy:
859
          ctx.ssh.CopyFileToNode(node_name, file_name)
860

    
861
  RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
862

    
863
  ToStdout("All requested certificates and keys have been replaced."
864
           " Running \"gnt-cluster verify\" now is recommended.")
865

    
866
  return 0
867

    
868

    
869
def RenewCrypto(opts, args):
870
  """Renews cluster certificates, keys and secrets.
871

872
  """
873
  return _RenewCrypto(opts.new_cluster_cert,
874
                      opts.new_rapi_cert,
875
                      opts.rapi_cert,
876
                      opts.new_spice_cert,
877
                      opts.spice_cert,
878
                      opts.spice_cacert,
879
                      opts.new_confd_hmac_key,
880
                      opts.new_cluster_domain_secret,
881
                      opts.cluster_domain_secret,
882
                      opts.force)
883

    
884

    
885
def SetClusterParams(opts, args):
886
  """Modify the cluster.
887

888
  @param opts: the command line options selected by the user
889
  @type args: list
890
  @param args: should be an empty list
891
  @rtype: int
892
  @return: the desired exit code
893

894
  """
895
  if not (not opts.lvm_storage or opts.vg_name or
896
          not opts.drbd_storage or opts.drbd_helper or
897
          opts.enabled_hypervisors or opts.hvparams or
898
          opts.beparams or opts.nicparams or
899
          opts.ndparams or opts.diskparams or
900
          opts.candidate_pool_size is not None or
901
          opts.uid_pool is not None or
902
          opts.maintain_node_health is not None or
903
          opts.add_uids is not None or
904
          opts.remove_uids is not None or
905
          opts.default_iallocator is not None or
906
          opts.reserved_lvs is not None or
907
          opts.master_netdev is not None or
908
          opts.master_netmask is not None or
909
          opts.use_external_mip_script is not None or
910
          opts.prealloc_wipe_disks is not None or
911
          opts.hv_state or
912
          opts.disk_state):
913
    ToStderr("Please give at least one of the parameters.")
914
    return 1
915

    
916
  vg_name = opts.vg_name
917
  if not opts.lvm_storage and opts.vg_name:
918
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
919
    return 1
920

    
921
  if not opts.lvm_storage:
922
    vg_name = ""
923

    
924
  drbd_helper = opts.drbd_helper
925
  if not opts.drbd_storage and opts.drbd_helper:
926
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
927
    return 1
928

    
929
  if not opts.drbd_storage:
930
    drbd_helper = ""
931

    
932
  hvlist = opts.enabled_hypervisors
933
  if hvlist is not None:
934
    hvlist = hvlist.split(",")
935

    
936
  # a list of (name, dict) we can pass directly to dict() (or [])
937
  hvparams = dict(opts.hvparams)
938
  for hv_params in hvparams.values():
939
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
940

    
941
  diskparams = dict(opts.diskparams)
942

    
943
  for dt_params in hvparams.values():
944
    utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
945

    
946
  beparams = opts.beparams
947
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
948

    
949
  nicparams = opts.nicparams
950
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
951

    
952
  ndparams = opts.ndparams
953
  if ndparams is not None:
954
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
955

    
956
  mnh = opts.maintain_node_health
957

    
958
  uid_pool = opts.uid_pool
959
  if uid_pool is not None:
960
    uid_pool = uidpool.ParseUidPool(uid_pool)
961

    
962
  add_uids = opts.add_uids
963
  if add_uids is not None:
964
    add_uids = uidpool.ParseUidPool(add_uids)
965

    
966
  remove_uids = opts.remove_uids
967
  if remove_uids is not None:
968
    remove_uids = uidpool.ParseUidPool(remove_uids)
969

    
970
  if opts.reserved_lvs is not None:
971
    if opts.reserved_lvs == "":
972
      opts.reserved_lvs = []
973
    else:
974
      opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
975

    
976
  if opts.master_netmask is not None:
977
    try:
978
      opts.master_netmask = int(opts.master_netmask)
979
    except ValueError:
980
      ToStderr("The --master-netmask option expects an int parameter.")
981
      return 1
982

    
983
  ext_ip_script = opts.use_external_mip_script
984

    
985
  if opts.disk_state:
986
    disk_state = utils.FlatToDict(opts.disk_state)
987
  else:
988
    disk_state = {}
989

    
990
  hv_state = dict(opts.hv_state)
991

    
992
  op = opcodes.OpClusterSetParams(vg_name=vg_name,
993
                                  drbd_helper=drbd_helper,
994
                                  enabled_hypervisors=hvlist,
995
                                  hvparams=hvparams,
996
                                  os_hvp=None,
997
                                  beparams=beparams,
998
                                  nicparams=nicparams,
999
                                  ndparams=ndparams,
1000
                                  diskparams=diskparams,
1001
                                  candidate_pool_size=opts.candidate_pool_size,
1002
                                  maintain_node_health=mnh,
1003
                                  uid_pool=uid_pool,
1004
                                  add_uids=add_uids,
1005
                                  remove_uids=remove_uids,
1006
                                  default_iallocator=opts.default_iallocator,
1007
                                  prealloc_wipe_disks=opts.prealloc_wipe_disks,
1008
                                  master_netdev=opts.master_netdev,
1009
                                  master_netmask=opts.master_netmask,
1010
                                  reserved_lvs=opts.reserved_lvs,
1011
                                  use_external_mip_script=ext_ip_script,
1012
                                  hv_state=hv_state,
1013
                                  disk_state=disk_state,
1014
                                  )
1015
  SubmitOpCode(op, opts=opts)
1016
  return 0
1017

    
1018

    
1019
def QueueOps(opts, args):
1020
  """Queue operations.
1021

1022
  @param opts: the command line options selected by the user
1023
  @type args: list
1024
  @param args: should contain only one element, the subcommand
1025
  @rtype: int
1026
  @return: the desired exit code
1027

1028
  """
1029
  command = args[0]
1030
  client = GetClient()
1031
  if command in ("drain", "undrain"):
1032
    drain_flag = command == "drain"
1033
    client.SetQueueDrainFlag(drain_flag)
1034
  elif command == "info":
1035
    result = client.QueryConfigValues(["drain_flag"])
1036
    if result[0]:
1037
      val = "set"
1038
    else:
1039
      val = "unset"
1040
    ToStdout("The drain flag is %s" % val)
1041
  else:
1042
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1043
                               errors.ECODE_INVAL)
1044

    
1045
  return 0
1046

    
1047

    
1048
def _ShowWatcherPause(until):
1049
  if until is None or until < time.time():
1050
    ToStdout("The watcher is not paused.")
1051
  else:
1052
    ToStdout("The watcher is paused until %s.", time.ctime(until))
1053

    
1054

    
1055
def WatcherOps(opts, args):
1056
  """Watcher operations.
1057

1058
  @param opts: the command line options selected by the user
1059
  @type args: list
1060
  @param args: should contain only one element, the subcommand
1061
  @rtype: int
1062
  @return: the desired exit code
1063

1064
  """
1065
  command = args[0]
1066
  client = GetClient()
1067

    
1068
  if command == "continue":
1069
    client.SetWatcherPause(None)
1070
    ToStdout("The watcher is no longer paused.")
1071

    
1072
  elif command == "pause":
1073
    if len(args) < 2:
1074
      raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1075

    
1076
    result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1077
    _ShowWatcherPause(result)
1078

    
1079
  elif command == "info":
1080
    result = client.QueryConfigValues(["watcher_pause"])
1081
    _ShowWatcherPause(result[0])
1082

    
1083
  else:
1084
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1085
                               errors.ECODE_INVAL)
1086

    
1087
  return 0
1088

    
1089

    
1090
def _OobPower(opts, node_list, power):
1091
  """Puts the node in the list to desired power state.
1092

1093
  @param opts: The command line options selected by the user
1094
  @param node_list: The list of nodes to operate on
1095
  @param power: True if they should be powered on, False otherwise
1096
  @return: The success of the operation (none failed)
1097

1098
  """
1099
  if power:
1100
    command = constants.OOB_POWER_ON
1101
  else:
1102
    command = constants.OOB_POWER_OFF
1103

    
1104
  op = opcodes.OpOobCommand(node_names=node_list,
1105
                            command=command,
1106
                            ignore_status=True,
1107
                            timeout=opts.oob_timeout,
1108
                            power_delay=opts.power_delay)
1109
  result = SubmitOpCode(op, opts=opts)
1110
  errs = 0
1111
  for node_result in result:
1112
    (node_tuple, data_tuple) = node_result
1113
    (_, node_name) = node_tuple
1114
    (data_status, _) = data_tuple
1115
    if data_status != constants.RS_NORMAL:
1116
      assert data_status != constants.RS_UNAVAIL
1117
      errs += 1
1118
      ToStderr("There was a problem changing power for %s, please investigate",
1119
               node_name)
1120

    
1121
  if errs > 0:
1122
    return False
1123

    
1124
  return True
1125

    
1126

    
1127
def _InstanceStart(opts, inst_list, start):
1128
  """Puts the instances in the list to desired state.
1129

1130
  @param opts: The command line options selected by the user
1131
  @param inst_list: The list of instances to operate on
1132
  @param start: True if they should be started, False for shutdown
1133
  @return: The success of the operation (none failed)
1134

1135
  """
1136
  if start:
1137
    opcls = opcodes.OpInstanceStartup
1138
    text_submit, text_success, text_failed = ("startup", "started", "starting")
1139
  else:
1140
    opcls = compat.partial(opcodes.OpInstanceShutdown,
1141
                           timeout=opts.shutdown_timeout)
1142
    text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1143

    
1144
  jex = JobExecutor(opts=opts)
1145

    
1146
  for inst in inst_list:
1147
    ToStdout("Submit %s of instance %s", text_submit, inst)
1148
    op = opcls(instance_name=inst)
1149
    jex.QueueJob(inst, op)
1150

    
1151
  results = jex.GetResults()
1152
  bad_cnt = len([1 for (success, _) in results if not success])
1153

    
1154
  if bad_cnt == 0:
1155
    ToStdout("All instances have been %s successfully", text_success)
1156
  else:
1157
    ToStderr("There were errors while %s instances:\n"
1158
             "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1159
             len(results))
1160
    return False
1161

    
1162
  return True
1163

    
1164

    
1165
class _RunWhenNodesReachableHelper:
1166
  """Helper class to make shared internal state sharing easier.
1167

1168
  @ivar success: Indicates if all action_cb calls were successful
1169

1170
  """
1171
  def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1172
               _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1173
    """Init the object.
1174

1175
    @param node_list: The list of nodes to be reachable
1176
    @param action_cb: Callback called when a new host is reachable
1177
    @type node2ip: dict
1178
    @param node2ip: Node to ip mapping
1179
    @param port: The port to use for the TCP ping
1180
    @param feedback_fn: The function used for feedback
1181
    @param _ping_fn: Function to check reachabilty (for unittest use only)
1182
    @param _sleep_fn: Function to sleep (for unittest use only)
1183

1184
    """
1185
    self.down = set(node_list)
1186
    self.up = set()
1187
    self.node2ip = node2ip
1188
    self.success = True
1189
    self.action_cb = action_cb
1190
    self.port = port
1191
    self.feedback_fn = feedback_fn
1192
    self._ping_fn = _ping_fn
1193
    self._sleep_fn = _sleep_fn
1194

    
1195
  def __call__(self):
1196
    """When called we run action_cb.
1197

1198
    @raises utils.RetryAgain: When there are still down nodes
1199

1200
    """
1201
    if not self.action_cb(self.up):
1202
      self.success = False
1203

    
1204
    if self.down:
1205
      raise utils.RetryAgain()
1206
    else:
1207
      return self.success
1208

    
1209
  def Wait(self, secs):
1210
    """Checks if a host is up or waits remaining seconds.
1211

1212
    @param secs: The secs remaining
1213

1214
    """
1215
    start = time.time()
1216
    for node in self.down:
1217
      if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1218
                       live_port_needed=True):
1219
        self.feedback_fn("Node %s became available" % node)
1220
        self.up.add(node)
1221
        self.down -= self.up
1222
        # If we have a node available there is the possibility to run the
1223
        # action callback successfully, therefore we don't wait and return
1224
        return
1225

    
1226
    self._sleep_fn(max(0.0, start + secs - time.time()))
1227

    
1228

    
1229
def _RunWhenNodesReachable(node_list, action_cb, interval):
1230
  """Run action_cb when nodes become reachable.
1231

1232
  @param node_list: The list of nodes to be reachable
1233
  @param action_cb: Callback called when a new host is reachable
1234
  @param interval: The earliest time to retry
1235

1236
  """
1237
  client = GetClient()
1238
  cluster_info = client.QueryClusterInfo()
1239
  if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1240
    family = netutils.IPAddress.family
1241
  else:
1242
    family = netutils.IP6Address.family
1243

    
1244
  node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1245
                 for node in node_list)
1246

    
1247
  port = netutils.GetDaemonPort(constants.NODED)
1248
  helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1249
                                        ToStdout)
1250

    
1251
  try:
1252
    return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1253
                       wait_fn=helper.Wait)
1254
  except utils.RetryTimeout:
1255
    ToStderr("Time exceeded while waiting for nodes to become reachable"
1256
             " again:\n  - %s", "  - ".join(helper.down))
1257
    return False
1258

    
1259

    
1260
def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1261
                          _instance_start_fn=_InstanceStart):
1262
  """Start the instances conditional based on node_states.
1263

1264
  @param opts: The command line options selected by the user
1265
  @param inst_map: A dict of inst -> nodes mapping
1266
  @param nodes_online: A list of nodes online
1267
  @param _instance_start_fn: Callback to start instances (unittest use only)
1268
  @return: Success of the operation on all instances
1269

1270
  """
1271
  start_inst_list = []
1272
  for (inst, nodes) in inst_map.items():
1273
    if not (nodes - nodes_online):
1274
      # All nodes the instance lives on are back online
1275
      start_inst_list.append(inst)
1276

    
1277
  for inst in start_inst_list:
1278
    del inst_map[inst]
1279

    
1280
  if start_inst_list:
1281
    return _instance_start_fn(opts, start_inst_list, True)
1282

    
1283
  return True
1284

    
1285

    
1286
def _EpoOn(opts, full_node_list, node_list, inst_map):
1287
  """Does the actual power on.
1288

1289
  @param opts: The command line options selected by the user
1290
  @param full_node_list: All nodes to operate on (includes nodes not supporting
1291
                         OOB)
1292
  @param node_list: The list of nodes to operate on (all need to support OOB)
1293
  @param inst_map: A dict of inst -> nodes mapping
1294
  @return: The desired exit status
1295

1296
  """
1297
  if node_list and not _OobPower(opts, node_list, False):
1298
    ToStderr("Not all nodes seem to get back up, investigate and start"
1299
             " manually if needed")
1300

    
1301
  # Wait for the nodes to be back up
1302
  action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1303

    
1304
  ToStdout("Waiting until all nodes are available again")
1305
  if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1306
    ToStderr("Please investigate and start stopped instances manually")
1307
    return constants.EXIT_FAILURE
1308

    
1309
  return constants.EXIT_SUCCESS
1310

    
1311

    
1312
def _EpoOff(opts, node_list, inst_map):
1313
  """Does the actual power off.
1314

1315
  @param opts: The command line options selected by the user
1316
  @param node_list: The list of nodes to operate on (all need to support OOB)
1317
  @param inst_map: A dict of inst -> nodes mapping
1318
  @return: The desired exit status
1319

1320
  """
1321
  if not _InstanceStart(opts, inst_map.keys(), False):
1322
    ToStderr("Please investigate and stop instances manually before continuing")
1323
    return constants.EXIT_FAILURE
1324

    
1325
  if not node_list:
1326
    return constants.EXIT_SUCCESS
1327

    
1328
  if _OobPower(opts, node_list, False):
1329
    return constants.EXIT_SUCCESS
1330
  else:
1331
    return constants.EXIT_FAILURE
1332

    
1333

    
1334
def Epo(opts, args):
1335
  """EPO operations.
1336

1337
  @param opts: the command line options selected by the user
1338
  @type args: list
1339
  @param args: should contain only one element, the subcommand
1340
  @rtype: int
1341
  @return: the desired exit code
1342

1343
  """
1344
  if opts.groups and opts.show_all:
1345
    ToStderr("Only one of --groups or --all are allowed")
1346
    return constants.EXIT_FAILURE
1347
  elif args and opts.show_all:
1348
    ToStderr("Arguments in combination with --all are not allowed")
1349
    return constants.EXIT_FAILURE
1350

    
1351
  client = GetClient()
1352

    
1353
  if opts.groups:
1354
    node_query_list = itertools.chain(*client.QueryGroups(names=args,
1355
                                                          fields=["node_list"],
1356
                                                          use_locking=False))
1357
  else:
1358
    node_query_list = args
1359

    
1360
  result = client.QueryNodes(names=node_query_list,
1361
                             fields=["name", "master", "pinst_list",
1362
                                     "sinst_list", "powered", "offline"],
1363
                             use_locking=False)
1364
  node_list = []
1365
  inst_map = {}
1366
  for (idx, (node, master, pinsts, sinsts, powered,
1367
             offline)) in enumerate(result):
1368
    # Normalize the node_query_list as well
1369
    if not opts.show_all:
1370
      node_query_list[idx] = node
1371
    if not offline:
1372
      for inst in (pinsts + sinsts):
1373
        if inst in inst_map:
1374
          if not master:
1375
            inst_map[inst].add(node)
1376
        elif master:
1377
          inst_map[inst] = set()
1378
        else:
1379
          inst_map[inst] = set([node])
1380

    
1381
    if master and opts.on:
1382
      # We ignore the master for turning on the machines, in fact we are
1383
      # already operating on the master at this point :)
1384
      continue
1385
    elif master and not opts.show_all:
1386
      ToStderr("%s is the master node, please do a master-failover to another"
1387
               " node not affected by the EPO or use --all if you intend to"
1388
               " shutdown the whole cluster", node)
1389
      return constants.EXIT_FAILURE
1390
    elif powered is None:
1391
      ToStdout("Node %s does not support out-of-band handling, it can not be"
1392
               " handled in a fully automated manner", node)
1393
    elif powered == opts.on:
1394
      ToStdout("Node %s is already in desired power state, skipping", node)
1395
    elif not offline or (offline and powered):
1396
      node_list.append(node)
1397

    
1398
  if not opts.force and not ConfirmOperation(node_query_list, "nodes", "epo"):
1399
    return constants.EXIT_FAILURE
1400

    
1401
  if opts.on:
1402
    return _EpoOn(opts, node_query_list, node_list, inst_map)
1403
  else:
1404
    return _EpoOff(opts, node_list, inst_map)
1405

    
1406

    
1407
commands = {
1408
  "init": (
1409
    InitCluster, [ArgHost(min=1, max=1)],
1410
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
1411
     HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
1412
     NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT,
1413
     NOMODIFY_SSH_SETUP_OPT, SECONDARY_IP_OPT, VG_NAME_OPT,
1414
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, DRBD_HELPER_OPT, NODRBD_STORAGE_OPT,
1415
     DEFAULT_IALLOCATOR_OPT, PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT,
1416
     NODE_PARAMS_OPT, GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT,
1417
     DISK_PARAMS_OPT],
1418
    "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
1419
  "destroy": (
1420
    DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
1421
    "", "Destroy cluster"),
1422
  "rename": (
1423
    RenameCluster, [ArgHost(min=1, max=1)],
1424
    [FORCE_OPT, DRY_RUN_OPT],
1425
    "<new_name>",
1426
    "Renames the cluster"),
1427
  "redist-conf": (
1428
    RedistributeConfig, ARGS_NONE, [SUBMIT_OPT, DRY_RUN_OPT, PRIORITY_OPT],
1429
    "", "Forces a push of the configuration file and ssconf files"
1430
    " to the nodes in the cluster"),
1431
  "verify": (
1432
    VerifyCluster, ARGS_NONE,
1433
    [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
1434
     DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT],
1435
    "", "Does a check on the cluster configuration"),
1436
  "verify-disks": (
1437
    VerifyDisks, ARGS_NONE, [PRIORITY_OPT],
1438
    "", "Does a check on the cluster disk status"),
1439
  "repair-disk-sizes": (
1440
    RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
1441
    "[instance...]", "Updates mismatches in recorded disk sizes"),
1442
  "master-failover": (
1443
    MasterFailover, ARGS_NONE, [NOVOTING_OPT],
1444
    "", "Makes the current node the master"),
1445
  "master-ping": (
1446
    MasterPing, ARGS_NONE, [],
1447
    "", "Checks if the master is alive"),
1448
  "version": (
1449
    ShowClusterVersion, ARGS_NONE, [],
1450
    "", "Shows the cluster version"),
1451
  "getmaster": (
1452
    ShowClusterMaster, ARGS_NONE, [],
1453
    "", "Shows the cluster master"),
1454
  "copyfile": (
1455
    ClusterCopyFile, [ArgFile(min=1, max=1)],
1456
    [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
1457
    "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
1458
  "command": (
1459
    RunClusterCommand, [ArgCommand(min=1)],
1460
    [NODE_LIST_OPT, NODEGROUP_OPT],
1461
    "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
1462
  "info": (
1463
    ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
1464
    "[--roman]", "Show cluster configuration"),
1465
  "list-tags": (
1466
    ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
1467
  "add-tags": (
1468
    AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1469
    "tag...", "Add tags to the cluster"),
1470
  "remove-tags": (
1471
    RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1472
    "tag...", "Remove tags from the cluster"),
1473
  "search-tags": (
1474
    SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
1475
    "Searches the tags on all objects on"
1476
    " the cluster for a given pattern (regex)"),
1477
  "queue": (
1478
    QueueOps,
1479
    [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
1480
    [], "drain|undrain|info", "Change queue properties"),
1481
  "watcher": (
1482
    WatcherOps,
1483
    [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
1484
     ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
1485
    [],
1486
    "{pause <timespec>|continue|info}", "Change watcher properties"),
1487
  "modify": (
1488
    SetClusterParams, ARGS_NONE,
1489
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT, MASTER_NETDEV_OPT,
1490
     MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT,
1491
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT,
1492
     DRBD_HELPER_OPT, NODRBD_STORAGE_OPT, DEFAULT_IALLOCATOR_OPT,
1493
     RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, PREALLOC_WIPE_DISKS_OPT,
1494
     NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT, HV_STATE_OPT,
1495
     DISK_STATE_OPT],
1496
    "[opts...]",
1497
    "Alters the parameters of the cluster"),
1498
  "renew-crypto": (
1499
    RenewCrypto, ARGS_NONE,
1500
    [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
1501
     NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
1502
     NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
1503
     NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT],
1504
    "[opts...]",
1505
    "Renews cluster certificates, keys and secrets"),
1506
  "epo": (
1507
    Epo, [ArgUnknown()],
1508
    [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
1509
     SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
1510
    "[opts...] [args]",
1511
    "Performs an emergency power-off on given args"),
1512
  "activate-master-ip": (
1513
    ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
1514
  "deactivate-master-ip": (
1515
    DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
1516
    "Deactivates the master IP"),
1517
  }
1518

    
1519

    
1520
#: dictionary with aliases for commands
1521
aliases = {
1522
  "masterfailover": "master-failover",
1523
}
1524

    
1525

    
1526
def Main():
1527
  return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
1528
                     aliases=aliases)