Statistics
| Branch: | Tag: | Revision:

root / lib / client / gnt_cluster.py @ bc5d0215

History | View | Annotate | Download (48.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Cluster related commands"""
22

    
23
# pylint: disable=W0401,W0613,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0613: Unused argument, since all functions follow the same API
26
# W0614: Unused import %s from wildcard import (since we need cli)
27
# C0103: Invalid name gnt-cluster
28

    
29
import os.path
30
import time
31
import OpenSSL
32
import itertools
33

    
34
from ganeti.cli import *
35
from ganeti import opcodes
36
from ganeti import constants
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import bootstrap
40
from ganeti import ssh
41
from ganeti import objects
42
from ganeti import uidpool
43
from ganeti import compat
44
from ganeti import netutils
45

    
46

    
47
ON_OPT = cli_option("--on", default=False,
48
                    action="store_true", dest="on",
49
                    help="Recover from an EPO")
50

    
51
GROUPS_OPT = cli_option("--groups", default=False,
52
                    action="store_true", dest="groups",
53
                    help="Arguments are node groups instead of nodes")
54

    
55
_EPO_PING_INTERVAL = 30 # 30 seconds between pings
56
_EPO_PING_TIMEOUT = 1 # 1 second
57
_EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
58

    
59

    
60
@UsesRPC
61
def InitCluster(opts, args):
62
  """Initialize the cluster.
63

64
  @param opts: the command line options selected by the user
65
  @type args: list
66
  @param args: should contain only one element, the desired
67
      cluster name
68
  @rtype: int
69
  @return: the desired exit code
70

71
  """
72
  if not opts.lvm_storage and opts.vg_name:
73
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
74
    return 1
75

    
76
  vg_name = opts.vg_name
77
  if opts.lvm_storage and not opts.vg_name:
78
    vg_name = constants.DEFAULT_VG
79

    
80
  if not opts.drbd_storage and opts.drbd_helper:
81
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
82
    return 1
83

    
84
  drbd_helper = opts.drbd_helper
85
  if opts.drbd_storage and not opts.drbd_helper:
86
    drbd_helper = constants.DEFAULT_DRBD_HELPER
87

    
88
  master_netdev = opts.master_netdev
89
  if master_netdev is None:
90
    master_netdev = constants.DEFAULT_BRIDGE
91

    
92
  hvlist = opts.enabled_hypervisors
93
  if hvlist is None:
94
    hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
95
  hvlist = hvlist.split(",")
96

    
97
  hvparams = dict(opts.hvparams)
98
  beparams = opts.beparams
99
  nicparams = opts.nicparams
100

    
101
  diskparams = dict(opts.diskparams)
102

    
103
  # check the disk template types here, as we cannot rely on the type check done
104
  # by the opcode parameter types
105
  diskparams_keys = set(diskparams.keys())
106
  if not (diskparams_keys <= constants.DISK_TEMPLATES):
107
    unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
108
    ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
109
    return 1
110

    
111
  # prepare beparams dict
112
  beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
113
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
114

    
115
  # prepare nicparams dict
116
  nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
117
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
118

    
119
  # prepare ndparams dict
120
  if opts.ndparams is None:
121
    ndparams = dict(constants.NDC_DEFAULTS)
122
  else:
123
    ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
124
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
125

    
126
  # prepare hvparams dict
127
  for hv in constants.HYPER_TYPES:
128
    if hv not in hvparams:
129
      hvparams[hv] = {}
130
    hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
131
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
132

    
133
  # prepare diskparams dict
134
  for templ in constants.DISK_TEMPLATES:
135
    if templ not in diskparams:
136
      diskparams[templ] = {}
137
    diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
138
                                         diskparams[templ])
139
    utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)
140

    
141
  if opts.candidate_pool_size is None:
142
    opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
143

    
144
  if opts.mac_prefix is None:
145
    opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
146

    
147
  uid_pool = opts.uid_pool
148
  if uid_pool is not None:
149
    uid_pool = uidpool.ParseUidPool(uid_pool)
150

    
151
  if opts.prealloc_wipe_disks is None:
152
    opts.prealloc_wipe_disks = False
153

    
154
  external_ip_setup_script = opts.use_external_mip_script
155
  if external_ip_setup_script is None:
156
    external_ip_setup_script = False
157

    
158
  try:
159
    primary_ip_version = int(opts.primary_ip_version)
160
  except (ValueError, TypeError), err:
161
    ToStderr("Invalid primary ip version value: %s" % str(err))
162
    return 1
163

    
164
  master_netmask = opts.master_netmask
165
  try:
166
    if master_netmask is not None:
167
      master_netmask = int(master_netmask)
168
  except (ValueError, TypeError), err:
169
    ToStderr("Invalid master netmask value: %s" % str(err))
170
    return 1
171

    
172
  bootstrap.InitCluster(cluster_name=args[0],
173
                        secondary_ip=opts.secondary_ip,
174
                        vg_name=vg_name,
175
                        mac_prefix=opts.mac_prefix,
176
                        master_netmask=master_netmask,
177
                        master_netdev=master_netdev,
178
                        file_storage_dir=opts.file_storage_dir,
179
                        shared_file_storage_dir=opts.shared_file_storage_dir,
180
                        enabled_hypervisors=hvlist,
181
                        hvparams=hvparams,
182
                        beparams=beparams,
183
                        nicparams=nicparams,
184
                        ndparams=ndparams,
185
                        diskparams=diskparams,
186
                        candidate_pool_size=opts.candidate_pool_size,
187
                        modify_etc_hosts=opts.modify_etc_hosts,
188
                        modify_ssh_setup=opts.modify_ssh_setup,
189
                        maintain_node_health=opts.maintain_node_health,
190
                        drbd_helper=drbd_helper,
191
                        uid_pool=uid_pool,
192
                        default_iallocator=opts.default_iallocator,
193
                        primary_ip_version=primary_ip_version,
194
                        prealloc_wipe_disks=opts.prealloc_wipe_disks,
195
                        use_external_mip_script=external_ip_setup_script,
196
                        )
197
  op = opcodes.OpClusterPostInit()
198
  SubmitOpCode(op, opts=opts)
199
  return 0
200

    
201

    
202
@UsesRPC
203
def DestroyCluster(opts, args):
204
  """Destroy the cluster.
205

206
  @param opts: the command line options selected by the user
207
  @type args: list
208
  @param args: should be an empty list
209
  @rtype: int
210
  @return: the desired exit code
211

212
  """
213
  if not opts.yes_do_it:
214
    ToStderr("Destroying a cluster is irreversible. If you really want"
215
             " destroy this cluster, supply the --yes-do-it option.")
216
    return 1
217

    
218
  op = opcodes.OpClusterDestroy()
219
  master = SubmitOpCode(op, opts=opts)
220
  # if we reached this, the opcode didn't fail; we can proceed to
221
  # shutdown all the daemons
222
  bootstrap.FinalizeClusterDestroy(master)
223
  return 0
224

    
225

    
226
def RenameCluster(opts, args):
227
  """Rename the cluster.
228

229
  @param opts: the command line options selected by the user
230
  @type args: list
231
  @param args: should contain only one element, the new cluster name
232
  @rtype: int
233
  @return: the desired exit code
234

235
  """
236
  cl = GetClient()
237

    
238
  (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
239

    
240
  new_name = args[0]
241
  if not opts.force:
242
    usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
243
                " connected over the network to the cluster name, the"
244
                " operation is very dangerous as the IP address will be"
245
                " removed from the node and the change may not go through."
246
                " Continue?") % (cluster_name, new_name)
247
    if not AskUser(usertext):
248
      return 1
249

    
250
  op = opcodes.OpClusterRename(name=new_name)
251
  result = SubmitOpCode(op, opts=opts, cl=cl)
252

    
253
  if result:
254
    ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
255

    
256
  return 0
257

    
258

    
259
def ActivateMasterIp(opts, args):
260
  """Activates the master IP.
261

262
  """
263
  op = opcodes.OpClusterActivateMasterIp()
264
  SubmitOpCode(op)
265
  return 0
266

    
267

    
268
def DeactivateMasterIp(opts, args):
269
  """Deactivates the master IP.
270

271
  """
272
  if not opts.confirm:
273
    usertext = ("This will disable the master IP. All the open connections to"
274
                " the master IP will be closed. To reach the master you will"
275
                " need to use its node IP."
276
                " Continue?")
277
    if not AskUser(usertext):
278
      return 1
279

    
280
  op = opcodes.OpClusterDeactivateMasterIp()
281
  SubmitOpCode(op)
282
  return 0
283

    
284

    
285
def RedistributeConfig(opts, args):
286
  """Forces push of the cluster configuration.
287

288
  @param opts: the command line options selected by the user
289
  @type args: list
290
  @param args: empty list
291
  @rtype: int
292
  @return: the desired exit code
293

294
  """
295
  op = opcodes.OpClusterRedistConf()
296
  SubmitOrSend(op, opts)
297
  return 0
298

    
299

    
300
def ShowClusterVersion(opts, args):
301
  """Write version of ganeti software to the standard output.
302

303
  @param opts: the command line options selected by the user
304
  @type args: list
305
  @param args: should be an empty list
306
  @rtype: int
307
  @return: the desired exit code
308

309
  """
310
  cl = GetClient()
311
  result = cl.QueryClusterInfo()
312
  ToStdout("Software version: %s", result["software_version"])
313
  ToStdout("Internode protocol: %s", result["protocol_version"])
314
  ToStdout("Configuration format: %s", result["config_version"])
315
  ToStdout("OS api version: %s", result["os_api_version"])
316
  ToStdout("Export interface: %s", result["export_version"])
317
  return 0
318

    
319

    
320
def ShowClusterMaster(opts, args):
321
  """Write name of master node to the standard output.
322

323
  @param opts: the command line options selected by the user
324
  @type args: list
325
  @param args: should be an empty list
326
  @rtype: int
327
  @return: the desired exit code
328

329
  """
330
  master = bootstrap.GetMaster()
331
  ToStdout(master)
332
  return 0
333

    
334

    
335
def _PrintGroupedParams(paramsdict, level=1, roman=False):
336
  """Print Grouped parameters (be, nic, disk) by group.
337

338
  @type paramsdict: dict of dicts
339
  @param paramsdict: {group: {param: value, ...}, ...}
340
  @type level: int
341
  @param level: Level of indention
342

343
  """
344
  indent = "  " * level
345
  for item, val in sorted(paramsdict.items()):
346
    if isinstance(val, dict):
347
      ToStdout("%s- %s:", indent, item)
348
      _PrintGroupedParams(val, level=level + 1, roman=roman)
349
    elif roman and isinstance(val, int):
350
      ToStdout("%s  %s: %s", indent, item, compat.TryToRoman(val))
351
    else:
352
      ToStdout("%s  %s: %s", indent, item, val)
353

    
354

    
355
def ShowClusterConfig(opts, args):
356
  """Shows cluster information.
357

358
  @param opts: the command line options selected by the user
359
  @type args: list
360
  @param args: should be an empty list
361
  @rtype: int
362
  @return: the desired exit code
363

364
  """
365
  cl = GetClient()
366
  result = cl.QueryClusterInfo()
367

    
368
  ToStdout("Cluster name: %s", result["name"])
369
  ToStdout("Cluster UUID: %s", result["uuid"])
370

    
371
  ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
372
  ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
373

    
374
  ToStdout("Master node: %s", result["master"])
375

    
376
  ToStdout("Architecture (this node): %s (%s)",
377
           result["architecture"][0], result["architecture"][1])
378

    
379
  if result["tags"]:
380
    tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
381
  else:
382
    tags = "(none)"
383

    
384
  ToStdout("Tags: %s", tags)
385

    
386
  ToStdout("Default hypervisor: %s", result["default_hypervisor"])
387
  ToStdout("Enabled hypervisors: %s",
388
           utils.CommaJoin(result["enabled_hypervisors"]))
389

    
390
  ToStdout("Hypervisor parameters:")
391
  _PrintGroupedParams(result["hvparams"])
392

    
393
  ToStdout("OS-specific hypervisor parameters:")
394
  _PrintGroupedParams(result["os_hvp"])
395

    
396
  ToStdout("OS parameters:")
397
  _PrintGroupedParams(result["osparams"])
398

    
399
  ToStdout("Hidden OSes: %s", utils.CommaJoin(result["hidden_os"]))
400
  ToStdout("Blacklisted OSes: %s", utils.CommaJoin(result["blacklisted_os"]))
401

    
402
  ToStdout("Cluster parameters:")
403
  ToStdout("  - candidate pool size: %s",
404
            compat.TryToRoman(result["candidate_pool_size"],
405
                              convert=opts.roman_integers))
406
  ToStdout("  - master netdev: %s", result["master_netdev"])
407
  ToStdout("  - master netmask: %s", result["master_netmask"])
408
  ToStdout("  - use external master IP address setup script: %s",
409
           result["use_external_mip_script"])
410
  ToStdout("  - lvm volume group: %s", result["volume_group_name"])
411
  if result["reserved_lvs"]:
412
    reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
413
  else:
414
    reserved_lvs = "(none)"
415
  ToStdout("  - lvm reserved volumes: %s", reserved_lvs)
416
  ToStdout("  - drbd usermode helper: %s", result["drbd_usermode_helper"])
417
  ToStdout("  - file storage path: %s", result["file_storage_dir"])
418
  ToStdout("  - shared file storage path: %s",
419
           result["shared_file_storage_dir"])
420
  ToStdout("  - maintenance of node health: %s",
421
           result["maintain_node_health"])
422
  ToStdout("  - uid pool: %s",
423
            uidpool.FormatUidPool(result["uid_pool"],
424
                                  roman=opts.roman_integers))
425
  ToStdout("  - default instance allocator: %s", result["default_iallocator"])
426
  ToStdout("  - primary ip version: %d", result["primary_ip_version"])
427
  ToStdout("  - preallocation wipe disks: %s", result["prealloc_wipe_disks"])
428
  ToStdout("  - OS search path: %s", utils.CommaJoin(constants.OS_SEARCH_PATH))
429

    
430
  ToStdout("Default node parameters:")
431
  _PrintGroupedParams(result["ndparams"], roman=opts.roman_integers)
432

    
433
  ToStdout("Default instance parameters:")
434
  _PrintGroupedParams(result["beparams"], roman=opts.roman_integers)
435

    
436
  ToStdout("Default nic parameters:")
437
  _PrintGroupedParams(result["nicparams"], roman=opts.roman_integers)
438

    
439
  return 0
440

    
441

    
442
def ClusterCopyFile(opts, args):
443
  """Copy a file from master to some nodes.
444

445
  @param opts: the command line options selected by the user
446
  @type args: list
447
  @param args: should contain only one element, the path of
448
      the file to be copied
449
  @rtype: int
450
  @return: the desired exit code
451

452
  """
453
  filename = args[0]
454
  if not os.path.exists(filename):
455
    raise errors.OpPrereqError("No such filename '%s'" % filename,
456
                               errors.ECODE_INVAL)
457

    
458
  cl = GetClient()
459

    
460
  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
461

    
462
  results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True,
463
                           secondary_ips=opts.use_replication_network,
464
                           nodegroup=opts.nodegroup)
465

    
466
  srun = ssh.SshRunner(cluster_name=cluster_name)
467
  for node in results:
468
    if not srun.CopyFileToNode(node, filename):
469
      ToStderr("Copy of file %s to node %s failed", filename, node)
470

    
471
  return 0
472

    
473

    
474
def RunClusterCommand(opts, args):
475
  """Run a command on some nodes.
476

477
  @param opts: the command line options selected by the user
478
  @type args: list
479
  @param args: should contain the command to be run and its arguments
480
  @rtype: int
481
  @return: the desired exit code
482

483
  """
484
  cl = GetClient()
485

    
486
  command = " ".join(args)
487

    
488
  nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup)
489

    
490
  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
491
                                                    "master_node"])
492

    
493
  srun = ssh.SshRunner(cluster_name=cluster_name)
494

    
495
  # Make sure master node is at list end
496
  if master_node in nodes:
497
    nodes.remove(master_node)
498
    nodes.append(master_node)
499

    
500
  for name in nodes:
501
    result = srun.Run(name, "root", command)
502
    ToStdout("------------------------------------------------")
503
    ToStdout("node: %s", name)
504
    ToStdout("%s", result.output)
505
    ToStdout("return code = %s", result.exit_code)
506

    
507
  return 0
508

    
509

    
510
def VerifyCluster(opts, args):
511
  """Verify integrity of cluster, performing various test on nodes.
512

513
  @param opts: the command line options selected by the user
514
  @type args: list
515
  @param args: should be an empty list
516
  @rtype: int
517
  @return: the desired exit code
518

519
  """
520
  skip_checks = []
521

    
522
  if opts.skip_nplusone_mem:
523
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
524

    
525
  cl = GetClient()
526

    
527
  op = opcodes.OpClusterVerify(verbose=opts.verbose,
528
                               error_codes=opts.error_codes,
529
                               debug_simulate_errors=opts.simulate_errors,
530
                               skip_checks=skip_checks,
531
                               ignore_errors=opts.ignore_errors,
532
                               group_name=opts.nodegroup)
533
  result = SubmitOpCode(op, cl=cl, opts=opts)
534

    
535
  # Keep track of submitted jobs
536
  jex = JobExecutor(cl=cl, opts=opts)
537

    
538
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
539
    jex.AddJobId(None, status, job_id)
540

    
541
  results = jex.GetResults()
542

    
543
  (bad_jobs, bad_results) = \
544
    map(len,
545
        # Convert iterators to lists
546
        map(list,
547
            # Count errors
548
            map(compat.partial(itertools.ifilterfalse, bool),
549
                # Convert result to booleans in a tuple
550
                zip(*((job_success, len(op_results) == 1 and op_results[0])
551
                      for (job_success, op_results) in results)))))
552

    
553
  if bad_jobs == 0 and bad_results == 0:
554
    rcode = constants.EXIT_SUCCESS
555
  else:
556
    rcode = constants.EXIT_FAILURE
557
    if bad_jobs > 0:
558
      ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
559

    
560
  return rcode
561

    
562

    
563
def VerifyDisks(opts, args):
564
  """Verify integrity of cluster disks.
565

566
  @param opts: the command line options selected by the user
567
  @type args: list
568
  @param args: should be an empty list
569
  @rtype: int
570
  @return: the desired exit code
571

572
  """
573
  cl = GetClient()
574

    
575
  op = opcodes.OpClusterVerifyDisks()
576

    
577
  result = SubmitOpCode(op, cl=cl, opts=opts)
578

    
579
  # Keep track of submitted jobs
580
  jex = JobExecutor(cl=cl, opts=opts)
581

    
582
  for (status, job_id) in result[constants.JOB_IDS_KEY]:
583
    jex.AddJobId(None, status, job_id)
584

    
585
  retcode = constants.EXIT_SUCCESS
586

    
587
  for (status, result) in jex.GetResults():
588
    if not status:
589
      ToStdout("Job failed: %s", result)
590
      continue
591

    
592
    ((bad_nodes, instances, missing), ) = result
593

    
594
    for node, text in bad_nodes.items():
595
      ToStdout("Error gathering data on node %s: %s",
596
               node, utils.SafeEncode(text[-400:]))
597
      retcode = constants.EXIT_FAILURE
598
      ToStdout("You need to fix these nodes first before fixing instances")
599

    
600
    for iname in instances:
601
      if iname in missing:
602
        continue
603
      op = opcodes.OpInstanceActivateDisks(instance_name=iname)
604
      try:
605
        ToStdout("Activating disks for instance '%s'", iname)
606
        SubmitOpCode(op, opts=opts, cl=cl)
607
      except errors.GenericError, err:
608
        nret, msg = FormatError(err)
609
        retcode |= nret
610
        ToStderr("Error activating disks for instance %s: %s", iname, msg)
611

    
612
    if missing:
613
      for iname, ival in missing.iteritems():
614
        all_missing = compat.all(x[0] in bad_nodes for x in ival)
615
        if all_missing:
616
          ToStdout("Instance %s cannot be verified as it lives on"
617
                   " broken nodes", iname)
618
        else:
619
          ToStdout("Instance %s has missing logical volumes:", iname)
620
          ival.sort()
621
          for node, vol in ival:
622
            if node in bad_nodes:
623
              ToStdout("\tbroken node %s /dev/%s", node, vol)
624
            else:
625
              ToStdout("\t%s /dev/%s", node, vol)
626

    
627
      ToStdout("You need to replace or recreate disks for all the above"
628
               " instances if this message persists after fixing broken nodes.")
629
      retcode = constants.EXIT_FAILURE
630

    
631
  return retcode
632

    
633

    
634
def RepairDiskSizes(opts, args):
635
  """Verify sizes of cluster disks.
636

637
  @param opts: the command line options selected by the user
638
  @type args: list
639
  @param args: optional list of instances to restrict check to
640
  @rtype: int
641
  @return: the desired exit code
642

643
  """
644
  op = opcodes.OpClusterRepairDiskSizes(instances=args)
645
  SubmitOpCode(op, opts=opts)
646

    
647

    
648
@UsesRPC
649
def MasterFailover(opts, args):
650
  """Failover the master node.
651

652
  This command, when run on a non-master node, will cause the current
653
  master to cease being master, and the non-master to become new
654
  master.
655

656
  @param opts: the command line options selected by the user
657
  @type args: list
658
  @param args: should be an empty list
659
  @rtype: int
660
  @return: the desired exit code
661

662
  """
663
  if opts.no_voting:
664
    usertext = ("This will perform the failover even if most other nodes"
665
                " are down, or if this node is outdated. This is dangerous"
666
                " as it can lead to a non-consistent cluster. Check the"
667
                " gnt-cluster(8) man page before proceeding. Continue?")
668
    if not AskUser(usertext):
669
      return 1
670

    
671
  return bootstrap.MasterFailover(no_voting=opts.no_voting)
672

    
673

    
674
def MasterPing(opts, args):
675
  """Checks if the master is alive.
676

677
  @param opts: the command line options selected by the user
678
  @type args: list
679
  @param args: should be an empty list
680
  @rtype: int
681
  @return: the desired exit code
682

683
  """
684
  try:
685
    cl = GetClient()
686
    cl.QueryClusterInfo()
687
    return 0
688
  except Exception: # pylint: disable=W0703
689
    return 1
690

    
691

    
692
def SearchTags(opts, args):
693
  """Searches the tags on all the cluster.
694

695
  @param opts: the command line options selected by the user
696
  @type args: list
697
  @param args: should contain only one element, the tag pattern
698
  @rtype: int
699
  @return: the desired exit code
700

701
  """
702
  op = opcodes.OpTagsSearch(pattern=args[0])
703
  result = SubmitOpCode(op, opts=opts)
704
  if not result:
705
    return 1
706
  result = list(result)
707
  result.sort()
708
  for path, tag in result:
709
    ToStdout("%s %s", path, tag)
710

    
711

    
712
def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
713
  """Reads and verifies an X509 certificate.
714

715
  @type cert_filename: string
716
  @param cert_filename: the path of the file containing the certificate to
717
                        verify encoded in PEM format
718
  @type verify_private_key: bool
719
  @param verify_private_key: whether to verify the private key in addition to
720
                             the public certificate
721
  @rtype: string
722
  @return: a string containing the PEM-encoded certificate.
723

724
  """
725
  try:
726
    pem = utils.ReadFile(cert_filename)
727
  except IOError, err:
728
    raise errors.X509CertError(cert_filename,
729
                               "Unable to read certificate: %s" % str(err))
730

    
731
  try:
732
    OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
733
  except Exception, err:
734
    raise errors.X509CertError(cert_filename,
735
                               "Unable to load certificate: %s" % str(err))
736

    
737
  if verify_private_key:
738
    try:
739
      OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
740
    except Exception, err:
741
      raise errors.X509CertError(cert_filename,
742
                                 "Unable to load private key: %s" % str(err))
743

    
744
  return pem
745

    
746

    
747
def _RenewCrypto(new_cluster_cert, new_rapi_cert, #pylint: disable=R0911
748
                 rapi_cert_filename, new_spice_cert, spice_cert_filename,
749
                 spice_cacert_filename, new_confd_hmac_key, new_cds,
750
                 cds_filename, force):
751
  """Renews cluster certificates, keys and secrets.
752

753
  @type new_cluster_cert: bool
754
  @param new_cluster_cert: Whether to generate a new cluster certificate
755
  @type new_rapi_cert: bool
756
  @param new_rapi_cert: Whether to generate a new RAPI certificate
757
  @type rapi_cert_filename: string
758
  @param rapi_cert_filename: Path to file containing new RAPI certificate
759
  @type new_spice_cert: bool
760
  @param new_spice_cert: Whether to generate a new SPICE certificate
761
  @type spice_cert_filename: string
762
  @param spice_cert_filename: Path to file containing new SPICE certificate
763
  @type spice_cacert_filename: string
764
  @param spice_cacert_filename: Path to file containing the certificate of the
765
                                CA that signed the SPICE certificate
766
  @type new_confd_hmac_key: bool
767
  @param new_confd_hmac_key: Whether to generate a new HMAC key
768
  @type new_cds: bool
769
  @param new_cds: Whether to generate a new cluster domain secret
770
  @type cds_filename: string
771
  @param cds_filename: Path to file containing new cluster domain secret
772
  @type force: bool
773
  @param force: Whether to ask user for confirmation
774

775
  """
776
  if new_rapi_cert and rapi_cert_filename:
777
    ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
778
             " options can be specified at the same time.")
779
    return 1
780

    
781
  if new_cds and cds_filename:
782
    ToStderr("Only one of the --new-cluster-domain-secret and"
783
             " --cluster-domain-secret options can be specified at"
784
             " the same time.")
785
    return 1
786

    
787
  if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
788
    ToStderr("When using --new-spice-certificate, the --spice-certificate"
789
             " and --spice-ca-certificate must not be used.")
790
    return 1
791

    
792
  if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
793
    ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
794
             " specified.")
795
    return 1
796

    
797
  rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
798
  try:
799
    if rapi_cert_filename:
800
      rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
801
    if spice_cert_filename:
802
      spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
803
      spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
804
  except errors.X509CertError, err:
805
    ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
806
    return 1
807

    
808
  if cds_filename:
809
    try:
810
      cds = utils.ReadFile(cds_filename)
811
    except Exception, err: # pylint: disable=W0703
812
      ToStderr("Can't load new cluster domain secret from %s: %s" %
813
               (cds_filename, str(err)))
814
      return 1
815
  else:
816
    cds = None
817

    
818
  if not force:
819
    usertext = ("This requires all daemons on all nodes to be restarted and"
820
                " may take some time. Continue?")
821
    if not AskUser(usertext):
822
      return 1
823

    
824
  def _RenewCryptoInner(ctx):
825
    ctx.feedback_fn("Updating certificates and keys")
826
    bootstrap.GenerateClusterCrypto(new_cluster_cert,
827
                                    new_rapi_cert,
828
                                    new_spice_cert,
829
                                    new_confd_hmac_key,
830
                                    new_cds,
831
                                    rapi_cert_pem=rapi_cert_pem,
832
                                    spice_cert_pem=spice_cert_pem,
833
                                    spice_cacert_pem=spice_cacert_pem,
834
                                    cds=cds)
835

    
836
    files_to_copy = []
837

    
838
    if new_cluster_cert:
839
      files_to_copy.append(constants.NODED_CERT_FILE)
840

    
841
    if new_rapi_cert or rapi_cert_pem:
842
      files_to_copy.append(constants.RAPI_CERT_FILE)
843

    
844
    if new_spice_cert or spice_cert_pem:
845
      files_to_copy.append(constants.SPICE_CERT_FILE)
846
      files_to_copy.append(constants.SPICE_CACERT_FILE)
847

    
848
    if new_confd_hmac_key:
849
      files_to_copy.append(constants.CONFD_HMAC_KEY)
850

    
851
    if new_cds or cds:
852
      files_to_copy.append(constants.CLUSTER_DOMAIN_SECRET_FILE)
853

    
854
    if files_to_copy:
855
      for node_name in ctx.nonmaster_nodes:
856
        ctx.feedback_fn("Copying %s to %s" %
857
                        (", ".join(files_to_copy), node_name))
858
        for file_name in files_to_copy:
859
          ctx.ssh.CopyFileToNode(node_name, file_name)
860

    
861
  RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
862

    
863
  ToStdout("All requested certificates and keys have been replaced."
864
           " Running \"gnt-cluster verify\" now is recommended.")
865

    
866
  return 0
867

    
868

    
869
def RenewCrypto(opts, args):
870
  """Renews cluster certificates, keys and secrets.
871

872
  """
873
  return _RenewCrypto(opts.new_cluster_cert,
874
                      opts.new_rapi_cert,
875
                      opts.rapi_cert,
876
                      opts.new_spice_cert,
877
                      opts.spice_cert,
878
                      opts.spice_cacert,
879
                      opts.new_confd_hmac_key,
880
                      opts.new_cluster_domain_secret,
881
                      opts.cluster_domain_secret,
882
                      opts.force)
883

    
884

    
885
def SetClusterParams(opts, args):
886
  """Modify the cluster.
887

888
  @param opts: the command line options selected by the user
889
  @type args: list
890
  @param args: should be an empty list
891
  @rtype: int
892
  @return: the desired exit code
893

894
  """
895
  if not (not opts.lvm_storage or opts.vg_name or
896
          not opts.drbd_storage or opts.drbd_helper or
897
          opts.enabled_hypervisors or opts.hvparams or
898
          opts.beparams or opts.nicparams or
899
          opts.ndparams or opts.diskparams or
900
          opts.candidate_pool_size is not None or
901
          opts.uid_pool is not None or
902
          opts.maintain_node_health is not None or
903
          opts.add_uids is not None or
904
          opts.remove_uids is not None or
905
          opts.default_iallocator is not None or
906
          opts.reserved_lvs is not None or
907
          opts.master_netdev is not None or
908
          opts.master_netmask is not None or
909
          opts.use_external_mip_script is not None or
910
          opts.prealloc_wipe_disks is not None):
911
    ToStderr("Please give at least one of the parameters.")
912
    return 1
913

    
914
  vg_name = opts.vg_name
915
  if not opts.lvm_storage and opts.vg_name:
916
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
917
    return 1
918

    
919
  if not opts.lvm_storage:
920
    vg_name = ""
921

    
922
  drbd_helper = opts.drbd_helper
923
  if not opts.drbd_storage and opts.drbd_helper:
924
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
925
    return 1
926

    
927
  if not opts.drbd_storage:
928
    drbd_helper = ""
929

    
930
  hvlist = opts.enabled_hypervisors
931
  if hvlist is not None:
932
    hvlist = hvlist.split(",")
933

    
934
  # a list of (name, dict) we can pass directly to dict() (or [])
935
  hvparams = dict(opts.hvparams)
936
  for hv_params in hvparams.values():
937
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
938

    
939
  diskparams = dict(opts.diskparams)
940

    
941
  for dt_params in hvparams.values():
942
    utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
943

    
944
  beparams = opts.beparams
945
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
946

    
947
  nicparams = opts.nicparams
948
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
949

    
950
  ndparams = opts.ndparams
951
  if ndparams is not None:
952
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
953

    
954
  mnh = opts.maintain_node_health
955

    
956
  uid_pool = opts.uid_pool
957
  if uid_pool is not None:
958
    uid_pool = uidpool.ParseUidPool(uid_pool)
959

    
960
  add_uids = opts.add_uids
961
  if add_uids is not None:
962
    add_uids = uidpool.ParseUidPool(add_uids)
963

    
964
  remove_uids = opts.remove_uids
965
  if remove_uids is not None:
966
    remove_uids = uidpool.ParseUidPool(remove_uids)
967

    
968
  if opts.reserved_lvs is not None:
969
    if opts.reserved_lvs == "":
970
      opts.reserved_lvs = []
971
    else:
972
      opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
973

    
974
  if opts.master_netmask is not None:
975
    try:
976
      opts.master_netmask = int(opts.master_netmask)
977
    except ValueError:
978
      ToStderr("The --master-netmask option expects an int parameter.")
979
      return 1
980

    
981
  ext_ip_script = opts.use_external_mip_script
982

    
983
  op = opcodes.OpClusterSetParams(vg_name=vg_name,
984
                                  drbd_helper=drbd_helper,
985
                                  enabled_hypervisors=hvlist,
986
                                  hvparams=hvparams,
987
                                  os_hvp=None,
988
                                  beparams=beparams,
989
                                  nicparams=nicparams,
990
                                  ndparams=ndparams,
991
                                  diskparams=diskparams,
992
                                  candidate_pool_size=opts.candidate_pool_size,
993
                                  maintain_node_health=mnh,
994
                                  uid_pool=uid_pool,
995
                                  add_uids=add_uids,
996
                                  remove_uids=remove_uids,
997
                                  default_iallocator=opts.default_iallocator,
998
                                  prealloc_wipe_disks=opts.prealloc_wipe_disks,
999
                                  master_netdev=opts.master_netdev,
1000
                                  master_netmask=opts.master_netmask,
1001
                                  reserved_lvs=opts.reserved_lvs,
1002
                                  use_external_mip_script=ext_ip_script,
1003
                                  )
1004
  SubmitOpCode(op, opts=opts)
1005
  return 0
1006

    
1007

    
1008
def QueueOps(opts, args):
1009
  """Queue operations.
1010

1011
  @param opts: the command line options selected by the user
1012
  @type args: list
1013
  @param args: should contain only one element, the subcommand
1014
  @rtype: int
1015
  @return: the desired exit code
1016

1017
  """
1018
  command = args[0]
1019
  client = GetClient()
1020
  if command in ("drain", "undrain"):
1021
    drain_flag = command == "drain"
1022
    client.SetQueueDrainFlag(drain_flag)
1023
  elif command == "info":
1024
    result = client.QueryConfigValues(["drain_flag"])
1025
    if result[0]:
1026
      val = "set"
1027
    else:
1028
      val = "unset"
1029
    ToStdout("The drain flag is %s" % val)
1030
  else:
1031
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1032
                               errors.ECODE_INVAL)
1033

    
1034
  return 0
1035

    
1036

    
1037
def _ShowWatcherPause(until):
1038
  if until is None or until < time.time():
1039
    ToStdout("The watcher is not paused.")
1040
  else:
1041
    ToStdout("The watcher is paused until %s.", time.ctime(until))
1042

    
1043

    
1044
def WatcherOps(opts, args):
1045
  """Watcher operations.
1046

1047
  @param opts: the command line options selected by the user
1048
  @type args: list
1049
  @param args: should contain only one element, the subcommand
1050
  @rtype: int
1051
  @return: the desired exit code
1052

1053
  """
1054
  command = args[0]
1055
  client = GetClient()
1056

    
1057
  if command == "continue":
1058
    client.SetWatcherPause(None)
1059
    ToStdout("The watcher is no longer paused.")
1060

    
1061
  elif command == "pause":
1062
    if len(args) < 2:
1063
      raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1064

    
1065
    result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1066
    _ShowWatcherPause(result)
1067

    
1068
  elif command == "info":
1069
    result = client.QueryConfigValues(["watcher_pause"])
1070
    _ShowWatcherPause(result[0])
1071

    
1072
  else:
1073
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
1074
                               errors.ECODE_INVAL)
1075

    
1076
  return 0
1077

    
1078

    
1079
def _OobPower(opts, node_list, power):
1080
  """Puts the node in the list to desired power state.
1081

1082
  @param opts: The command line options selected by the user
1083
  @param node_list: The list of nodes to operate on
1084
  @param power: True if they should be powered on, False otherwise
1085
  @return: The success of the operation (none failed)
1086

1087
  """
1088
  if power:
1089
    command = constants.OOB_POWER_ON
1090
  else:
1091
    command = constants.OOB_POWER_OFF
1092

    
1093
  op = opcodes.OpOobCommand(node_names=node_list,
1094
                            command=command,
1095
                            ignore_status=True,
1096
                            timeout=opts.oob_timeout,
1097
                            power_delay=opts.power_delay)
1098
  result = SubmitOpCode(op, opts=opts)
1099
  errs = 0
1100
  for node_result in result:
1101
    (node_tuple, data_tuple) = node_result
1102
    (_, node_name) = node_tuple
1103
    (data_status, _) = data_tuple
1104
    if data_status != constants.RS_NORMAL:
1105
      assert data_status != constants.RS_UNAVAIL
1106
      errs += 1
1107
      ToStderr("There was a problem changing power for %s, please investigate",
1108
               node_name)
1109

    
1110
  if errs > 0:
1111
    return False
1112

    
1113
  return True
1114

    
1115

    
1116
def _InstanceStart(opts, inst_list, start):
1117
  """Puts the instances in the list to desired state.
1118

1119
  @param opts: The command line options selected by the user
1120
  @param inst_list: The list of instances to operate on
1121
  @param start: True if they should be started, False for shutdown
1122
  @return: The success of the operation (none failed)
1123

1124
  """
1125
  if start:
1126
    opcls = opcodes.OpInstanceStartup
1127
    text_submit, text_success, text_failed = ("startup", "started", "starting")
1128
  else:
1129
    opcls = compat.partial(opcodes.OpInstanceShutdown,
1130
                           timeout=opts.shutdown_timeout)
1131
    text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1132

    
1133
  jex = JobExecutor(opts=opts)
1134

    
1135
  for inst in inst_list:
1136
    ToStdout("Submit %s of instance %s", text_submit, inst)
1137
    op = opcls(instance_name=inst)
1138
    jex.QueueJob(inst, op)
1139

    
1140
  results = jex.GetResults()
1141
  bad_cnt = len([1 for (success, _) in results if not success])
1142

    
1143
  if bad_cnt == 0:
1144
    ToStdout("All instances have been %s successfully", text_success)
1145
  else:
1146
    ToStderr("There were errors while %s instances:\n"
1147
             "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1148
             len(results))
1149
    return False
1150

    
1151
  return True
1152

    
1153

    
1154
class _RunWhenNodesReachableHelper:
1155
  """Helper class to make shared internal state sharing easier.
1156

1157
  @ivar success: Indicates if all action_cb calls were successful
1158

1159
  """
1160
  def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1161
               _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1162
    """Init the object.
1163

1164
    @param node_list: The list of nodes to be reachable
1165
    @param action_cb: Callback called when a new host is reachable
1166
    @type node2ip: dict
1167
    @param node2ip: Node to ip mapping
1168
    @param port: The port to use for the TCP ping
1169
    @param feedback_fn: The function used for feedback
1170
    @param _ping_fn: Function to check reachabilty (for unittest use only)
1171
    @param _sleep_fn: Function to sleep (for unittest use only)
1172

1173
    """
1174
    self.down = set(node_list)
1175
    self.up = set()
1176
    self.node2ip = node2ip
1177
    self.success = True
1178
    self.action_cb = action_cb
1179
    self.port = port
1180
    self.feedback_fn = feedback_fn
1181
    self._ping_fn = _ping_fn
1182
    self._sleep_fn = _sleep_fn
1183

    
1184
  def __call__(self):
1185
    """When called we run action_cb.
1186

1187
    @raises utils.RetryAgain: When there are still down nodes
1188

1189
    """
1190
    if not self.action_cb(self.up):
1191
      self.success = False
1192

    
1193
    if self.down:
1194
      raise utils.RetryAgain()
1195
    else:
1196
      return self.success
1197

    
1198
  def Wait(self, secs):
1199
    """Checks if a host is up or waits remaining seconds.
1200

1201
    @param secs: The secs remaining
1202

1203
    """
1204
    start = time.time()
1205
    for node in self.down:
1206
      if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1207
                       live_port_needed=True):
1208
        self.feedback_fn("Node %s became available" % node)
1209
        self.up.add(node)
1210
        self.down -= self.up
1211
        # If we have a node available there is the possibility to run the
1212
        # action callback successfully, therefore we don't wait and return
1213
        return
1214

    
1215
    self._sleep_fn(max(0.0, start + secs - time.time()))
1216

    
1217

    
1218
def _RunWhenNodesReachable(node_list, action_cb, interval):
1219
  """Run action_cb when nodes become reachable.
1220

1221
  @param node_list: The list of nodes to be reachable
1222
  @param action_cb: Callback called when a new host is reachable
1223
  @param interval: The earliest time to retry
1224

1225
  """
1226
  client = GetClient()
1227
  cluster_info = client.QueryClusterInfo()
1228
  if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1229
    family = netutils.IPAddress.family
1230
  else:
1231
    family = netutils.IP6Address.family
1232

    
1233
  node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1234
                 for node in node_list)
1235

    
1236
  port = netutils.GetDaemonPort(constants.NODED)
1237
  helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1238
                                        ToStdout)
1239

    
1240
  try:
1241
    return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1242
                       wait_fn=helper.Wait)
1243
  except utils.RetryTimeout:
1244
    ToStderr("Time exceeded while waiting for nodes to become reachable"
1245
             " again:\n  - %s", "  - ".join(helper.down))
1246
    return False
1247

    
1248

    
1249
def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1250
                          _instance_start_fn=_InstanceStart):
1251
  """Start the instances conditional based on node_states.
1252

1253
  @param opts: The command line options selected by the user
1254
  @param inst_map: A dict of inst -> nodes mapping
1255
  @param nodes_online: A list of nodes online
1256
  @param _instance_start_fn: Callback to start instances (unittest use only)
1257
  @return: Success of the operation on all instances
1258

1259
  """
1260
  start_inst_list = []
1261
  for (inst, nodes) in inst_map.items():
1262
    if not (nodes - nodes_online):
1263
      # All nodes the instance lives on are back online
1264
      start_inst_list.append(inst)
1265

    
1266
  for inst in start_inst_list:
1267
    del inst_map[inst]
1268

    
1269
  if start_inst_list:
1270
    return _instance_start_fn(opts, start_inst_list, True)
1271

    
1272
  return True
1273

    
1274

    
1275
def _EpoOn(opts, full_node_list, node_list, inst_map):
1276
  """Does the actual power on.
1277

1278
  @param opts: The command line options selected by the user
1279
  @param full_node_list: All nodes to operate on (includes nodes not supporting
1280
                         OOB)
1281
  @param node_list: The list of nodes to operate on (all need to support OOB)
1282
  @param inst_map: A dict of inst -> nodes mapping
1283
  @return: The desired exit status
1284

1285
  """
1286
  if node_list and not _OobPower(opts, node_list, False):
1287
    ToStderr("Not all nodes seem to get back up, investigate and start"
1288
             " manually if needed")
1289

    
1290
  # Wait for the nodes to be back up
1291
  action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1292

    
1293
  ToStdout("Waiting until all nodes are available again")
1294
  if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1295
    ToStderr("Please investigate and start stopped instances manually")
1296
    return constants.EXIT_FAILURE
1297

    
1298
  return constants.EXIT_SUCCESS
1299

    
1300

    
1301
def _EpoOff(opts, node_list, inst_map):
1302
  """Does the actual power off.
1303

1304
  @param opts: The command line options selected by the user
1305
  @param node_list: The list of nodes to operate on (all need to support OOB)
1306
  @param inst_map: A dict of inst -> nodes mapping
1307
  @return: The desired exit status
1308

1309
  """
1310
  if not _InstanceStart(opts, inst_map.keys(), False):
1311
    ToStderr("Please investigate and stop instances manually before continuing")
1312
    return constants.EXIT_FAILURE
1313

    
1314
  if not node_list:
1315
    return constants.EXIT_SUCCESS
1316

    
1317
  if _OobPower(opts, node_list, False):
1318
    return constants.EXIT_SUCCESS
1319
  else:
1320
    return constants.EXIT_FAILURE
1321

    
1322

    
1323
def Epo(opts, args):
1324
  """EPO operations.
1325

1326
  @param opts: the command line options selected by the user
1327
  @type args: list
1328
  @param args: should contain only one element, the subcommand
1329
  @rtype: int
1330
  @return: the desired exit code
1331

1332
  """
1333
  if opts.groups and opts.show_all:
1334
    ToStderr("Only one of --groups or --all are allowed")
1335
    return constants.EXIT_FAILURE
1336
  elif args and opts.show_all:
1337
    ToStderr("Arguments in combination with --all are not allowed")
1338
    return constants.EXIT_FAILURE
1339

    
1340
  client = GetClient()
1341

    
1342
  if opts.groups:
1343
    node_query_list = itertools.chain(*client.QueryGroups(names=args,
1344
                                                          fields=["node_list"],
1345
                                                          use_locking=False))
1346
  else:
1347
    node_query_list = args
1348

    
1349
  result = client.QueryNodes(names=node_query_list,
1350
                             fields=["name", "master", "pinst_list",
1351
                                     "sinst_list", "powered", "offline"],
1352
                             use_locking=False)
1353
  node_list = []
1354
  inst_map = {}
1355
  for (idx, (node, master, pinsts, sinsts, powered,
1356
             offline)) in enumerate(result):
1357
    # Normalize the node_query_list as well
1358
    if not opts.show_all:
1359
      node_query_list[idx] = node
1360
    if not offline:
1361
      for inst in (pinsts + sinsts):
1362
        if inst in inst_map:
1363
          if not master:
1364
            inst_map[inst].add(node)
1365
        elif master:
1366
          inst_map[inst] = set()
1367
        else:
1368
          inst_map[inst] = set([node])
1369

    
1370
    if master and opts.on:
1371
      # We ignore the master for turning on the machines, in fact we are
1372
      # already operating on the master at this point :)
1373
      continue
1374
    elif master and not opts.show_all:
1375
      ToStderr("%s is the master node, please do a master-failover to another"
1376
               " node not affected by the EPO or use --all if you intend to"
1377
               " shutdown the whole cluster", node)
1378
      return constants.EXIT_FAILURE
1379
    elif powered is None:
1380
      ToStdout("Node %s does not support out-of-band handling, it can not be"
1381
               " handled in a fully automated manner", node)
1382
    elif powered == opts.on:
1383
      ToStdout("Node %s is already in desired power state, skipping", node)
1384
    elif not offline or (offline and powered):
1385
      node_list.append(node)
1386

    
1387
  if not opts.force and not ConfirmOperation(node_query_list, "nodes", "epo"):
1388
    return constants.EXIT_FAILURE
1389

    
1390
  if opts.on:
1391
    return _EpoOn(opts, node_query_list, node_list, inst_map)
1392
  else:
1393
    return _EpoOff(opts, node_list, inst_map)
1394

    
1395

    
1396
commands = {
1397
  "init": (
1398
    InitCluster, [ArgHost(min=1, max=1)],
1399
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
1400
     HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
1401
     NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT,
1402
     NOMODIFY_SSH_SETUP_OPT, SECONDARY_IP_OPT, VG_NAME_OPT,
1403
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, DRBD_HELPER_OPT, NODRBD_STORAGE_OPT,
1404
     DEFAULT_IALLOCATOR_OPT, PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT,
1405
     NODE_PARAMS_OPT, GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT,
1406
     DISK_PARAMS_OPT],
1407
    "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
1408
  "destroy": (
1409
    DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
1410
    "", "Destroy cluster"),
1411
  "rename": (
1412
    RenameCluster, [ArgHost(min=1, max=1)],
1413
    [FORCE_OPT, DRY_RUN_OPT],
1414
    "<new_name>",
1415
    "Renames the cluster"),
1416
  "redist-conf": (
1417
    RedistributeConfig, ARGS_NONE, [SUBMIT_OPT, DRY_RUN_OPT, PRIORITY_OPT],
1418
    "", "Forces a push of the configuration file and ssconf files"
1419
    " to the nodes in the cluster"),
1420
  "verify": (
1421
    VerifyCluster, ARGS_NONE,
1422
    [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
1423
     DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT],
1424
    "", "Does a check on the cluster configuration"),
1425
  "verify-disks": (
1426
    VerifyDisks, ARGS_NONE, [PRIORITY_OPT],
1427
    "", "Does a check on the cluster disk status"),
1428
  "repair-disk-sizes": (
1429
    RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
1430
    "[instance...]", "Updates mismatches in recorded disk sizes"),
1431
  "master-failover": (
1432
    MasterFailover, ARGS_NONE, [NOVOTING_OPT],
1433
    "", "Makes the current node the master"),
1434
  "master-ping": (
1435
    MasterPing, ARGS_NONE, [],
1436
    "", "Checks if the master is alive"),
1437
  "version": (
1438
    ShowClusterVersion, ARGS_NONE, [],
1439
    "", "Shows the cluster version"),
1440
  "getmaster": (
1441
    ShowClusterMaster, ARGS_NONE, [],
1442
    "", "Shows the cluster master"),
1443
  "copyfile": (
1444
    ClusterCopyFile, [ArgFile(min=1, max=1)],
1445
    [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
1446
    "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
1447
  "command": (
1448
    RunClusterCommand, [ArgCommand(min=1)],
1449
    [NODE_LIST_OPT, NODEGROUP_OPT],
1450
    "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
1451
  "info": (
1452
    ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
1453
    "[--roman]", "Show cluster configuration"),
1454
  "list-tags": (
1455
    ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
1456
  "add-tags": (
1457
    AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1458
    "tag...", "Add tags to the cluster"),
1459
  "remove-tags": (
1460
    RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT],
1461
    "tag...", "Remove tags from the cluster"),
1462
  "search-tags": (
1463
    SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
1464
    "Searches the tags on all objects on"
1465
    " the cluster for a given pattern (regex)"),
1466
  "queue": (
1467
    QueueOps,
1468
    [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
1469
    [], "drain|undrain|info", "Change queue properties"),
1470
  "watcher": (
1471
    WatcherOps,
1472
    [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
1473
     ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
1474
    [],
1475
    "{pause <timespec>|continue|info}", "Change watcher properties"),
1476
  "modify": (
1477
    SetClusterParams, ARGS_NONE,
1478
    [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT, MASTER_NETDEV_OPT,
1479
     MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT,
1480
     MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT,
1481
     DRBD_HELPER_OPT, NODRBD_STORAGE_OPT, DEFAULT_IALLOCATOR_OPT,
1482
     RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, PREALLOC_WIPE_DISKS_OPT,
1483
     NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT],
1484
    "[opts...]",
1485
    "Alters the parameters of the cluster"),
1486
  "renew-crypto": (
1487
    RenewCrypto, ARGS_NONE,
1488
    [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
1489
     NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
1490
     NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
1491
     NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT],
1492
    "[opts...]",
1493
    "Renews cluster certificates, keys and secrets"),
1494
  "epo": (
1495
    Epo, [ArgUnknown()],
1496
    [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
1497
     SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
1498
    "[opts...] [args]",
1499
    "Performs an emergency power-off on given args"),
1500
  "activate-master-ip": (
1501
    ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
1502
  "deactivate-master-ip": (
1503
    DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
1504
    "Deactivates the master IP"),
1505
  }
1506

    
1507

    
1508
#: dictionary with aliases for commands
1509
aliases = {
1510
  "masterfailover": "master-failover",
1511
}
1512

    
1513

    
1514
def Main():
1515
  return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
1516
                     aliases=aliases)