Statistics
| Branch: | Tag: | Revision:

root / scripts / gnt-cluster @ 469f88e1

History | View | Annotate | Download (16.3 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
import sys
23
from optparse import make_option
24
import pprint
25
import os.path
26

    
27
from ganeti.cli import *
28
from ganeti import opcodes
29
from ganeti import constants
30
from ganeti import errors
31
from ganeti import utils
32
from ganeti import bootstrap
33
from ganeti import ssh
34
from ganeti import ssconf
35

    
36

    
37
def InitCluster(opts, args):
38
  """Initialize the cluster.
39

    
40
  Args:
41
    opts - class with options as members
42
    args - list of arguments, expected to be [clustername]
43

    
44
  """
45
  if not opts.lvm_storage and opts.vg_name:
46
    print ("Options --no-lvm-storage and --vg-name conflict.")
47
    return 1
48

    
49
  vg_name = opts.vg_name
50
  if opts.lvm_storage and not opts.vg_name:
51
    vg_name = constants.DEFAULT_VG
52

    
53
  bootstrap.InitCluster(cluster_name=args[0],
54
                        secondary_ip=opts.secondary_ip,
55
                        hypervisor_type=opts.hypervisor_type,
56
                        vg_name=vg_name,
57
                        mac_prefix=opts.mac_prefix,
58
                        def_bridge=opts.def_bridge,
59
                        master_netdev=opts.master_netdev,
60
                        file_storage_dir=opts.file_storage_dir)
61
  return 0
62

    
63

    
64
def DestroyCluster(opts, args):
65
  """Destroy the cluster.
66

    
67
  Args:
68
    opts - class with options as members
69

    
70
  """
71
  if not opts.yes_do_it:
72
    print ("Destroying a cluster is irreversibly. If you really want destroy"
73
           " this cluster, supply the --yes-do-it option.")
74
    return 1
75

    
76
  op = opcodes.OpDestroyCluster()
77
  master = SubmitOpCode(op)
78
  # if we reached this, the opcode didn't fail; we can proceed to
79
  # shutdown all the daemons
80
  bootstrap.FinalizeClusterDestroy(master)
81
  return 0
82

    
83

    
84
def RenameCluster(opts, args):
85
  """Rename the cluster.
86

    
87
  Args:
88
    opts - class with options as members, we use force only
89
    args - list of arguments, expected to be [new_name]
90

    
91
  """
92
  name = args[0]
93
  if not opts.force:
94
    usertext = ("This will rename the cluster to '%s'. If you are connected"
95
                " over the network to the cluster name, the operation is very"
96
                " dangerous as the IP address will be removed from the node"
97
                " and the change may not go through. Continue?") % name
98
    if not AskUser(usertext):
99
      return 1
100

    
101
  op = opcodes.OpRenameCluster(name=name)
102
  SubmitOpCode(op)
103
  return 0
104

    
105

    
106
def ShowClusterVersion(opts, args):
107
  """Write version of ganeti software to the standard output.
108

    
109
  Args:
110
    opts - class with options as members
111

    
112
  """
113
  op = opcodes.OpQueryClusterInfo()
114
  result = SubmitOpCode(op)
115
  print ("Software version: %s" % result["software_version"])
116
  print ("Internode protocol: %s" % result["protocol_version"])
117
  print ("Configuration format: %s" % result["config_version"])
118
  print ("OS api version: %s" % result["os_api_version"])
119
  print ("Export interface: %s" % result["export_version"])
120
  return 0
121

    
122

    
123
def ShowClusterMaster(opts, args):
124
  """Write name of master node to the standard output.
125

    
126
  Args:
127
    opts - class with options as members
128

    
129
  """
130
  print GetClient().QueryConfigValues(["master_node"])[0]
131
  return 0
132

    
133

    
134
def ShowClusterConfig(opts, args):
135
  """Shows cluster information.
136

    
137
  """
138
  op = opcodes.OpQueryClusterInfo()
139
  result = SubmitOpCode(op)
140

    
141
  print ("Cluster name: %s" % result["name"])
142

    
143
  print ("Master node: %s" % result["master"])
144

    
145
  print ("Architecture (this node): %s (%s)" %
146
         (result["architecture"][0], result["architecture"][1]))
147

    
148
  print ("Default hypervisor: %s" % result["hypervisor_type"])
149
  print ("Enabled hypervisors: %s" % ", ".join(result["enabled_hypervisors"]))
150

    
151
  print "Hypervisor parameters:"
152
  for hv_name, hv_dict in result["hvparams"].items():
153
    print "  - %s:" % hv_name
154
    for item, val in hv_dict.iteritems():
155
      print "      %s: %s" % (item, val)
156

    
157
  print "Cluster parameters:"
158
  for gr_name, gr_dict in result["beparams"].items():
159
    print "  - %s:" % gr_name
160
    for item, val in gr_dict.iteritems():
161
      print "      %s: %s" % (item, val)
162

    
163
  return 0
164

    
165

    
166
def ClusterCopyFile(opts, args):
167
  """Copy a file from master to some nodes.
168

    
169
  Args:
170
    opts - class with options as members
171
    args - list containing a single element, the file name
172
  Opts used:
173
    nodes - list containing the name of target nodes; if empty, all nodes
174

    
175
  """
176
  filename = args[0]
177
  if not os.path.exists(filename):
178
    raise errors.OpPrereqError("No such filename '%s'" % filename)
179

    
180
  cl = GetClient()
181

    
182
  myname = utils.HostInfo().name
183

    
184
  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
185

    
186
  op = opcodes.OpQueryNodes(output_fields=["name"], names=opts.nodes)
187
  results = [row[0] for row in SubmitOpCode(op, cl=cl) if row[0] != myname]
188

    
189
  srun = ssh.SshRunner(cluster_name=cluster_name)
190
  for node in results:
191
    if not srun.CopyFileToNode(node, filename):
192
      print >> sys.stderr, ("Copy of file %s to node %s failed" %
193
                            (filename, node))
194

    
195
  return 0
196

    
197

    
198
def RunClusterCommand(opts, args):
199
  """Run a command on some nodes.
200

    
201
  Args:
202
    opts - class with options as members
203
    args - the command list as a list
204
  Opts used:
205
    nodes: list containing the name of target nodes; if empty, all nodes
206

    
207
  """
208
  cl = GetClient()
209

    
210
  command = " ".join(args)
211
  op = opcodes.OpQueryNodes(output_fields=["name"], names=opts.nodes)
212
  nodes = [row[0] for row in SubmitOpCode(op, cl=cl)]
213

    
214
  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
215
                                                    "master_node"])
216

    
217
  srun = ssh.SshRunner(cluster_name=cluster_name)
218

    
219
  # Make sure master node is at list end
220
  if master_node in nodes:
221
    nodes.remove(master_node)
222
    nodes.append(master_node)
223

    
224
  for name in nodes:
225
    result = srun.Run(name, "root", command)
226
    print ("------------------------------------------------")
227
    print ("node: %s" % name)
228
    print ("%s" % result.output)
229
    print ("return code = %s" % result.exit_code)
230

    
231
  return 0
232

    
233

    
234
def VerifyCluster(opts, args):
235
  """Verify integrity of cluster, performing various test on nodes.
236

    
237
  Args:
238
    opts - class with options as members
239

    
240
  """
241
  skip_checks = []
242
  if opts.skip_nplusone_mem:
243
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
244
  op = opcodes.OpVerifyCluster(skip_checks=skip_checks)
245
  if SubmitOpCode(op):
246
    return 0
247
  else:
248
    return 1
249

    
250

    
251
def VerifyDisks(opts, args):
252
  """Verify integrity of cluster disks.
253

    
254
  Args:
255
    opts - class with options as members
256

    
257
  """
258
  op = opcodes.OpVerifyDisks()
259
  result = SubmitOpCode(op)
260
  if not isinstance(result, (list, tuple)) or len(result) != 4:
261
    raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")
262

    
263
  nodes, nlvm, instances, missing = result
264

    
265
  if nodes:
266
    print "Nodes unreachable or with bad data:"
267
    for name in nodes:
268
      print "\t%s" % name
269
  retcode = constants.EXIT_SUCCESS
270

    
271
  if nlvm:
272
    for node, text in nlvm.iteritems():
273
      print ("Error on node %s: LVM error: %s" %
274
             (node, text[-400:].encode('string_escape')))
275
      retcode |= 1
276
      print "You need to fix these nodes first before fixing instances"
277

    
278
  if instances:
279
    for iname in instances:
280
      if iname in missing:
281
        continue
282
      op = opcodes.OpActivateInstanceDisks(instance_name=iname)
283
      try:
284
        print "Activating disks for instance '%s'" % iname
285
        SubmitOpCode(op)
286
      except errors.GenericError, err:
287
        nret, msg = FormatError(err)
288
        retcode |= nret
289
        print >> sys.stderr, ("Error activating disks for instance %s: %s" %
290
                              (iname, msg))
291

    
292
  if missing:
293
    for iname, ival in missing.iteritems():
294
      all_missing = utils.all(ival, lambda x: x[0] in nlvm)
295
      if all_missing:
296
        print ("Instance %s cannot be verified as it lives on"
297
               " broken nodes" % iname)
298
      else:
299
        print "Instance %s has missing logical volumes:" % iname
300
        ival.sort()
301
        for node, vol in ival:
302
          if node in nlvm:
303
            print ("\tbroken node %s /dev/xenvg/%s" % (node, vol))
304
          else:
305
            print ("\t%s /dev/xenvg/%s" % (node, vol))
306
    print ("You need to run replace_disks for all the above"
307
           " instances, if this message persist after fixing nodes.")
308
    retcode |= 1
309

    
310
  return retcode
311

    
312

    
313
def MasterFailover(opts, args):
314
  """Failover the master node.
315

    
316
  This command, when run on a non-master node, will cause the current
317
  master to cease being master, and the non-master to become new
318
  master.
319

    
320
  """
321
  return bootstrap.MasterFailover()
322

    
323

    
324
def SearchTags(opts, args):
325
  """Searches the tags on all the cluster.
326

    
327
  """
328
  op = opcodes.OpSearchTags(pattern=args[0])
329
  result = SubmitOpCode(op)
330
  if not result:
331
    return 1
332
  result = list(result)
333
  result.sort()
334
  for path, tag in result:
335
    print "%s %s" % (path, tag)
336

    
337

    
338
def SetClusterParams(opts, args):
339
  """Modify the cluster.
340

    
341
  Args:
342
    opts - class with options as members
343

    
344
  """
345
  if not (not opts.lvm_storage or opts.vg_name):
346
    print "Please give at least one of the parameters."
347
    return 1
348

    
349
  vg_name = opts.vg_name
350
  if not opts.lvm_storage and opts.vg_name:
351
    print ("Options --no-lvm-storage and --vg-name conflict.")
352
    return 1
353

    
354
  op = opcodes.OpSetClusterParams(vg_name=opts.vg_name)
355
  SubmitOpCode(op)
356
  return 0
357

    
358

    
359
def QueueOps(opts, args):
360
  """Queue operations.
361

    
362
  """
363
  command = args[0]
364
  client = GetClient()
365
  if command in ("drain", "undrain"):
366
    drain_flag = command == "drain"
367
    client.SetQueueDrainFlag(drain_flag)
368
  elif command == "info":
369
    result = client.QueryConfigValues(["drain_flag"])
370
    print "The drain flag is",
371
    if result[0]:
372
      print "set"
373
    else:
374
      print "unset"
375
  return 0
376

    
377
# this is an option common to more than one command, so we declare
378
# it here and reuse it
379
node_option = make_option("-n", "--node", action="append", dest="nodes",
380
                          help="Node to copy to (if not given, all nodes),"
381
                               " can be given multiple times",
382
                          metavar="<node>", default=[])
383

    
384
commands = {
385
  'init': (InitCluster, ARGS_ONE,
386
           [DEBUG_OPT,
387
            make_option("-s", "--secondary-ip", dest="secondary_ip",
388
                        help="Specify the secondary ip for this node;"
389
                        " if given, the entire cluster must have secondary"
390
                        " addresses",
391
                        metavar="ADDRESS", default=None),
392
            make_option("-t", "--hypervisor-type", dest="hypervisor_type",
393
                        help="Specify the hypervisor type "
394
                        "(xen-pvm, kvm, fake, xen-hvm)",
395
                        metavar="TYPE", choices=["xen-pvm",
396
                                                 "kvm",
397
                                                 "fake",
398
                                                 "xen-hvm"],
399
                        default="xen-pvm",),
400
            make_option("-m", "--mac-prefix", dest="mac_prefix",
401
                        help="Specify the mac prefix for the instance IP"
402
                        " addresses, in the format XX:XX:XX",
403
                        metavar="PREFIX",
404
                        default="aa:00:00",),
405
            make_option("-g", "--vg-name", dest="vg_name",
406
                        help="Specify the volume group name "
407
                        " (cluster-wide) for disk allocation [xenvg]",
408
                        metavar="VG",
409
                        default=None,),
410
            make_option("-b", "--bridge", dest="def_bridge",
411
                        help="Specify the default bridge name (cluster-wide)"
412
                          " to connect the instances to [%s]" %
413
                          constants.DEFAULT_BRIDGE,
414
                        metavar="BRIDGE",
415
                        default=constants.DEFAULT_BRIDGE,),
416
            make_option("--master-netdev", dest="master_netdev",
417
                        help="Specify the node interface (cluster-wide)"
418
                          " on which the master IP address will be added "
419
                          " [%s]" % constants.DEFAULT_BRIDGE,
420
                        metavar="NETDEV",
421
                        default=constants.DEFAULT_BRIDGE,),
422
            make_option("--file-storage-dir", dest="file_storage_dir",
423
                        help="Specify the default directory (cluster-wide)"
424
                             " for storing the file-based disks [%s]" %
425
                             constants.DEFAULT_FILE_STORAGE_DIR,
426
                        metavar="DIR",
427
                        default=constants.DEFAULT_FILE_STORAGE_DIR,),
428
            make_option("--no-lvm-storage", dest="lvm_storage",
429
                        help="No support for lvm based instances"
430
                             " (cluster-wide)",
431
                        action="store_false", default=True,),
432
            ],
433
           "[opts...] <cluster_name>",
434
           "Initialises a new cluster configuration"),
435
  'destroy': (DestroyCluster, ARGS_NONE,
436
              [DEBUG_OPT,
437
               make_option("--yes-do-it", dest="yes_do_it",
438
                           help="Destroy cluster",
439
                           action="store_true"),
440
              ],
441
              "", "Destroy cluster"),
442
  'rename': (RenameCluster, ARGS_ONE, [DEBUG_OPT, FORCE_OPT],
443
               "<new_name>",
444
               "Renames the cluster"),
445
  'verify': (VerifyCluster, ARGS_NONE, [DEBUG_OPT,
446
             make_option("--no-nplus1-mem", dest="skip_nplusone_mem",
447
                         help="Skip N+1 memory redundancy tests",
448
                         action="store_true",
449
                         default=False,),
450
             ],
451
             "", "Does a check on the cluster configuration"),
452
  'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT],
453
                   "", "Does a check on the cluster disk status"),
454
  'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT],
455
                     "", "Makes the current node the master"),
456
  'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT],
457
              "", "Shows the cluster version"),
458
  'getmaster': (ShowClusterMaster, ARGS_NONE, [DEBUG_OPT],
459
                "", "Shows the cluster master"),
460
  'copyfile': (ClusterCopyFile, ARGS_ONE, [DEBUG_OPT, node_option],
461
               "[-n node...] <filename>",
462
               "Copies a file to all (or only some) nodes"),
463
  'command': (RunClusterCommand, ARGS_ATLEAST(1), [DEBUG_OPT, node_option],
464
              "[-n node...] <command>",
465
              "Runs a command on all (or only some) nodes"),
466
  'info': (ShowClusterConfig, ARGS_NONE, [DEBUG_OPT],
467
                 "", "Show cluster configuration"),
468
  'list-tags': (ListTags, ARGS_NONE,
469
                [DEBUG_OPT], "", "List the tags of the cluster"),
470
  'add-tags': (AddTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT],
471
               "tag...", "Add tags to the cluster"),
472
  'remove-tags': (RemoveTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT],
473
                  "tag...", "Remove tags from the cluster"),
474
  'search-tags': (SearchTags, ARGS_ONE,
475
                  [DEBUG_OPT], "", "Searches the tags on all objects on"
476
                  " the cluster for a given pattern (regex)"),
477
  'queue': (QueueOps, ARGS_ONE, [DEBUG_OPT],
478
            "drain|undrain|info", "Change queue properties"),
479
  'modify': (SetClusterParams, ARGS_NONE,
480
             [DEBUG_OPT,
481
              make_option("-g", "--vg-name", dest="vg_name",
482
                          help="Specify the volume group name "
483
                          " (cluster-wide) for disk allocation "
484
                          "and enable lvm based storage",
485
                          metavar="VG",),
486
              make_option("--no-lvm-storage", dest="lvm_storage",
487
                          help="Disable support for lvm based instances"
488
                               " (cluster-wide)",
489
                          action="store_false", default=True,),
490
              ],
491
             "[opts...]",
492
             "Alters the parameters of the cluster"),
493
  }
494

    
495
if __name__ == '__main__':
496
  sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))