root / scripts / gnt-cluster @ 4342e89b
History | View | Annotate | Download (19.3 kB)
1 |
#!/usr/bin/python |
---|---|
2 |
# |
3 |
|
4 |
# Copyright (C) 2006, 2007 Google Inc. |
5 |
# |
6 |
# This program is free software; you can redistribute it and/or modify |
7 |
# it under the terms of the GNU General Public License as published by |
8 |
# the Free Software Foundation; either version 2 of the License, or |
9 |
# (at your option) any later version. |
10 |
# |
11 |
# This program is distributed in the hope that it will be useful, but |
12 |
# WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
# General Public License for more details. |
15 |
# |
16 |
# You should have received a copy of the GNU General Public License |
17 |
# along with this program; if not, write to the Free Software |
18 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
19 |
# 02110-1301, USA. |
20 |
|
21 |
|
22 |
import sys |
23 |
from optparse import make_option |
24 |
import pprint |
25 |
import os.path |
26 |
|
27 |
from ganeti.cli import * |
28 |
from ganeti import opcodes |
29 |
from ganeti import constants |
30 |
from ganeti import errors |
31 |
from ganeti import utils |
32 |
from ganeti import bootstrap |
33 |
from ganeti import ssh |
34 |
from ganeti import ssconf |
35 |
|
36 |
|
37 |
def InitCluster(opts, args): |
38 |
"""Initialize the cluster. |
39 |
|
40 |
Args: |
41 |
opts - class with options as members |
42 |
args - list of arguments, expected to be [clustername] |
43 |
|
44 |
""" |
45 |
if not opts.lvm_storage and opts.vg_name: |
46 |
print ("Options --no-lvm-storage and --vg-name conflict.") |
47 |
return 1 |
48 |
|
49 |
vg_name = opts.vg_name |
50 |
if opts.lvm_storage and not opts.vg_name: |
51 |
vg_name = constants.DEFAULT_VG |
52 |
|
53 |
hvlist = opts.enabled_hypervisors |
54 |
if hvlist is not None: |
55 |
hvlist = hvlist.split(",") |
56 |
else: |
57 |
hvlist = constants.DEFAULT_ENABLED_HYPERVISOR |
58 |
|
59 |
hvparams = opts.hvparams |
60 |
if hvparams: |
61 |
# a list of (name, dict) we can pass directly to dict() |
62 |
hvparams = dict(opts.hvparams) |
63 |
else: |
64 |
# otherwise init as empty dict |
65 |
hvparams = {} |
66 |
|
67 |
beparams = opts.beparams |
68 |
# check for invalid parameters |
69 |
for parameter in beparams: |
70 |
if parameter not in constants.BES_PARAMETERS: |
71 |
print "Invalid backend parameter: %s" % parameter |
72 |
return 1 |
73 |
|
74 |
# prepare beparams dict |
75 |
for parameter in constants.BES_PARAMETERS: |
76 |
if parameter not in beparams: |
77 |
beparams[parameter] = constants.BEC_DEFAULTS[parameter] |
78 |
|
79 |
# type wrangling |
80 |
try: |
81 |
beparams[constants.BE_VCPUS] = int(beparams[constants.BE_VCPUS]) |
82 |
except ValueError: |
83 |
print "%s must be an integer" % constants.BE_VCPUS |
84 |
return 1 |
85 |
|
86 |
beparams[constants.BE_MEMORY] = utils.ParseUnit(beparams[constants.BE_MEMORY]) |
87 |
|
88 |
# prepare hvparams dict |
89 |
for hv in constants.HYPER_TYPES: |
90 |
if hv not in hvparams: |
91 |
hvparams[hv] = {} |
92 |
for parameter in constants.HVC_DEFAULTS[hv]: |
93 |
if parameter not in hvparams[hv]: |
94 |
hvparams[hv][parameter] = constants.HVC_DEFAULTS[hv][parameter] |
95 |
|
96 |
for hv in hvlist: |
97 |
if hv not in constants.HYPER_TYPES: |
98 |
print "invalid hypervisor: %s" % hv |
99 |
return 1 |
100 |
|
101 |
bootstrap.InitCluster(cluster_name=args[0], |
102 |
secondary_ip=opts.secondary_ip, |
103 |
vg_name=vg_name, |
104 |
mac_prefix=opts.mac_prefix, |
105 |
def_bridge=opts.def_bridge, |
106 |
master_netdev=opts.master_netdev, |
107 |
file_storage_dir=opts.file_storage_dir, |
108 |
enabled_hypervisors=hvlist, |
109 |
hvparams=hvparams, |
110 |
beparams=beparams) |
111 |
return 0 |
112 |
|
113 |
|
114 |
def DestroyCluster(opts, args): |
115 |
"""Destroy the cluster. |
116 |
|
117 |
Args: |
118 |
opts - class with options as members |
119 |
|
120 |
""" |
121 |
if not opts.yes_do_it: |
122 |
print ("Destroying a cluster is irreversibly. If you really want destroy" |
123 |
" this cluster, supply the --yes-do-it option.") |
124 |
return 1 |
125 |
|
126 |
op = opcodes.OpDestroyCluster() |
127 |
master = SubmitOpCode(op) |
128 |
# if we reached this, the opcode didn't fail; we can proceed to |
129 |
# shutdown all the daemons |
130 |
bootstrap.FinalizeClusterDestroy(master) |
131 |
return 0 |
132 |
|
133 |
|
134 |
def RenameCluster(opts, args): |
135 |
"""Rename the cluster. |
136 |
|
137 |
Args: |
138 |
opts - class with options as members, we use force only |
139 |
args - list of arguments, expected to be [new_name] |
140 |
|
141 |
""" |
142 |
name = args[0] |
143 |
if not opts.force: |
144 |
usertext = ("This will rename the cluster to '%s'. If you are connected" |
145 |
" over the network to the cluster name, the operation is very" |
146 |
" dangerous as the IP address will be removed from the node" |
147 |
" and the change may not go through. Continue?") % name |
148 |
if not AskUser(usertext): |
149 |
return 1 |
150 |
|
151 |
op = opcodes.OpRenameCluster(name=name) |
152 |
SubmitOpCode(op) |
153 |
return 0 |
154 |
|
155 |
|
156 |
def ShowClusterVersion(opts, args): |
157 |
"""Write version of ganeti software to the standard output. |
158 |
|
159 |
Args: |
160 |
opts - class with options as members |
161 |
|
162 |
""" |
163 |
op = opcodes.OpQueryClusterInfo() |
164 |
result = SubmitOpCode(op) |
165 |
print ("Software version: %s" % result["software_version"]) |
166 |
print ("Internode protocol: %s" % result["protocol_version"]) |
167 |
print ("Configuration format: %s" % result["config_version"]) |
168 |
print ("OS api version: %s" % result["os_api_version"]) |
169 |
print ("Export interface: %s" % result["export_version"]) |
170 |
return 0 |
171 |
|
172 |
|
173 |
def ShowClusterMaster(opts, args): |
174 |
"""Write name of master node to the standard output. |
175 |
|
176 |
Args: |
177 |
opts - class with options as members |
178 |
|
179 |
""" |
180 |
print GetClient().QueryConfigValues(["master_node"])[0] |
181 |
return 0 |
182 |
|
183 |
|
184 |
def ShowClusterConfig(opts, args): |
185 |
"""Shows cluster information. |
186 |
|
187 |
""" |
188 |
op = opcodes.OpQueryClusterInfo() |
189 |
result = SubmitOpCode(op) |
190 |
|
191 |
print ("Cluster name: %s" % result["name"]) |
192 |
|
193 |
print ("Master node: %s" % result["master"]) |
194 |
|
195 |
print ("Architecture (this node): %s (%s)" % |
196 |
(result["architecture"][0], result["architecture"][1])) |
197 |
|
198 |
print ("Default hypervisor: %s" % result["hypervisor_type"]) |
199 |
print ("Enabled hypervisors: %s" % ", ".join(result["enabled_hypervisors"])) |
200 |
|
201 |
print "Hypervisor parameters:" |
202 |
for hv_name, hv_dict in result["hvparams"].items(): |
203 |
print " - %s:" % hv_name |
204 |
for item, val in hv_dict.iteritems(): |
205 |
print " %s: %s" % (item, val) |
206 |
|
207 |
print "Cluster parameters:" |
208 |
for gr_name, gr_dict in result["beparams"].items(): |
209 |
print " - %s:" % gr_name |
210 |
for item, val in gr_dict.iteritems(): |
211 |
print " %s: %s" % (item, val) |
212 |
|
213 |
return 0 |
214 |
|
215 |
|
216 |
def ClusterCopyFile(opts, args): |
217 |
"""Copy a file from master to some nodes. |
218 |
|
219 |
Args: |
220 |
opts - class with options as members |
221 |
args - list containing a single element, the file name |
222 |
Opts used: |
223 |
nodes - list containing the name of target nodes; if empty, all nodes |
224 |
|
225 |
""" |
226 |
filename = args[0] |
227 |
if not os.path.exists(filename): |
228 |
raise errors.OpPrereqError("No such filename '%s'" % filename) |
229 |
|
230 |
cl = GetClient() |
231 |
|
232 |
myname = utils.HostInfo().name |
233 |
|
234 |
cluster_name = cl.QueryConfigValues(["cluster_name"])[0] |
235 |
|
236 |
op = opcodes.OpQueryNodes(output_fields=["name"], names=opts.nodes) |
237 |
results = [row[0] for row in SubmitOpCode(op, cl=cl) if row[0] != myname] |
238 |
|
239 |
srun = ssh.SshRunner(cluster_name=cluster_name) |
240 |
for node in results: |
241 |
if not srun.CopyFileToNode(node, filename): |
242 |
print >> sys.stderr, ("Copy of file %s to node %s failed" % |
243 |
(filename, node)) |
244 |
|
245 |
return 0 |
246 |
|
247 |
|
248 |
def RunClusterCommand(opts, args): |
249 |
"""Run a command on some nodes. |
250 |
|
251 |
Args: |
252 |
opts - class with options as members |
253 |
args - the command list as a list |
254 |
Opts used: |
255 |
nodes: list containing the name of target nodes; if empty, all nodes |
256 |
|
257 |
""" |
258 |
cl = GetClient() |
259 |
|
260 |
command = " ".join(args) |
261 |
op = opcodes.OpQueryNodes(output_fields=["name"], names=opts.nodes) |
262 |
nodes = [row[0] for row in SubmitOpCode(op, cl=cl)] |
263 |
|
264 |
cluster_name, master_node = cl.QueryConfigValues(["cluster_name", |
265 |
"master_node"]) |
266 |
|
267 |
srun = ssh.SshRunner(cluster_name=cluster_name) |
268 |
|
269 |
# Make sure master node is at list end |
270 |
if master_node in nodes: |
271 |
nodes.remove(master_node) |
272 |
nodes.append(master_node) |
273 |
|
274 |
for name in nodes: |
275 |
result = srun.Run(name, "root", command) |
276 |
print ("------------------------------------------------") |
277 |
print ("node: %s" % name) |
278 |
print ("%s" % result.output) |
279 |
print ("return code = %s" % result.exit_code) |
280 |
|
281 |
return 0 |
282 |
|
283 |
|
284 |
def VerifyCluster(opts, args): |
285 |
"""Verify integrity of cluster, performing various test on nodes. |
286 |
|
287 |
Args: |
288 |
opts - class with options as members |
289 |
|
290 |
""" |
291 |
skip_checks = [] |
292 |
if opts.skip_nplusone_mem: |
293 |
skip_checks.append(constants.VERIFY_NPLUSONE_MEM) |
294 |
op = opcodes.OpVerifyCluster(skip_checks=skip_checks) |
295 |
if SubmitOpCode(op): |
296 |
return 0 |
297 |
else: |
298 |
return 1 |
299 |
|
300 |
|
301 |
def VerifyDisks(opts, args): |
302 |
"""Verify integrity of cluster disks. |
303 |
|
304 |
Args: |
305 |
opts - class with options as members |
306 |
|
307 |
""" |
308 |
op = opcodes.OpVerifyDisks() |
309 |
result = SubmitOpCode(op) |
310 |
if not isinstance(result, (list, tuple)) or len(result) != 4: |
311 |
raise errors.ProgrammerError("Unknown result type for OpVerifyDisks") |
312 |
|
313 |
nodes, nlvm, instances, missing = result |
314 |
|
315 |
if nodes: |
316 |
print "Nodes unreachable or with bad data:" |
317 |
for name in nodes: |
318 |
print "\t%s" % name |
319 |
retcode = constants.EXIT_SUCCESS |
320 |
|
321 |
if nlvm: |
322 |
for node, text in nlvm.iteritems(): |
323 |
print ("Error on node %s: LVM error: %s" % |
324 |
(node, text[-400:].encode('string_escape'))) |
325 |
retcode |= 1 |
326 |
print "You need to fix these nodes first before fixing instances" |
327 |
|
328 |
if instances: |
329 |
for iname in instances: |
330 |
if iname in missing: |
331 |
continue |
332 |
op = opcodes.OpActivateInstanceDisks(instance_name=iname) |
333 |
try: |
334 |
print "Activating disks for instance '%s'" % iname |
335 |
SubmitOpCode(op) |
336 |
except errors.GenericError, err: |
337 |
nret, msg = FormatError(err) |
338 |
retcode |= nret |
339 |
print >> sys.stderr, ("Error activating disks for instance %s: %s" % |
340 |
(iname, msg)) |
341 |
|
342 |
if missing: |
343 |
for iname, ival in missing.iteritems(): |
344 |
all_missing = utils.all(ival, lambda x: x[0] in nlvm) |
345 |
if all_missing: |
346 |
print ("Instance %s cannot be verified as it lives on" |
347 |
" broken nodes" % iname) |
348 |
else: |
349 |
print "Instance %s has missing logical volumes:" % iname |
350 |
ival.sort() |
351 |
for node, vol in ival: |
352 |
if node in nlvm: |
353 |
print ("\tbroken node %s /dev/xenvg/%s" % (node, vol)) |
354 |
else: |
355 |
print ("\t%s /dev/xenvg/%s" % (node, vol)) |
356 |
print ("You need to run replace_disks for all the above" |
357 |
" instances, if this message persist after fixing nodes.") |
358 |
retcode |= 1 |
359 |
|
360 |
return retcode |
361 |
|
362 |
|
363 |
def MasterFailover(opts, args): |
364 |
"""Failover the master node. |
365 |
|
366 |
This command, when run on a non-master node, will cause the current |
367 |
master to cease being master, and the non-master to become new |
368 |
master. |
369 |
|
370 |
""" |
371 |
return bootstrap.MasterFailover() |
372 |
|
373 |
|
374 |
def SearchTags(opts, args): |
375 |
"""Searches the tags on all the cluster. |
376 |
|
377 |
""" |
378 |
op = opcodes.OpSearchTags(pattern=args[0]) |
379 |
result = SubmitOpCode(op) |
380 |
if not result: |
381 |
return 1 |
382 |
result = list(result) |
383 |
result.sort() |
384 |
for path, tag in result: |
385 |
print "%s %s" % (path, tag) |
386 |
|
387 |
|
388 |
def SetClusterParams(opts, args): |
389 |
"""Modify the cluster. |
390 |
|
391 |
Args: |
392 |
opts - class with options as members |
393 |
|
394 |
""" |
395 |
if not (not opts.lvm_storage or opts.vg_name or |
396 |
opts.enabled_hypervisors or opts.hvparams or |
397 |
opts.beparams): |
398 |
print "Please give at least one of the parameters." |
399 |
return 1 |
400 |
|
401 |
vg_name = opts.vg_name |
402 |
if not opts.lvm_storage and opts.vg_name: |
403 |
print ("Options --no-lvm-storage and --vg-name conflict.") |
404 |
return 1 |
405 |
|
406 |
hvlist = opts.enabled_hypervisors |
407 |
if hvlist is not None: |
408 |
hvlist = hvlist.split(",") |
409 |
|
410 |
hvparams = opts.hvparams |
411 |
if hvparams: |
412 |
# a list of (name, dict) we can pass directly to dict() |
413 |
hvparams = dict(opts.hvparams) |
414 |
|
415 |
beparams = opts.beparams |
416 |
|
417 |
op = opcodes.OpSetClusterParams(vg_name=opts.vg_name, |
418 |
enabled_hypervisors=hvlist, |
419 |
hvparams=hvparams, |
420 |
beparams=beparams) |
421 |
SubmitOpCode(op) |
422 |
return 0 |
423 |
|
424 |
|
425 |
def QueueOps(opts, args): |
426 |
"""Queue operations. |
427 |
|
428 |
""" |
429 |
command = args[0] |
430 |
client = GetClient() |
431 |
if command in ("drain", "undrain"): |
432 |
drain_flag = command == "drain" |
433 |
client.SetQueueDrainFlag(drain_flag) |
434 |
elif command == "info": |
435 |
result = client.QueryConfigValues(["drain_flag"]) |
436 |
print "The drain flag is", |
437 |
if result[0]: |
438 |
print "set" |
439 |
else: |
440 |
print "unset" |
441 |
return 0 |
442 |
|
443 |
# this is an option common to more than one command, so we declare |
444 |
# it here and reuse it |
445 |
node_option = make_option("-n", "--node", action="append", dest="nodes", |
446 |
help="Node to copy to (if not given, all nodes)," |
447 |
" can be given multiple times", |
448 |
metavar="<node>", default=[]) |
449 |
|
450 |
commands = { |
451 |
'init': (InitCluster, ARGS_ONE, |
452 |
[DEBUG_OPT, |
453 |
make_option("-s", "--secondary-ip", dest="secondary_ip", |
454 |
help="Specify the secondary ip for this node;" |
455 |
" if given, the entire cluster must have secondary" |
456 |
" addresses", |
457 |
metavar="ADDRESS", default=None), |
458 |
make_option("-m", "--mac-prefix", dest="mac_prefix", |
459 |
help="Specify the mac prefix for the instance IP" |
460 |
" addresses, in the format XX:XX:XX", |
461 |
metavar="PREFIX", |
462 |
default="aa:00:00",), |
463 |
make_option("-g", "--vg-name", dest="vg_name", |
464 |
help="Specify the volume group name " |
465 |
" (cluster-wide) for disk allocation [xenvg]", |
466 |
metavar="VG", |
467 |
default=None,), |
468 |
make_option("-b", "--bridge", dest="def_bridge", |
469 |
help="Specify the default bridge name (cluster-wide)" |
470 |
" to connect the instances to [%s]" % |
471 |
constants.DEFAULT_BRIDGE, |
472 |
metavar="BRIDGE", |
473 |
default=constants.DEFAULT_BRIDGE,), |
474 |
make_option("--master-netdev", dest="master_netdev", |
475 |
help="Specify the node interface (cluster-wide)" |
476 |
" on which the master IP address will be added " |
477 |
" [%s]" % constants.DEFAULT_BRIDGE, |
478 |
metavar="NETDEV", |
479 |
default=constants.DEFAULT_BRIDGE,), |
480 |
make_option("--file-storage-dir", dest="file_storage_dir", |
481 |
help="Specify the default directory (cluster-wide)" |
482 |
" for storing the file-based disks [%s]" % |
483 |
constants.DEFAULT_FILE_STORAGE_DIR, |
484 |
metavar="DIR", |
485 |
default=constants.DEFAULT_FILE_STORAGE_DIR,), |
486 |
make_option("--no-lvm-storage", dest="lvm_storage", |
487 |
help="No support for lvm based instances" |
488 |
" (cluster-wide)", |
489 |
action="store_false", default=True,), |
490 |
make_option("--enabled-hypervisors", dest="enabled_hypervisors", |
491 |
help="Comma-separated list of hypervisors", |
492 |
type="string", default=None), |
493 |
ikv_option("-H", "--hypervisor-parameters", dest="hvparams", |
494 |
help="Hypervisor and hypervisor options, in the" |
495 |
" format" |
496 |
" hypervisor:option=value,option=value,...", |
497 |
default=[], |
498 |
action="append", |
499 |
type="identkeyval"), |
500 |
keyval_option("-B", "--backend-parameters", dest="beparams", |
501 |
type="keyval", default={}, |
502 |
help="Backend parameters"), |
503 |
], |
504 |
"[opts...] <cluster_name>", |
505 |
"Initialises a new cluster configuration"), |
506 |
'destroy': (DestroyCluster, ARGS_NONE, |
507 |
[DEBUG_OPT, |
508 |
make_option("--yes-do-it", dest="yes_do_it", |
509 |
help="Destroy cluster", |
510 |
action="store_true"), |
511 |
], |
512 |
"", "Destroy cluster"), |
513 |
'rename': (RenameCluster, ARGS_ONE, [DEBUG_OPT, FORCE_OPT], |
514 |
"<new_name>", |
515 |
"Renames the cluster"), |
516 |
'verify': (VerifyCluster, ARGS_NONE, [DEBUG_OPT, |
517 |
make_option("--no-nplus1-mem", dest="skip_nplusone_mem", |
518 |
help="Skip N+1 memory redundancy tests", |
519 |
action="store_true", |
520 |
default=False,), |
521 |
], |
522 |
"", "Does a check on the cluster configuration"), |
523 |
'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT], |
524 |
"", "Does a check on the cluster disk status"), |
525 |
'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT], |
526 |
"", "Makes the current node the master"), |
527 |
'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT], |
528 |
"", "Shows the cluster version"), |
529 |
'getmaster': (ShowClusterMaster, ARGS_NONE, [DEBUG_OPT], |
530 |
"", "Shows the cluster master"), |
531 |
'copyfile': (ClusterCopyFile, ARGS_ONE, [DEBUG_OPT, node_option], |
532 |
"[-n node...] <filename>", |
533 |
"Copies a file to all (or only some) nodes"), |
534 |
'command': (RunClusterCommand, ARGS_ATLEAST(1), [DEBUG_OPT, node_option], |
535 |
"[-n node...] <command>", |
536 |
"Runs a command on all (or only some) nodes"), |
537 |
'info': (ShowClusterConfig, ARGS_NONE, [DEBUG_OPT], |
538 |
"", "Show cluster configuration"), |
539 |
'list-tags': (ListTags, ARGS_NONE, |
540 |
[DEBUG_OPT], "", "List the tags of the cluster"), |
541 |
'add-tags': (AddTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT], |
542 |
"tag...", "Add tags to the cluster"), |
543 |
'remove-tags': (RemoveTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT], |
544 |
"tag...", "Remove tags from the cluster"), |
545 |
'search-tags': (SearchTags, ARGS_ONE, |
546 |
[DEBUG_OPT], "", "Searches the tags on all objects on" |
547 |
" the cluster for a given pattern (regex)"), |
548 |
'queue': (QueueOps, ARGS_ONE, [DEBUG_OPT], |
549 |
"drain|undrain|info", "Change queue properties"), |
550 |
'modify': (SetClusterParams, ARGS_NONE, |
551 |
[DEBUG_OPT, |
552 |
make_option("-g", "--vg-name", dest="vg_name", |
553 |
help="Specify the volume group name " |
554 |
" (cluster-wide) for disk allocation " |
555 |
"and enable lvm based storage", |
556 |
metavar="VG",), |
557 |
make_option("--no-lvm-storage", dest="lvm_storage", |
558 |
help="Disable support for lvm based instances" |
559 |
" (cluster-wide)", |
560 |
action="store_false", default=True,), |
561 |
make_option("--enabled-hypervisors", dest="enabled_hypervisors", |
562 |
help="Comma-separated list of hypervisors", |
563 |
type="string", default=None), |
564 |
ikv_option("-H", "--hypervisor-parameters", dest="hvparams", |
565 |
help="Hypervisor and hypervisor options, in the" |
566 |
" format" |
567 |
" hypervisor:option=value,option=value,...", |
568 |
default=[], |
569 |
action="append", |
570 |
type="identkeyval"), |
571 |
keyval_option("-B", "--backend-parameters", dest="beparams", |
572 |
type="keyval", default={}, |
573 |
help="Backend parameters"), |
574 |
], |
575 |
"[opts...]", |
576 |
"Alters the parameters of the cluster"), |
577 |
} |
578 |
|
579 |
if __name__ == '__main__': |
580 |
sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER})) |