Revision 8e2524c3
b/lib/bootstrap.py | ||
---|---|---|
373 | 373 |
(node, result.fail_reason, result.output)) |
374 | 374 |
|
375 | 375 |
|
376 |
def MasterFailover(): |
|
376 |
def MasterFailover(no_voting=False):
|
|
377 | 377 |
"""Failover the master node. |
378 | 378 |
|
379 | 379 |
This checks that we are not already the master, and will cause the |
380 | 380 |
current master to cease being master, and the non-master to become |
381 | 381 |
new master. |
382 | 382 |
|
383 |
@type no_voting: boolean |
|
384 |
@param no_voting: force the operation without remote nodes agreement |
|
385 |
(dangerous) |
|
386 |
|
|
383 | 387 |
""" |
384 | 388 |
sstore = ssconf.SimpleStore() |
385 | 389 |
|
... | ... | |
401 | 405 |
" master candidates is:\n" |
402 | 406 |
"%s" % ('\n'.join(mc_no_master))) |
403 | 407 |
|
404 |
vote_list = GatherMasterVotes(node_list) |
|
405 |
|
|
406 |
if vote_list: |
|
407 |
voted_master = vote_list[0][0] |
|
408 |
if voted_master is None: |
|
409 |
raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not" |
|
410 |
" respond.") |
|
411 |
elif voted_master != old_master: |
|
412 |
raise errors.OpPrereqError("I have wrong configuration, I believe the" |
|
413 |
" master is %s but the other nodes voted for" |
|
414 |
" %s. Please resync the configuration of" |
|
415 |
" this node." % (old_master, voted_master)) |
|
408 |
if not no_voting: |
|
409 |
vote_list = GatherMasterVotes(node_list) |
|
410 |
|
|
411 |
if vote_list: |
|
412 |
voted_master = vote_list[0][0] |
|
413 |
if voted_master is None: |
|
414 |
raise errors.OpPrereqError("Cluster is inconsistent, most nodes did" |
|
415 |
" not respond.") |
|
416 |
elif voted_master != old_master: |
|
417 |
raise errors.OpPrereqError("I have a wrong configuration, I believe" |
|
418 |
" the master is %s but the other nodes" |
|
419 |
" voted %s. Please resync the configuration" |
|
420 |
" of this node." % |
|
421 |
(old_master, voted_master)) |
|
416 | 422 |
# end checks |
417 | 423 |
|
418 | 424 |
rcode = 0 |
... | ... | |
436 | 442 |
# cluster info |
437 | 443 |
cfg.Update(cluster_info) |
438 | 444 |
|
439 |
result = rpc.RpcRunner.call_node_start_master(new_master, True) |
|
445 |
# 2.0.X: Don't start the master if no_voting is true |
|
446 |
result = rpc.RpcRunner.call_node_start_master(new_master, not no_voting) |
|
440 | 447 |
if result.failed or not result.data: |
441 | 448 |
logging.error("Could not start the master role on the new master" |
442 | 449 |
" %s, please check", new_master) |
b/man/gnt-cluster.sgml | ||
---|---|---|
442 | 442 |
|
443 | 443 |
<cmdsynopsis> |
444 | 444 |
<command>masterfailover</command> |
445 |
<arg>--no-voting</arg> |
|
445 | 446 |
</cmdsynopsis> |
446 | 447 |
|
447 | 448 |
<para> |
448 | 449 |
Failover the master role to the current node. |
449 | 450 |
</para> |
451 |
|
|
452 |
<para> |
|
453 |
The <option>--no-voting</option> option skips the remote node agreement |
|
454 |
checks. This is dangerous, but necessary in some cases (for example |
|
455 |
failing over the master role in a 2 node cluster with the original master |
|
456 |
down). If the original master then comes up, it won't be able to start |
|
457 |
its master daemon because it won't have enough votes, but so won't the |
|
458 |
new master, if the master daemon ever needs a restart. You can pass |
|
459 |
--no-voting to ganeti-masterd on the new master to solve this problem, |
|
460 |
and gnt-cluster redist-conf to make sure the cluster is consistent again. |
|
461 |
</para> |
|
462 |
|
|
463 |
<para> |
|
464 |
In version 2.0.X ganeti-masterd will not be able to start if |
|
465 |
masterfailover is called with the --no-voting option (which, again, |
|
466 |
should only be used on 2 nodes clusters with the former master being |
|
467 |
down). In that case just start it manually passing --no-voting to it |
|
468 |
as well, until you have restored cluster redundancy. |
|
469 |
</para> |
|
470 |
|
|
450 | 471 |
</refsect2> |
451 | 472 |
|
452 | 473 |
<refsect2> |
b/scripts/gnt-cluster | ||
---|---|---|
424 | 424 |
@return: the desired exit code |
425 | 425 |
|
426 | 426 |
""" |
427 |
return bootstrap.MasterFailover() |
|
427 |
if opts.no_voting: |
|
428 |
usertext = ("This will perform the failover even if most other nodes" |
|
429 |
" are down, or if this node is outdated. This is dangerous" |
|
430 |
" as it can lead to a non-consistent cluster. Check the" |
|
431 |
" gnt-cluster(8) man page before proceeding. Continue?") |
|
432 |
if not AskUser(usertext): |
|
433 |
return 1 |
|
434 |
|
|
435 |
return bootstrap.MasterFailover(no_voting=opts.no_voting) |
|
428 | 436 |
|
429 | 437 |
|
430 | 438 |
def SearchTags(opts, args): |
... | ... | |
613 | 621 |
"", "Does a check on the cluster configuration"), |
614 | 622 |
'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT], |
615 | 623 |
"", "Does a check on the cluster disk status"), |
616 |
'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT], |
|
624 |
'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT, |
|
625 |
make_option("--no-voting", dest="no_voting", |
|
626 |
help="Skip node agreement check (dangerous)", |
|
627 |
action="store_true", |
|
628 |
default=False,), |
|
629 |
], |
|
617 | 630 |
"", "Makes the current node the master"), |
618 | 631 |
'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT], |
619 | 632 |
"", "Shows the cluster version"), |
Also available in: Unified diff