Revision b63ed789
b/lib/backend.py | ||
---|---|---|
223 | 223 |
if result.failed: |
224 | 224 |
logger.Error("Failed to list logical volumes, lvs output: %s" % |
225 | 225 |
result.output) |
226 |
return lvs
|
|
226 |
return result.output
|
|
227 | 227 |
|
228 | 228 |
for line in result.stdout.splitlines(): |
229 | 229 |
line = line.strip().rstrip(sep) |
b/lib/cmdlib.py | ||
---|---|---|
840 | 840 |
# node_volume |
841 | 841 |
volumeinfo = all_volumeinfo[node] |
842 | 842 |
|
843 |
if type(volumeinfo) != dict: |
|
843 |
if isinstance(volumeinfo, basestring): |
|
844 |
feedback_fn(" - ERROR: LVM problem on node %s: %s" % |
|
845 |
(node, volumeinfo[-400:].encode('string_escape'))) |
|
846 |
bad = True |
|
847 |
node_volume[node] = {} |
|
848 |
elif not isinstance(volumeinfo, dict): |
|
844 | 849 |
feedback_fn(" - ERROR: connection to %s failed" % (node,)) |
845 | 850 |
bad = True |
846 | 851 |
continue |
847 |
|
|
848 |
node_volume[node] = volumeinfo |
|
852 |
else: |
|
853 |
node_volume[node] = volumeinfo
|
|
849 | 854 |
|
850 | 855 |
# node_instance |
851 | 856 |
nodeinstance = all_instanceinfo[node] |
... | ... | |
899 | 904 |
"""Verify integrity of cluster disks. |
900 | 905 |
|
901 | 906 |
""" |
902 |
result = res_nodes, res_instances = [], []
|
|
907 |
result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
|
|
903 | 908 |
|
904 | 909 |
vg_name = self.cfg.GetVGName() |
905 | 910 |
nodes = utils.NiceSort(self.cfg.GetNodeList()) |
... | ... | |
928 | 933 |
# node_volume |
929 | 934 |
lvs = node_lvs[node] |
930 | 935 |
|
931 |
if not isinstance(lvs, dict): |
|
936 |
if isinstance(lvs, basestring): |
|
937 |
logger.Info("error enumerating LVs on node %s: %s" % (node, lvs)) |
|
938 |
res_nlvm[node] = lvs |
|
939 |
elif not isinstance(lvs, dict): |
|
932 | 940 |
logger.Info("connection to node %s failed or invalid data returned" % |
933 | 941 |
(node,)) |
934 | 942 |
res_nodes.append(node) |
935 | 943 |
continue |
936 | 944 |
|
937 | 945 |
for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems(): |
938 |
if not lv_online:
|
|
939 |
inst = nv_dict.get((node, lv_name), None)
|
|
940 |
if inst is not None and inst.name not in res_instances:
|
|
946 |
inst = nv_dict.pop((node, lv_name), None)
|
|
947 |
if (not lv_online and inst is not None
|
|
948 |
and inst.name not in res_instances):
|
|
941 | 949 |
res_instances.append(inst.name) |
942 | 950 |
|
951 |
# any leftover items in nv_dict are missing LVs, let's arrange the |
|
952 |
# data better |
|
953 |
for key, inst in nv_dict.iteritems(): |
|
954 |
if inst.name not in res_missing: |
|
955 |
res_missing[inst.name] = [] |
|
956 |
res_missing[inst.name].append(key) |
|
957 |
|
|
943 | 958 |
return result |
944 | 959 |
|
945 | 960 |
|
b/lib/constants.py | ||
---|---|---|
25 | 25 |
|
26 | 26 |
# various versions |
27 | 27 |
CONFIG_VERSION = 3 |
28 |
PROTOCOL_VERSION = 9
|
|
28 |
PROTOCOL_VERSION = 10
|
|
29 | 29 |
RELEASE_VERSION = _autoconf.PACKAGE_VERSION |
30 | 30 |
OS_API_VERSION = 5 |
31 | 31 |
EXPORT_VERSION = 0 |
b/lib/opcodes.py | ||
---|---|---|
93 | 93 |
|
94 | 94 |
Result: two lists: |
95 | 95 |
- list of node names with bad data returned (unreachable, etc.) |
96 |
- dist of node names with broken volume groups (values: error msg) |
|
96 | 97 |
- list of instances with degraded disks (that should be activated) |
98 |
- dict of instances with missing logical volumes (values: (node, vol) |
|
99 |
pairs with details about the missing volumes) |
|
97 | 100 |
|
98 |
In normal operation, both lists should be empty. A non-empty
|
|
99 |
instance list is still ok (errors were fixed) but non-empty node
|
|
100 |
list means some node is down, and probably there are unfixable drbd
|
|
101 |
errors. |
|
101 |
In normal operation, all lists should be empty. A non-empty instance
|
|
102 |
list (3rd element of the result) is still ok (errors were fixed) but
|
|
103 |
non-empty node list means some node is down, and probably there are
|
|
104 |
unfixable drbd errors.
|
|
102 | 105 |
|
103 | 106 |
Note that only instances that are drbd-based are taken into |
104 | 107 |
consideration. This might need to be revisited in the future. |
b/scripts/gnt-cluster | ||
---|---|---|
27 | 27 |
from ganeti import opcodes |
28 | 28 |
from ganeti import constants |
29 | 29 |
from ganeti import errors |
30 |
from ganeti import utils |
|
30 | 31 |
|
31 | 32 |
|
32 | 33 |
def InitCluster(opts, args): |
... | ... | |
191 | 192 |
""" |
192 | 193 |
op = opcodes.OpVerifyDisks() |
193 | 194 |
result = SubmitOpCode(op) |
194 |
if not isinstance(result, tuple) or len(result) != 2:
|
|
195 |
if not isinstance(result, tuple) or len(result) != 4:
|
|
195 | 196 |
raise errors.ProgrammerError("Unknown result type for OpVerifyDisks") |
196 | 197 |
|
197 |
nodes, instances = result |
|
198 |
nodes, nlvm, instances, missing = result |
|
199 |
|
|
198 | 200 |
if nodes: |
199 | 201 |
print "Nodes unreachable or with bad data:" |
200 | 202 |
for name in nodes: |
201 | 203 |
print "\t%s" % name |
202 | 204 |
retcode = constants.EXIT_SUCCESS |
205 |
|
|
206 |
if nlvm: |
|
207 |
for node, text in nlvm.iteritems(): |
|
208 |
print ("Error on node %s: LVM error: %s" % |
|
209 |
(node, text[-400:].encode('string_escape'))) |
|
210 |
retcode |= 1 |
|
211 |
print "You need to fix these nodes first before fixing instances" |
|
212 |
|
|
203 | 213 |
if instances: |
204 | 214 |
for iname in instances: |
215 |
if iname in missing: |
|
216 |
continue |
|
205 | 217 |
op = opcodes.OpActivateInstanceDisks(instance_name=iname) |
206 | 218 |
try: |
207 | 219 |
print "Activating disks for instance '%s'" % iname |
... | ... | |
209 | 221 |
except errors.GenericError, err: |
210 | 222 |
nret, msg = FormatError(err) |
211 | 223 |
retcode |= nret |
212 |
print >>sys.stderr, ("Error activating disks for instance %s: %s" % |
|
213 |
(iname, msg)) |
|
224 |
print >> sys.stderr, ("Error activating disks for instance %s: %s" % |
|
225 |
(iname, msg)) |
|
226 |
|
|
227 |
if missing: |
|
228 |
for iname, ival in missing.iteritems(): |
|
229 |
all_missing = utils.all(ival, lambda x: x[0] in nlvm) |
|
230 |
if all_missing: |
|
231 |
print ("Instance %s cannot be verified as it lives on" |
|
232 |
" broken nodes" % iname) |
|
233 |
else: |
|
234 |
print "Instance %s has missing logical volumes:" % iname |
|
235 |
ival.sort() |
|
236 |
for node, vol in ival: |
|
237 |
if node in nlvm: |
|
238 |
print ("\tbroken node %s /dev/xenvg/%s" % (node, vol)) |
|
239 |
else: |
|
240 |
print ("\t%s /dev/xenvg/%s" % (node, vol)) |
|
241 |
print ("You need to run replace_disks for all the above" |
|
242 |
" instances, if this message persist after fixing nodes.") |
|
243 |
retcode |= 1 |
|
214 | 244 |
|
215 | 245 |
return retcode |
216 | 246 |
|
Also available in: Unified diff