Revision b63ed789

b/lib/backend.py
223 223
  if result.failed:
224 224
    logger.Error("Failed to list logical volumes, lvs output: %s" %
225 225
                 result.output)
226
    return lvs
226
    return result.output
227 227

  
228 228
  for line in result.stdout.splitlines():
229 229
    line = line.strip().rstrip(sep)
b/lib/cmdlib.py
840 840
      # node_volume
841 841
      volumeinfo = all_volumeinfo[node]
842 842

  
843
      if type(volumeinfo) != dict:
843
      if isinstance(volumeinfo, basestring):
844
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
845
                    (node, volumeinfo[-400:].encode('string_escape')))
846
        bad = True
847
        node_volume[node] = {}
848
      elif not isinstance(volumeinfo, dict):
844 849
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
845 850
        bad = True
846 851
        continue
847

  
848
      node_volume[node] = volumeinfo
852
      else:
853
        node_volume[node] = volumeinfo
849 854

  
850 855
      # node_instance
851 856
      nodeinstance = all_instanceinfo[node]
......
899 904
    """Verify integrity of cluster disks.
900 905

  
901 906
    """
902
    result = res_nodes, res_instances = [], []
907
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
903 908

  
904 909
    vg_name = self.cfg.GetVGName()
905 910
    nodes = utils.NiceSort(self.cfg.GetNodeList())
......
928 933
      # node_volume
929 934
      lvs = node_lvs[node]
930 935

  
931
      if not isinstance(lvs, dict):
936
      if isinstance(lvs, basestring):
937
        logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
938
        res_nlvm[node] = lvs
939
      elif not isinstance(lvs, dict):
932 940
        logger.Info("connection to node %s failed or invalid data returned" %
933 941
                    (node,))
934 942
        res_nodes.append(node)
935 943
        continue
936 944

  
937 945
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
938
        if not lv_online:
939
          inst = nv_dict.get((node, lv_name), None)
940
          if inst is not None and inst.name not in res_instances:
946
        inst = nv_dict.pop((node, lv_name), None)
947
        if (not lv_online and inst is not None
948
            and inst.name not in res_instances):
941 949
            res_instances.append(inst.name)
942 950

  
951
    # any leftover items in nv_dict are missing LVs, let's arrange the
952
    # data better
953
    for key, inst in nv_dict.iteritems():
954
      if inst.name not in res_missing:
955
        res_missing[inst.name] = []
956
      res_missing[inst.name].append(key)
957

  
943 958
    return result
944 959

  
945 960

  
b/lib/constants.py
25 25

  
26 26
# various versions
27 27
CONFIG_VERSION = 3
28
PROTOCOL_VERSION = 9
28
PROTOCOL_VERSION = 10
29 29
RELEASE_VERSION = _autoconf.PACKAGE_VERSION
30 30
OS_API_VERSION = 5
31 31
EXPORT_VERSION = 0
b/lib/opcodes.py
93 93

  
94 94
  Result: two lists:
95 95
    - list of node names with bad data returned (unreachable, etc.)
96
    - dist of node names with broken volume groups (values: error msg)
96 97
    - list of instances with degraded disks (that should be activated)
98
    - dict of instances with missing logical volumes (values: (node, vol)
99
      pairs with details about the missing volumes)
97 100

  
98
  In normal operation, both lists should be empty. A non-empty
99
  instance list is still ok (errors were fixed) but non-empty node
100
  list means some node is down, and probably there are unfixable drbd
101
  errors.
101
  In normal operation, all lists should be empty. A non-empty instance
102
  list (3rd element of the result) is still ok (errors were fixed) but
103
  non-empty node list means some node is down, and probably there are
104
  unfixable drbd errors.
102 105

  
103 106
  Note that only instances that are drbd-based are taken into
104 107
  consideration. This might need to be revisited in the future.
b/scripts/gnt-cluster
27 27
from ganeti import opcodes
28 28
from ganeti import constants
29 29
from ganeti import errors
30
from ganeti import utils
30 31

  
31 32

  
32 33
def InitCluster(opts, args):
......
191 192
  """
192 193
  op = opcodes.OpVerifyDisks()
193 194
  result = SubmitOpCode(op)
194
  if not isinstance(result, tuple) or len(result) != 2:
195
  if not isinstance(result, tuple) or len(result) != 4:
195 196
    raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")
196 197

  
197
  nodes, instances = result
198
  nodes, nlvm, instances, missing = result
199

  
198 200
  if nodes:
199 201
    print "Nodes unreachable or with bad data:"
200 202
    for name in nodes:
201 203
      print "\t%s" % name
202 204
  retcode = constants.EXIT_SUCCESS
205

  
206
  if nlvm:
207
    for node, text in nlvm.iteritems():
208
      print ("Error on node %s: LVM error: %s" %
209
             (node, text[-400:].encode('string_escape')))
210
      retcode |= 1
211
      print "You need to fix these nodes first before fixing instances"
212

  
203 213
  if instances:
204 214
    for iname in instances:
215
      if iname in missing:
216
        continue
205 217
      op = opcodes.OpActivateInstanceDisks(instance_name=iname)
206 218
      try:
207 219
        print "Activating disks for instance '%s'" % iname
......
209 221
      except errors.GenericError, err:
210 222
        nret, msg = FormatError(err)
211 223
        retcode |= nret
212
        print >>sys.stderr, ("Error activating disks for instance %s: %s" %
213
                             (iname, msg))
224
        print >> sys.stderr, ("Error activating disks for instance %s: %s" %
225
                              (iname, msg))
226

  
227
  if missing:
228
    for iname, ival in missing.iteritems():
229
      all_missing = utils.all(ival, lambda x: x[0] in nlvm)
230
      if all_missing:
231
        print ("Instance %s cannot be verified as it lives on"
232
               " broken nodes" % iname)
233
      else:
234
        print "Instance %s has missing logical volumes:" % iname
235
        ival.sort()
236
        for node, vol in ival:
237
          if node in nlvm:
238
            print ("\tbroken node %s /dev/xenvg/%s" % (node, vol))
239
          else:
240
            print ("\t%s /dev/xenvg/%s" % (node, vol))
241
    print ("You need to run replace_disks for all the above"
242
           " instances, if this message persist after fixing nodes.")
243
    retcode |= 1
214 244

  
215 245
  return retcode
216 246

  

Also available in: Unified diff