Some improvements to gnt-node repair-storage
authorIustin Pop <iustin@google.com>
Tue, 27 Oct 2009 07:54:24 +0000 (16:54 +0900)
committerIustin Pop <iustin@google.com>
Mon, 2 Nov 2009 14:14:40 +0000 (15:14 +0100)
Currently the repair storage has two issues:

- down instances are aborting the operation, even though they should be
  ignored (it's not technically possible to know their disk status
  unless we would activate their disks)
- if the VG is so broken that disks cannot be activated via gnt-instance
  activate-disks or gnt-instance startup, it's not possible to repair
  the VG at all

The patch makes the opcode skip down instances and also introduces an
``--ignore-consistency`` flag for forcing the execution of the LU.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Michael Hanselmann <hansmi@google.com>

lib/cmdlib.py
lib/opcodes.py
man/gnt-node.sgml
scripts/gnt-node

index be21ed1..5883817 100644 (file)
@@ -6989,11 +6989,18 @@ class LURepairNodeStorage(NoHooksLU):
       }
 
   def _CheckFaultyDisks(self, instance, node_name):
-    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
-                                node_name, True):
-      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
-                                 " node '%s'" % (instance.name, node_name),
-                                 errors.ECODE_STATE)
+    """Ensure faulty disks abort the opcode or at least warn."""
+    try:
+      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
+                                  node_name, True):
+        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
+                                   " node '%s'" % (instance.name, node_name),
+                                   errors.ECODE_STATE)
+    except errors.OpPrereqError, err:
+      if self.op.ignore_consistency:
+        self.proc.LogWarning(str(err.args[0]))
+      else:
+        raise
 
   def CheckPrereq(self):
     """Check prerequisites.
@@ -7009,6 +7016,8 @@ class LURepairNodeStorage(NoHooksLU):
 
     # Check whether any instance on this node has faulty disks
     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
+      if not inst.admin_up:
+        continue
       check_nodes = set(inst.all_nodes)
       check_nodes.discard(self.op.node_name)
       for inst_node_name in check_nodes:
index 7e01595..d1179e1 100644 (file)
@@ -390,6 +390,7 @@ class OpRepairNodeStorage(OpCode):
     "node_name",
     "storage_type",
     "name",
+    "ignore_consistency",
     ]
 
 
index a62edd0..b16a4a7 100644 (file)
@@ -889,9 +889,9 @@ node2 lvm-pv /dev/sdb1 698.6G   0M 698.6G Y
         <command>modify-storage</command>
         <arg><option>--allocatable=yes|no</option></arg>
         <sbr>
-        <arg><replaceable>node</replaceable></arg>
-        <arg><replaceable>storage-type</replaceable></arg>
-        <arg><replaceable>volume-name</replaceable></arg>
+        <arg choice="req"><replaceable>node</replaceable></arg>
+        <arg choice="req"><replaceable>storage-type</replaceable></arg>
+        <arg choice="req"><replaceable>volume-name</replaceable></arg>
       </cmdsynopsis>
 
       <para>
@@ -913,9 +913,10 @@ node2 lvm-pv /dev/sdb1 698.6G   0M 698.6G Y
 
       <cmdsynopsis>
         <command>repair-storage</command>
-        <arg><replaceable>node</replaceable></arg>
-        <arg><replaceable>storage-type</replaceable></arg>
-        <arg><replaceable>volume-name</replaceable></arg>
+        <arg>--ignore-consistency</arg>
+        <arg choice="req"><replaceable>node</replaceable></arg>
+        <arg choice="req"><replaceable>storage-type</replaceable></arg>
+        <arg choice="req"><replaceable>volume-name</replaceable></arg>
       </cmdsynopsis>
 
       <para>
@@ -936,6 +937,12 @@ node2 lvm-pv /dev/sdb1 698.6G   0M 698.6G Y
       </caution>
 
       <para>
+        The <option>--ignore-consistency</option> option will ignore
+        any inconsistent disks (on the nodes paired with this
+        one). Use of this option is most likely to lead to data-loss.
+      </para>
+
+      <para>
         Example:
         <screen>
 # gnt-node repair-storage node5.example.com lvm-vg xenvg
index b14b04d..205365a 100755 (executable)
@@ -597,7 +597,8 @@ def RepairStorage(opts, args):
 
   op = opcodes.OpRepairNodeStorage(node_name=node_name,
                                    storage_type=storage_type,
-                                   name=volume_name)
+                                   name=volume_name,
+                                   ignore_consistency=opts.ignore_consistency)
   SubmitOpCode(op)
 
 
@@ -709,7 +710,7 @@ commands = {
     [ArgNode(min=1, max=1),
      ArgChoice(min=1, max=1, choices=_REPAIRABLE_STORAGE_TYPES),
      ArgFile(min=1, max=1)],
-    [],
+    [IGNORE_CONSIST_OPT],
     "<node_name> <storage_type> <name>",
     "Repairs a storage volume on a node"),
   'list-tags': (