Relax replace_disks_all meaning for drbd8

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index e273ee2..f338da5 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -26,7 +26,6 @@
  import os
  import os.path
  import sha
-import socket
  import time
  import tempfile
  import re
@@ -70,7 +69,7 @@ class LogicalUnit(object):
      validity.
  
      """
-    self.processor = processor
+    self.proc = processor
      self.op = op
      self.cfg = cfg
      self.sstore = sstore
@@ -164,6 +163,15 @@ class NoHooksLU(LogicalUnit):
      return {}, [], []
  
  
+def _RemoveHostFromEtcHosts(hostname):
+  """Wrapper around utils.RemoteEtcHostsEntry.
+
+  """
+  hi = utils.HostInfo(name=hostname)
+  utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
+  utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
+
+
  def _GetWantedNodes(lu, nodes):
    """Returns list of checked and expanded node names.
  
@@ -286,86 +294,6 @@ def _BuildInstanceHookEnvByObject(instance, override=None):
    return _BuildInstanceHookEnv(**args)
  
  
-def _UpdateEtcHosts(fullnode, ip):
-  """Ensure a node has a correct entry in /etc/hosts.
-
-  Args:
-    fullnode - Fully qualified domain name of host. (str)
-    ip       - IPv4 address of host (str)
-
-  """
-  node = fullnode.split(".", 1)[0]
-
-  f = open('/etc/hosts', 'r+')
-
-  inthere = False
-
-  save_lines = []
-  add_lines = []
-  removed = False
-
-  while True:
-    rawline = f.readline()
-
-    if not rawline:
-      # End of file
-      break
-
-    line = rawline.split('\n')[0]
-
-    # Strip off comments
-    line = line.split('#')[0]
-
-    if not line:
-      # Entire line was comment, skip
-      save_lines.append(rawline)
-      continue
-
-    fields = line.split()
-
-    haveall = True
-    havesome = False
-    for spec in [ ip, fullnode, node ]:
-      if spec not in fields:
-        haveall = False
-      if spec in fields:
-        havesome = True
-
-    if haveall:
-      inthere = True
-      save_lines.append(rawline)
-      continue
-
-    if havesome and not haveall:
-      # Line (old, or manual?) which is missing some.  Remove.
-      removed = True
-      continue
-
-    save_lines.append(rawline)
-
-  if not inthere:
-    add_lines.append('%s\t%s %s\n' % (ip, fullnode, node))
-
-  if removed:
-    if add_lines:
-      save_lines = save_lines + add_lines
-
-    # We removed a line, write a new file and replace old.
-    fd, tmpname = tempfile.mkstemp('tmp', 'hosts_', '/etc')
-    newfile = os.fdopen(fd, 'w')
-    newfile.write(''.join(save_lines))
-    newfile.close()
-    os.rename(tmpname, '/etc/hosts')
-
-  elif add_lines:
-    # Simply appending a new line will do the trick.
-    f.seek(0, 2)
-    for add in add_lines:
-      f.write(add)
-
-  f.close()
-
-
  def _UpdateKnownHosts(fullnode, ip, pubkey):
    """Ensure a node has a correct known_hosts entry.
  
@@ -386,39 +314,35 @@ def _UpdateKnownHosts(fullnode, ip, pubkey):
    add_lines = []
    removed = False
  
-  while True:
-    rawline = f.readline()
+  for rawline in f:
      logger.Debug('read %s' % (repr(rawline),))
  
-    if not rawline:
-      # End of file
-      break
-
-    line = rawline.split('\n')[0]
-
-    parts = line.split(' ')
-    fields = parts[0].split(',')
-    key = parts[2]
-
-    haveall = True
-    havesome = False
-    for spec in [ ip, fullnode ]:
-      if spec not in fields:
-        haveall = False
-      if spec in fields:
-        havesome = True
-
-    logger.Debug("key, pubkey = %s." % (repr((key, pubkey)),))
-    if haveall and key == pubkey:
-      inthere = True
-      save_lines.append(rawline)
-      logger.Debug("Keeping known_hosts '%s'." % (repr(rawline),))
-      continue
+    parts = rawline.rstrip('\r\n').split()
+
+    # Ignore unwanted lines
+    if len(parts) >= 3 and not rawline.lstrip()[0] == '#':
+      fields = parts[0].split(',')
+      key = parts[2]
+
+      haveall = True
+      havesome = False
+      for spec in [ ip, fullnode ]:
+        if spec not in fields:
+          haveall = False
+        if spec in fields:
+          havesome = True
+
+      logger.Debug("key, pubkey = %s." % (repr((key, pubkey)),))
+      if haveall and key == pubkey:
+        inthere = True
+        save_lines.append(rawline)
+        logger.Debug("Keeping known_hosts '%s'." % (repr(rawline),))
+        continue
  
-    if havesome and (not haveall or key != pubkey):
-      removed = True
-      logger.Debug("Discarding known_hosts '%s'." % (repr(rawline),))
-      continue
+      if havesome and (not haveall or key != pubkey):
+        removed = True
+        logger.Debug("Discarding known_hosts '%s'." % (repr(rawline),))
+        continue
  
      save_lines.append(rawline)
  
@@ -616,6 +540,11 @@ class LUInitCluster(LogicalUnit):
                                   (self.op.master_netdev,
                                    result.output.strip()))
  
+    if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
+            os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
+      raise errors.OpPrereqError("Init.d script '%s' missing or not "
+                                 "executable." % constants.NODE_INITD_SCRIPT)
+
    def Exec(self, feedback_fn):
      """Initialize the cluster.
  
@@ -645,7 +574,10 @@ class LUInitCluster(LogicalUnit):
        f.close()
      sshkey = sshline.split(" ")[1]
  
-    _UpdateEtcHosts(hostname.name, hostname.ip)
+    hi = utils.HostInfo(name=hostname.name)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, hostname.name, hi.ip)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName(), hi.ip)
+    del hi
  
      _UpdateKnownHosts(hostname.name, hostname.ip, sshkey)
  
@@ -687,10 +619,12 @@ class LUDestroyCluster(NoHooksLU):
      """Destroys the cluster.
  
      """
+    master = self.sstore.GetMasterNode()
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
      utils.CreateBackup(priv_key)
      utils.CreateBackup(pub_key)
-    rpc.call_node_leave_cluster(self.sstore.GetMasterNode())
+    rpc.call_node_leave_cluster(master)
+    _RemoveHostFromEtcHosts(master)
  
  
  class LUVerifyCluster(NoHooksLU):
@@ -860,7 +794,6 @@ class LUVerifyCluster(NoHooksLU):
      feedback_fn("* Verifying global settings")
      self.cfg.VerifyConfig()
  
-    master = self.sstore.GetMasterNode()
      vg_name = self.cfg.GetVGName()
      nodelist = utils.NiceSort(self.cfg.GetNodeList())
      instancelist = utils.NiceSort(self.cfg.GetInstanceList())
@@ -1016,7 +949,7 @@ class LURenameCluster(LogicalUnit):
                       "please restart manually.")
  
  
-def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
+def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
    """Sleep and poll for an instance's disk to sync.
  
    """
@@ -1024,7 +957,7 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
      return True
  
    if not oneshot:
-    logger.ToStdout("Waiting for instance %s to sync disks." % instance.name)
+    proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
  
    node = instance.primary_node
  
@@ -1038,7 +971,7 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
      cumul_degraded = False
      rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
      if not rstats:
-      logger.ToStderr("Can't get any data from node %s" % node)
+      proc.LogWarning("Can't get any data from node %s" % node)
        retries += 1
        if retries >= 10:
          raise errors.RemoteError("Can't contact node %s for mirror data,"
@@ -1049,10 +982,11 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
      for i in range(len(rstats)):
        mstat = rstats[i]
        if mstat is None:
-        logger.ToStderr("Can't compute data for node %s/%s" %
+        proc.LogWarning("Can't compute data for node %s/%s" %
                          (node, instance.disks[i].iv_name))
          continue
-      perc_done, est_time, is_degraded = mstat
+      # we ignore the ldisk parameter
+      perc_done, est_time, is_degraded, _ = mstat
        cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
        if perc_done is not None:
          done = False
@@ -1061,8 +995,8 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
            max_time = est_time
          else:
            rem_time = "no time estimate"
-        logger.ToStdout("- device %s: %5.2f%% done, %s" %
-                        (instance.disks[i].iv_name, perc_done, rem_time))
+        proc.LogInfo("- device %s: %5.2f%% done, %s" %
+                     (instance.disks[i].iv_name, perc_done, rem_time))
      if done or oneshot:
        break
  
@@ -1075,15 +1009,23 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
          utils.Lock('cmd')
  
    if done:
-    logger.ToStdout("Instance %s's disks are in sync." % instance.name)
+    proc.LogInfo("Instance %s's disks are in sync." % instance.name)
    return not cumul_degraded
  
  
-def _CheckDiskConsistency(cfgw, dev, node, on_primary):
+def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
    """Check that mirrors are not degraded.
  
+  The ldisk parameter, if True, will change the test from the
+  is_degraded attribute (which represents overall non-ok status for
+  the device(s)) to the ldisk (representing the local storage status).
+
    """
    cfgw.SetDiskID(dev, node)
+  if ldisk:
+    idx = 6
+  else:
+    idx = 5
  
    result = True
    if on_primary or dev.AssembleOnSecondary():
@@ -1092,7 +1034,7 @@ def _CheckDiskConsistency(cfgw, dev, node, on_primary):
        logger.ToStderr("Can't get any data from node %s" % node)
        result = False
      else:
-      result = result and (not rstats[5])
+      result = result and (not rstats[idx])
    if dev.children:
      for child in dev.children:
        result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
@@ -1197,6 +1139,8 @@ class LURemoveNode(LogicalUnit):
  
      self.cfg.RemoveNode(node.name)
  
+    _RemoveHostFromEtcHosts(node.name)
+
  
  class LUQueryNodes(NoHooksLU):
    """Logical unit for querying nodes.
@@ -1540,7 +1484,11 @@ class LUAddNode(LogicalUnit):
        raise errors.OpExecError("Cannot transfer ssh keys to the new node")
  
      # Add node to our /etc/hosts, and add key to known_hosts
-    _UpdateEtcHosts(new_node.name, new_node.primary_ip)
+    hi = utils.HostInfo(name=new_node.name)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, new_node.name, hi.ip)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName(), hi.ip)
+    del hi
+
      _UpdateKnownHosts(new_node.name, new_node.primary_ip,
                        self.cfg.GetHostKey())
  
@@ -1825,7 +1773,8 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
      for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
        cfg.SetDiskID(node_disk, node)
        is_primary = node == instance.primary_node
-      result = rpc.call_blockdev_assemble(node, node_disk, is_primary)
+      result = rpc.call_blockdev_assemble(node, node_disk,
+                                          instance.name, is_primary)
        if not result:
          logger.Error("could not prepare block device %s on node %s (is_pri"
                       "mary=%s)" % (inst_disk.iv_name, node, is_primary))
@@ -2161,8 +2110,8 @@ class LUReinstallInstance(LogicalUnit):
        if pnode is None:
          raise errors.OpPrereqError("Primary node '%s' is unknown" %
                                     self.op.pnode)
-      os_obj = rpc.call_os_get([pnode.name], self.op.os_type)[pnode.name]
-      if not isinstance(os_obj, objects.OS):
+      os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
+      if not os_obj:
          raise errors.OpPrereqError("OS '%s' not in supported OS list for"
                                     " primary node"  % self.op.os_type)
  
@@ -2484,10 +2433,10 @@ class LUFailoverInstance(LogicalUnit):
  
      # check bridge existance
      brlist = [nic.bridge for nic in instance.nics]
-    if not rpc.call_bridges_exist(instance.primary_node, brlist):
+    if not rpc.call_bridges_exist(target_node, brlist):
        raise errors.OpPrereqError("One or more target bridges %s does not"
                                   " exist on destination node '%s'" %
-                                 (brlist, instance.primary_node))
+                                 (brlist, target_node))
  
      self.instance = instance
  
@@ -2531,9 +2480,13 @@ class LUFailoverInstance(LogicalUnit):
                  (instance.name, source_node))
  
      if not rpc.call_instance_shutdown(source_node, instance):
-      logger.Error("Could not shutdown instance %s on node %s. Proceeding"
-                   " anyway. Please make sure node %s is down"  %
-                   (instance.name, source_node, source_node))
+      if self.op.ignore_consistency:
+        logger.Error("Could not shutdown instance %s on node %s. Proceeding"
+                     " anyway. Please make sure node %s is down"  %
+                     (instance.name, source_node, source_node))
+      else:
+        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
+                                 (instance.name, source_node))
  
      feedback_fn("* deactivating the instance's disks on source node")
      if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
@@ -2560,7 +2513,7 @@ class LUFailoverInstance(LogicalUnit):
                                 (instance.name, target_node))
  
  
-def _CreateBlockDevOnPrimary(cfg, node, device, info):
+def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
    """Create a tree of block devices on the primary node.
  
    This always creates all devices.
@@ -2568,11 +2521,12 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info):
    """
    if device.children:
      for child in device.children:
-      if not _CreateBlockDevOnPrimary(cfg, node, child, info):
+      if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
          return False
  
    cfg.SetDiskID(device, node)
-  new_id = rpc.call_blockdev_create(node, device, device.size, True, info)
+  new_id = rpc.call_blockdev_create(node, device, device.size,
+                                    instance.name, True, info)
    if not new_id:
      return False
    if device.physical_id is None:
@@ -2580,7 +2534,7 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info):
    return True
  
  
-def _CreateBlockDevOnSecondary(cfg, node, device, force, info):
+def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
    """Create a tree of block devices on a secondary node.
  
    If this device type has to be created on secondaries, create it and
@@ -2593,13 +2547,15 @@ def _CreateBlockDevOnSecondary(cfg, node, device, force, info):
      force = True
    if device.children:
      for child in device.children:
-      if not _CreateBlockDevOnSecondary(cfg, node, child, force, info):
+      if not _CreateBlockDevOnSecondary(cfg, node, instance,
+                                        child, force, info):
          return False
  
    if not force:
      return True
    cfg.SetDiskID(device, node)
-  new_id = rpc.call_blockdev_create(node, device, device.size, False, info)
+  new_id = rpc.call_blockdev_create(node, device, device.size,
+                                    instance.name, False, info)
    if not new_id:
      return False
    if device.physical_id is None:
@@ -2754,13 +2710,14 @@ def _CreateDisks(cfg, instance):
                (device.iv_name, instance.name))
      #HARDCODE
      for secondary_node in instance.secondary_nodes:
-      if not _CreateBlockDevOnSecondary(cfg, secondary_node, device, False,
-                                        info):
+      if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
+                                        device, False, info):
          logger.Error("failed to create volume %s (%s) on secondary node %s!" %
                       (device.iv_name, device, secondary_node))
          return False
      #HARDCODE
-    if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, device, info):
+    if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
+                                    instance, device, info):
        logger.Error("failed to create volume %s on primary!" %
                     device.iv_name)
        return False
@@ -2944,8 +2901,8 @@ class LUCreateInstance(LogicalUnit):
                                     (node, info['vg_free'], req_size))
  
      # os verification
-    os_obj = rpc.call_os_get([pnode.name], self.op.os_type)[pnode.name]
-    if not isinstance(os_obj, objects.OS):
+    os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
+    if not os_obj:
        raise errors.OpPrereqError("OS '%s' not in supported os list for"
                                   " primary node"  % self.op.os_type)
  
@@ -3033,12 +2990,12 @@ class LUCreateInstance(LogicalUnit):
      self.cfg.AddInstance(iobj)
  
      if self.op.wait_for_sync:
-      disk_abort = not _WaitForSync(self.cfg, iobj)
+      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
      elif iobj.disk_template in constants.DTS_NET_MIRROR:
        # make sure the disks are not degraded (still sync-ing is ok)
        time.sleep(15)
        feedback_fn("* checking mirrors status")
-      disk_abort = not _WaitForSync(self.cfg, iobj, oneshot=True)
+      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
      else:
        disk_abort = False
  
@@ -3206,14 +3163,16 @@ class LUAddMDDRBDComponent(LogicalUnit):
  
      logger.Info("adding new mirror component on secondary")
      #HARDCODE
-    if not _CreateBlockDevOnSecondary(self.cfg, remote_node, new_drbd, False,
+    if not _CreateBlockDevOnSecondary(self.cfg, remote_node, instance,
+                                      new_drbd, False,
                                        _GetInstanceInfoText(instance)):
        raise errors.OpExecError("Failed to create new component on secondary"
                                 " node %s" % remote_node)
  
      logger.Info("adding new mirror component on primary")
      #HARDCODE
-    if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node, new_drbd,
+    if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node,
+                                    instance, new_drbd,
                                      _GetInstanceInfoText(instance)):
        # remove secondary dev
        self.cfg.SetDiskID(new_drbd, remote_node)
@@ -3238,7 +3197,7 @@ class LUAddMDDRBDComponent(LogicalUnit):
  
      self.cfg.AddInstance(instance)
  
-    _WaitForSync(self.cfg, instance)
+    _WaitForSync(self.cfg, instance, self.proc)
  
      return 0
  
@@ -3350,8 +3309,12 @@ class LUReplaceDisks(LogicalUnit):
        "OLD_SECONDARY": self.instance.secondary_nodes[0],
        }
      env.update(_BuildInstanceHookEnvByObject(self.instance))
-    nl = [self.sstore.GetMasterNode(),
-          self.instance.primary_node] + list(self.instance.secondary_nodes)
+    nl = [
+      self.sstore.GetMasterNode(),
+      self.instance.primary_node,
+      ]
+    if self.op.remote_node is not None:
+      nl.append(self.op.remote_node)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -3366,6 +3329,7 @@ class LUReplaceDisks(LogicalUnit):
        raise errors.OpPrereqError("Instance '%s' not known" %
                                   self.op.instance_name)
      self.instance = instance
+    self.op.instance_name = instance.name
  
      if instance.disk_template not in constants.DTS_NET_MIRROR:
        raise errors.OpPrereqError("Instance's disk layout is not"
@@ -3391,14 +3355,24 @@ class LUReplaceDisks(LogicalUnit):
        raise errors.OpPrereqError("The specified node is the primary node of"
                                   " the instance.")
      elif remote_node == self.sec_node:
+      if self.op.mode == constants.REPLACE_DISK_SEC:
+        # this is for DRBD8, where we can't execute the same mode of
+        # replacement as for drbd7 (no different port allocated)
+        raise errors.OpPrereqError("Same secondary given, cannot execute"
+                                   " replacement")
        # the user gave the current secondary, switch to
-      # 'no-replace-secondary' mode
+      # 'no-replace-secondary' mode for drbd7
        remote_node = None
      if (instance.disk_template == constants.DT_REMOTE_RAID1 and
          self.op.mode != constants.REPLACE_DISK_ALL):
        raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
                                   " disks replacement, not individual ones")
      if instance.disk_template == constants.DT_DRBD8:
+      if (self.op.mode == constants.REPLACE_DISK_ALL and
+          remote_node is not None):
+        # switch to replace secondary mode
+        self.op.mode = constants.REPLACE_DISK_SEC
+
        if self.op.mode == constants.REPLACE_DISK_ALL:
          raise errors.OpPrereqError("Template 'drbd8' only allows primary or"
                                     " secondary disk replacement, not"
@@ -3409,10 +3383,12 @@ class LUReplaceDisks(LogicalUnit):
                                       " the secondary while doing a primary"
                                       " node disk replacement")
          self.tgt_node = instance.primary_node
+        self.oth_node = instance.secondary_nodes[0]
        elif self.op.mode == constants.REPLACE_DISK_SEC:
          self.new_node = remote_node # this can be None, in which case
                                      # we don't change the secondary
          self.tgt_node = instance.secondary_nodes[0]
+        self.oth_node = instance.primary_node
        else:
          raise errors.ProgrammerError("Unhandled disk replace mode")
  
@@ -3444,7 +3420,8 @@ class LUReplaceDisks(LogicalUnit):
        logger.Info("adding new mirror component on secondary for %s" %
                    dev.iv_name)
        #HARDCODE
-      if not _CreateBlockDevOnSecondary(cfg, remote_node, new_drbd, False,
+      if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
+                                        new_drbd, False,
                                          _GetInstanceInfoText(instance)):
          raise errors.OpExecError("Failed to create new component on"
                                   " secondary node %s\n"
@@ -3453,7 +3430,8 @@ class LUReplaceDisks(LogicalUnit):
  
        logger.Info("adding new mirror component on primary")
        #HARDCODE
-      if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, new_drbd,
+      if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
+                                      instance, new_drbd,
                                        _GetInstanceInfoText(instance)):
          # remove secondary dev
          cfg.SetDiskID(new_drbd, remote_node)
@@ -3481,7 +3459,7 @@ class LUReplaceDisks(LogicalUnit):
      # this can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its
      # return value
-    _WaitForSync(cfg, instance, unlock=True)
+    _WaitForSync(cfg, instance, self.proc, unlock=True)
  
      # so check manually all the devices
      for name in iv_names:
@@ -3531,13 +3509,54 @@ class LUReplaceDisks(LogicalUnit):
          - remove old LVs (which have the name name_replaces.<time_t>)
  
      Failures are not very well handled.
+
      """
+    steps_total = 6
+    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
      instance = self.instance
      iv_names = {}
      vgname = self.cfg.GetVGName()
      # start of work
      cfg = self.cfg
      tgt_node = self.tgt_node
+    oth_node = self.oth_node
+
+    # Step: check device activation
+    self.proc.LogStep(1, steps_total, "check device existence")
+    info("checking volume groups")
+    my_vg = cfg.GetVGName()
+    results = rpc.call_vg_list([oth_node, tgt_node])
+    if not results:
+      raise errors.OpExecError("Can't list volume groups on the nodes")
+    for node in oth_node, tgt_node:
+      res = results.get(node, False)
+      if not res or my_vg not in res:
+        raise errors.OpExecError("Volume group '%s' not found on %s" %
+                                 (my_vg, node))
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      for node in tgt_node, oth_node:
+        info("checking %s on %s" % (dev.iv_name, node))
+        cfg.SetDiskID(dev, node)
+        if not rpc.call_blockdev_find(node, dev):
+          raise errors.OpExecError("Can't find device %s on node %s" %
+                                   (dev.iv_name, node))
+
+    # Step: check other node consistency
+    self.proc.LogStep(2, steps_total, "check peer consistency")
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      info("checking %s consistency on %s" % (dev.iv_name, oth_node))
+      if not _CheckDiskConsistency(self.cfg, dev, oth_node,
+                                   oth_node==instance.primary_node):
+        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
+                                 " to replace disks on this node (%s)" %
+                                 (oth_node, tgt_node))
+
+    # Step: create new storage
+    self.proc.LogStep(3, steps_total, "allocate new storage")
      for dev in instance.disks:
        if not dev.iv_name in self.op.disks:
          continue
@@ -3552,72 +3571,81 @@ class LUReplaceDisks(LogicalUnit):
        new_lvs = [lv_data, lv_meta]
        old_lvs = dev.children
        iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
-      logger.Info("adding new local storage on %s for %s" %
-                  (tgt_node, dev.iv_name))
+      info("creating new local storage on %s for %s" %
+           (tgt_node, dev.iv_name))
        # since we *always* want to create this LV, we use the
        # _Create...OnPrimary (which forces the creation), even if we
        # are talking about the secondary node
        for new_lv in new_lvs:
-        if not _CreateBlockDevOnPrimary(cfg, tgt_node, new_lv,
+        if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
                                          _GetInstanceInfoText(instance)):
            raise errors.OpExecError("Failed to create new LV named '%s' on"
                                     " node '%s'" %
                                     (new_lv.logical_id[1], tgt_node))
  
+    # Step: for each lv, detach+rename*2+attach
+    self.proc.LogStep(4, steps_total, "change drbd configuration")
+    for dev, old_lvs, new_lvs in iv_names.itervalues():
+      info("detaching %s drbd from local storage" % dev.iv_name)
        if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
          raise errors.OpExecError("Can't detach drbd from local storage on node"
                                   " %s for device %s" % (tgt_node, dev.iv_name))
-      dev.children = []
-      cfg.Update(instance)
+      #dev.children = []
+      #cfg.Update(instance)
  
        # ok, we created the new LVs, so now we know we have the needed
        # storage; as such, we proceed on the target node to rename
        # old_lv to _old, and new_lv to old_lv; note that we rename LVs
        # using the assumption than logical_id == physical_id (which in
        # turn is the unique_id on that node)
+
+      # FIXME(iustin): use a better name for the replaced LVs
        temp_suffix = int(time.time())
-      logger.Info("renaming the old LVs on the target node")
        ren_fn = lambda d, suff: (d.physical_id[0],
                                  d.physical_id[1] + "_replaced-%s" % suff)
-      rlist = [(disk, ren_fn(disk, temp_suffix)) for disk in old_lvs]
+      # build the rename list based on what LVs exist on the node
+      rlist = []
+      for to_ren in old_lvs:
+        find_res = rpc.call_blockdev_find(tgt_node, to_ren)
+        if find_res is not None: # device exists
+          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
+
+      info("renaming the old LVs on the target node")
        if not rpc.call_blockdev_rename(tgt_node, rlist):
-        logger.Error("Can't rename old LVs on node %s" % tgt_node)
-        do_change_old = False
-      else:
-        do_change_old = True
+        raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
        # now we rename the new LVs to the old LVs
-      logger.Info("renaming the new LVs on the target node")
+      info("renaming the new LVs on the target node")
        rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
        if not rpc.call_blockdev_rename(tgt_node, rlist):
-        logger.Error("Can't rename new LVs on node %s" % tgt_node)
-      else:
-        for old, new in zip(old_lvs, new_lvs):
-          new.logical_id = old.logical_id
-          cfg.SetDiskID(new, tgt_node)
+        raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
+
+      for old, new in zip(old_lvs, new_lvs):
+        new.logical_id = old.logical_id
+        cfg.SetDiskID(new, tgt_node)
  
-      if do_change_old:
-        for disk in old_lvs:
-          disk.logical_id = ren_fn(disk, temp_suffix)
-          cfg.SetDiskID(disk, tgt_node)
+      for disk in old_lvs:
+        disk.logical_id = ren_fn(disk, temp_suffix)
+        cfg.SetDiskID(disk, tgt_node)
  
        # now that the new lvs have the old name, we can add them to the device
-      logger.Info("adding new mirror component on %s" % tgt_node)
+      info("adding new mirror component on %s" % tgt_node)
        if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
-        logger.Error("Can't add local storage to drbd!")
          for new_lv in new_lvs:
            if not rpc.call_blockdev_remove(tgt_node, new_lv):
-            logger.Error("Can't rollback device %s")
-        return
+            warning("Can't rollback device %s", "manually cleanup unused"
+                    " logical volumes")
+        raise errors.OpExecError("Can't add local storage to drbd")
  
        dev.children = new_lvs
        cfg.Update(instance)
  
+    # Step: wait for sync
  
      # this can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its
      # return value
-    logger.Info("Done changing drbd configs, waiting for sync")
-    _WaitForSync(cfg, instance, unlock=True)
+    self.proc.LogStep(5, steps_total, "sync devices")
+    _WaitForSync(cfg, instance, self.proc, unlock=True)
  
      # so check manually all the devices
      for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
@@ -3626,13 +3654,14 @@ class LUReplaceDisks(LogicalUnit):
        if is_degr:
          raise errors.OpExecError("DRBD device %s is degraded!" % name)
  
+    # Step: remove old storage
+    self.proc.LogStep(6, steps_total, "removing old storage")
      for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
-      logger.Info("remove logical volumes for %s" % name)
+      info("remove logical volumes for %s" % name)
        for lv in old_lvs:
          cfg.SetDiskID(lv, tgt_node)
          if not rpc.call_blockdev_remove(tgt_node, lv):
-          logger.Error("Can't cleanup child device, skipping. You need to"
-                       " fix manually!")
+          warning("Can't remove old LV", "manually remove unused LVs")
            continue
  
    def _ExecD8Secondary(self, feedback_fn):
@@ -3652,7 +3681,10 @@ class LUReplaceDisks(LogicalUnit):
        - remove all disks from the old secondary
  
      Failures are not very well handled.
+
      """
+    steps_total = 6
+    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
      instance = self.instance
      iv_names = {}
      vgname = self.cfg.GetVGName()
@@ -3661,56 +3693,100 @@ class LUReplaceDisks(LogicalUnit):
      old_node = self.tgt_node
      new_node = self.new_node
      pri_node = instance.primary_node
+
+    # Step: check device activation
+    self.proc.LogStep(1, steps_total, "check device existence")
+    info("checking volume groups")
+    my_vg = cfg.GetVGName()
+    results = rpc.call_vg_list([pri_node, new_node])
+    if not results:
+      raise errors.OpExecError("Can't list volume groups on the nodes")
+    for node in pri_node, new_node:
+      res = results.get(node, False)
+      if not res or my_vg not in res:
+        raise errors.OpExecError("Volume group '%s' not found on %s" %
+                                 (my_vg, node))
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      info("checking %s on %s" % (dev.iv_name, pri_node))
+      cfg.SetDiskID(dev, pri_node)
+      if not rpc.call_blockdev_find(pri_node, dev):
+        raise errors.OpExecError("Can't find device %s on node %s" %
+                                 (dev.iv_name, pri_node))
+
+    # Step: check other node consistency
+    self.proc.LogStep(2, steps_total, "check peer consistency")
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
+      if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
+        raise errors.OpExecError("Primary node (%s) has degraded storage,"
+                                 " unsafe to replace the secondary" %
+                                 pri_node)
+
+    # Step: create new storage
+    self.proc.LogStep(3, steps_total, "allocate new storage")
      for dev in instance.disks:
        size = dev.size
-      logger.Info("adding new local storage on %s for %s" %
-                  (new_node, dev.iv_name))
+      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
        # since we *always* want to create this LV, we use the
        # _Create...OnPrimary (which forces the creation), even if we
        # are talking about the secondary node
        for new_lv in dev.children:
-        if not _CreateBlockDevOnPrimary(cfg, new_node, new_lv,
+        if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
                                          _GetInstanceInfoText(instance)):
            raise errors.OpExecError("Failed to create new LV named '%s' on"
                                     " node '%s'" %
                                     (new_lv.logical_id[1], new_node))
  
+      iv_names[dev.iv_name] = (dev, dev.children)
+
+    self.proc.LogStep(4, steps_total, "changing drbd configuration")
+    for dev in instance.disks:
+      size = dev.size
+      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
        # create new devices on new_node
        new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
                                logical_id=(pri_node, new_node,
                                            dev.logical_id[2]),
                                children=dev.children)
-      if not _CreateBlockDevOnSecondary(cfg, new_node, new_drbd, False,
+      if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
+                                        new_drbd, False,
                                        _GetInstanceInfoText(instance)):
          raise errors.OpExecError("Failed to create new DRBD on"
                                   " node '%s'" % new_node)
  
+    for dev in instance.disks:
        # we have new devices, shutdown the drbd on the old secondary
+      info("shutting down drbd for %s on old node" % dev.iv_name)
        cfg.SetDiskID(dev, old_node)
        if not rpc.call_blockdev_shutdown(old_node, dev):
-        raise errors.OpExecError("Failed to shutdown DRBD on old node")
+        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
+                "Please cleanup this device manuall as soon as possible")
  
        # we have new storage, we 'rename' the network on the primary
+      info("switching primary drbd for %s to new secondary node" % dev.iv_name)
        cfg.SetDiskID(dev, pri_node)
        # rename to the ip of the new node
        new_uid = list(dev.physical_id)
        new_uid[2] = self.remote_node_info.secondary_ip
        rlist = [(dev, tuple(new_uid))]
        if not rpc.call_blockdev_rename(pri_node, rlist):
-        raise errors.OpExecError("Can't detach re-attach drbd %s on node"
+        raise errors.OpExecError("Can't detach & re-attach drbd %s on node"
                                   " %s from %s to %s" %
                                   (dev.iv_name, pri_node, old_node, new_node))
        dev.logical_id = (pri_node, new_node, dev.logical_id[2])
        cfg.SetDiskID(dev, pri_node)
        cfg.Update(instance)
  
-      iv_names[dev.iv_name] = (dev, dev.children)
  
      # this can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its
      # return value
-    logger.Info("Done changing drbd configs, waiting for sync")
-    _WaitForSync(cfg, instance, unlock=True)
+    self.proc.LogStep(5, steps_total, "sync devices")
+    _WaitForSync(cfg, instance, self.proc, unlock=True)
  
      # so check manually all the devices
      for name, (dev, old_lvs) in iv_names.iteritems():
@@ -3719,14 +3795,14 @@ class LUReplaceDisks(LogicalUnit):
        if is_degr:
          raise errors.OpExecError("DRBD device %s is degraded!" % name)
  
+    self.proc.LogStep(6, steps_total, "removing old storage")
      for name, (dev, old_lvs) in iv_names.iteritems():
-      logger.Info("remove logical volumes for %s" % name)
+      info("remove logical volumes for %s" % name)
        for lv in old_lvs:
          cfg.SetDiskID(lv, old_node)
          if not rpc.call_blockdev_remove(old_node, lv):
-          logger.Error("Can't cleanup child device, skipping. You need to"
-                       " fix manually!")
-          continue
+          warning("Can't remove LV on old secondary",
+                  "Cleanup stale volumes by hand")
  
    def Exec(self, feedback_fn):
      """Execute disk replacement.
@@ -4027,7 +4103,7 @@ class LUExportInstance(LogicalUnit):
      # shutdown the instance, unless requested not to do so
      if self.op.shutdown:
        op = opcodes.OpShutdownInstance(instance_name=instance.name)
-      self.processor.ChainOpCode(op, feedback_fn)
+      self.proc.ChainOpCode(op)
  
      vgname = self.cfg.GetVGName()
  
@@ -4053,7 +4129,7 @@ class LUExportInstance(LogicalUnit):
        if self.op.shutdown:
          op = opcodes.OpStartupInstance(instance_name=instance.name,
                                         force=False)
-        self.processor.ChainOpCode(op, feedback_fn)
+        self.proc.ChainOpCode(op)
  
      # TODO: check for size
  
@@ -4079,7 +4155,7 @@ class LUExportInstance(LogicalUnit):
      # substitutes an empty list with the full cluster node list.
      if nodelist:
        op = opcodes.OpQueryExports(nodes=nodelist)
-      exportlist = self.processor.ChainOpCode(op, feedback_fn)
+      exportlist = self.proc.ChainOpCode(op)
        for node in exportlist:
          if instance.name in exportlist[node]:
            if not rpc.call_export_remove(node, instance.name):
@@ -4131,6 +4207,42 @@ class LUGetTags(TagsLU):
      return self.target.GetTags()
  
  
+class LUSearchTags(NoHooksLU):
+  """Searches the tags for a given pattern.
+
+  """
+  _OP_REQP = ["pattern"]
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks the pattern passed for validity by compiling it.
+
+    """
+    try:
+      self.re = re.compile(self.op.pattern)
+    except re.error, err:
+      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
+                                 (self.op.pattern, err))
+
+  def Exec(self, feedback_fn):
+    """Returns the tag list.
+
+    """
+    cfg = self.cfg
+    tgts = [("/cluster", cfg.GetClusterInfo())]
+    ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()]
+    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
+    nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()]
+    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
+    results = []
+    for path, target in tgts:
+      for tag in target.GetTags():
+        if self.re.search(tag):
+          results.append((path, tag))
+    return results
+
+
  class LUAddTags(TagsLU):
    """Sets a tag on a given object.