X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/4cc2a7286e26f21902de812c1476e5c921e5e082..7df43a7638efc05a12aac4f57a219e903daa24d5:/lib/cmdlib.py

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index c071c96..f338da5 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -26,7 +26,6 @@
 import os
 import os.path
 import sha
-import socket
 import time
 import tempfile
 import re
@@ -70,7 +69,7 @@ class LogicalUnit(object):
     validity.
 
     """
-    self.processor = processor
+    self.proc = processor
     self.op = op
     self.cfg = cfg
     self.sstore = sstore
@@ -164,6 +163,15 @@ class NoHooksLU(LogicalUnit):
     return {}, [], []
 
 
+def _RemoveHostFromEtcHosts(hostname):
+  """Wrapper around utils.RemoteEtcHostsEntry.
+
+  """
+  hi = utils.HostInfo(name=hostname)
+  utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
+  utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
+
+
 def _GetWantedNodes(lu, nodes):
   """Returns list of checked and expanded node names.
 
@@ -286,86 +294,6 @@ def _BuildInstanceHookEnvByObject(instance, override=None):
   return _BuildInstanceHookEnv(**args)
 
 
-def _UpdateEtcHosts(fullnode, ip):
-  """Ensure a node has a correct entry in /etc/hosts.
-
-  Args:
-    fullnode - Fully qualified domain name of host. (str)
-    ip       - IPv4 address of host (str)
-
-  """
-  node = fullnode.split(".", 1)[0]
-
-  f = open('/etc/hosts', 'r+')
-
-  inthere = False
-
-  save_lines = []
-  add_lines = []
-  removed = False
-
-  while True:
-    rawline = f.readline()
-
-    if not rawline:
-      # End of file
-      break
-
-    line = rawline.split('\n')[0]
-
-    # Strip off comments
-    line = line.split('#')[0]
-
-    if not line:
-      # Entire line was comment, skip
-      save_lines.append(rawline)
-      continue
-
-    fields = line.split()
-
-    haveall = True
-    havesome = False
-    for spec in [ ip, fullnode, node ]:
-      if spec not in fields:
-        haveall = False
-      if spec in fields:
-        havesome = True
-
-    if haveall:
-      inthere = True
-      save_lines.append(rawline)
-      continue
-
-    if havesome and not haveall:
-      # Line (old, or manual?) which is missing some.  Remove.
-      removed = True
-      continue
-
-    save_lines.append(rawline)
-
-  if not inthere:
-    add_lines.append('%s\t%s %s\n' % (ip, fullnode, node))
-
-  if removed:
-    if add_lines:
-      save_lines = save_lines + add_lines
-
-    # We removed a line, write a new file and replace old.
-    fd, tmpname = tempfile.mkstemp('tmp', 'hosts_', '/etc')
-    newfile = os.fdopen(fd, 'w')
-    newfile.write(''.join(save_lines))
-    newfile.close()
-    os.rename(tmpname, '/etc/hosts')
-
-  elif add_lines:
-    # Simply appending a new line will do the trick.
-    f.seek(0, 2)
-    for add in add_lines:
-      f.write(add)
-
-  f.close()
-
-
 def _UpdateKnownHosts(fullnode, ip, pubkey):
   """Ensure a node has a correct known_hosts entry.
 
@@ -612,6 +540,11 @@ class LUInitCluster(LogicalUnit):
                                  (self.op.master_netdev,
                                   result.output.strip()))
 
+    if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
+            os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
+      raise errors.OpPrereqError("Init.d script '%s' missing or not "
+                                 "executable." % constants.NODE_INITD_SCRIPT)
+
   def Exec(self, feedback_fn):
     """Initialize the cluster.
 
@@ -641,7 +574,10 @@ class LUInitCluster(LogicalUnit):
       f.close()
     sshkey = sshline.split(" ")[1]
 
-    _UpdateEtcHosts(hostname.name, hostname.ip)
+    hi = utils.HostInfo(name=hostname.name)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, hostname.name, hi.ip)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName(), hi.ip)
+    del hi
 
     _UpdateKnownHosts(hostname.name, hostname.ip, sshkey)
 
@@ -683,10 +619,12 @@ class LUDestroyCluster(NoHooksLU):
     """Destroys the cluster.
 
     """
+    master = self.sstore.GetMasterNode()
     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
     utils.CreateBackup(priv_key)
     utils.CreateBackup(pub_key)
-    rpc.call_node_leave_cluster(self.sstore.GetMasterNode())
+    rpc.call_node_leave_cluster(master)
+    _RemoveHostFromEtcHosts(master)
 
 
 class LUVerifyCluster(NoHooksLU):
@@ -856,7 +794,6 @@ class LUVerifyCluster(NoHooksLU):
     feedback_fn("* Verifying global settings")
     self.cfg.VerifyConfig()
 
-    master = self.sstore.GetMasterNode()
     vg_name = self.cfg.GetVGName()
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
@@ -1012,7 +949,7 @@ class LURenameCluster(LogicalUnit):
                      "please restart manually.")
 
 
-def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
+def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
   """Sleep and poll for an instance's disk to sync.
 
   """
@@ -1020,7 +957,7 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
     return True
 
   if not oneshot:
-    logger.ToStdout("Waiting for instance %s to sync disks." % instance.name)
+    proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
 
   node = instance.primary_node
 
@@ -1034,7 +971,7 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
     cumul_degraded = False
     rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
     if not rstats:
-      logger.ToStderr("Can't get any data from node %s" % node)
+      proc.LogWarning("Can't get any data from node %s" % node)
       retries += 1
       if retries >= 10:
         raise errors.RemoteError("Can't contact node %s for mirror data,"
@@ -1045,10 +982,11 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
     for i in range(len(rstats)):
       mstat = rstats[i]
       if mstat is None:
-        logger.ToStderr("Can't compute data for node %s/%s" %
+        proc.LogWarning("Can't compute data for node %s/%s" %
                         (node, instance.disks[i].iv_name))
         continue
-      perc_done, est_time, is_degraded = mstat
+      # we ignore the ldisk parameter
+      perc_done, est_time, is_degraded, _ = mstat
       cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
       if perc_done is not None:
         done = False
@@ -1057,8 +995,8 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
           max_time = est_time
         else:
           rem_time = "no time estimate"
-        logger.ToStdout("- device %s: %5.2f%% done, %s" %
-                        (instance.disks[i].iv_name, perc_done, rem_time))
+        proc.LogInfo("- device %s: %5.2f%% done, %s" %
+                     (instance.disks[i].iv_name, perc_done, rem_time))
     if done or oneshot:
       break
 
@@ -1071,15 +1009,23 @@ def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
         utils.Lock('cmd')
 
   if done:
-    logger.ToStdout("Instance %s's disks are in sync." % instance.name)
+    proc.LogInfo("Instance %s's disks are in sync." % instance.name)
   return not cumul_degraded
 
 
-def _CheckDiskConsistency(cfgw, dev, node, on_primary):
+def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
   """Check that mirrors are not degraded.
 
+  The ldisk parameter, if True, will change the test from the
+  is_degraded attribute (which represents overall non-ok status for
+  the device(s)) to the ldisk (representing the local storage status).
+
   """
   cfgw.SetDiskID(dev, node)
+  if ldisk:
+    idx = 6
+  else:
+    idx = 5
 
   result = True
   if on_primary or dev.AssembleOnSecondary():
@@ -1088,7 +1034,7 @@ def _CheckDiskConsistency(cfgw, dev, node, on_primary):
       logger.ToStderr("Can't get any data from node %s" % node)
       result = False
     else:
-      result = result and (not rstats[5])
+      result = result and (not rstats[idx])
   if dev.children:
     for child in dev.children:
       result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
@@ -1193,6 +1139,8 @@ class LURemoveNode(LogicalUnit):
 
     self.cfg.RemoveNode(node.name)
 
+    _RemoveHostFromEtcHosts(node.name)
+
 
 class LUQueryNodes(NoHooksLU):
   """Logical unit for querying nodes.
@@ -1536,7 +1484,11 @@ class LUAddNode(LogicalUnit):
       raise errors.OpExecError("Cannot transfer ssh keys to the new node")
 
     # Add node to our /etc/hosts, and add key to known_hosts
-    _UpdateEtcHosts(new_node.name, new_node.primary_ip)
+    hi = utils.HostInfo(name=new_node.name)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, new_node.name, hi.ip)
+    utils.AddEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName(), hi.ip)
+    del hi
+
     _UpdateKnownHosts(new_node.name, new_node.primary_ip,
                       self.cfg.GetHostKey())
 
@@ -1821,7 +1773,8 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
       cfg.SetDiskID(node_disk, node)
       is_primary = node == instance.primary_node
-      result = rpc.call_blockdev_assemble(node, node_disk, is_primary)
+      result = rpc.call_blockdev_assemble(node, node_disk,
+                                          instance.name, is_primary)
       if not result:
         logger.Error("could not prepare block device %s on node %s (is_pri"
                      "mary=%s)" % (inst_disk.iv_name, node, is_primary))
@@ -2157,8 +2110,8 @@ class LUReinstallInstance(LogicalUnit):
       if pnode is None:
         raise errors.OpPrereqError("Primary node '%s' is unknown" %
                                    self.op.pnode)
-      os_obj = rpc.call_os_get([pnode.name], self.op.os_type)[pnode.name]
-      if not isinstance(os_obj, objects.OS):
+      os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
+      if not os_obj:
         raise errors.OpPrereqError("OS '%s' not in supported OS list for"
                                    " primary node"  % self.op.os_type)
 
@@ -2480,10 +2433,10 @@ class LUFailoverInstance(LogicalUnit):
 
     # check bridge existance
     brlist = [nic.bridge for nic in instance.nics]
-    if not rpc.call_bridges_exist(instance.primary_node, brlist):
+    if not rpc.call_bridges_exist(target_node, brlist):
       raise errors.OpPrereqError("One or more target bridges %s does not"
                                  " exist on destination node '%s'" %
-                                 (brlist, instance.primary_node))
+                                 (brlist, target_node))
 
     self.instance = instance
 
@@ -2527,9 +2480,13 @@ class LUFailoverInstance(LogicalUnit):
                 (instance.name, source_node))
 
     if not rpc.call_instance_shutdown(source_node, instance):
-      logger.Error("Could not shutdown instance %s on node %s. Proceeding"
-                   " anyway. Please make sure node %s is down"  %
-                   (instance.name, source_node, source_node))
+      if self.op.ignore_consistency:
+        logger.Error("Could not shutdown instance %s on node %s. Proceeding"
+                     " anyway. Please make sure node %s is down"  %
+                     (instance.name, source_node, source_node))
+      else:
+        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
+                                 (instance.name, source_node))
 
     feedback_fn("* deactivating the instance's disks on source node")
     if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
@@ -2556,7 +2513,7 @@ class LUFailoverInstance(LogicalUnit):
                                (instance.name, target_node))
 
 
-def _CreateBlockDevOnPrimary(cfg, node, device, info):
+def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
   """Create a tree of block devices on the primary node.
 
   This always creates all devices.
@@ -2564,11 +2521,12 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info):
   """
   if device.children:
     for child in device.children:
-      if not _CreateBlockDevOnPrimary(cfg, node, child, info):
+      if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
         return False
 
   cfg.SetDiskID(device, node)
-  new_id = rpc.call_blockdev_create(node, device, device.size, True, info)
+  new_id = rpc.call_blockdev_create(node, device, device.size,
+                                    instance.name, True, info)
   if not new_id:
     return False
   if device.physical_id is None:
@@ -2576,7 +2534,7 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info):
   return True
 
 
-def _CreateBlockDevOnSecondary(cfg, node, device, force, info):
+def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
   """Create a tree of block devices on a secondary node.
 
   If this device type has to be created on secondaries, create it and
@@ -2589,13 +2547,15 @@ def _CreateBlockDevOnSecondary(cfg, node, device, force, info):
     force = True
   if device.children:
     for child in device.children:
-      if not _CreateBlockDevOnSecondary(cfg, node, child, force, info):
+      if not _CreateBlockDevOnSecondary(cfg, node, instance,
+                                        child, force, info):
         return False
 
   if not force:
     return True
   cfg.SetDiskID(device, node)
-  new_id = rpc.call_blockdev_create(node, device, device.size, False, info)
+  new_id = rpc.call_blockdev_create(node, device, device.size,
+                                    instance.name, False, info)
   if not new_id:
     return False
   if device.physical_id is None:
@@ -2750,13 +2710,14 @@ def _CreateDisks(cfg, instance):
               (device.iv_name, instance.name))
     #HARDCODE
     for secondary_node in instance.secondary_nodes:
-      if not _CreateBlockDevOnSecondary(cfg, secondary_node, device, False,
-                                        info):
+      if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
+                                        device, False, info):
         logger.Error("failed to create volume %s (%s) on secondary node %s!" %
                      (device.iv_name, device, secondary_node))
         return False
     #HARDCODE
-    if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, device, info):
+    if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
+                                    instance, device, info):
       logger.Error("failed to create volume %s on primary!" %
                    device.iv_name)
       return False
@@ -2940,8 +2901,8 @@ class LUCreateInstance(LogicalUnit):
                                    (node, info['vg_free'], req_size))
 
     # os verification
-    os_obj = rpc.call_os_get([pnode.name], self.op.os_type)[pnode.name]
-    if not isinstance(os_obj, objects.OS):
+    os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
+    if not os_obj:
       raise errors.OpPrereqError("OS '%s' not in supported os list for"
                                  " primary node"  % self.op.os_type)
 
@@ -3029,12 +2990,12 @@ class LUCreateInstance(LogicalUnit):
     self.cfg.AddInstance(iobj)
 
     if self.op.wait_for_sync:
-      disk_abort = not _WaitForSync(self.cfg, iobj)
+      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
     elif iobj.disk_template in constants.DTS_NET_MIRROR:
       # make sure the disks are not degraded (still sync-ing is ok)
       time.sleep(15)
       feedback_fn("* checking mirrors status")
-      disk_abort = not _WaitForSync(self.cfg, iobj, oneshot=True)
+      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
     else:
       disk_abort = False
 
@@ -3202,14 +3163,16 @@ class LUAddMDDRBDComponent(LogicalUnit):
 
     logger.Info("adding new mirror component on secondary")
     #HARDCODE
-    if not _CreateBlockDevOnSecondary(self.cfg, remote_node, new_drbd, False,
+    if not _CreateBlockDevOnSecondary(self.cfg, remote_node, instance,
+                                      new_drbd, False,
                                       _GetInstanceInfoText(instance)):
       raise errors.OpExecError("Failed to create new component on secondary"
                                " node %s" % remote_node)
 
     logger.Info("adding new mirror component on primary")
     #HARDCODE
-    if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node, new_drbd,
+    if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node,
+                                    instance, new_drbd,
                                     _GetInstanceInfoText(instance)):
       # remove secondary dev
       self.cfg.SetDiskID(new_drbd, remote_node)
@@ -3234,7 +3197,7 @@ class LUAddMDDRBDComponent(LogicalUnit):
 
     self.cfg.AddInstance(instance)
 
-    _WaitForSync(self.cfg, instance)
+    _WaitForSync(self.cfg, instance, self.proc)
 
     return 0
 
@@ -3346,8 +3309,12 @@ class LUReplaceDisks(LogicalUnit):
       "OLD_SECONDARY": self.instance.secondary_nodes[0],
       }
     env.update(_BuildInstanceHookEnvByObject(self.instance))
-    nl = [self.sstore.GetMasterNode(),
-          self.instance.primary_node] + list(self.instance.secondary_nodes)
+    nl = [
+      self.sstore.GetMasterNode(),
+      self.instance.primary_node,
+      ]
+    if self.op.remote_node is not None:
+      nl.append(self.op.remote_node)
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -3362,6 +3329,7 @@ class LUReplaceDisks(LogicalUnit):
       raise errors.OpPrereqError("Instance '%s' not known" %
                                  self.op.instance_name)
     self.instance = instance
+    self.op.instance_name = instance.name
 
     if instance.disk_template not in constants.DTS_NET_MIRROR:
       raise errors.OpPrereqError("Instance's disk layout is not"
@@ -3387,14 +3355,24 @@ class LUReplaceDisks(LogicalUnit):
       raise errors.OpPrereqError("The specified node is the primary node of"
                                  " the instance.")
     elif remote_node == self.sec_node:
+      if self.op.mode == constants.REPLACE_DISK_SEC:
+        # this is for DRBD8, where we can't execute the same mode of
+        # replacement as for drbd7 (no different port allocated)
+        raise errors.OpPrereqError("Same secondary given, cannot execute"
+                                   " replacement")
       # the user gave the current secondary, switch to
-      # 'no-replace-secondary' mode
+      # 'no-replace-secondary' mode for drbd7
       remote_node = None
     if (instance.disk_template == constants.DT_REMOTE_RAID1 and
         self.op.mode != constants.REPLACE_DISK_ALL):
       raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
                                  " disks replacement, not individual ones")
     if instance.disk_template == constants.DT_DRBD8:
+      if (self.op.mode == constants.REPLACE_DISK_ALL and
+          remote_node is not None):
+        # switch to replace secondary mode
+        self.op.mode = constants.REPLACE_DISK_SEC
+
       if self.op.mode == constants.REPLACE_DISK_ALL:
         raise errors.OpPrereqError("Template 'drbd8' only allows primary or"
                                    " secondary disk replacement, not"
@@ -3405,10 +3383,12 @@ class LUReplaceDisks(LogicalUnit):
                                      " the secondary while doing a primary"
                                      " node disk replacement")
         self.tgt_node = instance.primary_node
+        self.oth_node = instance.secondary_nodes[0]
       elif self.op.mode == constants.REPLACE_DISK_SEC:
         self.new_node = remote_node # this can be None, in which case
                                     # we don't change the secondary
         self.tgt_node = instance.secondary_nodes[0]
+        self.oth_node = instance.primary_node
       else:
         raise errors.ProgrammerError("Unhandled disk replace mode")
 
@@ -3440,7 +3420,8 @@ class LUReplaceDisks(LogicalUnit):
       logger.Info("adding new mirror component on secondary for %s" %
                   dev.iv_name)
       #HARDCODE
-      if not _CreateBlockDevOnSecondary(cfg, remote_node, new_drbd, False,
+      if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
+                                        new_drbd, False,
                                         _GetInstanceInfoText(instance)):
         raise errors.OpExecError("Failed to create new component on"
                                  " secondary node %s\n"
@@ -3449,7 +3430,8 @@ class LUReplaceDisks(LogicalUnit):
 
       logger.Info("adding new mirror component on primary")
       #HARDCODE
-      if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, new_drbd,
+      if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
+                                      instance, new_drbd,
                                       _GetInstanceInfoText(instance)):
         # remove secondary dev
         cfg.SetDiskID(new_drbd, remote_node)
@@ -3477,7 +3459,7 @@ class LUReplaceDisks(LogicalUnit):
     # this can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its
     # return value
-    _WaitForSync(cfg, instance, unlock=True)
+    _WaitForSync(cfg, instance, self.proc, unlock=True)
 
     # so check manually all the devices
     for name in iv_names:
@@ -3527,13 +3509,54 @@ class LUReplaceDisks(LogicalUnit):
         - remove old LVs (which have the name name_replaces.<time_t>)
 
     Failures are not very well handled.
+
     """
+    steps_total = 6
+    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
     instance = self.instance
     iv_names = {}
     vgname = self.cfg.GetVGName()
     # start of work
     cfg = self.cfg
     tgt_node = self.tgt_node
+    oth_node = self.oth_node
+
+    # Step: check device activation
+    self.proc.LogStep(1, steps_total, "check device existence")
+    info("checking volume groups")
+    my_vg = cfg.GetVGName()
+    results = rpc.call_vg_list([oth_node, tgt_node])
+    if not results:
+      raise errors.OpExecError("Can't list volume groups on the nodes")
+    for node in oth_node, tgt_node:
+      res = results.get(node, False)
+      if not res or my_vg not in res:
+        raise errors.OpExecError("Volume group '%s' not found on %s" %
+                                 (my_vg, node))
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      for node in tgt_node, oth_node:
+        info("checking %s on %s" % (dev.iv_name, node))
+        cfg.SetDiskID(dev, node)
+        if not rpc.call_blockdev_find(node, dev):
+          raise errors.OpExecError("Can't find device %s on node %s" %
+                                   (dev.iv_name, node))
+
+    # Step: check other node consistency
+    self.proc.LogStep(2, steps_total, "check peer consistency")
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      info("checking %s consistency on %s" % (dev.iv_name, oth_node))
+      if not _CheckDiskConsistency(self.cfg, dev, oth_node,
+                                   oth_node==instance.primary_node):
+        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
+                                 " to replace disks on this node (%s)" %
+                                 (oth_node, tgt_node))
+
+    # Step: create new storage
+    self.proc.LogStep(3, steps_total, "allocate new storage")
     for dev in instance.disks:
       if not dev.iv_name in self.op.disks:
         continue
@@ -3548,72 +3571,81 @@ class LUReplaceDisks(LogicalUnit):
       new_lvs = [lv_data, lv_meta]
       old_lvs = dev.children
       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
-      logger.Info("adding new local storage on %s for %s" %
-                  (tgt_node, dev.iv_name))
+      info("creating new local storage on %s for %s" %
+           (tgt_node, dev.iv_name))
       # since we *always* want to create this LV, we use the
       # _Create...OnPrimary (which forces the creation), even if we
       # are talking about the secondary node
       for new_lv in new_lvs:
-        if not _CreateBlockDevOnPrimary(cfg, tgt_node, new_lv,
+        if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
                                         _GetInstanceInfoText(instance)):
           raise errors.OpExecError("Failed to create new LV named '%s' on"
                                    " node '%s'" %
                                    (new_lv.logical_id[1], tgt_node))
 
+    # Step: for each lv, detach+rename*2+attach
+    self.proc.LogStep(4, steps_total, "change drbd configuration")
+    for dev, old_lvs, new_lvs in iv_names.itervalues():
+      info("detaching %s drbd from local storage" % dev.iv_name)
       if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
         raise errors.OpExecError("Can't detach drbd from local storage on node"
                                  " %s for device %s" % (tgt_node, dev.iv_name))
-      dev.children = []
-      cfg.Update(instance)
+      #dev.children = []
+      #cfg.Update(instance)
 
       # ok, we created the new LVs, so now we know we have the needed
       # storage; as such, we proceed on the target node to rename
       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
       # using the assumption than logical_id == physical_id (which in
       # turn is the unique_id on that node)
+
+      # FIXME(iustin): use a better name for the replaced LVs
       temp_suffix = int(time.time())
-      logger.Info("renaming the old LVs on the target node")
       ren_fn = lambda d, suff: (d.physical_id[0],
                                 d.physical_id[1] + "_replaced-%s" % suff)
-      rlist = [(disk, ren_fn(disk, temp_suffix)) for disk in old_lvs]
+      # build the rename list based on what LVs exist on the node
+      rlist = []
+      for to_ren in old_lvs:
+        find_res = rpc.call_blockdev_find(tgt_node, to_ren)
+        if find_res is not None: # device exists
+          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
+
+      info("renaming the old LVs on the target node")
       if not rpc.call_blockdev_rename(tgt_node, rlist):
-        logger.Error("Can't rename old LVs on node %s" % tgt_node)
-        do_change_old = False
-      else:
-        do_change_old = True
+        raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
       # now we rename the new LVs to the old LVs
-      logger.Info("renaming the new LVs on the target node")
+      info("renaming the new LVs on the target node")
       rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
       if not rpc.call_blockdev_rename(tgt_node, rlist):
-        logger.Error("Can't rename new LVs on node %s" % tgt_node)
-      else:
-        for old, new in zip(old_lvs, new_lvs):
-          new.logical_id = old.logical_id
-          cfg.SetDiskID(new, tgt_node)
+        raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
+
+      for old, new in zip(old_lvs, new_lvs):
+        new.logical_id = old.logical_id
+        cfg.SetDiskID(new, tgt_node)
 
-      if do_change_old:
-        for disk in old_lvs:
-          disk.logical_id = ren_fn(disk, temp_suffix)
-          cfg.SetDiskID(disk, tgt_node)
+      for disk in old_lvs:
+        disk.logical_id = ren_fn(disk, temp_suffix)
+        cfg.SetDiskID(disk, tgt_node)
 
       # now that the new lvs have the old name, we can add them to the device
-      logger.Info("adding new mirror component on %s" % tgt_node)
+      info("adding new mirror component on %s" % tgt_node)
       if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
-        logger.Error("Can't add local storage to drbd!")
         for new_lv in new_lvs:
           if not rpc.call_blockdev_remove(tgt_node, new_lv):
-            logger.Error("Can't rollback device %s")
-        return
+            warning("Can't rollback device %s", "manually cleanup unused"
+                    " logical volumes")
+        raise errors.OpExecError("Can't add local storage to drbd")
 
       dev.children = new_lvs
       cfg.Update(instance)
 
+    # Step: wait for sync
 
     # this can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its
     # return value
-    logger.Info("Done changing drbd configs, waiting for sync")
-    _WaitForSync(cfg, instance, unlock=True)
+    self.proc.LogStep(5, steps_total, "sync devices")
+    _WaitForSync(cfg, instance, self.proc, unlock=True)
 
     # so check manually all the devices
     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
@@ -3622,13 +3654,14 @@ class LUReplaceDisks(LogicalUnit):
       if is_degr:
         raise errors.OpExecError("DRBD device %s is degraded!" % name)
 
+    # Step: remove old storage
+    self.proc.LogStep(6, steps_total, "removing old storage")
     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
-      logger.Info("remove logical volumes for %s" % name)
+      info("remove logical volumes for %s" % name)
       for lv in old_lvs:
         cfg.SetDiskID(lv, tgt_node)
         if not rpc.call_blockdev_remove(tgt_node, lv):
-          logger.Error("Can't cleanup child device, skipping. You need to"
-                       " fix manually!")
+          warning("Can't remove old LV", "manually remove unused LVs")
           continue
 
   def _ExecD8Secondary(self, feedback_fn):
@@ -3648,7 +3681,10 @@ class LUReplaceDisks(LogicalUnit):
       - remove all disks from the old secondary
 
     Failures are not very well handled.
+
     """
+    steps_total = 6
+    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
     instance = self.instance
     iv_names = {}
     vgname = self.cfg.GetVGName()
@@ -3657,56 +3693,100 @@ class LUReplaceDisks(LogicalUnit):
     old_node = self.tgt_node
     new_node = self.new_node
     pri_node = instance.primary_node
+
+    # Step: check device activation
+    self.proc.LogStep(1, steps_total, "check device existence")
+    info("checking volume groups")
+    my_vg = cfg.GetVGName()
+    results = rpc.call_vg_list([pri_node, new_node])
+    if not results:
+      raise errors.OpExecError("Can't list volume groups on the nodes")
+    for node in pri_node, new_node:
+      res = results.get(node, False)
+      if not res or my_vg not in res:
+        raise errors.OpExecError("Volume group '%s' not found on %s" %
+                                 (my_vg, node))
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      info("checking %s on %s" % (dev.iv_name, pri_node))
+      cfg.SetDiskID(dev, pri_node)
+      if not rpc.call_blockdev_find(pri_node, dev):
+        raise errors.OpExecError("Can't find device %s on node %s" %
+                                 (dev.iv_name, pri_node))
+
+    # Step: check other node consistency
+    self.proc.LogStep(2, steps_total, "check peer consistency")
+    for dev in instance.disks:
+      if not dev.iv_name in self.op.disks:
+        continue
+      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
+      if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
+        raise errors.OpExecError("Primary node (%s) has degraded storage,"
+                                 " unsafe to replace the secondary" %
+                                 pri_node)
+
+    # Step: create new storage
+    self.proc.LogStep(3, steps_total, "allocate new storage")
     for dev in instance.disks:
       size = dev.size
-      logger.Info("adding new local storage on %s for %s" %
-                  (new_node, dev.iv_name))
+      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
       # since we *always* want to create this LV, we use the
       # _Create...OnPrimary (which forces the creation), even if we
       # are talking about the secondary node
       for new_lv in dev.children:
-        if not _CreateBlockDevOnPrimary(cfg, new_node, new_lv,
+        if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
                                         _GetInstanceInfoText(instance)):
           raise errors.OpExecError("Failed to create new LV named '%s' on"
                                    " node '%s'" %
                                    (new_lv.logical_id[1], new_node))
 
+      iv_names[dev.iv_name] = (dev, dev.children)
+
+    self.proc.LogStep(4, steps_total, "changing drbd configuration")
+    for dev in instance.disks:
+      size = dev.size
+      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
       # create new devices on new_node
       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
                               logical_id=(pri_node, new_node,
                                           dev.logical_id[2]),
                               children=dev.children)
-      if not _CreateBlockDevOnSecondary(cfg, new_node, new_drbd, False,
+      if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
+                                        new_drbd, False,
                                       _GetInstanceInfoText(instance)):
         raise errors.OpExecError("Failed to create new DRBD on"
                                  " node '%s'" % new_node)
 
+    for dev in instance.disks:
       # we have new devices, shutdown the drbd on the old secondary
+      info("shutting down drbd for %s on old node" % dev.iv_name)
       cfg.SetDiskID(dev, old_node)
       if not rpc.call_blockdev_shutdown(old_node, dev):
-        raise errors.OpExecError("Failed to shutdown DRBD on old node")
+        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
+                "Please cleanup this device manuall as soon as possible")
 
       # we have new storage, we 'rename' the network on the primary
+      info("switching primary drbd for %s to new secondary node" % dev.iv_name)
       cfg.SetDiskID(dev, pri_node)
       # rename to the ip of the new node
       new_uid = list(dev.physical_id)
       new_uid[2] = self.remote_node_info.secondary_ip
       rlist = [(dev, tuple(new_uid))]
       if not rpc.call_blockdev_rename(pri_node, rlist):
-        raise errors.OpExecError("Can't detach re-attach drbd %s on node"
+        raise errors.OpExecError("Can't detach & re-attach drbd %s on node"
                                  " %s from %s to %s" %
                                  (dev.iv_name, pri_node, old_node, new_node))
       dev.logical_id = (pri_node, new_node, dev.logical_id[2])
       cfg.SetDiskID(dev, pri_node)
       cfg.Update(instance)
 
-      iv_names[dev.iv_name] = (dev, dev.children)
 
     # this can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its
     # return value
-    logger.Info("Done changing drbd configs, waiting for sync")
-    _WaitForSync(cfg, instance, unlock=True)
+    self.proc.LogStep(5, steps_total, "sync devices")
+    _WaitForSync(cfg, instance, self.proc, unlock=True)
 
     # so check manually all the devices
     for name, (dev, old_lvs) in iv_names.iteritems():
@@ -3715,14 +3795,14 @@ class LUReplaceDisks(LogicalUnit):
       if is_degr:
         raise errors.OpExecError("DRBD device %s is degraded!" % name)
 
+    self.proc.LogStep(6, steps_total, "removing old storage")
     for name, (dev, old_lvs) in iv_names.iteritems():
-      logger.Info("remove logical volumes for %s" % name)
+      info("remove logical volumes for %s" % name)
       for lv in old_lvs:
         cfg.SetDiskID(lv, old_node)
         if not rpc.call_blockdev_remove(old_node, lv):
-          logger.Error("Can't cleanup child device, skipping. You need to"
-                       " fix manually!")
-          continue
+          warning("Can't remove LV on old secondary",
+                  "Cleanup stale volumes by hand")
 
   def Exec(self, feedback_fn):
     """Execute disk replacement.
@@ -4023,7 +4103,7 @@ class LUExportInstance(LogicalUnit):
     # shutdown the instance, unless requested not to do so
     if self.op.shutdown:
       op = opcodes.OpShutdownInstance(instance_name=instance.name)
-      self.processor.ChainOpCode(op)
+      self.proc.ChainOpCode(op)
 
     vgname = self.cfg.GetVGName()
 
@@ -4049,7 +4129,7 @@ class LUExportInstance(LogicalUnit):
       if self.op.shutdown:
         op = opcodes.OpStartupInstance(instance_name=instance.name,
                                        force=False)
-        self.processor.ChainOpCode(op)
+        self.proc.ChainOpCode(op)
 
     # TODO: check for size
 
@@ -4075,7 +4155,7 @@ class LUExportInstance(LogicalUnit):
     # substitutes an empty list with the full cluster node list.
     if nodelist:
       op = opcodes.OpQueryExports(nodes=nodelist)
-      exportlist = self.processor.ChainOpCode(op)
+      exportlist = self.proc.ChainOpCode(op)
       for node in exportlist:
         if instance.name in exportlist[node]:
           if not rpc.call_export_remove(node, instance.name):
@@ -4127,6 +4207,42 @@ class LUGetTags(TagsLU):
     return self.target.GetTags()
 
 
+class LUSearchTags(NoHooksLU):
+  """Searches the tags for a given pattern.
+
+  """
+  _OP_REQP = ["pattern"]
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks the pattern passed for validity by compiling it.
+
+    """
+    try:
+      self.re = re.compile(self.op.pattern)
+    except re.error, err:
+      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
+                                 (self.op.pattern, err))
+
+  def Exec(self, feedback_fn):
+    """Returns the tag list.
+
+    """
+    cfg = self.cfg
+    tgts = [("/cluster", cfg.GetClusterInfo())]
+    ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()]
+    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
+    nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()]
+    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
+    results = []
+    for path, target in tgts:
+      for tag in target.GetTags():
+        if self.re.search(tag):
+          results.append((path, tag))
+    return results
+
+
 class LUAddTags(TagsLU):
   """Sets a tag on a given object.