KVM: configure bridged NICs at migration start

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index a67cdb6..c5dbcd5 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -5191,7 +5191,8 @@ class LUInstanceStartup(LogicalUnit):
      instance = self.instance
      force = self.op.force
  
-    self.cfg.MarkInstanceUp(instance.name)
+    if not self.op.no_remember:
+      self.cfg.MarkInstanceUp(instance.name)
  
      if self.primary_offline:
        assert self.op.ignore_offline_nodes
@@ -5332,7 +5333,8 @@ class LUInstanceShutdown(LogicalUnit):
      node_current = instance.primary_node
      timeout = self.op.timeout
  
-    self.cfg.MarkInstanceDown(instance.name)
+    if not self.op.no_remember:
+      self.cfg.MarkInstanceDown(instance.name)
  
      if self.primary_offline:
        assert self.op.ignore_offline_nodes
@@ -5440,8 +5442,25 @@ class LUInstanceRecreateDisks(LogicalUnit):
    HTYPE = constants.HTYPE_INSTANCE
    REQ_BGL = False
  
+  def CheckArguments(self):
+    # normalise the disk list
+    self.op.disks = sorted(frozenset(self.op.disks))
+
    def ExpandNames(self):
      self._ExpandAndLockInstance()
+    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+    if self.op.nodes:
+      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
+      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
+    else:
+      self.needed_locks[locking.LEVEL_NODE] = []
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      # if we replace the nodes, we only need to lock the old primary,
+      # otherwise we need to lock all nodes for disk re-creation
+      primary_only = bool(self.op.nodes)
+      self._LockInstancesNodes(primary_only=primary_only)
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -5462,12 +5481,31 @@ class LUInstanceRecreateDisks(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
-    _CheckNodeOnline(self, instance.primary_node)
+    if self.op.nodes:
+      if len(self.op.nodes) != len(instance.all_nodes):
+        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
+                                   " %d replacement nodes were specified" %
+                                   (instance.name, len(instance.all_nodes),
+                                    len(self.op.nodes)),
+                                   errors.ECODE_INVAL)
+      assert instance.disk_template != constants.DT_DRBD8 or \
+          len(self.op.nodes) == 2
+      assert instance.disk_template != constants.DT_PLAIN or \
+          len(self.op.nodes) == 1
+      primary_node = self.op.nodes[0]
+    else:
+      primary_node = instance.primary_node
+    _CheckNodeOnline(self, primary_node)
  
      if instance.disk_template == constants.DT_DISKLESS:
        raise errors.OpPrereqError("Instance '%s' has no disks" %
                                   self.op.instance_name, errors.ECODE_INVAL)
-    _CheckInstanceDown(self, instance, "cannot recreate disks")
+    # if we replace nodes *and* the old primary is offline, we don't
+    # check
+    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
+    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
+    if not (self.op.nodes and old_pnode.offline):
+      _CheckInstanceDown(self, instance, "cannot recreate disks")
  
      if not self.op.disks:
        self.op.disks = range(len(instance.disks))
@@ -5476,18 +5514,39 @@ class LUInstanceRecreateDisks(LogicalUnit):
          if idx >= len(instance.disks):
            raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
                                       errors.ECODE_INVAL)
-
+    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
+      raise errors.OpPrereqError("Can't recreate disks partially and"
+                                 " change the nodes at the same time",
+                                 errors.ECODE_INVAL)
      self.instance = instance
  
    def Exec(self, feedback_fn):
      """Recreate the disks.
  
      """
+    # change primary node, if needed
+    if self.op.nodes:
+      self.instance.primary_node = self.op.nodes[0]
+      self.LogWarning("Changing the instance's nodes, you will have to"
+                      " remove any disks left on the older nodes manually")
+
      to_skip = []
-    for idx, _ in enumerate(self.instance.disks):
+    for idx, disk in enumerate(self.instance.disks):
        if idx not in self.op.disks: # disk idx has not been passed in
          to_skip.append(idx)
          continue
+      # update secondaries for disks, if needed
+      if self.op.nodes:
+        if disk.dev_type == constants.LD_DRBD8:
+          # need to update the nodes
+          assert len(self.op.nodes) == 2
+          logical_id = list(disk.logical_id)
+          logical_id[0] = self.op.nodes[0]
+          logical_id[1] = self.op.nodes[1]
+          disk.logical_id = tuple(logical_id)
+
+    if self.op.nodes:
+      self.cfg.Update(self.instance, feedback_fn)
  
      _CreateDisks(self, self.instance, to_skip=to_skip)
  
@@ -5536,8 +5595,9 @@ class LUInstanceRename(LogicalUnit):
      new_name = self.op.new_name
      if self.op.name_check:
        hostname = netutils.GetHostname(name=new_name)
-      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
-                   hostname.name)
+      if hostname != new_name:
+        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
+                     hostname.name)
        new_name = self.op.new_name = hostname.name
        if (self.op.ip_check and
            netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
@@ -7036,9 +7096,8 @@ class LUInstanceCreate(LogicalUnit):
        raise errors.OpPrereqError("Invalid file driver name '%s'" %
                                   self.op.file_driver, errors.ECODE_INVAL)
  
-    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
-      raise errors.OpPrereqError("File storage directory path not absolute",
-                                 errors.ECODE_INVAL)
+    if self.op.disk_template == constants.DT_FILE:
+      opcodes.RequireFileStorage()
  
      ### Node/iallocator related checks
      _CheckIAllocatorOrNode(self, "iallocator", "pnode")
@@ -7386,10 +7445,35 @@ class LUInstanceCreate(LogicalUnit):
        if name in os_defs and os_defs[name] == self.op.osparams[name]:
          del self.op.osparams[name]
  
+  def _CalculateFileStorageDir(self):
+    """Calculate final instance file storage dir.
+
+    """
+    # file storage dir calculation/check
+    self.instance_file_storage_dir = None
+    if self.op.disk_template == constants.DT_FILE:
+      # build the full file storage dir path
+      joinargs = []
+
+      cfg_storagedir = self.cfg.GetFileStorageDir()
+      if not cfg_storagedir:
+        raise errors.OpPrereqError("Cluster file storage dir not defined")
+      joinargs.append(cfg_storagedir)
+
+      if self.op.file_storage_dir is not None:
+        joinargs.append(self.op.file_storage_dir)
+
+      joinargs.append(self.op.instance_name)
+
+      # pylint: disable-msg=W0142
+      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
    def CheckPrereq(self):
      """Check prerequisites.
  
      """
+    self._CalculateFileStorageDir()
+
      if self.op.mode == constants.INSTANCE_IMPORT:
        export_info = self._ReadExportInfo()
        self._ReadExportParams(export_info)
@@ -7687,25 +7771,12 @@ class LUInstanceCreate(LogicalUnit):
      else:
        network_port = None
  
-    if constants.ENABLE_FILE_STORAGE:
-      # this is needed because os.path.join does not accept None arguments
-      if self.op.file_storage_dir is None:
-        string_file_storage_dir = ""
-      else:
-        string_file_storage_dir = self.op.file_storage_dir
-
-      # build the full file storage dir path
-      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
-                                        string_file_storage_dir, instance)
-    else:
-      file_storage_dir = ""
-
      disks = _GenerateDiskTemplate(self,
                                    self.op.disk_template,
                                    instance, pnode_name,
                                    self.secondaries,
                                    self.disks,
-                                  file_storage_dir,
+                                  self.instance_file_storage_dir,
                                    self.op.file_driver,
                                    0,
                                    feedback_fn)
@@ -8282,22 +8353,23 @@ class TLReplaceDisks(Tasklet):
        else:
          fn = self._ExecDrbd8DiskOnly
  
-      return fn(feedback_fn)
-
+      result = fn(feedback_fn)
      finally:
        # Deactivate the instance disks if we're replacing them on a
        # down instance
        if activate_disks:
          _SafeShutdownInstanceDisks(self.lu, self.instance)
  
-      if __debug__:
-        # Verify owned locks
-        owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
-        assert ((self.early_release and not owned_locks) or
-                (not self.early_release and
-                 set(owned_locks) == set(self.node_secondary_ip))), \
-          ("Not owning the correct locks, early_release=%s, owned=%r" %
-           (self.early_release, owned_locks))
+    if __debug__:
+      # Verify owned locks
+      owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
+      assert ((self.early_release and not owned_locks) or
+              (not self.early_release and
+               set(owned_locks) == set(self.node_secondary_ip))), \
+        ("Not owning the correct locks, early_release=%s, owned=%r" %
+         (self.early_release, owned_locks))
+
+    return result
  
    def _CheckVolumeGroup(self, nodes):
      self.lu.LogInfo("Checking volume groups")
@@ -8350,6 +8422,12 @@ class TLReplaceDisks(Tasklet):
                                   (node_name, self.instance.name))
  
    def _CreateNewStorage(self, node_name):
+    """Create new storage on the primary or secondary node.
+
+    This is only used for same-node replaces, not for changing the
+    secondary node, hence we don't want to modify the existing disk.
+
+    """
      iv_names = {}
  
      for idx, dev in enumerate(self.instance.disks):
@@ -8371,7 +8449,7 @@ class TLReplaceDisks(Tasklet):
                               logical_id=(vg_meta, names[1]))
  
        new_lvs = [lv_data, lv_meta]
-      old_lvs = dev.children
+      old_lvs = [child.Copy() for child in dev.children]
        iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
  
        # we pass force_create=True to force the LVM creation
@@ -8496,10 +8574,14 @@ class TLReplaceDisks(Tasklet):
                                               rename_new_to_old)
        result.Raise("Can't rename new LVs on node %s" % self.target_node)
  
+      # Intermediate steps of in memory modifications
        for old, new in zip(old_lvs, new_lvs):
          new.logical_id = old.logical_id
          self.cfg.SetDiskID(new, self.target_node)
  
+      # We need to modify old_lvs so that removal later removes the
+      # right LVs, not the newly added ones; note that old_lvs is a
+      # copy here
        for disk in old_lvs:
          disk.logical_id = ren_fn(disk, temp_suffix)
          self.cfg.SetDiskID(disk, self.target_node)
@@ -8519,10 +8601,6 @@ class TLReplaceDisks(Tasklet):
                                       "volumes"))
          raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
  
-      dev.children = new_lvs
-
-      self.cfg.Update(self.instance, feedback_fn)
-
      cstep = 5
      if self.early_release:
        self.lu.LogStep(cstep, steps_total, "Removing old storage")
@@ -8788,10 +8866,13 @@ class LUNodeEvacStrategy(NoHooksLU):
        locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
  
    def Exec(self, feedback_fn):
+    instances = []
+    for node in self.op.nodes:
+      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
+    if not instances:
+      return []
+
      if self.op.remote_node is not None:
-      instances = []
-      for node in self.op.nodes:
-        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
        result = []
        for i in instances:
          if i.primary_node == self.op.remote_node:
@@ -9332,6 +9413,7 @@ class LUInstanceSetParams(LogicalUnit):
        self.be_inst = i_bedict # the new dict (without defaults)
      else:
        self.be_new = self.be_inst = {}
+    be_old = cluster.FillBE(instance)
  
      # osparams processing
      if self.op.osparams:
@@ -9343,7 +9425,8 @@ class LUInstanceSetParams(LogicalUnit):
  
      self.warn = []
  
-    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
+    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
+        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
        mem_check_list = [pnode]
        if be_new[constants.BE_AUTO_BALANCE]:
          # either we changed auto_balance to yes or it was from before
@@ -9384,16 +9467,17 @@ class LUInstanceSetParams(LogicalUnit):
          for node, nres in nodeinfo.items():
            if node not in instance.secondary_nodes:
              continue
-          msg = nres.fail_msg
-          if msg:
-            self.warn.append("Can't get info from secondary node %s: %s" %
-                             (node, msg))
-          elif not isinstance(nres.payload.get('memory_free', None), int):
-            self.warn.append("Secondary node %s didn't return free"
-                             " memory information" % node)
+          nres.Raise("Can't get info from secondary node %s" % node,
+                     prereq=True, ecode=errors.ECODE_STATE)
+          if not isinstance(nres.payload.get('memory_free', None), int):
+            raise errors.OpPrereqError("Secondary node %s didn't return free"
+                                       " memory information" % node,
+                                       errors.ECODE_STATE)
            elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
-            self.warn.append("Not enough memory to failover instance to"
-                             " secondary node %s" % node)
+            raise errors.OpPrereqError("This change will prevent the instance"
+                                       " from failover to its secondary node"
+                                       " %s, due to not enough memory" % node,
+                                       errors.ECODE_STATE)
  
      # NIC processing
      self.nic_pnew = {}
@@ -9543,7 +9627,8 @@ class LUInstanceSetParams(LogicalUnit):
      self.cfg.Update(instance, feedback_fn)
  
      # disks are created, waiting for sync
-    disk_abort = not _WaitForSync(self, instance)
+    disk_abort = not _WaitForSync(self, instance,
+                                  oneshot=not self.op.wait_for_sync)
      if disk_abort:
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance, please cleanup manually")