Add gnt-instance start --pause
[ganeti-local] / lib / cmdlib.py
index 5216b1c..8b9fa17 100644 (file)
@@ -2016,8 +2016,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         # All or no nodes
         errorif(missing_file and missing_file != node_names,
                 cls.ECLUSTERFILECHECK, None,
-                "File %s is optional, but it must exist on all or no nodes (not"
-                " found on %s)",
+                "File %s is optional, but it must exist on all or no"
+                " nodes (not found on %s)",
                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
       else:
         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
@@ -2419,8 +2419,6 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     """Build hooks nodes.
 
     """
-    assert self.my_node_names, ("Node list not gathered,"
-      " has CheckPrereq been executed?")
     return ([], self.my_node_names)
 
   def Exec(self, feedback_fn):
@@ -2428,6 +2426,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     # This method has too many local variables. pylint: disable-msg=R0914
+
+    if not self.my_node_names:
+      # empty node group
+      feedback_fn("* Empty node group, skipping verification")
+      return True
+
     self.bad = False
     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
     verbose = self.op.verbose
@@ -2808,9 +2812,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         and hook results
 
     """
-    # We only really run POST phase hooks, and are only interested in
-    # their results
-    if phase == constants.HOOKS_PHASE_POST:
+    # We only really run POST phase hooks, only for non-empty groups,
+    # and are only interested in their results
+    if not self.my_node_names:
+      # empty node group
+      pass
+    elif phase == constants.HOOKS_PHASE_POST:
       # Used to change hooks' output to proper indentation
       feedback_fn("* Hooks Results")
       assert hooks_results, "invalid result from hooks"
@@ -2836,7 +2843,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
             feedback_fn("%s" % output)
             lu_result = 0
 
-      return lu_result
+    return lu_result
 
 
 class LUClusterVerifyDisks(NoHooksLU):
@@ -5698,7 +5705,8 @@ class LUInstanceStartup(LogicalUnit):
       _StartInstanceDisks(self, instance, force)
 
       result = self.rpc.call_instance_start(node_current, instance,
-                                            self.op.hvparams, self.op.beparams)
+                                            self.op.hvparams, self.op.beparams,
+                                            self.op.startup_paused)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -5788,7 +5796,8 @@ class LUInstanceReboot(LogicalUnit):
         self.LogInfo("Instance %s was already stopped, starting now",
                      instance.name)
       _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance, None, None)
+      result = self.rpc.call_instance_start(node_current, instance,
+                                            None, None, False)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -6158,7 +6167,7 @@ class LUInstanceRename(LogicalUnit):
     old_name = inst.name
 
     rename_file_storage = False
-    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
+    if (inst.disk_template in constants.DTS_FILEBASED and
         self.op.new_name != inst.name):
       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
       rename_file_storage = True
@@ -6639,7 +6648,8 @@ class LUInstanceMove(LogicalUnit):
         _ShutdownInstanceDisks(self, instance)
         raise errors.OpExecError("Can't activate the instance's disks")
 
-      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      result = self.rpc.call_instance_start(target_node, instance,
+                                            None, None, False)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -7591,7 +7601,7 @@ def _CreateDisks(lu, instance, to_skip=None, target_node=None):
     pnode = target_node
     all_nodes = [pnode]
 
-  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
+  if instance.disk_template in constants.DTS_FILEBASED:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
 
@@ -7864,9 +7874,10 @@ class LUInstanceCreate(LogicalUnit):
       raise errors.OpPrereqError("Invalid file driver name '%s'" %
                                  self.op.file_driver, errors.ECODE_INVAL)
 
-    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
-      raise errors.OpPrereqError("File storage directory path not absolute",
-                                 errors.ECODE_INVAL)
+    if self.op.disk_template == constants.DT_FILE:
+      opcodes.RequireFileStorage()
+    elif self.op.disk_template == constants.DT_SHARED_FILE:
+      opcodes.RequireSharedFileStorage()
 
     ### Node/iallocator related checks
     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
@@ -8225,10 +8236,40 @@ class LUInstanceCreate(LogicalUnit):
       if name in os_defs and os_defs[name] == self.op.osparams[name]:
         del self.op.osparams[name]
 
+  def _CalculateFileStorageDir(self):
+    """Calculate final instance file storage dir.
+
+    """
+    # file storage dir calculation/check
+    self.instance_file_storage_dir = None
+    if self.op.disk_template in constants.DTS_FILEBASED:
+      # build the full file storage dir path
+      joinargs = []
+
+      if self.op.disk_template == constants.DT_SHARED_FILE:
+        get_fsd_fn = self.cfg.GetSharedFileStorageDir
+      else:
+        get_fsd_fn = self.cfg.GetFileStorageDir
+
+      cfg_storagedir = get_fsd_fn()
+      if not cfg_storagedir:
+        raise errors.OpPrereqError("Cluster file storage dir not defined")
+      joinargs.append(cfg_storagedir)
+
+      if self.op.file_storage_dir is not None:
+        joinargs.append(self.op.file_storage_dir)
+
+      joinargs.append(self.op.instance_name)
+
+      # pylint: disable-msg=W0142
+      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
   def CheckPrereq(self):
     """Check prerequisites.
 
     """
+    self._CalculateFileStorageDir()
+
     if self.op.mode == constants.INSTANCE_IMPORT:
       export_info = self._ReadExportInfo()
       self._ReadExportParams(export_info)
@@ -8560,30 +8601,12 @@ class LUInstanceCreate(LogicalUnit):
     else:
       network_port = None
 
-    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
-      # this is needed because os.path.join does not accept None arguments
-      if self.op.file_storage_dir is None:
-        string_file_storage_dir = ""
-      else:
-        string_file_storage_dir = self.op.file_storage_dir
-
-      # build the full file storage dir path
-      if self.op.disk_template == constants.DT_SHARED_FILE:
-        get_fsd_fn = self.cfg.GetSharedFileStorageDir
-      else:
-        get_fsd_fn = self.cfg.GetFileStorageDir
-
-      file_storage_dir = utils.PathJoin(get_fsd_fn(),
-                                        string_file_storage_dir, instance)
-    else:
-      file_storage_dir = ""
-
     disks = _GenerateDiskTemplate(self,
                                   self.op.disk_template,
                                   instance, pnode_name,
                                   self.secondaries,
                                   self.disks,
-                                  file_storage_dir,
+                                  self.instance_file_storage_dir,
                                   self.op.file_driver,
                                   0,
                                   feedback_fn)
@@ -8751,7 +8774,8 @@ class LUInstanceCreate(LogicalUnit):
       self.cfg.Update(iobj, feedback_fn)
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+      result = self.rpc.call_instance_start(pnode_name, iobj,
+                                            None, None, False)
       result.Raise("Could not start instance")
 
     return list(iobj.all_nodes)
@@ -9734,8 +9758,8 @@ class LURepairNodeStorage(NoHooksLU):
                  (self.op.name, self.op.node_name))
 
 
-class LUNodeEvacStrategy(NoHooksLU):
-  """Computes the node evacuation strategy.
+class LUNodeEvacuate(NoHooksLU):
+  """Evacuates instances off a list of nodes.
 
   """
   REQ_BGL = False
@@ -9744,41 +9768,166 @@ class LUNodeEvacStrategy(NoHooksLU):
     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
 
   def ExpandNames(self):
-    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
-    self.needed_locks = locks = {}
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+
+    if self.op.remote_node is not None:
+      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
+      assert self.op.remote_node
+
+      if self.op.remote_node == self.op.node_name:
+        raise errors.OpPrereqError("Can not use evacuated node as a new"
+                                   " secondary node", errors.ECODE_INVAL)
+
+      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
+        raise errors.OpPrereqError("Without the use of an iallocator only"
+                                   " secondary instances can be evacuated",
+                                   errors.ECODE_INVAL)
+
+    # Declare locks
+    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
     if self.op.remote_node is None:
-      locks[locking.LEVEL_NODE] = locking.ALL_SET
+      # Iallocator will choose any node(s) in the same group
+      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
     else:
-      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
-      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
+      group_nodes = frozenset([self.op.remote_node])
 
-  def Exec(self, feedback_fn):
-    instances = []
-    for node in self.op.nodes:
-      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
-    if not instances:
-      return []
+    # Determine nodes to be locked
+    self.lock_nodes = set([self.op.node_name]) | group_nodes
+
+  def _DetermineInstances(self):
+    """Builds list of instances to operate on.
+
+    """
+    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
+
+    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
+      # Primary instances only
+      inst_fn = _GetNodePrimaryInstances
+      assert self.op.remote_node is None, \
+        "Evacuating primary instances requires iallocator"
+    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
+      # Secondary instances only
+      inst_fn = _GetNodeSecondaryInstances
+    else:
+      # All instances
+      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
+      inst_fn = _GetNodeInstances
+
+    return inst_fn(self.cfg, self.op.node_name)
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        set(i.name for i in self._DetermineInstances())
+
+    elif level == locking.LEVEL_NODEGROUP:
+      # Lock node groups optimistically, needs verification once nodes have
+      # been acquired
+      self.needed_locks[locking.LEVEL_NODEGROUP] = \
+        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
+
+    elif level == locking.LEVEL_NODE:
+      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
+
+  def CheckPrereq(self):
+    # Verify locks
+    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
+    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
+
+    assert owned_nodes == self.lock_nodes
+
+    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
+    if owned_groups != wanted_groups:
+      raise errors.OpExecError("Node groups changed since locks were acquired,"
+                               " current groups are '%s', used to be '%s'" %
+                               (utils.CommaJoin(wanted_groups),
+                                utils.CommaJoin(owned_groups)))
+
+    # Determine affected instances
+    self.instances = self._DetermineInstances()
+    self.instance_names = [i.name for i in self.instances]
+
+    if set(self.instance_names) != owned_instances:
+      raise errors.OpExecError("Instances on node '%s' changed since locks"
+                               " were acquired, current instances are '%s',"
+                               " used to be '%s'" %
+                               (self.op.node_name,
+                                utils.CommaJoin(self.instance_names),
+                                utils.CommaJoin(owned_instances)))
+
+    if self.instance_names:
+      self.LogInfo("Evacuating instances from node '%s': %s",
+                   self.op.node_name,
+                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
+    else:
+      self.LogInfo("No instances to evacuate from node '%s'",
+                   self.op.node_name)
 
     if self.op.remote_node is not None:
-      result = []
-      for i in instances:
+      for i in self.instances:
         if i.primary_node == self.op.remote_node:
           raise errors.OpPrereqError("Node %s is the primary node of"
                                      " instance %s, cannot use it as"
                                      " secondary" %
                                      (self.op.remote_node, i.name),
                                      errors.ECODE_INVAL)
-        result.append([i.name, self.op.remote_node])
-    else:
-      ial = IAllocator(self.cfg, self.rpc,
-                       mode=constants.IALLOCATOR_MODE_MEVAC,
-                       evac_nodes=self.op.nodes)
-      ial.Run(self.op.iallocator, validate=True)
+
+  def Exec(self, feedback_fn):
+    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
+
+    if not self.instance_names:
+      # No instances to evacuate
+      jobs = []
+
+    elif self.op.iallocator is not None:
+      # TODO: Implement relocation to other group
+      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
+                       evac_mode=self.op.mode,
+                       instances=list(self.instance_names))
+
+      ial.Run(self.op.iallocator)
+
       if not ial.success:
-        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
-                                 errors.ECODE_NORES)
-      result = ial.result
-    return result
+        raise errors.OpPrereqError("Can't compute node evacuation using"
+                                   " iallocator '%s': %s" %
+                                   (self.op.iallocator, ial.info),
+                                   errors.ECODE_NORES)
+
+      jobs = [[opcodes.OpCode.LoadOpCode(state) for state in jobset]
+              for jobset in ial.result]
+
+      # Set "early_release" flag on opcodes where available
+      early_release = self.op.early_release
+      for op in itertools.chain(*jobs): # pylint: disable-msg=W0142
+        try:
+          op.early_release = early_release
+        except AttributeError:
+          assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
+
+    elif self.op.remote_node is not None:
+      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
+      jobs = [
+        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
+                                        remote_node=self.op.remote_node,
+                                        disks=[],
+                                        mode=constants.REPLACE_DISK_CHG,
+                                        early_release=self.op.early_release)]
+        for instance_name in self.instance_names
+        ]
+
+    else:
+      raise errors.ProgrammerError("No iallocator or remote node")
+
+    return ResultWithJobs(jobs)
 
 
 class LUInstanceGrowDisk(LogicalUnit):
@@ -11045,7 +11194,8 @@ class LUBackupExport(LogicalUnit):
             not self.op.remove_instance):
           assert not activate_disks
           feedback_fn("Starting instance %s" % instance.name)
-          result = self.rpc.call_instance_start(src_node, instance, None, None)
+          result = self.rpc.call_instance_start(src_node, instance,
+                                                None, None, False)
           msg = result.fail_msg
           if msg:
             feedback_fn("Failed to start instance: %s" % msg)
@@ -12033,7 +12183,7 @@ class IAllocator(object):
     self.name = None
     self.evac_nodes = None
     self.instances = None
-    self.reloc_mode = None
+    self.evac_mode = None
     self.target_groups = []
     # computed fields
     self.required_nodes = None
@@ -12087,8 +12237,7 @@ class IAllocator(object):
       hypervisor_name = self.hypervisor
     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
-    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
-                       constants.IALLOCATOR_MODE_MRELOC):
+    else:
       hypervisor_name = cluster_info.enabled_hypervisors[0]
 
     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
@@ -12323,13 +12472,21 @@ class IAllocator(object):
       }
     return request
 
-  def _AddMultiRelocate(self):
-    """Get data for multi-relocate requests.
+  def _AddNodeEvacuate(self):
+    """Get data for node-evacuate requests.
+
+    """
+    return {
+      "instances": self.instances,
+      "evac_mode": self.evac_mode,
+      }
+
+  def _AddChangeGroup(self):
+    """Get data for node-evacuate requests.
 
     """
     return {
       "instances": self.instances,
-      "reloc_mode": self.reloc_mode,
       "target_groups": self.target_groups,
       }
 
@@ -12355,6 +12512,13 @@ class IAllocator(object):
     self.in_text = serializer.Dump(self.in_data)
 
   _STRING_LIST = ht.TListOf(ht.TString)
+  _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
+     # pylint: disable-msg=E1101
+     # Class '...' has no 'OP_ID' member
+     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
+                          opcodes.OpInstanceMigrate.OP_ID,
+                          opcodes.OpInstanceReplaceDisks.OP_ID])
+     })))
   _MODE_DATA = {
     constants.IALLOCATOR_MODE_ALLOC:
       (_AddNewInstance,
@@ -12376,19 +12540,16 @@ class IAllocator(object):
     constants.IALLOCATOR_MODE_MEVAC:
       (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
        ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
-    constants.IALLOCATOR_MODE_MRELOC:
-      (_AddMultiRelocate, [
+     constants.IALLOCATOR_MODE_NODE_EVAC:
+      (_AddNodeEvacuate, [
+        ("instances", _STRING_LIST),
+        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
+        ], _JOBSET_LIST),
+     constants.IALLOCATOR_MODE_CHG_GROUP:
+      (_AddChangeGroup, [
         ("instances", _STRING_LIST),
-        ("reloc_mode", ht.TElemOf(constants.IALLOCATOR_MRELOC_MODES)),
         ("target_groups", _STRING_LIST),
-        ],
-       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
-         # pylint: disable-msg=E1101
-         # Class '...' has no 'OP_ID' member
-         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
-                              opcodes.OpInstanceMigrate.OP_ID,
-                              opcodes.OpInstanceReplaceDisks.OP_ID])
-         })))),
+        ], _JOBSET_LIST),
     }
 
   def Run(self, name, validate=True, call_fn=None):
@@ -12471,6 +12632,9 @@ class IAllocator(object):
       else:
         raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
 
+    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
+
     self.out_data = rdict
 
   @staticmethod
@@ -12553,12 +12717,11 @@ class LUTestAllocator(NoHooksLU):
       if not hasattr(self.op, "evac_nodes"):
         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
                                    " opcode input", errors.ECODE_INVAL)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
-      if self.op.instances:
-        self.op.instances = _GetWantedInstances(self, self.op.instances)
-      else:
-        raise errors.OpPrereqError("Missing instances to relocate",
-                                   errors.ECODE_INVAL)
+    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
+                          constants.IALLOCATOR_MODE_NODE_EVAC):
+      if not self.op.instances:
+        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
+      self.op.instances = _GetWantedInstances(self, self.op.instances)
     else:
       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
                                  self.op.mode, errors.ECODE_INVAL)
@@ -12598,12 +12761,16 @@ class LUTestAllocator(NoHooksLU):
       ial = IAllocator(self.cfg, self.rpc,
                        mode=self.op.mode,
                        evac_nodes=self.op.evac_nodes)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
+    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
       ial = IAllocator(self.cfg, self.rpc,
                        mode=self.op.mode,
                        instances=self.op.instances,
-                       reloc_mode=self.op.reloc_mode,
                        target_groups=self.op.target_groups)
+    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+      ial = IAllocator(self.cfg, self.rpc,
+                       mode=self.op.mode,
+                       instances=self.op.instances,
+                       evac_mode=self.op.evac_mode)
     else:
       raise errors.ProgrammerError("Uncatched mode %s in"
                                    " LUTestAllocator.Exec", self.op.mode)