Remove unused parameter “unlock” from cmdlib._WaitForSync

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 6e5fbab..768d67b 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -500,6 +500,21 @@ def _CheckBooleanOpField(op, name):
    setattr(op, name, val)
  
  
+def _CheckGlobalHvParams(params):
+  """Validates that given hypervisor params are not global ones.
+
+  This will ensure that instances don't get customised versions of
+  global params.
+
+  """
+  used_globals = constants.HVC_GLOBALS.intersection(params)
+  if used_globals:
+    msg = ("The following hypervisor parameters are global and cannot"
+           " be customized at instance level, please modify them at"
+           " cluster level: %s" % ", ".join(used_globals))
+    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
+
+
  def _CheckNodeOnline(lu, node):
    """Ensure that a given node is online.
  
@@ -716,7 +731,7 @@ def _CheckNicsBridgesExist(lu, target_nics, target_node,
    if brlist:
      result = lu.rpc.call_bridges_exist(target_node, brlist)
      result.Raise("Error checking bridges on destination node '%s'" %
-                 target_node, prereq=True)
+                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
  
  
  def _CheckInstanceBridgesExist(lu, instance, node=None):
@@ -797,7 +812,7 @@ def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
  
    result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
    result.Raise("Failed to get disk status from node %s" % node_name,
-               prereq=prereq)
+               prereq=prereq, ecode=errors.ECODE_ENVIRON)
  
    for idx, bdev_status in enumerate(result.payload):
      if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
@@ -1127,6 +1142,20 @@ class LUVerifyCluster(LogicalUnit):
      _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
               "; ".join(test))
  
+    # check pv names
+    if vg_name is not None:
+      pvlist = node_result.get(constants.NV_PVLIST, None)
+      test = pvlist is None
+      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
+      if not test:
+        # check that ':' is not present in PV names, since it's a
+        # special character for lvcreate (denotes the range of PEs to
+        # use on the PV)
+        for size, pvname, owner_vg in pvlist:
+          test = ":" in pvname
+          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
+                   " '%s' of VG '%s'", pvname, owner_vg)
+
    def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
                        node_instance, n_offline):
      """Verify an instance.
@@ -1301,6 +1330,7 @@ class LUVerifyCluster(LogicalUnit):
      if vg_name is not None:
        node_verify_param[constants.NV_VGLIST] = None
        node_verify_param[constants.NV_LVLIST] = vg_name
+      node_verify_param[constants.NV_PVLIST] = [vg_name]
        node_verify_param[constants.NV_DRBDLIST] = None
      all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                             self.cfg.GetClusterName())
@@ -1773,7 +1803,7 @@ class LURenameCluster(LogicalUnit):
      """Verify that the passed name is a valid one.
  
      """
-    hostname = utils.HostInfo(self.op.name)
+    hostname = utils.GetHostInfo(self.op.name)
  
      new_name = hostname.name
      self.ip = new_ip = hostname.ip
@@ -1940,6 +1970,29 @@ class LUSetClusterParams(LogicalUnit):
        self.new_nicparams = objects.FillDict(
          cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
        objects.NIC.CheckParameterSyntax(self.new_nicparams)
+      nic_errors = []
+
+      # check all instances for consistency
+      for instance in self.cfg.GetAllInstancesInfo().values():
+        for nic_idx, nic in enumerate(instance.nics):
+          params_copy = copy.deepcopy(nic.nicparams)
+          params_filled = objects.FillDict(self.new_nicparams, params_copy)
+
+          # check parameter syntax
+          try:
+            objects.NIC.CheckParameterSyntax(params_filled)
+          except errors.ConfigurationError, err:
+            nic_errors.append("Instance %s, nic/%d: %s" %
+                              (instance.name, nic_idx, err))
+
+          # if we're moving instances to routed, check that they have an ip
+          target_mode = params_filled[constants.NIC_MODE]
+          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
+            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
+                              (instance.name, nic_idx))
+      if nic_errors:
+        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
+                                   "\n".join(nic_errors))
  
      # hypervisor list/parameters
      self.new_hvparams = objects.FillDict(cluster.hvparams, {})
@@ -2081,7 +2134,7 @@ class LURedistributeConfig(NoHooksLU):
      _RedistributeAncillaryFiles(self)
  
  
-def _WaitForSync(lu, instance, oneshot=False, unlock=False):
+def _WaitForSync(lu, instance, oneshot=False):
    """Sleep and poll for an instance's disk to sync.
  
    """
@@ -2096,6 +2149,8 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
    for dev in instance.disks:
      lu.cfg.SetDiskID(dev, node)
  
+  # TODO: Convert to utils.Retry
+
    retries = 0
    degr_retries = 10 # in seconds, as we sleep 1 second each time
    while True:
@@ -2826,7 +2881,7 @@ class LUAddNode(LogicalUnit):
      node_name = self.op.node_name
      cfg = self.cfg
  
-    dns_data = utils.HostInfo(node_name)
+    dns_data = utils.GetHostInfo(node_name)
  
      node = dns_data.name
      primary_ip = self.op.primary_ip = dns_data.ip
@@ -2967,7 +3022,7 @@ class LUAddNode(LogicalUnit):
        result = self.rpc.call_node_has_ip_address(new_node.name,
                                                   new_node.secondary_ip)
        result.Raise("Failure checking secondary ip on node %s" % new_node.name,
-                   prereq=True)
+                   prereq=True, ecode=errors.ECODE_ENVIRON)
        if not result.payload:
          raise errors.OpExecError("Node claims it doesn't have the secondary ip"
                                   " you gave (%s). Please fix and re-run this"
@@ -3005,7 +3060,7 @@ class LUAddNode(LogicalUnit):
                            " candidate status: %s" % msg)
      else:
        _RedistributeAncillaryFiles(self, additional_nodes=[node])
-      self.context.AddNode(new_node)
+      self.context.AddNode(new_node, self.proc.GetECId())
  
  
  class LUSetNodeParams(LogicalUnit):
@@ -3539,7 +3594,8 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
  
    """
    nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
-  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
+  nodeinfo[node].Raise("Can't get data from node %s" % node,
+                       prereq=True, ecode=errors.ECODE_ENVIRON)
    free_mem = nodeinfo[node].payload.get('memory_free', None)
    if not isinstance(free_mem, int):
      raise errors.OpPrereqError("Can't compute free memory on node %s, result"
@@ -3627,7 +3683,7 @@ class LUStartupInstance(LogicalUnit):
                                                instance.name,
                                                instance.hypervisor)
      remote_info.Raise("Error checking node %s" % instance.primary_node,
-                      prereq=True)
+                      prereq=True, ecode=errors.ECODE_ENVIRON)
      if not remote_info.payload: # not running already
        _CheckNodeFreeMemory(self, instance.primary_node,
                             "starting instance %s" % instance.name,
@@ -3846,7 +3902,7 @@ class LUReinstallInstance(LogicalUnit):
                                                instance.name,
                                                instance.hypervisor)
      remote_info.Raise("Error checking node %s" % instance.primary_node,
-                      prereq=True)
+                      prereq=True, ecode=errors.ECODE_ENVIRON)
      if remote_info.payload:
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
@@ -3864,7 +3920,8 @@ class LUReinstallInstance(LogicalUnit):
                                     self.op.pnode, errors.ECODE_NOENT)
        result = self.rpc.call_os_get(pnode.name, self.op.os_type)
        result.Raise("OS '%s' not in supported OS list for primary node %s" %
-                   (self.op.os_type, pnode.name), prereq=True)
+                   (self.op.os_type, pnode.name),
+                   prereq=True, ecode=errors.ECODE_INVAL)
        if not self.op.force_variant:
          _CheckOSVariant(result.payload, self.op.os_type)
  
@@ -3946,7 +4003,7 @@ class LURecreateInstanceDisks(LogicalUnit):
                                                instance.name,
                                                instance.hypervisor)
      remote_info.Raise("Error checking node %s" % instance.primary_node,
-                      prereq=True)
+                      prereq=True, ecode=errors.ECODE_ENVIRON)
      if remote_info.payload:
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
@@ -4014,7 +4071,7 @@ class LURenameInstance(LogicalUnit):
                                                instance.name,
                                                instance.hypervisor)
      remote_info.Raise("Error checking node %s" % instance.primary_node,
-                      prereq=True)
+                      prereq=True, ecode=errors.ECODE_ENVIRON)
      if remote_info.payload:
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
@@ -4022,7 +4079,7 @@ class LURenameInstance(LogicalUnit):
      self.instance = instance
  
      # new name verification
-    name_info = utils.HostInfo(self.op.new_name)
+    name_info = utils.GetHostInfo(self.op.new_name)
  
      self.op.new_name = new_name = name_info.name
      instance_list = self.cfg.GetInstanceList()
@@ -4181,7 +4238,8 @@ class LUQueryInstances(NoHooksLU):
                                      "hvparams",
                                      ] + _SIMPLE_FIELDS +
                                    ["hv/%s" % name
-                                   for name in constants.HVS_PARAMETERS] +
+                                   for name in constants.HVS_PARAMETERS
+                                   if name not in constants.HVC_GLOBALS] +
                                    ["be/%s" % name
                                     for name in constants.BES_PARAMETERS])
    _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
@@ -4276,7 +4334,7 @@ class LUQueryInstances(NoHooksLU):
      cluster = self.cfg.GetClusterInfo()
      for instance in instance_list:
        iout = []
-      i_hv = cluster.FillHV(instance)
+      i_hv = cluster.FillHV(instance, skip_globals=True)
        i_be = cluster.FillBE(instance)
        i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
                                   nic.nicparams) for nic in instance.nics]
@@ -4363,7 +4421,8 @@ class LUQueryInstances(NoHooksLU):
          elif field == "hvparams":
            val = i_hv
          elif (field.startswith(HVPREFIX) and
-              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
+              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
+              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
            val = i_hv.get(field[len(HVPREFIX):], None)
          elif field == "beparams":
            val = i_be
@@ -4910,7 +4969,8 @@ class TLMigrateInstance(Tasklet):
        _CheckNodeNotDrained(self, target_node)
        result = self.rpc.call_instance_migratable(instance.primary_node,
                                                   instance)
-      result.Raise("Can't migrate, please use failover", prereq=True)
+      result.Raise("Can't migrate, please use failover",
+                   prereq=True, ecode=errors.ECODE_STATE)
  
      self.instance = instance
  
@@ -5072,8 +5132,8 @@ class TLMigrateInstance(Tasklet):
                                                      False)
      abort_msg = abort_result.fail_msg
      if abort_msg:
-      logging.error("Aborting migration failed on target node %s: %s" %
-                    (target_node, abort_msg))
+      logging.error("Aborting migration failed on target node %s: %s",
+                    target_node, abort_msg)
        # Don't raise an exception here, as we stil have to try to revert the
        # disk status, even if this step failed.
  
@@ -5127,6 +5187,7 @@ class TLMigrateInstance(Tasklet):
      if msg:
        logging.error("Instance pre-migration failed, trying to revert"
                      " disk status: %s", msg)
+      self.feedback_fn("Pre-migration failed, aborting")
        self._AbortMigration()
        self._RevertDiskStatus()
        raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
@@ -5141,6 +5202,7 @@ class TLMigrateInstance(Tasklet):
      if msg:
        logging.error("Instance migration failed, trying to revert"
                      " disk status: %s", msg)
+      self.feedback_fn("Migration failed, aborting")
        self._AbortMigration()
        self._RevertDiskStatus()
        raise errors.OpExecError("Could not migrate instance %s: %s" %
@@ -5158,7 +5220,7 @@ class TLMigrateInstance(Tasklet):
      msg = result.fail_msg
      if msg:
        logging.error("Instance migration succeeded, but finalization failed:"
-                    " %s" % msg)
+                    " %s", msg)
        raise errors.OpExecError("Could not finalize instance migration: %s" %
                                 msg)
  
@@ -5272,7 +5334,7 @@ def _GenerateUniqueNames(lu, exts):
    """
    results = []
    for val in exts:
-    new_id = lu.cfg.GenerateUniqueID()
+    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
      results.append("%s%s" % (new_id, val))
    return results
  
@@ -5284,7 +5346,7 @@ def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
    """
    port = lu.cfg.AllocatePort()
    vgname = lu.cfg.GetVGName()
-  shared_secret = lu.cfg.GenerateDRBDSecret()
+  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
    dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
                            logical_id=(vgname, names[0]))
    dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
@@ -5577,6 +5639,8 @@ class LUCreateInstance(LogicalUnit):
      hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
      hv_type.CheckParameterSyntax(filled_hvp)
      self.hv_full = filled_hvp
+    # check that we don't specify global parameters on an instance
+    _CheckGlobalHvParams(self.op.hvparams)
  
      # fill and remember the beparams dict
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
@@ -5586,7 +5650,7 @@ class LUCreateInstance(LogicalUnit):
      #### instance parameters check
  
      # instance name verification
-    hostname1 = utils.HostInfo(self.op.instance_name)
+    hostname1 = utils.GetHostInfo(self.op.instance_name)
      self.op.instance_name = instance_name = hostname1.name
  
      # this is just a preventive check, but someone might still add this
@@ -5624,7 +5688,7 @@ class LUCreateInstance(LogicalUnit):
                                       errors.ECODE_INVAL)
          nic_ip = ip
  
-      # TODO: check the ip for uniqueness !!
+      # TODO: check the ip address for uniqueness
        if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
          raise errors.OpPrereqError("Routed nic mode requires an ip address",
                                     errors.ECODE_INVAL)
@@ -5636,8 +5700,9 @@ class LUCreateInstance(LogicalUnit):
            raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                       mac, errors.ECODE_INVAL)
          else:
-          # or validate/reserve the current one
-          if self.cfg.IsMacInUse(mac):
+          try:
+            self.cfg.ReserveMAC(mac, self.proc.GetECId())
+          except errors.ReservationError:
              raise errors.OpPrereqError("MAC address %s already in use"
                                         " in cluster" % mac,
                                         errors.ECODE_NOTUNIQUE)
@@ -5917,7 +5982,7 @@ class LUCreateInstance(LogicalUnit):
      # creation job will fail.
      for nic in self.nics:
        if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-        nic.mac = self.cfg.GenerateMAC()
+        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
  
      #### allocator run
  
@@ -5979,7 +6044,8 @@ class LUCreateInstance(LogicalUnit):
      # os verification
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
      result.Raise("OS '%s' not in supported os list for primary node %s" %
-                 (self.op.os_type, pnode.name), prereq=True)
+                 (self.op.os_type, pnode.name),
+                 prereq=True, ecode=errors.ECODE_INVAL)
      if not self.op.force_variant:
        _CheckOSVariant(result.payload, self.op.os_type)
  
@@ -6055,7 +6121,8 @@ class LUCreateInstance(LogicalUnit):
  
      feedback_fn("adding instance %s to cluster config" % instance)
  
-    self.cfg.AddInstance(iobj)
+    self.cfg.AddInstance(iobj, self.proc.GetECId())
+
      # Declare that we don't want to remove the instance lock anymore, as we've
      # added the instance to the config
      del self.remove_locks[locking.LEVEL_INSTANCE]
@@ -6809,7 +6876,7 @@ class TLReplaceDisks(Tasklet):
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
      self.lu.LogStep(5, steps_total, "Sync devices")
-    _WaitForSync(self.lu, self.instance, unlock=True)
+    _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
      self._CheckDevices(self.instance.primary_node, iv_names)
@@ -6865,7 +6932,7 @@ class TLReplaceDisks(Tasklet):
      minors = self.cfg.AllocateDRBDMinor([self.new_node
                                           for dev in self.instance.disks],
                                          self.instance.name)
-    logging.debug("Allocated minors %r" % (minors,))
+    logging.debug("Allocated minors %r", minors)
  
      iv_names = {}
      for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
@@ -6954,7 +7021,7 @@ class TLReplaceDisks(Tasklet):
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
      self.lu.LogStep(5, steps_total, "Sync devices")
-    _WaitForSync(self.lu, self.instance, unlock=True)
+    _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
      self._CheckDevices(self.instance.primary_node, iv_names)
@@ -6985,11 +7052,18 @@ class LURepairNodeStorage(NoHooksLU):
        }
  
    def _CheckFaultyDisks(self, instance, node_name):
-    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
-                                node_name, True):
-      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
-                                 " node '%s'" % (instance.name, node_name),
-                                 errors.ECODE_STATE)
+    """Ensure faulty disks abort the opcode or at least warn."""
+    try:
+      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
+                                  node_name, True):
+        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
+                                   " node '%s'" % (instance.name, node_name),
+                                   errors.ECODE_STATE)
+    except errors.OpPrereqError, err:
+      if self.op.ignore_consistency:
+        self.proc.LogWarning(str(err.args[0]))
+      else:
+        raise
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -7005,6 +7079,8 @@ class LURepairNodeStorage(NoHooksLU):
  
      # Check whether any instance on this node has faulty disks
      for inst in _GetNodeInstances(self.cfg, self.op.node_name):
+      if not inst.admin_up:
+        continue
        check_nodes = set(inst.all_nodes)
        check_nodes.discard(self.op.node_name)
        for inst_node_name in check_nodes:
@@ -7260,7 +7336,7 @@ class LUQueryInstanceData(NoHooksLU):
          "hypervisor": instance.hypervisor,
          "network_port": instance.network_port,
          "hv_instance": instance.hvparams,
-        "hv_actual": cluster.FillHV(instance),
+        "hv_actual": cluster.FillHV(instance, skip_globals=True),
          "be_instance": instance.beparams,
          "be_actual": cluster.FillBE(instance),
          "serial_no": instance.serial_no,
@@ -7297,6 +7373,9 @@ class LUSetInstanceParams(LogicalUnit):
              self.op.hvparams or self.op.beparams):
        raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
  
+    if self.op.hvparams:
+      _CheckGlobalHvParams(self.op.hvparams)
+
      # Disk validation
      disk_addremove = 0
      for disk_op, disk_dict in self.op.disks:
@@ -7592,10 +7671,14 @@ class LUSetInstanceParams(LogicalUnit):
          continue
        if nic_op != constants.DDM_ADD:
          # an existing nic
+        if not instance.nics:
+          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
+                                     " no NICs" % nic_op,
+                                     errors.ECODE_INVAL)
          if nic_op < 0 or nic_op >= len(instance.nics):
            raise errors.OpPrereqError("Invalid NIC index %s, valid values"
                                       " are 0 to %d" %
-                                     (nic_op, len(instance.nics)),
+                                     (nic_op, len(instance.nics) - 1),
                                       errors.ECODE_INVAL)
          old_nic_params = instance.nics[nic_op].nicparams
          old_nic_ip = instance.nics[nic_op].ip
@@ -7643,10 +7726,12 @@ class LUSetInstanceParams(LogicalUnit):
                                       errors.ECODE_INVAL)
          elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
            # otherwise generate the mac
-          nic_dict['mac'] = self.cfg.GenerateMAC()
+          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
          else:
            # or validate/reserve the current one
-          if self.cfg.IsMacInUse(nic_mac):
+          try:
+            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
+          except errors.ReservationError:
              raise errors.OpPrereqError("MAC address %s already in use"
                                         " in cluster" % nic_mac,
                                         errors.ECODE_NOTUNIQUE)
@@ -7775,8 +7860,8 @@ class LUSetInstanceParams(LogicalUnit):
          for key in 'mac', 'ip':
            if key in nic_dict:
              setattr(instance.nics[nic_op], key, nic_dict[key])
-        if nic_op in self.nic_pnew:
-          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
+        if nic_op in self.nic_pinst:
+          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
          for key, val in nic_dict.iteritems():
            result.append(("nic.%s/%d" % (key, nic_op), val))