drive_del after device_del in disk hot-remove
[ganeti-local] / lib / bootstrap.py
index 726f7c4..cb2f0f4 100644 (file)
@@ -28,7 +28,9 @@ import os.path
 import re
 import logging
 import time
 import re
 import logging
 import time
+import tempfile
 
 
+from ganeti.cmdlib import cluster
 from ganeti import rpc
 from ganeti import ssh
 from ganeti import utils
 from ganeti import rpc
 from ganeti import ssh
 from ganeti import utils
@@ -39,11 +41,12 @@ from ganeti import objects
 from ganeti import ssconf
 from ganeti import serializer
 from ganeti import hypervisor
 from ganeti import ssconf
 from ganeti import serializer
 from ganeti import hypervisor
-from ganeti import bdev
+from ganeti.storage import drbd
+from ganeti.storage import filestorage
 from ganeti import netutils
 from ganeti import netutils
-from ganeti import backend
 from ganeti import luxi
 from ganeti import jstore
 from ganeti import luxi
 from ganeti import jstore
+from ganeti import pathutils
 
 
 # ec_id for InitConfig's temporary reservation manager
 
 
 # ec_id for InitConfig's temporary reservation manager
@@ -60,7 +63,7 @@ def _InitSSHSetup():
   permitted hosts and adds the hostkey to its own known hosts.
 
   """
   permitted hosts and adds the hostkey to its own known hosts.
 
   """
-  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
+  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER)
 
   for name in priv_key, pub_key:
     if os.path.exists(name):
 
   for name in priv_key, pub_key:
     if os.path.exists(name):
@@ -92,12 +95,12 @@ def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert,
                           new_confd_hmac_key, new_cds,
                           rapi_cert_pem=None, spice_cert_pem=None,
                           spice_cacert_pem=None, cds=None,
                           new_confd_hmac_key, new_cds,
                           rapi_cert_pem=None, spice_cert_pem=None,
                           spice_cacert_pem=None, cds=None,
-                          nodecert_file=constants.NODED_CERT_FILE,
-                          rapicert_file=constants.RAPI_CERT_FILE,
-                          spicecert_file=constants.SPICE_CERT_FILE,
-                          spicecacert_file=constants.SPICE_CACERT_FILE,
-                          hmackey_file=constants.CONFD_HMAC_KEY,
-                          cds_file=constants.CLUSTER_DOMAIN_SECRET_FILE):
+                          nodecert_file=pathutils.NODED_CERT_FILE,
+                          rapicert_file=pathutils.RAPI_CERT_FILE,
+                          spicecert_file=pathutils.SPICE_CERT_FILE,
+                          spicecacert_file=pathutils.SPICE_CACERT_FILE,
+                          hmackey_file=pathutils.CONFD_HMAC_KEY,
+                          cds_file=pathutils.CLUSTER_DOMAIN_SECRET_FILE):
   """Updates the cluster certificates, keys and secrets.
 
   @type new_cluster_cert: bool
   """Updates the cluster certificates, keys and secrets.
 
   @type new_cluster_cert: bool
@@ -208,7 +211,7 @@ def _InitGanetiServerSetup(master_name):
   # Generate cluster secrets
   GenerateClusterCrypto(True, False, False, False, False)
 
   # Generate cluster secrets
   GenerateClusterCrypto(True, False, False, False, False)
 
-  result = utils.RunCmd([constants.DAEMON_UTIL, "start", constants.NODED])
+  result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.NODED])
   if result.failed:
     raise errors.OpExecError("Could not start the node daemon, command %s"
                              " had exitcode %s and error %s" %
   if result.failed:
     raise errors.OpExecError("Could not start the node daemon, command %s"
                              " had exitcode %s and error %s" %
@@ -255,7 +258,85 @@ def _WaitForMasterDaemon():
                              " %s seconds" % _DAEMON_READY_TIMEOUT)
 
 
                              " %s seconds" % _DAEMON_READY_TIMEOUT)
 
 
-def _InitFileStorage(file_storage_dir):
+def _WaitForSshDaemon(hostname, port, family):
+  """Wait for SSH daemon to become responsive.
+
+  """
+  hostip = netutils.GetHostname(name=hostname, family=family).ip
+
+  def _CheckSshDaemon():
+    if netutils.TcpPing(hostip, port, timeout=1.0, live_port_needed=True):
+      logging.debug("SSH daemon on %s:%s (IP address %s) has become"
+                    " responsive", hostname, port, hostip)
+    else:
+      raise utils.RetryAgain()
+
+  try:
+    utils.Retry(_CheckSshDaemon, 1.0, _DAEMON_READY_TIMEOUT)
+  except utils.RetryTimeout:
+    raise errors.OpExecError("SSH daemon on %s:%s (IP address %s) didn't"
+                             " become responsive within %s seconds" %
+                             (hostname, port, hostip, _DAEMON_READY_TIMEOUT))
+
+
+def RunNodeSetupCmd(cluster_name, node, basecmd, debug, verbose,
+                    use_cluster_key, ask_key, strict_host_check, data):
+  """Runs a command to configure something on a remote machine.
+
+  @type cluster_name: string
+  @param cluster_name: Cluster name
+  @type node: string
+  @param node: Node name
+  @type basecmd: string
+  @param basecmd: Base command (path on the remote machine)
+  @type debug: bool
+  @param debug: Enable debug output
+  @type verbose: bool
+  @param verbose: Enable verbose output
+  @type use_cluster_key: bool
+  @param use_cluster_key: See L{ssh.SshRunner.BuildCmd}
+  @type ask_key: bool
+  @param ask_key: See L{ssh.SshRunner.BuildCmd}
+  @type strict_host_check: bool
+  @param strict_host_check: See L{ssh.SshRunner.BuildCmd}
+  @param data: JSON-serializable input data for script (passed to stdin)
+
+  """
+  cmd = [basecmd]
+
+  # Pass --debug/--verbose to the external script if set on our invocation
+  if debug:
+    cmd.append("--debug")
+
+  if verbose:
+    cmd.append("--verbose")
+
+  family = ssconf.SimpleStore().GetPrimaryIPFamily()
+  srun = ssh.SshRunner(cluster_name,
+                       ipv6=(family == netutils.IP6Address.family))
+  scmd = srun.BuildCmd(node, constants.SSH_LOGIN_USER,
+                       utils.ShellQuoteArgs(cmd),
+                       batch=False, ask_key=ask_key, quiet=False,
+                       strict_host_check=strict_host_check,
+                       use_cluster_key=use_cluster_key)
+
+  tempfh = tempfile.TemporaryFile()
+  try:
+    tempfh.write(serializer.DumpJson(data))
+    tempfh.seek(0)
+
+    result = utils.RunCmd(scmd, interactive=True, input_fd=tempfh)
+  finally:
+    tempfh.close()
+
+  if result.failed:
+    raise errors.OpExecError("Command '%s' failed: %s" %
+                             (result.cmd, result.fail_reason))
+
+  _WaitForSshDaemon(node, netutils.GetDaemonPort(constants.SSH), family)
+
+
+def _InitFileStorageDir(file_storage_dir):
   """Initialize if needed the file storage.
 
   @param file_storage_dir: the user-supplied value
   """Initialize if needed the file storage.
 
   @param file_storage_dir: the user-supplied value
@@ -281,9 +362,142 @@ def _InitFileStorage(file_storage_dir):
     raise errors.OpPrereqError("The file storage directory '%s' is not"
                                " a directory." % file_storage_dir,
                                errors.ECODE_ENVIRON)
     raise errors.OpPrereqError("The file storage directory '%s' is not"
                                " a directory." % file_storage_dir,
                                errors.ECODE_ENVIRON)
+
   return file_storage_dir
 
 
   return file_storage_dir
 
 
+def _PrepareFileBasedStorage(
+    enabled_disk_templates, file_storage_dir,
+    default_dir, file_disk_template,
+    init_fn=_InitFileStorageDir, acceptance_fn=None):
+  """Checks if a file-base storage type is enabled and inits the dir.
+
+  @type enabled_disk_templates: list of string
+  @param enabled_disk_templates: list of enabled disk templates
+  @type file_storage_dir: string
+  @param file_storage_dir: the file storage directory
+  @type default_dir: string
+  @param default_dir: default file storage directory when C{file_storage_dir}
+      is 'None'
+  @type file_disk_template: string
+  @param file_disk_template: a disk template whose storage type is 'ST_FILE'
+  @rtype: string
+  @returns: the name of the actual file storage directory
+
+  """
+  assert (file_disk_template in
+          utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
+  if file_storage_dir is None:
+    file_storage_dir = default_dir
+  if not acceptance_fn:
+    acceptance_fn = \
+        lambda path: filestorage.CheckFileStoragePathAcceptance(
+            path, exact_match_ok=True)
+
+  cluster.CheckFileStoragePathVsEnabledDiskTemplates(
+      logging.warning, file_storage_dir, enabled_disk_templates)
+
+  file_storage_enabled = file_disk_template in enabled_disk_templates
+  if file_storage_enabled:
+    try:
+      acceptance_fn(file_storage_dir)
+    except errors.FileStoragePathError as e:
+      raise errors.OpPrereqError(str(e))
+    result_file_storage_dir = init_fn(file_storage_dir)
+  else:
+    result_file_storage_dir = file_storage_dir
+  return result_file_storage_dir
+
+
+def _PrepareFileStorage(
+    enabled_disk_templates, file_storage_dir, init_fn=_InitFileStorageDir,
+    acceptance_fn=None):
+  """Checks if file storage is enabled and inits the dir.
+
+  @see: C{_PrepareFileBasedStorage}
+
+  """
+  return _PrepareFileBasedStorage(
+      enabled_disk_templates, file_storage_dir,
+      pathutils.DEFAULT_FILE_STORAGE_DIR, constants.DT_FILE,
+      init_fn=init_fn, acceptance_fn=acceptance_fn)
+
+
+def _PrepareSharedFileStorage(
+    enabled_disk_templates, file_storage_dir, init_fn=_InitFileStorageDir,
+    acceptance_fn=None):
+  """Checks if shared file storage is enabled and inits the dir.
+
+  @see: C{_PrepareFileBasedStorage}
+
+  """
+  return _PrepareFileBasedStorage(
+      enabled_disk_templates, file_storage_dir,
+      pathutils.DEFAULT_SHARED_FILE_STORAGE_DIR, constants.DT_SHARED_FILE,
+      init_fn=init_fn, acceptance_fn=acceptance_fn)
+
+
+def _InitCheckEnabledDiskTemplates(enabled_disk_templates):
+  """Checks the sanity of the enabled disk templates.
+
+  """
+  if not enabled_disk_templates:
+    raise errors.OpPrereqError("Enabled disk templates list must contain at"
+                               " least one member", errors.ECODE_INVAL)
+  invalid_disk_templates = \
+    set(enabled_disk_templates) - constants.DISK_TEMPLATES
+  if invalid_disk_templates:
+    raise errors.OpPrereqError("Enabled disk templates list contains invalid"
+                               " entries: %s" % invalid_disk_templates,
+                               errors.ECODE_INVAL)
+
+
+def _RestrictIpolicyToEnabledDiskTemplates(ipolicy, enabled_disk_templates):
+  """Restricts the ipolicy's disk templates to the enabled ones.
+
+  This function clears the ipolicy's list of allowed disk templates from the
+  ones that are not enabled by the cluster.
+
+  @type ipolicy: dict
+  @param ipolicy: the instance policy
+  @type enabled_disk_templates: list of string
+  @param enabled_disk_templates: the list of cluster-wide enabled disk
+    templates
+
+  """
+  assert constants.IPOLICY_DTS in ipolicy
+  allowed_disk_templates = ipolicy[constants.IPOLICY_DTS]
+  restricted_disk_templates = list(set(allowed_disk_templates)
+                                   .intersection(set(enabled_disk_templates)))
+  ipolicy[constants.IPOLICY_DTS] = restricted_disk_templates
+
+
+def _InitCheckDrbdHelper(drbd_helper, drbd_enabled):
+  """Checks the DRBD usermode helper.
+
+  @type drbd_helper: string
+  @param drbd_helper: name of the DRBD usermode helper that the system should
+    use
+
+  """
+  if not drbd_enabled:
+    return
+
+  if drbd_helper is not None:
+    try:
+      curr_helper = drbd.DRBD8.GetUsermodeHelper()
+    except errors.BlockDeviceError, err:
+      raise errors.OpPrereqError("Error while checking drbd helper"
+                                 " (disable drbd with --enabled-disk-templates"
+                                 " if you are not using drbd): %s" % str(err),
+                                 errors.ECODE_ENVIRON)
+    if drbd_helper != curr_helper:
+      raise errors.OpPrereqError("Error: requiring %s as drbd helper but %s"
+                                 " is the current helper" % (drbd_helper,
+                                                             curr_helper),
+                                 errors.ECODE_INVAL)
+
+
 def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
                 master_netmask, master_netdev, file_storage_dir,
                 shared_file_storage_dir, candidate_pool_size, secondary_ip=None,
 def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
                 master_netmask, master_netdev, file_storage_dir,
                 shared_file_storage_dir, candidate_pool_size, secondary_ip=None,
@@ -293,11 +507,14 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
                 maintain_node_health=False, drbd_helper=None, uid_pool=None,
                 default_iallocator=None, primary_ip_version=None, ipolicy=None,
                 prealloc_wipe_disks=False, use_external_mip_script=False,
                 maintain_node_health=False, drbd_helper=None, uid_pool=None,
                 default_iallocator=None, primary_ip_version=None, ipolicy=None,
                 prealloc_wipe_disks=False, use_external_mip_script=False,
-                hv_state=None, disk_state=None):
+                hv_state=None, disk_state=None, enabled_disk_templates=None):
   """Initialise the cluster.
 
   @type candidate_pool_size: int
   @param candidate_pool_size: master candidate pool size
   """Initialise the cluster.
 
   @type candidate_pool_size: int
   @param candidate_pool_size: master candidate pool size
+  @type enabled_disk_templates: list of string
+  @param enabled_disk_templates: list of disk_templates to be used in this
+    cluster
 
   """
   # TODO: complete the docstring
 
   """
   # TODO: complete the docstring
@@ -314,21 +531,24 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
                                " entries: %s" % invalid_hvs,
                                errors.ECODE_INVAL)
 
                                " entries: %s" % invalid_hvs,
                                errors.ECODE_INVAL)
 
+  _InitCheckEnabledDiskTemplates(enabled_disk_templates)
+
   try:
     ipcls = netutils.IPAddress.GetClassFromIpVersion(primary_ip_version)
   except errors.ProgrammerError:
     raise errors.OpPrereqError("Invalid primary ip version: %d." %
   try:
     ipcls = netutils.IPAddress.GetClassFromIpVersion(primary_ip_version)
   except errors.ProgrammerError:
     raise errors.OpPrereqError("Invalid primary ip version: %d." %
-                               primary_ip_version)
+                               primary_ip_version, errors.ECODE_INVAL)
 
   hostname = netutils.GetHostname(family=ipcls.family)
   if not ipcls.IsValid(hostname.ip):
     raise errors.OpPrereqError("This host's IP (%s) is not a valid IPv%d"
 
   hostname = netutils.GetHostname(family=ipcls.family)
   if not ipcls.IsValid(hostname.ip):
     raise errors.OpPrereqError("This host's IP (%s) is not a valid IPv%d"
-                               " address." % (hostname.ip, primary_ip_version))
+                               " address." % (hostname.ip, primary_ip_version),
+                               errors.ECODE_INVAL)
 
   if ipcls.IsLoopback(hostname.ip):
     raise errors.OpPrereqError("This host's IP (%s) resolves to a loopback"
                                " address. Please fix DNS or %s." %
 
   if ipcls.IsLoopback(hostname.ip):
     raise errors.OpPrereqError("This host's IP (%s) resolves to a loopback"
                                " address. Please fix DNS or %s." %
-                               (hostname.ip, constants.ETC_HOSTS),
+                               (hostname.ip, pathutils.ETC_HOSTS),
                                errors.ECODE_ENVIRON)
 
   if not ipcls.Own(hostname.ip):
                                errors.ECODE_ENVIRON)
 
   if not ipcls.Own(hostname.ip):
@@ -363,42 +583,32 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
   if master_netmask is not None:
     if not ipcls.ValidateNetmask(master_netmask):
       raise errors.OpPrereqError("CIDR netmask (%s) not valid for IPv%s " %
   if master_netmask is not None:
     if not ipcls.ValidateNetmask(master_netmask):
       raise errors.OpPrereqError("CIDR netmask (%s) not valid for IPv%s " %
-                                  (master_netmask, primary_ip_version))
+                                  (master_netmask, primary_ip_version),
+                                 errors.ECODE_INVAL)
   else:
     master_netmask = ipcls.iplen
 
   else:
     master_netmask = ipcls.iplen
 
-  if vg_name is not None:
+  if vg_name:
     # Check if volume group is valid
     vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
                                           constants.MIN_VG_SIZE)
     if vgstatus:
     # Check if volume group is valid
     vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
                                           constants.MIN_VG_SIZE)
     if vgstatus:
-      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
-                                 " you are not using lvm" % vgstatus,
-                                 errors.ECODE_INVAL)
+      raise errors.OpPrereqError("Error: %s" % vgstatus, errors.ECODE_INVAL)
 
 
-  if drbd_helper is not None:
-    try:
-      curr_helper = bdev.BaseDRBD.GetUsermodeHelper()
-    except errors.BlockDeviceError, err:
-      raise errors.OpPrereqError("Error while checking drbd helper"
-                                 " (specify --no-drbd-storage if you are not"
-                                 " using drbd): %s" % str(err),
-                                 errors.ECODE_ENVIRON)
-    if drbd_helper != curr_helper:
-      raise errors.OpPrereqError("Error: requiring %s as drbd helper but %s"
-                                 " is the current helper" % (drbd_helper,
-                                                             curr_helper),
-                                 errors.ECODE_INVAL)
+  drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
+  _InitCheckDrbdHelper(drbd_helper, drbd_enabled)
 
 
-  if constants.ENABLE_FILE_STORAGE:
-    file_storage_dir = _InitFileStorage(file_storage_dir)
-  else:
-    file_storage_dir = ""
+  logging.debug("Stopping daemons (if any are running)")
+  result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-all"])
+  if result.failed:
+    raise errors.OpExecError("Could not stop daemons, command %s"
+                             " had exitcode %s and error '%s'" %
+                             (result.cmd, result.exit_code, result.output))
 
 
-  if constants.ENABLE_SHARED_FILE_STORAGE:
-    shared_file_storage_dir = _InitFileStorage(shared_file_storage_dir)
-  else:
-    shared_file_storage_dir = ""
+  file_storage_dir = _PrepareFileStorage(enabled_disk_templates,
+                                         file_storage_dir)
+  shared_file_storage_dir = _PrepareSharedFileStorage(enabled_disk_templates,
+                                                      shared_file_storage_dir)
 
   if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
     raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix,
 
   if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
     raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix,
@@ -410,7 +620,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
                                (master_netdev,
                                 result.output.strip()), errors.ECODE_INVAL)
 
                                (master_netdev,
                                 result.output.strip()), errors.ECODE_INVAL)
 
-  dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE)]
+  dirs = [(pathutils.RUN_DIR, constants.RUN_DIRS_MODE)]
   utils.EnsureDirs(dirs)
 
   objects.UpgradeBeParams(beparams)
   utils.EnsureDirs(dirs)
 
   objects.UpgradeBeParams(beparams)
@@ -420,6 +630,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
   objects.NIC.CheckParameterSyntax(nicparams)
 
   full_ipolicy = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, ipolicy)
   objects.NIC.CheckParameterSyntax(nicparams)
 
   full_ipolicy = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, ipolicy)
+  _RestrictIpolicyToEnabledDiskTemplates(full_ipolicy, enabled_disk_templates)
 
   if ndparams is not None:
     utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
 
   if ndparams is not None:
     utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
@@ -461,12 +672,32 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
       unknown_params = param_keys - default_param_keys
       raise errors.OpPrereqError("Invalid parameters for disk template %s:"
                                  " %s" % (template,
       unknown_params = param_keys - default_param_keys
       raise errors.OpPrereqError("Invalid parameters for disk template %s:"
                                  " %s" % (template,
-                                          utils.CommaJoin(unknown_params)))
+                                          utils.CommaJoin(unknown_params)),
+                                 errors.ECODE_INVAL)
     utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
     utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
+    if template == constants.DT_DRBD8 and vg_name is not None:
+      # The default METAVG value is equal to the VG name set at init time,
+      # if provided
+      dt_params[constants.DRBD_DEFAULT_METAVG] = vg_name
+
+  try:
+    utils.VerifyDictOptions(diskparams, constants.DISK_DT_DEFAULTS)
+  except errors.OpPrereqError, err:
+    raise errors.OpPrereqError("While verify diskparam options: %s" % err,
+                               errors.ECODE_INVAL)
 
   # set up ssh config and /etc/hosts
 
   # set up ssh config and /etc/hosts
-  sshline = utils.ReadFile(constants.SSH_HOST_RSA_PUB)
-  sshkey = sshline.split(" ")[1]
+  rsa_sshkey = ""
+  dsa_sshkey = ""
+  if os.path.isfile(pathutils.SSH_HOST_RSA_PUB):
+    sshline = utils.ReadFile(pathutils.SSH_HOST_RSA_PUB)
+    rsa_sshkey = sshline.split(" ")[1]
+  if os.path.isfile(pathutils.SSH_HOST_DSA_PUB):
+    sshline = utils.ReadFile(pathutils.SSH_HOST_DSA_PUB)
+    dsa_sshkey = sshline.split(" ")[1]
+  if not rsa_sshkey and not dsa_sshkey:
+    raise errors.OpPrereqError("Failed to find SSH public keys",
+                               errors.ECODE_ENVIRON)
 
   if modify_etc_hosts:
     utils.AddHostToEtcHosts(hostname.name, hostname.ip)
 
   if modify_etc_hosts:
     utils.AddHostToEtcHosts(hostname.name, hostname.ip)
@@ -494,12 +725,12 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
   # init of cluster config file
   cluster_config = objects.Cluster(
     serial_no=1,
   # init of cluster config file
   cluster_config = objects.Cluster(
     serial_no=1,
-    rsahostkeypub=sshkey,
+    rsahostkeypub=rsa_sshkey,
+    dsahostkeypub=dsa_sshkey,
     highest_used_port=(constants.FIRST_DRBD_PORT - 1),
     mac_prefix=mac_prefix,
     volume_group_name=vg_name,
     tcpudp_port_pool=set(),
     highest_used_port=(constants.FIRST_DRBD_PORT - 1),
     mac_prefix=mac_prefix,
     volume_group_name=vg_name,
     tcpudp_port_pool=set(),
-    master_node=hostname.name,
     master_ip=clustername.ip,
     master_netmask=master_netmask,
     master_netdev=master_netdev,
     master_ip=clustername.ip,
     master_netmask=master_netmask,
     master_netdev=master_netdev,
@@ -527,6 +758,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
     ipolicy=full_ipolicy,
     hv_state_static=hv_state,
     disk_state_static=disk_state,
     ipolicy=full_ipolicy,
     hv_state_static=hv_state,
     disk_state_static=disk_state,
+    enabled_disk_templates=enabled_disk_templates,
     )
   master_node_config = objects.Node(name=hostname.name,
                                     primary_ip=hostname.ip,
     )
   master_node_config = objects.Node(name=hostname.name,
                                     primary_ip=hostname.ip,
@@ -538,15 +770,15 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
                                     )
   InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config)
   cfg = config.ConfigWriter(offline=True)
                                     )
   InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config)
   cfg = config.ConfigWriter(offline=True)
-  ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE)
+  ssh.WriteKnownHostsFile(cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
   cfg.Update(cfg.GetClusterInfo(), logging.error)
   cfg.Update(cfg.GetClusterInfo(), logging.error)
-  backend.WriteSsconfFiles(cfg.GetSsconfValues())
+  ssconf.WriteSsconfFiles(cfg.GetSsconfValues())
 
   # set up the inter-node password and certificate
   _InitGanetiServerSetup(hostname.name)
 
   logging.debug("Starting daemons")
 
   # set up the inter-node password and certificate
   _InitGanetiServerSetup(hostname.name)
 
   logging.debug("Starting daemons")
-  result = utils.RunCmd([constants.DAEMON_UTIL, "start-all"])
+  result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-all"])
   if result.failed:
     raise errors.OpExecError("Could not start daemons, command %s"
                              " had exitcode %s and error %s" %
   if result.failed:
     raise errors.OpExecError("Could not start daemons, command %s"
                              " had exitcode %s and error %s" %
@@ -556,7 +788,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914
 
 
 def InitConfig(version, cluster_config, master_node_config,
 
 
 def InitConfig(version, cluster_config, master_node_config,
-               cfg_file=constants.CLUSTER_CONF_FILE):
+               cfg_file=pathutils.CLUSTER_CONF_FILE):
   """Create the initial cluster configuration.
 
   It will contain the current node, which will also be the master
   """Create the initial cluster configuration.
 
   It will contain the current node, which will also be the master
@@ -577,13 +809,14 @@ def InitConfig(version, cluster_config, master_node_config,
                                                 _INITCONF_ECID)
   master_node_config.uuid = uuid_generator.Generate([], utils.NewUUID,
                                                     _INITCONF_ECID)
                                                 _INITCONF_ECID)
   master_node_config.uuid = uuid_generator.Generate([], utils.NewUUID,
                                                     _INITCONF_ECID)
+  cluster_config.master_node = master_node_config.uuid
   nodes = {
   nodes = {
-    master_node_config.name: master_node_config,
+    master_node_config.uuid: master_node_config,
     }
   default_nodegroup = objects.NodeGroup(
     uuid=uuid_generator.Generate([], utils.NewUUID, _INITCONF_ECID),
     name=constants.INITIAL_NODE_GROUP_NAME,
     }
   default_nodegroup = objects.NodeGroup(
     uuid=uuid_generator.Generate([], utils.NewUUID, _INITCONF_ECID),
     name=constants.INITIAL_NODE_GROUP_NAME,
-    members=[master_node_config.name],
+    members=[master_node_config.uuid],
     diskparams={},
     )
   nodegroups = {
     diskparams={},
     )
   nodegroups = {
@@ -595,6 +828,7 @@ def InitConfig(version, cluster_config, master_node_config,
                                    nodegroups=nodegroups,
                                    nodes=nodes,
                                    instances={},
                                    nodegroups=nodegroups,
                                    nodes=nodes,
                                    instances={},
+                                   networks={},
                                    serial_no=1,
                                    ctime=now, mtime=now)
   utils.WriteFile(cfg_file,
                                    serial_no=1,
                                    ctime=now, mtime=now)
   utils.WriteFile(cfg_file,
@@ -602,7 +836,7 @@ def InitConfig(version, cluster_config, master_node_config,
                   mode=0600)
 
 
                   mode=0600)
 
 
-def FinalizeClusterDestroy(master):
+def FinalizeClusterDestroy(master_uuid):
   """Execute the last steps of cluster destroy
 
   This function shuts down all the daemons, completing the destroy
   """Execute the last steps of cluster destroy
 
   This function shuts down all the daemons, completing the destroy
@@ -613,29 +847,31 @@ def FinalizeClusterDestroy(master):
   modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup
   runner = rpc.BootstrapRunner()
 
   modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup
   runner = rpc.BootstrapRunner()
 
+  master_name = cfg.GetNodeName(master_uuid)
+
   master_params = cfg.GetMasterNetworkParameters()
   master_params = cfg.GetMasterNetworkParameters()
-  master_params.name = master
+  master_params.uuid = master_uuid
   ems = cfg.GetUseExternalMipScript()
   ems = cfg.GetUseExternalMipScript()
-  result = runner.call_node_deactivate_master_ip(master_params.name,
-                                                 master_params, ems)
+  result = runner.call_node_deactivate_master_ip(master_name, master_params,
+                                                 ems)
 
   msg = result.fail_msg
   if msg:
     logging.warning("Could not disable the master IP: %s", msg)
 
 
   msg = result.fail_msg
   if msg:
     logging.warning("Could not disable the master IP: %s", msg)
 
-  result = runner.call_node_stop_master(master)
+  result = runner.call_node_stop_master(master_name)
   msg = result.fail_msg
   if msg:
     logging.warning("Could not disable the master role: %s", msg)
 
   msg = result.fail_msg
   if msg:
     logging.warning("Could not disable the master role: %s", msg)
 
-  result = runner.call_node_leave_cluster(master, modify_ssh_setup)
+  result = runner.call_node_leave_cluster(master_name, modify_ssh_setup)
   msg = result.fail_msg
   if msg:
     logging.warning("Could not shutdown the node daemon and cleanup"
                     " the node: %s", msg)
 
 
   msg = result.fail_msg
   if msg:
     logging.warning("Could not shutdown the node daemon and cleanup"
                     " the node: %s", msg)
 
 
-def SetupNodeDaemon(cluster_name, node, ssh_key_check):
+def SetupNodeDaemon(opts, cluster_name, node):
   """Add a node to the cluster.
 
   This function must be called before the actual opcode, and will ssh
   """Add a node to the cluster.
 
   This function must be called before the actual opcode, and will ssh
@@ -644,38 +880,19 @@ def SetupNodeDaemon(cluster_name, node, ssh_key_check):
 
   @param cluster_name: the cluster name
   @param node: the name of the new node
 
   @param cluster_name: the cluster name
   @param node: the name of the new node
-  @param ssh_key_check: whether to do a strict key check
 
   """
 
   """
-  family = ssconf.SimpleStore().GetPrimaryIPFamily()
-  sshrunner = ssh.SshRunner(cluster_name,
-                            ipv6=(family == netutils.IP6Address.family))
-
-  bind_address = constants.IP4_ADDRESS_ANY
-  if family == netutils.IP6Address.family:
-    bind_address = constants.IP6_ADDRESS_ANY
-
-  # set up inter-node password and certificate and restarts the node daemon
-  # and then connect with ssh to set password and start ganeti-noded
-  # note that all the below variables are sanitized at this point,
-  # either by being constants or by the checks above
-  sshrunner.CopyFileToNode(node, constants.NODED_CERT_FILE)
-  sshrunner.CopyFileToNode(node, constants.RAPI_CERT_FILE)
-  sshrunner.CopyFileToNode(node, constants.SPICE_CERT_FILE)
-  sshrunner.CopyFileToNode(node, constants.SPICE_CACERT_FILE)
-  sshrunner.CopyFileToNode(node, constants.CONFD_HMAC_KEY)
-  mycommand = ("%s stop-all; %s start %s -b %s" %
-               (constants.DAEMON_UTIL, constants.DAEMON_UTIL, constants.NODED,
-                utils.ShellQuote(bind_address)))
-
-  result = sshrunner.Run(node, "root", mycommand, batch=False,
-                         ask_key=ssh_key_check,
-                         use_cluster_key=True,
-                         strict_host_check=ssh_key_check)
-  if result.failed:
-    raise errors.OpExecError("Remote command on node %s, error: %s,"
-                             " output: %s" %
-                             (node, result.fail_reason, result.output))
+  data = {
+    constants.NDS_CLUSTER_NAME: cluster_name,
+    constants.NDS_NODE_DAEMON_CERTIFICATE:
+      utils.ReadFile(pathutils.NODED_CERT_FILE),
+    constants.NDS_SSCONF: ssconf.SimpleStore().ReadAll(),
+    constants.NDS_START_NODE_DAEMON: True,
+    }
+
+  RunNodeSetupCmd(cluster_name, node, pathutils.NODE_DAEMON_SETUP,
+                  opts.debug, opts.verbose,
+                  True, opts.ssh_key_check, opts.ssh_key_check, data)
 
   _WaitForNodeDaemon(node)
 
 
   _WaitForNodeDaemon(node)
 
@@ -695,7 +912,7 @@ def MasterFailover(no_voting=False):
   sstore = ssconf.SimpleStore()
 
   old_master, new_master = ssconf.GetMasterAndMyself(sstore)
   sstore = ssconf.SimpleStore()
 
   old_master, new_master = ssconf.GetMasterAndMyself(sstore)
-  node_list = sstore.GetNodeList()
+  node_names = sstore.GetNodeList()
   mc_list = sstore.GetMasterCandidates()
 
   if old_master == new_master:
   mc_list = sstore.GetMasterCandidates()
 
   if old_master == new_master:
@@ -714,7 +931,7 @@ def MasterFailover(no_voting=False):
                                errors.ECODE_STATE)
 
   if not no_voting:
                                errors.ECODE_STATE)
 
   if not no_voting:
-    vote_list = GatherMasterVotes(node_list)
+    vote_list = GatherMasterVotes(node_names)
 
     if vote_list:
       voted_master = vote_list[0][0]
 
     if vote_list:
       voted_master = vote_list[0][0]
@@ -739,8 +956,20 @@ def MasterFailover(no_voting=False):
     # configuration data
     cfg = config.ConfigWriter(accept_foreign=True)
 
     # configuration data
     cfg = config.ConfigWriter(accept_foreign=True)
 
+    old_master_node = cfg.GetNodeInfoByName(old_master)
+    if old_master_node is None:
+      raise errors.OpPrereqError("Could not find old master node '%s' in"
+                                 " cluster configuration." % old_master,
+                                 errors.ECODE_NOENT)
+
     cluster_info = cfg.GetClusterInfo()
     cluster_info = cfg.GetClusterInfo()
-    cluster_info.master_node = new_master
+    new_master_node = cfg.GetNodeInfoByName(new_master)
+    if new_master_node is None:
+      raise errors.OpPrereqError("Could not find new master node '%s' in"
+                                 " cluster configuration." % new_master,
+                                 errors.ECODE_NOENT)
+
+    cluster_info.master_node = new_master_node.uuid
     # this will also regenerate the ssconf files, since we updated the
     # cluster info
     cfg.Update(cluster_info, logging.error)
     # this will also regenerate the ssconf files, since we updated the
     # cluster info
     cfg.Update(cluster_info, logging.error)
@@ -758,9 +987,9 @@ def MasterFailover(no_voting=False):
 
   runner = rpc.BootstrapRunner()
   master_params = cfg.GetMasterNetworkParameters()
 
   runner = rpc.BootstrapRunner()
   master_params = cfg.GetMasterNetworkParameters()
-  master_params.name = old_master
+  master_params.uuid = old_master_node.uuid
   ems = cfg.GetUseExternalMipScript()
   ems = cfg.GetUseExternalMipScript()
-  result = runner.call_node_deactivate_master_ip(master_params.name,
+  result = runner.call_node_deactivate_master_ip(old_master,
                                                  master_params, ems)
 
   msg = result.fail_msg
                                                  master_params, ems)
 
   msg = result.fail_msg
@@ -771,7 +1000,7 @@ def MasterFailover(no_voting=False):
   msg = result.fail_msg
   if msg:
     logging.error("Could not disable the master role on the old master"
   msg = result.fail_msg
   if msg:
     logging.error("Could not disable the master role on the old master"
-                 " %s, please disable manually: %s", old_master, msg)
+                  " %s, please disable manually: %s", old_master, msg)
 
   logging.info("Checking master IP non-reachability...")
 
 
   logging.info("Checking master IP non-reachability...")
 
@@ -824,7 +1053,7 @@ def GetMaster():
   return old_master
 
 
   return old_master
 
 
-def GatherMasterVotes(node_list):
+def GatherMasterVotes(node_names):
   """Check the agreement on who is the master.
 
   This function will return a list of (node, number of votes), ordered
   """Check the agreement on who is the master.
 
   This function will return a list of (node, number of votes), ordered
@@ -838,8 +1067,8 @@ def GatherMasterVotes(node_list):
   since we use the same source for configuration information for both
   backend and boostrap, we'll always vote for ourselves.
 
   since we use the same source for configuration information for both
   backend and boostrap, we'll always vote for ourselves.
 
-  @type node_list: list
-  @param node_list: the list of nodes to query for master info; the current
+  @type node_names: list
+  @param node_names: the list of nodes to query for master info; the current
       node will be removed if it is in the list
   @rtype: list
   @return: list of (node, votes)
       node will be removed if it is in the list
   @rtype: list
   @return: list of (node, votes)
@@ -847,30 +1076,31 @@ def GatherMasterVotes(node_list):
   """
   myself = netutils.Hostname.GetSysName()
   try:
   """
   myself = netutils.Hostname.GetSysName()
   try:
-    node_list.remove(myself)
+    node_names.remove(myself)
   except ValueError:
     pass
   except ValueError:
     pass
-  if not node_list:
+  if not node_names:
     # no nodes left (eventually after removing myself)
     return []
     # no nodes left (eventually after removing myself)
     return []
-  results = rpc.BootstrapRunner().call_master_info(node_list)
+  results = rpc.BootstrapRunner().call_master_info(node_names)
   if not isinstance(results, dict):
     # this should not happen (unless internal error in rpc)
     logging.critical("Can't complete rpc call, aborting master startup")
   if not isinstance(results, dict):
     # this should not happen (unless internal error in rpc)
     logging.critical("Can't complete rpc call, aborting master startup")
-    return [(None, len(node_list))]
+    return [(None, len(node_names))]
   votes = {}
   votes = {}
-  for node in results:
-    nres = results[node]
+  for node_name in results:
+    nres = results[node_name]
     data = nres.payload
     msg = nres.fail_msg
     fail = False
     if msg:
     data = nres.payload
     msg = nres.fail_msg
     fail = False
     if msg:
-      logging.warning("Error contacting node %s: %s", node, msg)
+      logging.warning("Error contacting node %s: %s", node_name, msg)
       fail = True
     # for now we accept both length 3, 4 and 5 (data[3] is primary ip version
     # and data[4] is the master netmask)
     elif not isinstance(data, (tuple, list)) or len(data) < 3:
       fail = True
     # for now we accept both length 3, 4 and 5 (data[3] is primary ip version
     # and data[4] is the master netmask)
     elif not isinstance(data, (tuple, list)) or len(data) < 3:
-      logging.warning("Invalid data received from node %s: %s", node, data)
+      logging.warning("Invalid data received from node %s: %s",
+                      node_name, data)
       fail = True
     if fail:
       if None not in votes:
       fail = True
     if fail:
       if None not in votes: