Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 58b311ca

History | View | Annotate | Download (59.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34
import logging
35
import tempfile
36

    
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import ssh
40
from ganeti import hypervisor
41
from ganeti import constants
42
from ganeti import bdev
43
from ganeti import objects
44
from ganeti import ssconf
45

    
46

    
47
def _GetConfig():
48
  return ssconf.SimpleConfigReader()
49

    
50

    
51
def _GetSshRunner(cluster_name):
52
  return ssh.SshRunner(cluster_name)
53

    
54

    
55
def _CleanDirectory(path, exclude=[]):
56
  """Removes all regular files in a directory.
57

58
  @param exclude: List of files to be excluded.
59
  @type exclude: list
60

61
  """
62
  if not os.path.isdir(path):
63
    return
64

    
65
  # Normalize excluded paths
66
  exclude = [os.path.normpath(i) for i in exclude]
67

    
68
  for rel_name in utils.ListVisibleFiles(path):
69
    full_name = os.path.normpath(os.path.join(path, rel_name))
70
    if full_name in exclude:
71
      continue
72
    if os.path.isfile(full_name) and not os.path.islink(full_name):
73
      utils.RemoveFile(full_name)
74

    
75

    
76
def JobQueuePurge():
77
  """Removes job queue files and archived jobs
78

79
  """
80
  _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE])
81
  _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR)
82

    
83

    
84
def GetMasterInfo():
85
  """Returns master information.
86

87
  This is an utility function to compute master information, either
88
  for consumption here or from the node daemon.
89

90
  @rtype: tuple
91
  @return: (master_netdev, master_ip, master_name)
92

93
  """
94
  try:
95
    cfg = _GetConfig()
96
    master_netdev = cfg.GetMasterNetdev()
97
    master_ip = cfg.GetMasterIP()
98
    master_node = cfg.GetMasterNode()
99
  except errors.ConfigurationError, err:
100
    logging.exception("Cluster configuration incomplete")
101
    return (None, None)
102
  return (master_netdev, master_ip, master_node)
103

    
104

    
105
def StartMaster(start_daemons):
106
  """Activate local node as master node.
107

108
  The function will always try activate the IP address of the master
109
  (if someone else has it, then it won't). Then, if the start_daemons
110
  parameter is True, it will also start the master daemons
111
  (ganet-masterd and ganeti-rapi).
112

113
  """
114
  ok = True
115
  master_netdev, master_ip, _ = GetMasterInfo()
116
  if not master_netdev:
117
    return False
118

    
119
  if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
120
    if utils.OwnIpAddress(master_ip):
121
      # we already have the ip:
122
      logging.debug("Already started")
123
    else:
124
      logging.error("Someone else has the master ip, not activating")
125
      ok = False
126
  else:
127
    result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
128
                           "dev", master_netdev, "label",
129
                           "%s:0" % master_netdev])
130
    if result.failed:
131
      logging.error("Can't activate master IP: %s", result.output)
132
      ok = False
133

    
134
    result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
135
                           "-s", master_ip, master_ip])
136
    # we'll ignore the exit code of arping
137

    
138
  # and now start the master and rapi daemons
139
  if start_daemons:
140
    for daemon in 'ganeti-masterd', 'ganeti-rapi':
141
      result = utils.RunCmd([daemon])
142
      if result.failed:
143
        logging.error("Can't start daemon %s: %s", daemon, result.output)
144
        ok = False
145
  return ok
146

    
147

    
148
def StopMaster(stop_daemons):
149
  """Deactivate this node as master.
150

151
  The function will always try to deactivate the IP address of the
152
  master. Then, if the stop_daemons parameter is True, it will also
153
  stop the master daemons (ganet-masterd and ganeti-rapi).
154

155
  """
156
  master_netdev, master_ip, _ = GetMasterInfo()
157
  if not master_netdev:
158
    return False
159

    
160
  result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
161
                         "dev", master_netdev])
162
  if result.failed:
163
    logging.error("Can't remove the master IP, error: %s", result.output)
164
    # but otherwise ignore the failure
165

    
166
  if stop_daemons:
167
    # stop/kill the rapi and the master daemon
168
    for daemon in constants.RAPI_PID, constants.MASTERD_PID:
169
      utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
170

    
171
  return True
172

    
173

    
174
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
175
  """Joins this node to the cluster.
176

177
  This does the following:
178
      - updates the hostkeys of the machine (rsa and dsa)
179
      - adds the ssh private key to the user
180
      - adds the ssh public key to the users' authorized_keys file
181

182
  """
183
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
184
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
185
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
186
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
187
  for name, content, mode in sshd_keys:
188
    utils.WriteFile(name, data=content, mode=mode)
189

    
190
  try:
191
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
192
                                                    mkdir=True)
193
  except errors.OpExecError, err:
194
    logging.exception("Error while processing user ssh files")
195
    return False
196

    
197
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
198
    utils.WriteFile(name, data=content, mode=0600)
199

    
200
  utils.AddAuthorizedKey(auth_keys, sshpub)
201

    
202
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
203

    
204
  return True
205

    
206

    
207
def LeaveCluster():
208
  """Cleans up the current node and prepares it to be removed from the cluster.
209

210
  """
211
  _CleanDirectory(constants.DATA_DIR)
212
  JobQueuePurge()
213

    
214
  try:
215
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
216
  except errors.OpExecError:
217
    logging.exception("Error while processing ssh files")
218
    return
219

    
220
  f = open(pub_key, 'r')
221
  try:
222
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
223
  finally:
224
    f.close()
225

    
226
  utils.RemoveFile(priv_key)
227
  utils.RemoveFile(pub_key)
228

    
229
  # Return a reassuring string to the caller, and quit
230
  raise errors.QuitGanetiException(False, 'Shutdown scheduled')
231

    
232

    
233
def GetNodeInfo(vgname, hypervisor_type):
234
  """Gives back a hash with different informations about the node.
235

236
  @type vgname: C{string}
237
  @param vgname: the name of the volume group to ask for disk space information
238
  @type hypervisor_type: C{str}
239
  @param hypervisor_type: the name of the hypervisor to ask for
240
      memory information
241
  @rtype: C{dict}
242
  @return: dictionary with the following keys:
243
      - vg_size is the size of the configured volume group in MiB
244
      - vg_free is the free size of the volume group in MiB
245
      - memory_dom0 is the memory allocated for domain0 in MiB
246
      - memory_free is the currently available (free) ram in MiB
247
      - memory_total is the total number of ram in MiB
248

249
  """
250
  outputarray = {}
251
  vginfo = _GetVGInfo(vgname)
252
  outputarray['vg_size'] = vginfo['vg_size']
253
  outputarray['vg_free'] = vginfo['vg_free']
254

    
255
  hyper = hypervisor.GetHypervisor(hypervisor_type)
256
  hyp_info = hyper.GetNodeInfo()
257
  if hyp_info is not None:
258
    outputarray.update(hyp_info)
259

    
260
  f = open("/proc/sys/kernel/random/boot_id", 'r')
261
  try:
262
    outputarray["bootid"] = f.read(128).rstrip("\n")
263
  finally:
264
    f.close()
265

    
266
  return outputarray
267

    
268

    
269
def VerifyNode(what, cluster_name):
270
  """Verify the status of the local node.
271

272
  Based on the input L{what} parameter, various checks are done on the
273
  local node.
274

275
  If the I{filelist} key is present, this list of
276
  files is checksummed and the file/checksum pairs are returned.
277

278
  If the I{nodelist} key is present, we check that we have
279
  connectivity via ssh with the target nodes (and check the hostname
280
  report).
281

282
  If the I{node-net-test} key is present, we check that we have
283
  connectivity to the given nodes via both primary IP and, if
284
  applicable, secondary IPs.
285

286
  @type what: C{dict}
287
  @param what: a dictionary of things to check:
288
      - filelist: list of files for which to compute checksums
289
      - nodelist: list of nodes we should check ssh communication with
290
      - node-net-test: list of nodes we should check node daemon port
291
        connectivity with
292
      - hypervisor: list with hypervisors to run the verify for
293

294
  """
295
  result = {}
296

    
297
  if 'hypervisor' in what:
298
    result['hypervisor'] = my_dict = {}
299
    for hv_name in what['hypervisor']:
300
      my_dict[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
301

    
302
  if 'filelist' in what:
303
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
304

    
305
  if 'nodelist' in what:
306
    result['nodelist'] = {}
307
    random.shuffle(what['nodelist'])
308
    for node in what['nodelist']:
309
      success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
310
      if not success:
311
        result['nodelist'][node] = message
312
  if 'node-net-test' in what:
313
    result['node-net-test'] = {}
314
    my_name = utils.HostInfo().name
315
    my_pip = my_sip = None
316
    for name, pip, sip in what['node-net-test']:
317
      if name == my_name:
318
        my_pip = pip
319
        my_sip = sip
320
        break
321
    if not my_pip:
322
      result['node-net-test'][my_name] = ("Can't find my own"
323
                                          " primary/secondary IP"
324
                                          " in the node list")
325
    else:
326
      port = utils.GetNodeDaemonPort()
327
      for name, pip, sip in what['node-net-test']:
328
        fail = []
329
        if not utils.TcpPing(pip, port, source=my_pip):
330
          fail.append("primary")
331
        if sip != pip:
332
          if not utils.TcpPing(sip, port, source=my_sip):
333
            fail.append("secondary")
334
        if fail:
335
          result['node-net-test'][name] = ("failure using the %s"
336
                                           " interface(s)" %
337
                                           " and ".join(fail))
338

    
339
  return result
340

    
341

    
342
def GetVolumeList(vg_name):
343
  """Compute list of logical volumes and their size.
344

345
  Returns:
346
    dictionary of all partions (key) with their size (in MiB), inactive
347
    and online status:
348
    {'test1': ('20.06', True, True)}
349

350
  """
351
  lvs = {}
352
  sep = '|'
353
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
354
                         "--separator=%s" % sep,
355
                         "-olv_name,lv_size,lv_attr", vg_name])
356
  if result.failed:
357
    logging.error("Failed to list logical volumes, lvs output: %s",
358
                  result.output)
359
    return result.output
360

    
361
  valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
362
  for line in result.stdout.splitlines():
363
    line = line.strip()
364
    match = valid_line_re.match(line)
365
    if not match:
366
      logging.error("Invalid line returned from lvs output: '%s'", line)
367
      continue
368
    name, size, attr = match.groups()
369
    inactive = attr[4] == '-'
370
    online = attr[5] == 'o'
371
    lvs[name] = (size, inactive, online)
372

    
373
  return lvs
374

    
375

    
376
def ListVolumeGroups():
377
  """List the volume groups and their size.
378

379
  Returns:
380
    Dictionary with keys volume name and values the size of the volume
381

382
  """
383
  return utils.ListVolumeGroups()
384

    
385

    
386
def NodeVolumes():
387
  """List all volumes on this node.
388

389
  """
390
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
391
                         "--separator=|",
392
                         "--options=lv_name,lv_size,devices,vg_name"])
393
  if result.failed:
394
    logging.error("Failed to list logical volumes, lvs output: %s",
395
                  result.output)
396
    return {}
397

    
398
  def parse_dev(dev):
399
    if '(' in dev:
400
      return dev.split('(')[0]
401
    else:
402
      return dev
403

    
404
  def map_line(line):
405
    return {
406
      'name': line[0].strip(),
407
      'size': line[1].strip(),
408
      'dev': parse_dev(line[2].strip()),
409
      'vg': line[3].strip(),
410
    }
411

    
412
  return [map_line(line.split('|')) for line in result.stdout.splitlines()
413
          if line.count('|') >= 3]
414

    
415

    
416
def BridgesExist(bridges_list):
417
  """Check if a list of bridges exist on the current node.
418

419
  Returns:
420
    True if all of them exist, false otherwise
421

422
  """
423
  for bridge in bridges_list:
424
    if not utils.BridgeExists(bridge):
425
      return False
426

    
427
  return True
428

    
429

    
430
def GetInstanceList(hypervisor_list):
431
  """Provides a list of instances.
432

433
  @type hypervisor_list: list
434
  @param hypervisor_list: the list of hypervisors to query information
435

436
  @rtype: list
437
  @return: a list of all running instances on the current node
438
             - instance1.example.com
439
             - instance2.example.com
440

441
  """
442
  results = []
443
  for hname in hypervisor_list:
444
    try:
445
      names = hypervisor.GetHypervisor(hname).ListInstances()
446
      results.extend(names)
447
    except errors.HypervisorError, err:
448
      logging.exception("Error enumerating instances for hypevisor %s", hname)
449
      # FIXME: should we somehow not propagate this to the master?
450
      raise
451

    
452
  return results
453

    
454

    
455
def GetInstanceInfo(instance, hname):
456
  """Gives back the informations about an instance as a dictionary.
457

458
  @type instance: string
459
  @param instance: the instance name
460
  @type hname: string
461
  @param hname: the hypervisor type of the instance
462

463
  @rtype: dict
464
  @return: dictionary with the following keys:
465
      - memory: memory size of instance (int)
466
      - state: xen state of instance (string)
467
      - time: cpu time of instance (float)
468

469
  """
470
  output = {}
471

    
472
  iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
473
  if iinfo is not None:
474
    output['memory'] = iinfo[2]
475
    output['state'] = iinfo[4]
476
    output['time'] = iinfo[5]
477

    
478
  return output
479

    
480

    
481
def GetAllInstancesInfo(hypervisor_list):
482
  """Gather data about all instances.
483

484
  This is the equivalent of `GetInstanceInfo()`, except that it
485
  computes data for all instances at once, thus being faster if one
486
  needs data about more than one instance.
487

488
  @type hypervisor_list: list
489
  @param hypervisor_list: list of hypervisors to query for instance data
490

491
  @rtype: dict of dicts
492
  @return: dictionary of instance: data, with data having the following keys:
493
      - memory: memory size of instance (int)
494
      - state: xen state of instance (string)
495
      - time: cpu time of instance (float)
496
      - vcpuus: the number of vcpus
497

498
  """
499
  output = {}
500

    
501
  for hname in hypervisor_list:
502
    iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
503
    if iinfo:
504
      for name, inst_id, memory, vcpus, state, times in iinfo:
505
        value = {
506
          'memory': memory,
507
          'vcpus': vcpus,
508
          'state': state,
509
          'time': times,
510
          }
511
        if name in output and output[name] != value:
512
          raise errors.HypervisorError("Instance %s running duplicate"
513
                                       " with different parameters" % name)
514
        output[name] = value
515

    
516
  return output
517

    
518

    
519
def AddOSToInstance(instance):
520
  """Add an OS to an instance.
521

522
  @type instance: L{objects.Instance}
523
  @param instance: Instance whose OS is to be installed
524

525
  """
526
  inst_os = OSFromDisk(instance.os)
527

    
528
  create_script = inst_os.create_script
529
  create_env = OSEnvironment(instance)
530

    
531
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
532
                                     instance.name, int(time.time()))
533
  if not os.path.exists(constants.LOG_OS_DIR):
534
    os.mkdir(constants.LOG_OS_DIR, 0750)
535

    
536
  command = utils.BuildShellCmd("cd %s && %s &>%s",
537
                                inst_os.path, create_script, logfile)
538

    
539
  result = utils.RunCmd(command, env=create_env)
540
  if result.failed:
541
    logging.error("os create command '%s' returned error: %s, logfile: %s,"
542
                  " output: %s", command, result.fail_reason, logfile,
543
                  result.output)
544
    return False
545

    
546
  return True
547

    
548

    
549
def RunRenameInstance(instance, old_name):
550
  """Run the OS rename script for an instance.
551

552
  @type instance: objects.Instance
553
  @param instance: Instance whose OS is to be installed
554
  @type old_name: string
555
  @param old_name: previous instance name
556

557
  """
558
  inst_os = OSFromDisk(instance.os)
559

    
560
  script = inst_os.rename_script
561
  rename_env = OSEnvironment(instance)
562
  rename_env['OLD_INSTANCE_NAME'] = old_name
563

    
564
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
565
                                           old_name,
566
                                           instance.name, int(time.time()))
567
  if not os.path.exists(constants.LOG_OS_DIR):
568
    os.mkdir(constants.LOG_OS_DIR, 0750)
569

    
570
  command = utils.BuildShellCmd("cd %s && %s &>%s",
571
                                inst_os.path, script, logfile)
572

    
573
  result = utils.RunCmd(command, env=rename_env)
574

    
575
  if result.failed:
576
    logging.error("os create command '%s' returned error: %s output: %s",
577
                  command, result.fail_reason, result.output)
578
    return False
579

    
580
  return True
581

    
582

    
583
def _GetVGInfo(vg_name):
584
  """Get informations about the volume group.
585

586
  Args:
587
    vg_name: the volume group
588

589
  Returns:
590
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
591
    where
592
    vg_size is the total size of the volume group in MiB
593
    vg_free is the free size of the volume group in MiB
594
    pv_count are the number of physical disks in that vg
595

596
  If an error occurs during gathering of data, we return the same dict
597
  with keys all set to None.
598

599
  """
600
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
601

    
602
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
603
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
604

    
605
  if retval.failed:
606
    logging.error("volume group %s not present", vg_name)
607
    return retdic
608
  valarr = retval.stdout.strip().rstrip(':').split(':')
609
  if len(valarr) == 3:
610
    try:
611
      retdic = {
612
        "vg_size": int(round(float(valarr[0]), 0)),
613
        "vg_free": int(round(float(valarr[1]), 0)),
614
        "pv_count": int(valarr[2]),
615
        }
616
    except ValueError, err:
617
      logging.exception("Fail to parse vgs output")
618
  else:
619
    logging.error("vgs output has the wrong number of fields (expected"
620
                  " three): %s", str(valarr))
621
  return retdic
622

    
623

    
624
def _GatherBlockDevs(instance):
625
  """Set up an instance's block device(s).
626

627
  This is run on the primary node at instance startup. The block
628
  devices must be already assembled.
629

630
  """
631
  block_devices = []
632
  for disk in instance.disks:
633
    device = _RecursiveFindBD(disk)
634
    if device is None:
635
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
636
                                    str(disk))
637
    device.Open()
638
    block_devices.append((disk, device))
639
  return block_devices
640

    
641

    
642
def StartInstance(instance, extra_args):
643
  """Start an instance.
644

645
  @type instance: instance object
646
  @param instance: the instance object
647
  @rtype: boolean
648
  @return: whether the startup was successful or not
649

650
  """
651
  running_instances = GetInstanceList([instance.hypervisor])
652

    
653
  if instance.name in running_instances:
654
    return True
655

    
656
  block_devices = _GatherBlockDevs(instance)
657
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
658

    
659
  try:
660
    hyper.StartInstance(instance, block_devices, extra_args)
661
  except errors.HypervisorError, err:
662
    logging.exception("Failed to start instance")
663
    return False
664

    
665
  return True
666

    
667

    
668
def ShutdownInstance(instance):
669
  """Shut an instance down.
670

671
  @type instance: instance object
672
  @param instance: the instance object
673
  @rtype: boolean
674
  @return: whether the startup was successful or not
675

676
  """
677
  hv_name = instance.hypervisor
678
  running_instances = GetInstanceList([hv_name])
679

    
680
  if instance.name not in running_instances:
681
    return True
682

    
683
  hyper = hypervisor.GetHypervisor(hv_name)
684
  try:
685
    hyper.StopInstance(instance)
686
  except errors.HypervisorError, err:
687
    logging.error("Failed to stop instance")
688
    return False
689

    
690
  # test every 10secs for 2min
691
  shutdown_ok = False
692

    
693
  time.sleep(1)
694
  for dummy in range(11):
695
    if instance.name not in GetInstanceList([hv_name]):
696
      break
697
    time.sleep(10)
698
  else:
699
    # the shutdown did not succeed
700
    logging.error("shutdown of '%s' unsuccessful, using destroy", instance)
701

    
702
    try:
703
      hyper.StopInstance(instance, force=True)
704
    except errors.HypervisorError, err:
705
      logging.exception("Failed to stop instance")
706
      return False
707

    
708
    time.sleep(1)
709
    if instance.name in GetInstanceList([hv_name]):
710
      logging.error("could not shutdown instance '%s' even by destroy",
711
                    instance.name)
712
      return False
713

    
714
  return True
715

    
716

    
717
def RebootInstance(instance, reboot_type, extra_args):
718
  """Reboot an instance.
719

720
  Args:
721
    instance    - name of instance to reboot
722
    reboot_type - how to reboot [soft,hard,full]
723

724
  """
725
  running_instances = GetInstanceList([instance.hypervisor])
726

    
727
  if instance.name not in running_instances:
728
    logging.error("Cannot reboot instance that is not running")
729
    return False
730

    
731
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
732
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
733
    try:
734
      hyper.RebootInstance(instance)
735
    except errors.HypervisorError, err:
736
      logging.exception("Failed to soft reboot instance")
737
      return False
738
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
739
    try:
740
      ShutdownInstance(instance)
741
      StartInstance(instance, extra_args)
742
    except errors.HypervisorError, err:
743
      logging.exception("Failed to hard reboot instance")
744
      return False
745
  else:
746
    raise errors.ParameterError("reboot_type invalid")
747

    
748
  return True
749

    
750

    
751
def MigrateInstance(instance, target, live):
752
  """Migrates an instance to another node.
753

754
  @type instance: C{objects.Instance}
755
  @param instance: the instance definition
756
  @type target: string
757
  @param target: the target node name
758
  @type live: boolean
759
  @param live: whether the migration should be done live or not (the
760
      interpretation of this parameter is left to the hypervisor)
761
  @rtype: tuple
762
  @return: a tuple of (success, msg) where:
763
      - succes is a boolean denoting the success/failure of the operation
764
      - msg is a string with details in case of failure
765

766
  """
767
  hyper = hypervisor.GetHypervisor(instance.hypervisor_name)
768

    
769
  try:
770
    hyper.MigrateInstance(instance.name, target, live)
771
  except errors.HypervisorError, err:
772
    msg = "Failed to migrate instance: %s" % str(err)
773
    logging.error(msg)
774
    return (False, msg)
775
  return (True, "Migration successfull")
776

    
777

    
778
def CreateBlockDevice(disk, size, owner, on_primary, info):
779
  """Creates a block device for an instance.
780

781
  Args:
782
   disk: a ganeti.objects.Disk object
783
   size: the size of the physical underlying device
784
   owner: a string with the name of the instance
785
   on_primary: a boolean indicating if it is the primary node or not
786
   info: string that will be sent to the physical device creation
787

788
  Returns:
789
    the new unique_id of the device (this can sometime be
790
    computed only after creation), or None. On secondary nodes,
791
    it's not required to return anything.
792

793
  """
794
  clist = []
795
  if disk.children:
796
    for child in disk.children:
797
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
798
      if on_primary or disk.AssembleOnSecondary():
799
        # we need the children open in case the device itself has to
800
        # be assembled
801
        crdev.Open()
802
      clist.append(crdev)
803
  try:
804
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
805
    if device is not None:
806
      logging.info("removing existing device %s", disk)
807
      device.Remove()
808
  except errors.BlockDeviceError, err:
809
    pass
810

    
811
  device = bdev.Create(disk.dev_type, disk.physical_id,
812
                       clist, size)
813
  if device is None:
814
    raise ValueError("Can't create child device for %s, %s" %
815
                     (disk, size))
816
  if on_primary or disk.AssembleOnSecondary():
817
    if not device.Assemble():
818
      errorstring = "Can't assemble device after creation"
819
      logging.error(errorstring)
820
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
821
                                    " daemon logs" % errorstring)
822
    device.SetSyncSpeed(constants.SYNC_SPEED)
823
    if on_primary or disk.OpenOnSecondary():
824
      device.Open(force=True)
825
    DevCacheManager.UpdateCache(device.dev_path, owner,
826
                                on_primary, disk.iv_name)
827

    
828
  device.SetInfo(info)
829

    
830
  physical_id = device.unique_id
831
  return physical_id
832

    
833

    
834
def RemoveBlockDevice(disk):
835
  """Remove a block device.
836

837
  This is intended to be called recursively.
838

839
  """
840
  try:
841
    # since we are removing the device, allow a partial match
842
    # this allows removal of broken mirrors
843
    rdev = _RecursiveFindBD(disk, allow_partial=True)
844
  except errors.BlockDeviceError, err:
845
    # probably can't attach
846
    logging.info("Can't attach to device %s in remove", disk)
847
    rdev = None
848
  if rdev is not None:
849
    r_path = rdev.dev_path
850
    result = rdev.Remove()
851
    if result:
852
      DevCacheManager.RemoveCache(r_path)
853
  else:
854
    result = True
855
  if disk.children:
856
    for child in disk.children:
857
      result = result and RemoveBlockDevice(child)
858
  return result
859

    
860

    
861
def _RecursiveAssembleBD(disk, owner, as_primary):
862
  """Activate a block device for an instance.
863

864
  This is run on the primary and secondary nodes for an instance.
865

866
  This function is called recursively.
867

868
  Args:
869
    disk: a objects.Disk object
870
    as_primary: if we should make the block device read/write
871

872
  Returns:
873
    the assembled device or None (in case no device was assembled)
874

875
  If the assembly is not successful, an exception is raised.
876

877
  """
878
  children = []
879
  if disk.children:
880
    mcn = disk.ChildrenNeeded()
881
    if mcn == -1:
882
      mcn = 0 # max number of Nones allowed
883
    else:
884
      mcn = len(disk.children) - mcn # max number of Nones
885
    for chld_disk in disk.children:
886
      try:
887
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
888
      except errors.BlockDeviceError, err:
889
        if children.count(None) >= mcn:
890
          raise
891
        cdev = None
892
        logging.debug("Error in child activation: %s", str(err))
893
      children.append(cdev)
894

    
895
  if as_primary or disk.AssembleOnSecondary():
896
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
897
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
898
    result = r_dev
899
    if as_primary or disk.OpenOnSecondary():
900
      r_dev.Open()
901
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
902
                                as_primary, disk.iv_name)
903

    
904
  else:
905
    result = True
906
  return result
907

    
908

    
909
def AssembleBlockDevice(disk, owner, as_primary):
910
  """Activate a block device for an instance.
911

912
  This is a wrapper over _RecursiveAssembleBD.
913

914
  Returns:
915
    a /dev path for primary nodes
916
    True for secondary nodes
917

918
  """
919
  result = _RecursiveAssembleBD(disk, owner, as_primary)
920
  if isinstance(result, bdev.BlockDev):
921
    result = result.dev_path
922
  return result
923

    
924

    
925
def ShutdownBlockDevice(disk):
926
  """Shut down a block device.
927

928
  First, if the device is assembled (can `Attach()`), then the device
929
  is shutdown. Then the children of the device are shutdown.
930

931
  This function is called recursively. Note that we don't cache the
932
  children or such, as oppossed to assemble, shutdown of different
933
  devices doesn't require that the upper device was active.
934

935
  """
936
  r_dev = _RecursiveFindBD(disk)
937
  if r_dev is not None:
938
    r_path = r_dev.dev_path
939
    result = r_dev.Shutdown()
940
    if result:
941
      DevCacheManager.RemoveCache(r_path)
942
  else:
943
    result = True
944
  if disk.children:
945
    for child in disk.children:
946
      result = result and ShutdownBlockDevice(child)
947
  return result
948

    
949

    
950
def MirrorAddChildren(parent_cdev, new_cdevs):
951
  """Extend a mirrored block device.
952

953
  """
954
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
955
  if parent_bdev is None:
956
    logging.error("Can't find parent device")
957
    return False
958
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
959
  if new_bdevs.count(None) > 0:
960
    logging.error("Can't find new device(s) to add: %s:%s",
961
                  new_bdevs, new_cdevs)
962
    return False
963
  parent_bdev.AddChildren(new_bdevs)
964
  return True
965

    
966

    
967
def MirrorRemoveChildren(parent_cdev, new_cdevs):
968
  """Shrink a mirrored block device.
969

970
  """
971
  parent_bdev = _RecursiveFindBD(parent_cdev)
972
  if parent_bdev is None:
973
    logging.error("Can't find parent in remove children: %s", parent_cdev)
974
    return False
975
  devs = []
976
  for disk in new_cdevs:
977
    rpath = disk.StaticDevPath()
978
    if rpath is None:
979
      bd = _RecursiveFindBD(disk)
980
      if bd is None:
981
        logging.error("Can't find dynamic device %s while removing children",
982
                      disk)
983
        return False
984
      else:
985
        devs.append(bd.dev_path)
986
    else:
987
      devs.append(rpath)
988
  parent_bdev.RemoveChildren(devs)
989
  return True
990

    
991

    
992
def GetMirrorStatus(disks):
993
  """Get the mirroring status of a list of devices.
994

995
  Args:
996
    disks: list of `objects.Disk`
997

998
  Returns:
999
    list of (mirror_done, estimated_time) tuples, which
1000
    are the result of bdev.BlockDevice.CombinedSyncStatus()
1001

1002
  """
1003
  stats = []
1004
  for dsk in disks:
1005
    rbd = _RecursiveFindBD(dsk)
1006
    if rbd is None:
1007
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
1008
    stats.append(rbd.CombinedSyncStatus())
1009
  return stats
1010

    
1011

    
1012
def _RecursiveFindBD(disk, allow_partial=False):
1013
  """Check if a device is activated.
1014

1015
  If so, return informations about the real device.
1016

1017
  Args:
1018
    disk: the objects.Disk instance
1019
    allow_partial: don't abort the find if a child of the
1020
                   device can't be found; this is intended to be
1021
                   used when repairing mirrors
1022

1023
  Returns:
1024
    None if the device can't be found
1025
    otherwise the device instance
1026

1027
  """
1028
  children = []
1029
  if disk.children:
1030
    for chdisk in disk.children:
1031
      children.append(_RecursiveFindBD(chdisk))
1032

    
1033
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
1034

    
1035

    
1036
def FindBlockDevice(disk):
1037
  """Check if a device is activated.
1038

1039
  If so, return informations about the real device.
1040

1041
  Args:
1042
    disk: the objects.Disk instance
1043
  Returns:
1044
    None if the device can't be found
1045
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
1046

1047
  """
1048
  rbd = _RecursiveFindBD(disk)
1049
  if rbd is None:
1050
    return rbd
1051
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
1052

    
1053

    
1054
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
1055
  """Write a file to the filesystem.
1056

1057
  This allows the master to overwrite(!) a file. It will only perform
1058
  the operation if the file belongs to a list of configuration files.
1059

1060
  """
1061
  if not os.path.isabs(file_name):
1062
    logging.error("Filename passed to UploadFile is not absolute: '%s'",
1063
                  file_name)
1064
    return False
1065

    
1066
  allowed_files = [
1067
    constants.CLUSTER_CONF_FILE,
1068
    constants.ETC_HOSTS,
1069
    constants.SSH_KNOWN_HOSTS_FILE,
1070
    constants.VNC_PASSWORD_FILE,
1071
    ]
1072

    
1073
  if file_name not in allowed_files:
1074
    logging.error("Filename passed to UploadFile not in allowed"
1075
                 " upload targets: '%s'", file_name)
1076
    return False
1077

    
1078
  utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
1079
                  atime=atime, mtime=mtime)
1080
  return True
1081

    
1082

    
1083
def _ErrnoOrStr(err):
1084
  """Format an EnvironmentError exception.
1085

1086
  If the `err` argument has an errno attribute, it will be looked up
1087
  and converted into a textual EXXXX description. Otherwise the string
1088
  representation of the error will be returned.
1089

1090
  """
1091
  if hasattr(err, 'errno'):
1092
    detail = errno.errorcode[err.errno]
1093
  else:
1094
    detail = str(err)
1095
  return detail
1096

    
1097

    
1098
def _OSOndiskVersion(name, os_dir):
1099
  """Compute and return the API version of a given OS.
1100

1101
  This function will try to read the API version of the os given by
1102
  the 'name' parameter and residing in the 'os_dir' directory.
1103

1104
  Return value will be either an integer denoting the version or None in the
1105
  case when this is not a valid OS name.
1106

1107
  """
1108
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1109

    
1110
  try:
1111
    st = os.stat(api_file)
1112
  except EnvironmentError, err:
1113
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1114
                           " found (%s)" % _ErrnoOrStr(err))
1115

    
1116
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1117
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1118
                           " a regular file")
1119

    
1120
  try:
1121
    f = open(api_file)
1122
    try:
1123
      api_versions = f.readlines()
1124
    finally:
1125
      f.close()
1126
  except EnvironmentError, err:
1127
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1128
                           " API version (%s)" % _ErrnoOrStr(err))
1129

    
1130
  api_versions = [version.strip() for version in api_versions]
1131
  try:
1132
    api_versions = [int(version) for version in api_versions]
1133
  except (TypeError, ValueError), err:
1134
    raise errors.InvalidOS(name, os_dir,
1135
                           "API version is not integer (%s)" % str(err))
1136

    
1137
  return api_versions
1138

    
1139

    
1140
def DiagnoseOS(top_dirs=None):
1141
  """Compute the validity for all OSes.
1142

1143
  Returns an OS object for each name in all the given top directories
1144
  (if not given defaults to constants.OS_SEARCH_PATH)
1145

1146
  Returns:
1147
    list of OS objects
1148

1149
  """
1150
  if top_dirs is None:
1151
    top_dirs = constants.OS_SEARCH_PATH
1152

    
1153
  result = []
1154
  for dir_name in top_dirs:
1155
    if os.path.isdir(dir_name):
1156
      try:
1157
        f_names = utils.ListVisibleFiles(dir_name)
1158
      except EnvironmentError, err:
1159
        logging.exception("Can't list the OS directory %s", dir_name)
1160
        break
1161
      for name in f_names:
1162
        try:
1163
          os_inst = OSFromDisk(name, base_dir=dir_name)
1164
          result.append(os_inst)
1165
        except errors.InvalidOS, err:
1166
          result.append(objects.OS.FromInvalidOS(err))
1167

    
1168
  return result
1169

    
1170

    
1171
def OSFromDisk(name, base_dir=None):
1172
  """Create an OS instance from disk.
1173

1174
  This function will return an OS instance if the given name is a
1175
  valid OS name. Otherwise, it will raise an appropriate
1176
  `errors.InvalidOS` exception, detailing why this is not a valid
1177
  OS.
1178

1179
  @type base_dir: string
1180
  @keyword base_dir: Base directory containing OS installations.
1181
                     Defaults to a search in all the OS_SEARCH_PATH dirs.
1182

1183
  """
1184

    
1185
  if base_dir is None:
1186
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1187
    if os_dir is None:
1188
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1189
  else:
1190
    os_dir = os.path.sep.join([base_dir, name])
1191

    
1192
  api_versions = _OSOndiskVersion(name, os_dir)
1193

    
1194
  if constants.OS_API_VERSION not in api_versions:
1195
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1196
                           " (found %s want %s)"
1197
                           % (api_versions, constants.OS_API_VERSION))
1198

    
1199
  # OS Scripts dictionary, we will populate it with the actual script names
1200
  os_scripts = dict.fromkeys(constants.OS_SCRIPTS)
1201

    
1202
  for script in os_scripts:
1203
    os_scripts[script] = os.path.sep.join([os_dir, script])
1204

    
1205
    try:
1206
      st = os.stat(os_scripts[script])
1207
    except EnvironmentError, err:
1208
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1209
                             (script, _ErrnoOrStr(err)))
1210

    
1211
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1212
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1213
                             script)
1214

    
1215
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1216
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1217
                             script)
1218

    
1219

    
1220
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1221
                    create_script=os_scripts[constants.OS_SCRIPT_CREATE],
1222
                    export_script=os_scripts[constants.OS_SCRIPT_EXPORT],
1223
                    import_script=os_scripts[constants.OS_SCRIPT_IMPORT],
1224
                    rename_script=os_scripts[constants.OS_SCRIPT_RENAME],
1225
                    api_versions=api_versions)
1226

    
1227
def OSEnvironment(instance, debug=0):
1228
  """Calculate the environment for an os script.
1229

1230
  @type instance: instance object
1231
  @param instance: target instance for the os script run
1232
  @type debug: integer
1233
  @param debug: debug level (0 or 1, for os api 10)
1234
  @rtype: dict
1235
  @return: dict of environment variables
1236

1237
  """
1238
  result = {}
1239
  result['OS_API_VERSION'] = '%d' % constants.OS_API_VERSION
1240
  result['INSTANCE_NAME'] = instance.name
1241
  result['HYPERVISOR'] = instance.hypervisor
1242
  result['DISK_COUNT'] = '%d' % len(instance.disks)
1243
  result['NIC_COUNT'] = '%d' % len(instance.nics)
1244
  result['DEBUG_LEVEL'] = '%d' % debug
1245
  for idx, disk in enumerate(instance.disks):
1246
    real_disk = _RecursiveFindBD(disk)
1247
    if real_disk is None:
1248
      raise errors.BlockDeviceError("Block device '%s' is not set up" %
1249
                                    str(disk))
1250
    real_disk.Open()
1251
    result['DISK_%d_PATH' % idx] = real_disk.dev_path
1252
    # FIXME: When disks will have read-only mode, populate this
1253
    result['DISK_%d_ACCESS' % idx] = 'W'
1254
    if constants.HV_DISK_TYPE in instance.hvparams:
1255
      result['DISK_%d_FRONTEND_TYPE' % idx] = \
1256
        instance.hvparams[constants.HV_DISK_TYPE]
1257
    if disk.dev_type in constants.LDS_BLOCK:
1258
      result['DISK_%d_BACKEND_TYPE' % idx] = 'block'
1259
    elif disk.dev_type == constants.LD_FILE:
1260
      result['DISK_%d_BACKEND_TYPE' % idx] = \
1261
        'file:%s' % disk.physical_id[0]
1262
  for idx, nic in enumerate(instance.nics):
1263
    result['NIC_%d_MAC' % idx] = nic.mac
1264
    if nic.ip:
1265
      result['NIC_%d_IP' % idx] = nic.ip
1266
    result['NIC_%d_BRIDGE' % idx] = nic.bridge
1267
    if constants.HV_NIC_TYPE in instance.hvparams:
1268
      result['NIC_%d_FRONTEND_TYPE' % idx] = \
1269
        instance.hvparams[constants.HV_NIC_TYPE]
1270

    
1271
  return result
1272

    
1273
def GrowBlockDevice(disk, amount):
1274
  """Grow a stack of block devices.
1275

1276
  This function is called recursively, with the childrens being the
1277
  first one resize.
1278

1279
  Args:
1280
    disk: the disk to be grown
1281

1282
  Returns: a tuple of (status, result), with:
1283
    status: the result (true/false) of the operation
1284
    result: the error message if the operation failed, otherwise not used
1285

1286
  """
1287
  r_dev = _RecursiveFindBD(disk)
1288
  if r_dev is None:
1289
    return False, "Cannot find block device %s" % (disk,)
1290

    
1291
  try:
1292
    r_dev.Grow(amount)
1293
  except errors.BlockDeviceError, err:
1294
    return False, str(err)
1295

    
1296
  return True, None
1297

    
1298

    
1299
def SnapshotBlockDevice(disk):
1300
  """Create a snapshot copy of a block device.
1301

1302
  This function is called recursively, and the snapshot is actually created
1303
  just for the leaf lvm backend device.
1304

1305
  Args:
1306
    disk: the disk to be snapshotted
1307

1308
  Returns:
1309
    a config entry for the actual lvm device snapshotted.
1310

1311
  """
1312
  if disk.children:
1313
    if len(disk.children) == 1:
1314
      # only one child, let's recurse on it
1315
      return SnapshotBlockDevice(disk.children[0])
1316
    else:
1317
      # more than one child, choose one that matches
1318
      for child in disk.children:
1319
        if child.size == disk.size:
1320
          # return implies breaking the loop
1321
          return SnapshotBlockDevice(child)
1322
  elif disk.dev_type == constants.LD_LV:
1323
    r_dev = _RecursiveFindBD(disk)
1324
    if r_dev is not None:
1325
      # let's stay on the safe side and ask for the full size, for now
1326
      return r_dev.Snapshot(disk.size)
1327
    else:
1328
      return None
1329
  else:
1330
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1331
                                 " '%s' of type '%s'" %
1332
                                 (disk.unique_id, disk.dev_type))
1333

    
1334

    
1335
def ExportSnapshot(disk, dest_node, instance, cluster_name):
1336
  """Export a block device snapshot to a remote node.
1337

1338
  Args:
1339
    disk: the snapshot block device
1340
    dest_node: the node to send the image to
1341
    instance: instance being exported
1342

1343
  Returns:
1344
    True if successful, False otherwise.
1345

1346
  """
1347
  # TODO(ultrotter): Import/Export still to be converted to OS API 10
1348
  logging.error("Import/Export still to be converted to OS API 10")
1349
  return False
1350

    
1351
  inst_os = OSFromDisk(instance.os)
1352
  export_script = inst_os.export_script
1353

    
1354
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1355
                                     instance.name, int(time.time()))
1356
  if not os.path.exists(constants.LOG_OS_DIR):
1357
    os.mkdir(constants.LOG_OS_DIR, 0750)
1358

    
1359
  real_os_dev = _RecursiveFindBD(disk)
1360
  if real_os_dev is None:
1361
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1362
                                  str(disk))
1363
  real_os_dev.Open()
1364

    
1365
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1366
  destfile = disk.physical_id[1]
1367

    
1368
  # the target command is built out of three individual commands,
1369
  # which are joined by pipes; we check each individual command for
1370
  # valid parameters
1371

    
1372
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1373
                               export_script, instance.name,
1374
                               real_os_dev.dev_path, logfile)
1375

    
1376
  comprcmd = "gzip"
1377

    
1378
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1379
                                destdir, destdir, destfile)
1380
  remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
1381
                                                   constants.GANETI_RUNAS,
1382
                                                   destcmd)
1383

    
1384
  # all commands have been checked, so we're safe to combine them
1385
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1386

    
1387
  result = utils.RunCmd(command)
1388

    
1389
  if result.failed:
1390
    logging.error("os snapshot export command '%s' returned error: %s"
1391
                  " output: %s", command, result.fail_reason, result.output)
1392
    return False
1393

    
1394
  return True
1395

    
1396

    
1397
def FinalizeExport(instance, snap_disks):
1398
  """Write out the export configuration information.
1399

1400
  Args:
1401
    instance: instance configuration
1402
    snap_disks: snapshot block devices
1403

1404
  Returns:
1405
    False in case of error, True otherwise.
1406

1407
  """
1408
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1409
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1410

    
1411
  config = objects.SerializableConfigParser()
1412

    
1413
  config.add_section(constants.INISECT_EXP)
1414
  config.set(constants.INISECT_EXP, 'version', '0')
1415
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1416
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1417
  config.set(constants.INISECT_EXP, 'os', instance.os)
1418
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1419

    
1420
  config.add_section(constants.INISECT_INS)
1421
  config.set(constants.INISECT_INS, 'name', instance.name)
1422
  config.set(constants.INISECT_INS, 'memory', '%d' %
1423
             instance.beparams[constants.BE_MEMORY])
1424
  config.set(constants.INISECT_INS, 'vcpus', '%d' %
1425
             instance.beparams[constants.BE_VCPUS])
1426
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1427

    
1428
  nic_count = 0
1429
  for nic_count, nic in enumerate(instance.nics):
1430
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1431
               nic_count, '%s' % nic.mac)
1432
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1433
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count,
1434
               '%s' % nic.bridge)
1435
  # TODO: redundant: on load can read nics until it doesn't exist
1436
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1437

    
1438
  disk_count = 0
1439
  for disk_count, disk in enumerate(snap_disks):
1440
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1441
               ('%s' % disk.iv_name))
1442
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1443
               ('%s' % disk.physical_id[1]))
1444
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1445
               ('%d' % disk.size))
1446
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1447

    
1448
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1449
  cfo = open(cff, 'w')
1450
  try:
1451
    config.write(cfo)
1452
  finally:
1453
    cfo.close()
1454

    
1455
  shutil.rmtree(finaldestdir, True)
1456
  shutil.move(destdir, finaldestdir)
1457

    
1458
  return True
1459

    
1460

    
1461
def ExportInfo(dest):
1462
  """Get export configuration information.
1463

1464
  Args:
1465
    dest: directory containing the export
1466

1467
  Returns:
1468
    A serializable config file containing the export info.
1469

1470
  """
1471
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1472

    
1473
  config = objects.SerializableConfigParser()
1474
  config.read(cff)
1475

    
1476
  if (not config.has_section(constants.INISECT_EXP) or
1477
      not config.has_section(constants.INISECT_INS)):
1478
    return None
1479

    
1480
  return config
1481

    
1482

    
1483
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image,
1484
                         cluster_name):
1485
  """Import an os image into an instance.
1486

1487
  Args:
1488
    instance: the instance object
1489
    os_disk: the instance-visible name of the os device
1490
    swap_disk: the instance-visible name of the swap device
1491
    src_node: node holding the source image
1492
    src_image: path to the source image on src_node
1493

1494
  Returns:
1495
    False in case of error, True otherwise.
1496

1497
  """
1498
  # TODO(ultrotter): Import/Export still to be converted to OS API 10
1499
  logging.error("Import/Export still to be converted to OS API 10")
1500
  return False
1501

    
1502
  inst_os = OSFromDisk(instance.os)
1503
  import_script = inst_os.import_script
1504

    
1505
  os_device = instance.FindDisk(os_disk)
1506
  if os_device is None:
1507
    logging.error("Can't find this device-visible name '%s'", os_disk)
1508
    return False
1509

    
1510
  swap_device = instance.FindDisk(swap_disk)
1511
  if swap_device is None:
1512
    logging.error("Can't find this device-visible name '%s'", swap_disk)
1513
    return False
1514

    
1515
  real_os_dev = _RecursiveFindBD(os_device)
1516
  if real_os_dev is None:
1517
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1518
                                  str(os_device))
1519
  real_os_dev.Open()
1520

    
1521
  real_swap_dev = _RecursiveFindBD(swap_device)
1522
  if real_swap_dev is None:
1523
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1524
                                  str(swap_device))
1525
  real_swap_dev.Open()
1526

    
1527
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1528
                                        instance.name, int(time.time()))
1529
  if not os.path.exists(constants.LOG_OS_DIR):
1530
    os.mkdir(constants.LOG_OS_DIR, 0750)
1531

    
1532
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1533
  remotecmd = _GetSshRunner(cluster_name).BuildCmd(src_node,
1534
                                                   constants.GANETI_RUNAS,
1535
                                                   destcmd)
1536

    
1537
  comprcmd = "gunzip"
1538
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1539
                               inst_os.path, import_script, instance.name,
1540
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1541
                               logfile)
1542

    
1543
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1544
  env = {'HYPERVISOR': instance.hypervisor}
1545

    
1546
  result = utils.RunCmd(command, env=env)
1547

    
1548
  if result.failed:
1549
    logging.error("os import command '%s' returned error: %s"
1550
                  " output: %s", command, result.fail_reason, result.output)
1551
    return False
1552

    
1553
  return True
1554

    
1555

    
1556
def ListExports():
1557
  """Return a list of exports currently available on this machine.
1558

1559
  """
1560
  if os.path.isdir(constants.EXPORT_DIR):
1561
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1562
  else:
1563
    return []
1564

    
1565

    
1566
def RemoveExport(export):
1567
  """Remove an existing export from the node.
1568

1569
  Args:
1570
    export: the name of the export to remove
1571

1572
  Returns:
1573
    False in case of error, True otherwise.
1574

1575
  """
1576
  target = os.path.join(constants.EXPORT_DIR, export)
1577

    
1578
  shutil.rmtree(target)
1579
  # TODO: catch some of the relevant exceptions and provide a pretty
1580
  # error message if rmtree fails.
1581

    
1582
  return True
1583

    
1584

    
1585
def RenameBlockDevices(devlist):
1586
  """Rename a list of block devices.
1587

1588
  The devlist argument is a list of tuples (disk, new_logical,
1589
  new_physical). The return value will be a combined boolean result
1590
  (True only if all renames succeeded).
1591

1592
  """
1593
  result = True
1594
  for disk, unique_id in devlist:
1595
    dev = _RecursiveFindBD(disk)
1596
    if dev is None:
1597
      result = False
1598
      continue
1599
    try:
1600
      old_rpath = dev.dev_path
1601
      dev.Rename(unique_id)
1602
      new_rpath = dev.dev_path
1603
      if old_rpath != new_rpath:
1604
        DevCacheManager.RemoveCache(old_rpath)
1605
        # FIXME: we should add the new cache information here, like:
1606
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1607
        # but we don't have the owner here - maybe parse from existing
1608
        # cache? for now, we only lose lvm data when we rename, which
1609
        # is less critical than DRBD or MD
1610
    except errors.BlockDeviceError, err:
1611
      logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
1612
      result = False
1613
  return result
1614

    
1615

    
1616
def _TransformFileStorageDir(file_storage_dir):
1617
  """Checks whether given file_storage_dir is valid.
1618

1619
  Checks wheter the given file_storage_dir is within the cluster-wide
1620
  default file_storage_dir stored in SimpleStore. Only paths under that
1621
  directory are allowed.
1622

1623
  Args:
1624
    file_storage_dir: string with path
1625

1626
  Returns:
1627
    normalized file_storage_dir (string) if valid, None otherwise
1628

1629
  """
1630
  cfg = _GetConfig()
1631
  file_storage_dir = os.path.normpath(file_storage_dir)
1632
  base_file_storage_dir = cfg.GetFileStorageDir()
1633
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1634
      base_file_storage_dir):
1635
    logging.error("file storage directory '%s' is not under base file"
1636
                  " storage directory '%s'",
1637
                  file_storage_dir, base_file_storage_dir)
1638
    return None
1639
  return file_storage_dir
1640

    
1641

    
1642
def CreateFileStorageDir(file_storage_dir):
1643
  """Create file storage directory.
1644

1645
  Args:
1646
    file_storage_dir: string containing the path
1647

1648
  Returns:
1649
    tuple with first element a boolean indicating wheter dir
1650
    creation was successful or not
1651

1652
  """
1653
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1654
  result = True,
1655
  if not file_storage_dir:
1656
    result = False,
1657
  else:
1658
    if os.path.exists(file_storage_dir):
1659
      if not os.path.isdir(file_storage_dir):
1660
        logging.error("'%s' is not a directory", file_storage_dir)
1661
        result = False,
1662
    else:
1663
      try:
1664
        os.makedirs(file_storage_dir, 0750)
1665
      except OSError, err:
1666
        logging.error("Cannot create file storage directory '%s': %s",
1667
                      file_storage_dir, err)
1668
        result = False,
1669
  return result
1670

    
1671

    
1672
def RemoveFileStorageDir(file_storage_dir):
1673
  """Remove file storage directory.
1674

1675
  Remove it only if it's empty. If not log an error and return.
1676

1677
  Args:
1678
    file_storage_dir: string containing the path
1679

1680
  Returns:
1681
    tuple with first element a boolean indicating wheter dir
1682
    removal was successful or not
1683

1684
  """
1685
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1686
  result = True,
1687
  if not file_storage_dir:
1688
    result = False,
1689
  else:
1690
    if os.path.exists(file_storage_dir):
1691
      if not os.path.isdir(file_storage_dir):
1692
        logging.error("'%s' is not a directory", file_storage_dir)
1693
        result = False,
1694
      # deletes dir only if empty, otherwise we want to return False
1695
      try:
1696
        os.rmdir(file_storage_dir)
1697
      except OSError, err:
1698
        logging.exception("Cannot remove file storage directory '%s'",
1699
                          file_storage_dir)
1700
        result = False,
1701
  return result
1702

    
1703

    
1704
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1705
  """Rename the file storage directory.
1706

1707
  Args:
1708
    old_file_storage_dir: string containing the old path
1709
    new_file_storage_dir: string containing the new path
1710

1711
  Returns:
1712
    tuple with first element a boolean indicating wheter dir
1713
    rename was successful or not
1714

1715
  """
1716
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1717
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1718
  result = True,
1719
  if not old_file_storage_dir or not new_file_storage_dir:
1720
    result = False,
1721
  else:
1722
    if not os.path.exists(new_file_storage_dir):
1723
      if os.path.isdir(old_file_storage_dir):
1724
        try:
1725
          os.rename(old_file_storage_dir, new_file_storage_dir)
1726
        except OSError, err:
1727
          logging.exception("Cannot rename '%s' to '%s'",
1728
                            old_file_storage_dir, new_file_storage_dir)
1729
          result =  False,
1730
      else:
1731
        logging.error("'%s' is not a directory", old_file_storage_dir)
1732
        result = False,
1733
    else:
1734
      if os.path.exists(old_file_storage_dir):
1735
        logging.error("Cannot rename '%s' to '%s'. Both locations exist.",
1736
                      old_file_storage_dir, new_file_storage_dir)
1737
        result = False,
1738
  return result
1739

    
1740

    
1741
def _IsJobQueueFile(file_name):
1742
  """Checks whether the given filename is in the queue directory.
1743

1744
  """
1745
  queue_dir = os.path.normpath(constants.QUEUE_DIR)
1746
  result = (os.path.commonprefix([queue_dir, file_name]) == queue_dir)
1747

    
1748
  if not result:
1749
    logging.error("'%s' is not a file in the queue directory",
1750
                  file_name)
1751

    
1752
  return result
1753

    
1754

    
1755
def JobQueueUpdate(file_name, content):
1756
  """Updates a file in the queue directory.
1757

1758
  """
1759
  if not _IsJobQueueFile(file_name):
1760
    return False
1761

    
1762
  # Write and replace the file atomically
1763
  utils.WriteFile(file_name, data=content)
1764

    
1765
  return True
1766

    
1767

    
1768
def JobQueueRename(old, new):
1769
  """Renames a job queue file.
1770

1771
  """
1772
  if not (_IsJobQueueFile(old) and _IsJobQueueFile(new)):
1773
    return False
1774

    
1775
  os.rename(old, new)
1776

    
1777
  return True
1778

    
1779

    
1780
def JobQueueSetDrainFlag(drain_flag):
1781
  """Set the drain flag for the queue.
1782

1783
  This will set or unset the queue drain flag.
1784

1785
  @type drain_flag: bool
1786
  @param drain_flag: if True, will set the drain flag, otherwise reset it.
1787

1788
  """
1789
  if drain_flag:
1790
    utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True)
1791
  else:
1792
    utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE)
1793

    
1794
  return True
1795

    
1796

    
1797
def CloseBlockDevices(disks):
1798
  """Closes the given block devices.
1799

1800
  This means they will be switched to secondary mode (in case of DRBD).
1801

1802
  """
1803
  bdevs = []
1804
  for cf in disks:
1805
    rd = _RecursiveFindBD(cf)
1806
    if rd is None:
1807
      return (False, "Can't find device %s" % cf)
1808
    bdevs.append(rd)
1809

    
1810
  msg = []
1811
  for rd in bdevs:
1812
    try:
1813
      rd.Close()
1814
    except errors.BlockDeviceError, err:
1815
      msg.append(str(err))
1816
  if msg:
1817
    return (False, "Can't make devices secondary: %s" % ",".join(msg))
1818
  else:
1819
    return (True, "All devices secondary")
1820

    
1821

    
1822
def ValidateHVParams(hvname, hvparams):
1823
  """Validates the given hypervisor parameters.
1824

1825
  @type hvname: string
1826
  @param hvname: the hypervisor name
1827
  @type hvparams: dict
1828
  @param hvparams: the hypervisor parameters to be validated
1829
  @rtype: tuple (bool, str)
1830
  @return: tuple of (success, message)
1831

1832
  """
1833
  try:
1834
    hv_type = hypervisor.GetHypervisor(hvname)
1835
    hv_type.ValidateParameters(hvparams)
1836
    return (True, "Validation passed")
1837
  except errors.HypervisorError, err:
1838
    return (False, str(err))
1839

    
1840

    
1841
class HooksRunner(object):
1842
  """Hook runner.
1843

1844
  This class is instantiated on the node side (ganeti-noded) and not on
1845
  the master side.
1846

1847
  """
1848
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1849

    
1850
  def __init__(self, hooks_base_dir=None):
1851
    """Constructor for hooks runner.
1852

1853
    Args:
1854
      - hooks_base_dir: if not None, this overrides the
1855
        constants.HOOKS_BASE_DIR (useful for unittests)
1856

1857
    """
1858
    if hooks_base_dir is None:
1859
      hooks_base_dir = constants.HOOKS_BASE_DIR
1860
    self._BASE_DIR = hooks_base_dir
1861

    
1862
  @staticmethod
1863
  def ExecHook(script, env):
1864
    """Exec one hook script.
1865

1866
    Args:
1867
     - script: the full path to the script
1868
     - env: the environment with which to exec the script
1869

1870
    """
1871
    # exec the process using subprocess and log the output
1872
    fdstdin = None
1873
    try:
1874
      fdstdin = open("/dev/null", "r")
1875
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1876
                               stderr=subprocess.STDOUT, close_fds=True,
1877
                               shell=False, cwd="/", env=env)
1878
      output = ""
1879
      try:
1880
        output = child.stdout.read(4096)
1881
        child.stdout.close()
1882
      except EnvironmentError, err:
1883
        output += "Hook script error: %s" % str(err)
1884

    
1885
      while True:
1886
        try:
1887
          result = child.wait()
1888
          break
1889
        except EnvironmentError, err:
1890
          if err.errno == errno.EINTR:
1891
            continue
1892
          raise
1893
    finally:
1894
      # try not to leak fds
1895
      for fd in (fdstdin, ):
1896
        if fd is not None:
1897
          try:
1898
            fd.close()
1899
          except EnvironmentError, err:
1900
            # just log the error
1901
            #logging.exception("Error while closing fd %s", fd)
1902
            pass
1903

    
1904
    return result == 0, output
1905

    
1906
  def RunHooks(self, hpath, phase, env):
1907
    """Run the scripts in the hooks directory.
1908

1909
    This method will not be usually overriden by child opcodes.
1910

1911
    """
1912
    if phase == constants.HOOKS_PHASE_PRE:
1913
      suffix = "pre"
1914
    elif phase == constants.HOOKS_PHASE_POST:
1915
      suffix = "post"
1916
    else:
1917
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1918
    rr = []
1919

    
1920
    subdir = "%s-%s.d" % (hpath, suffix)
1921
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1922
    try:
1923
      dir_contents = utils.ListVisibleFiles(dir_name)
1924
    except OSError, err:
1925
      # must log
1926
      return rr
1927

    
1928
    # we use the standard python sort order,
1929
    # so 00name is the recommended naming scheme
1930
    dir_contents.sort()
1931
    for relname in dir_contents:
1932
      fname = os.path.join(dir_name, relname)
1933
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1934
          self.RE_MASK.match(relname) is not None):
1935
        rrval = constants.HKR_SKIP
1936
        output = ""
1937
      else:
1938
        result, output = self.ExecHook(fname, env)
1939
        if not result:
1940
          rrval = constants.HKR_FAIL
1941
        else:
1942
          rrval = constants.HKR_SUCCESS
1943
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1944

    
1945
    return rr
1946

    
1947

    
1948
class IAllocatorRunner(object):
1949
  """IAllocator runner.
1950

1951
  This class is instantiated on the node side (ganeti-noded) and not on
1952
  the master side.
1953

1954
  """
1955
  def Run(self, name, idata):
1956
    """Run an iallocator script.
1957

1958
    Return value: tuple of:
1959
       - run status (one of the IARUN_ constants)
1960
       - stdout
1961
       - stderr
1962
       - fail reason (as from utils.RunResult)
1963

1964
    """
1965
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1966
                                  os.path.isfile)
1967
    if alloc_script is None:
1968
      return (constants.IARUN_NOTFOUND, None, None, None)
1969

    
1970
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1971
    try:
1972
      os.write(fd, idata)
1973
      os.close(fd)
1974
      result = utils.RunCmd([alloc_script, fin_name])
1975
      if result.failed:
1976
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1977
                result.fail_reason)
1978
    finally:
1979
      os.unlink(fin_name)
1980

    
1981
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1982

    
1983

    
1984
class DevCacheManager(object):
1985
  """Simple class for managing a cache of block device information.
1986

1987
  """
1988
  _DEV_PREFIX = "/dev/"
1989
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1990

    
1991
  @classmethod
1992
  def _ConvertPath(cls, dev_path):
1993
    """Converts a /dev/name path to the cache file name.
1994

1995
    This replaces slashes with underscores and strips the /dev
1996
    prefix. It then returns the full path to the cache file
1997

1998
    """
1999
    if dev_path.startswith(cls._DEV_PREFIX):
2000
      dev_path = dev_path[len(cls._DEV_PREFIX):]
2001
    dev_path = dev_path.replace("/", "_")
2002
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
2003
    return fpath
2004

    
2005
  @classmethod
2006
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
2007
    """Updates the cache information for a given device.
2008

2009
    """
2010
    if dev_path is None:
2011
      logging.error("DevCacheManager.UpdateCache got a None dev_path")
2012
      return
2013
    fpath = cls._ConvertPath(dev_path)
2014
    if on_primary:
2015
      state = "primary"
2016
    else:
2017
      state = "secondary"
2018
    if iv_name is None:
2019
      iv_name = "not_visible"
2020
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
2021
    try:
2022
      utils.WriteFile(fpath, data=fdata)
2023
    except EnvironmentError, err:
2024
      logging.exception("Can't update bdev cache for %s", dev_path)
2025

    
2026
  @classmethod
2027
  def RemoveCache(cls, dev_path):
2028
    """Remove data for a dev_path.
2029

2030
    """
2031
    if dev_path is None:
2032
      logging.error("DevCacheManager.RemoveCache got a None dev_path")
2033
      return
2034
    fpath = cls._ConvertPath(dev_path)
2035
    try:
2036
      utils.RemoveFile(fpath)
2037
    except EnvironmentError, err:
2038
      logging.exception("Can't update bdev cache for %s", dev_path)