Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 74c47259

History | View | Annotate | Download (58.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34
import logging
35
import tempfile
36

    
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import ssh
40
from ganeti import hypervisor
41
from ganeti import constants
42
from ganeti import bdev
43
from ganeti import objects
44
from ganeti import ssconf
45

    
46

    
47
def _GetConfig():
48
  return ssconf.SimpleConfigReader()
49

    
50

    
51
def _GetSshRunner(cluster_name):
52
  return ssh.SshRunner(cluster_name)
53

    
54

    
55
def _CleanDirectory(path, exclude=[]):
56
  """Removes all regular files in a directory.
57

58
  @param exclude: List of files to be excluded.
59
  @type exclude: list
60

61
  """
62
  if not os.path.isdir(path):
63
    return
64

    
65
  # Normalize excluded paths
66
  exclude = [os.path.normpath(i) for i in exclude]
67

    
68
  for rel_name in utils.ListVisibleFiles(path):
69
    full_name = os.path.normpath(os.path.join(path, rel_name))
70
    if full_name in exclude:
71
      continue
72
    if os.path.isfile(full_name) and not os.path.islink(full_name):
73
      utils.RemoveFile(full_name)
74

    
75

    
76
def JobQueuePurge():
77
  """Removes job queue files and archived jobs
78

79
  """
80
  _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE])
81
  _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR)
82

    
83

    
84
def GetMasterInfo():
85
  """Returns master information.
86

87
  This is an utility function to compute master information, either
88
  for consumption here or from the node daemon.
89

90
  @rtype: tuple
91
  @return: (master_netdev, master_ip, master_name)
92

93
  """
94
  try:
95
    cfg = _GetConfig()
96
    master_netdev = cfg.GetMasterNetdev()
97
    master_ip = cfg.GetMasterIP()
98
    master_node = cfg.GetMasterNode()
99
  except errors.ConfigurationError, err:
100
    logging.exception("Cluster configuration incomplete")
101
    return (None, None)
102
  return (master_netdev, master_ip, master_node)
103

    
104

    
105
def StartMaster(start_daemons):
106
  """Activate local node as master node.
107

108
  The function will always try activate the IP address of the master
109
  (if someone else has it, then it won't). Then, if the start_daemons
110
  parameter is True, it will also start the master daemons
111
  (ganet-masterd and ganeti-rapi).
112

113
  """
114
  ok = True
115
  master_netdev, master_ip, _ = GetMasterInfo()
116
  if not master_netdev:
117
    return False
118

    
119
  if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
120
    if utils.OwnIpAddress(master_ip):
121
      # we already have the ip:
122
      logging.debug("Already started")
123
    else:
124
      logging.error("Someone else has the master ip, not activating")
125
      ok = False
126
  else:
127
    result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
128
                           "dev", master_netdev, "label",
129
                           "%s:0" % master_netdev])
130
    if result.failed:
131
      logging.error("Can't activate master IP: %s", result.output)
132
      ok = False
133

    
134
    result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
135
                           "-s", master_ip, master_ip])
136
    # we'll ignore the exit code of arping
137

    
138
  # and now start the master and rapi daemons
139
  if start_daemons:
140
    for daemon in 'ganeti-masterd', 'ganeti-rapi':
141
      result = utils.RunCmd([daemon])
142
      if result.failed:
143
        logging.error("Can't start daemon %s: %s", daemon, result.output)
144
        ok = False
145
  return ok
146

    
147

    
148
def StopMaster(stop_daemons):
149
  """Deactivate this node as master.
150

151
  The function will always try to deactivate the IP address of the
152
  master. Then, if the stop_daemons parameter is True, it will also
153
  stop the master daemons (ganet-masterd and ganeti-rapi).
154

155
  """
156
  master_netdev, master_ip, _ = GetMasterInfo()
157
  if not master_netdev:
158
    return False
159

    
160
  result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
161
                         "dev", master_netdev])
162
  if result.failed:
163
    logging.error("Can't remove the master IP, error: %s", result.output)
164
    # but otherwise ignore the failure
165

    
166
  if stop_daemons:
167
    # stop/kill the rapi and the master daemon
168
    for daemon in constants.RAPI_PID, constants.MASTERD_PID:
169
      utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
170

    
171
  return True
172

    
173

    
174
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
175
  """Joins this node to the cluster.
176

177
  This does the following:
178
      - updates the hostkeys of the machine (rsa and dsa)
179
      - adds the ssh private key to the user
180
      - adds the ssh public key to the users' authorized_keys file
181

182
  """
183
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
184
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
185
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
186
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
187
  for name, content, mode in sshd_keys:
188
    utils.WriteFile(name, data=content, mode=mode)
189

    
190
  try:
191
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
192
                                                    mkdir=True)
193
  except errors.OpExecError, err:
194
    logging.exception("Error while processing user ssh files")
195
    return False
196

    
197
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
198
    utils.WriteFile(name, data=content, mode=0600)
199

    
200
  utils.AddAuthorizedKey(auth_keys, sshpub)
201

    
202
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
203

    
204
  return True
205

    
206

    
207
def LeaveCluster():
208
  """Cleans up the current node and prepares it to be removed from the cluster.
209

210
  """
211
  _CleanDirectory(constants.DATA_DIR)
212
  JobQueuePurge()
213

    
214
  try:
215
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
216
  except errors.OpExecError:
217
    logging.exception("Error while processing ssh files")
218
    return
219

    
220
  f = open(pub_key, 'r')
221
  try:
222
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
223
  finally:
224
    f.close()
225

    
226
  utils.RemoveFile(priv_key)
227
  utils.RemoveFile(pub_key)
228

    
229
  # Return a reassuring string to the caller, and quit
230
  raise errors.QuitGanetiException(False, 'Shutdown scheduled')
231

    
232

    
233
def GetNodeInfo(vgname, hypervisor_type):
234
  """Gives back a hash with different informations about the node.
235

236
  @type vgname: C{string}
237
  @param vgname: the name of the volume group to ask for disk space information
238
  @type hypervisor_type: C{str}
239
  @param hypervisor_type: the name of the hypervisor to ask for
240
      memory information
241
  @rtype: C{dict}
242
  @return: dictionary with the following keys:
243
      - vg_size is the size of the configured volume group in MiB
244
      - vg_free is the free size of the volume group in MiB
245
      - memory_dom0 is the memory allocated for domain0 in MiB
246
      - memory_free is the currently available (free) ram in MiB
247
      - memory_total is the total number of ram in MiB
248

249
  """
250
  outputarray = {}
251
  vginfo = _GetVGInfo(vgname)
252
  outputarray['vg_size'] = vginfo['vg_size']
253
  outputarray['vg_free'] = vginfo['vg_free']
254

    
255
  hyper = hypervisor.GetHypervisor(hypervisor_type)
256
  hyp_info = hyper.GetNodeInfo()
257
  if hyp_info is not None:
258
    outputarray.update(hyp_info)
259

    
260
  f = open("/proc/sys/kernel/random/boot_id", 'r')
261
  try:
262
    outputarray["bootid"] = f.read(128).rstrip("\n")
263
  finally:
264
    f.close()
265

    
266
  return outputarray
267

    
268

    
269
def VerifyNode(what, cluster_name):
270
  """Verify the status of the local node.
271

272
  Based on the input L{what} parameter, various checks are done on the
273
  local node.
274

275
  If the I{filelist} key is present, this list of
276
  files is checksummed and the file/checksum pairs are returned.
277

278
  If the I{nodelist} key is present, we check that we have
279
  connectivity via ssh with the target nodes (and check the hostname
280
  report).
281

282
  If the I{node-net-test} key is present, we check that we have
283
  connectivity to the given nodes via both primary IP and, if
284
  applicable, secondary IPs.
285

286
  @type what: C{dict}
287
  @param what: a dictionary of things to check:
288
      - filelist: list of files for which to compute checksums
289
      - nodelist: list of nodes we should check ssh communication with
290
      - node-net-test: list of nodes we should check node daemon port
291
        connectivity with
292
      - hypervisor: list with hypervisors to run the verify for
293

294
  """
295
  result = {}
296

    
297
  if 'hypervisor' in what:
298
    result['hypervisor'] = my_dict = {}
299
    for hv_name in what['hypervisor']:
300
      my_dict[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
301

    
302
  if 'filelist' in what:
303
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
304

    
305
  if 'nodelist' in what:
306
    result['nodelist'] = {}
307
    random.shuffle(what['nodelist'])
308
    for node in what['nodelist']:
309
      success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
310
      if not success:
311
        result['nodelist'][node] = message
312
  if 'node-net-test' in what:
313
    result['node-net-test'] = {}
314
    my_name = utils.HostInfo().name
315
    my_pip = my_sip = None
316
    for name, pip, sip in what['node-net-test']:
317
      if name == my_name:
318
        my_pip = pip
319
        my_sip = sip
320
        break
321
    if not my_pip:
322
      result['node-net-test'][my_name] = ("Can't find my own"
323
                                          " primary/secondary IP"
324
                                          " in the node list")
325
    else:
326
      port = utils.GetNodeDaemonPort()
327
      for name, pip, sip in what['node-net-test']:
328
        fail = []
329
        if not utils.TcpPing(pip, port, source=my_pip):
330
          fail.append("primary")
331
        if sip != pip:
332
          if not utils.TcpPing(sip, port, source=my_sip):
333
            fail.append("secondary")
334
        if fail:
335
          result['node-net-test'][name] = ("failure using the %s"
336
                                           " interface(s)" %
337
                                           " and ".join(fail))
338

    
339
  return result
340

    
341

    
342
def GetVolumeList(vg_name):
343
  """Compute list of logical volumes and their size.
344

345
  Returns:
346
    dictionary of all partions (key) with their size (in MiB), inactive
347
    and online status:
348
    {'test1': ('20.06', True, True)}
349

350
  """
351
  lvs = {}
352
  sep = '|'
353
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
354
                         "--separator=%s" % sep,
355
                         "-olv_name,lv_size,lv_attr", vg_name])
356
  if result.failed:
357
    logging.error("Failed to list logical volumes, lvs output: %s",
358
                  result.output)
359
    return result.output
360

    
361
  valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
362
  for line in result.stdout.splitlines():
363
    line = line.strip()
364
    match = valid_line_re.match(line)
365
    if not match:
366
      logging.error("Invalid line returned from lvs output: '%s'", line)
367
      continue
368
    name, size, attr = match.groups()
369
    inactive = attr[4] == '-'
370
    online = attr[5] == 'o'
371
    lvs[name] = (size, inactive, online)
372

    
373
  return lvs
374

    
375

    
376
def ListVolumeGroups():
377
  """List the volume groups and their size.
378

379
  Returns:
380
    Dictionary with keys volume name and values the size of the volume
381

382
  """
383
  return utils.ListVolumeGroups()
384

    
385

    
386
def NodeVolumes():
387
  """List all volumes on this node.
388

389
  """
390
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
391
                         "--separator=|",
392
                         "--options=lv_name,lv_size,devices,vg_name"])
393
  if result.failed:
394
    logging.error("Failed to list logical volumes, lvs output: %s",
395
                  result.output)
396
    return {}
397

    
398
  def parse_dev(dev):
399
    if '(' in dev:
400
      return dev.split('(')[0]
401
    else:
402
      return dev
403

    
404
  def map_line(line):
405
    return {
406
      'name': line[0].strip(),
407
      'size': line[1].strip(),
408
      'dev': parse_dev(line[2].strip()),
409
      'vg': line[3].strip(),
410
    }
411

    
412
  return [map_line(line.split('|')) for line in result.stdout.splitlines()
413
          if line.count('|') >= 3]
414

    
415

    
416
def BridgesExist(bridges_list):
417
  """Check if a list of bridges exist on the current node.
418

419
  Returns:
420
    True if all of them exist, false otherwise
421

422
  """
423
  for bridge in bridges_list:
424
    if not utils.BridgeExists(bridge):
425
      return False
426

    
427
  return True
428

    
429

    
430
def GetInstanceList(hypervisor_list):
431
  """Provides a list of instances.
432

433
  @type hypervisor_list: list
434
  @param hypervisor_list: the list of hypervisors to query information
435

436
  @rtype: list
437
  @return: a list of all running instances on the current node
438
             - instance1.example.com
439
             - instance2.example.com
440

441
  """
442
  results = []
443
  for hname in hypervisor_list:
444
    try:
445
      names = hypervisor.GetHypervisor(hname).ListInstances()
446
      results.extend(names)
447
    except errors.HypervisorError, err:
448
      logging.exception("Error enumerating instances for hypevisor %s", hname)
449
      # FIXME: should we somehow not propagate this to the master?
450
      raise
451

    
452
  return results
453

    
454

    
455
def GetInstanceInfo(instance, hname):
456
  """Gives back the informations about an instance as a dictionary.
457

458
  @type instance: string
459
  @param instance: the instance name
460
  @type hname: string
461
  @param hname: the hypervisor type of the instance
462

463
  @rtype: dict
464
  @return: dictionary with the following keys:
465
      - memory: memory size of instance (int)
466
      - state: xen state of instance (string)
467
      - time: cpu time of instance (float)
468

469
  """
470
  output = {}
471

    
472
  iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
473
  if iinfo is not None:
474
    output['memory'] = iinfo[2]
475
    output['state'] = iinfo[4]
476
    output['time'] = iinfo[5]
477

    
478
  return output
479

    
480

    
481
def GetAllInstancesInfo(hypervisor_list):
482
  """Gather data about all instances.
483

484
  This is the equivalent of `GetInstanceInfo()`, except that it
485
  computes data for all instances at once, thus being faster if one
486
  needs data about more than one instance.
487

488
  @type hypervisor_list: list
489
  @param hypervisor_list: list of hypervisors to query for instance data
490

491
  @rtype: dict of dicts
492
  @return: dictionary of instance: data, with data having the following keys:
493
      - memory: memory size of instance (int)
494
      - state: xen state of instance (string)
495
      - time: cpu time of instance (float)
496
      - vcpuus: the number of vcpus
497

498
  """
499
  output = {}
500

    
501
  for hname in hypervisor_list:
502
    iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
503
    if iinfo:
504
      for name, inst_id, memory, vcpus, state, times in iinfo:
505
        value = {
506
          'memory': memory,
507
          'vcpus': vcpus,
508
          'state': state,
509
          'time': times,
510
          }
511
        if name in output and output[name] != value:
512
          raise errors.HypervisorError("Instance %s running duplicate"
513
                                       " with different parameters" % name)
514
        output[name] = value
515

    
516
  return output
517

    
518

    
519
def AddOSToInstance(instance):
520
  """Add an OS to an instance.
521

522
  @type instance: L{objects.Instance}
523
  @param instance: Instance whose OS is to be installed
524

525
  """
526
  inst_os = OSFromDisk(instance.os)
527

    
528
  create_script = inst_os.create_script
529
  create_env = OSEnvironment(instance)
530

    
531
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
532
                                     instance.name, int(time.time()))
533
  if not os.path.exists(constants.LOG_OS_DIR):
534
    os.mkdir(constants.LOG_OS_DIR, 0750)
535

    
536
  command = utils.BuildShellCmd("cd %s && %s &>%s",
537
                                inst_os.path, create_script, logfile)
538

    
539
  result = utils.RunCmd(command, env=create_env)
540
  if result.failed:
541
    logging.error("os create command '%s' returned error: %s, logfile: %s,"
542
                  " output: %s", command, result.fail_reason, logfile,
543
                  result.output)
544
    return False
545

    
546
  return True
547

    
548

    
549
def RunRenameInstance(instance, old_name):
550
  """Run the OS rename script for an instance.
551

552
  @type instance: objects.Instance
553
  @param instance: Instance whose OS is to be installed
554
  @type old_name: string
555
  @param old_name: previous instance name
556

557
  """
558
  inst_os = OSFromDisk(instance.os)
559

    
560
  script = inst_os.rename_script
561
  rename_env = OSEnvironment(instance)
562
  rename_env['OLD_INSTANCE_NAME'] = old_name
563

    
564
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
565
                                           old_name,
566
                                           instance.name, int(time.time()))
567
  if not os.path.exists(constants.LOG_OS_DIR):
568
    os.mkdir(constants.LOG_OS_DIR, 0750)
569

    
570
  command = utils.BuildShellCmd("cd %s && %s &>%s",
571
                                inst_os.path, script, logfile)
572

    
573
  result = utils.RunCmd(command, env=rename_env)
574

    
575
  if result.failed:
576
    logging.error("os create command '%s' returned error: %s output: %s",
577
                  command, result.fail_reason, result.output)
578
    return False
579

    
580
  return True
581

    
582

    
583
def _GetVGInfo(vg_name):
584
  """Get informations about the volume group.
585

586
  Args:
587
    vg_name: the volume group
588

589
  Returns:
590
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
591
    where
592
    vg_size is the total size of the volume group in MiB
593
    vg_free is the free size of the volume group in MiB
594
    pv_count are the number of physical disks in that vg
595

596
  If an error occurs during gathering of data, we return the same dict
597
  with keys all set to None.
598

599
  """
600
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
601

    
602
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
603
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
604

    
605
  if retval.failed:
606
    logging.error("volume group %s not present", vg_name)
607
    return retdic
608
  valarr = retval.stdout.strip().rstrip(':').split(':')
609
  if len(valarr) == 3:
610
    try:
611
      retdic = {
612
        "vg_size": int(round(float(valarr[0]), 0)),
613
        "vg_free": int(round(float(valarr[1]), 0)),
614
        "pv_count": int(valarr[2]),
615
        }
616
    except ValueError, err:
617
      logging.exception("Fail to parse vgs output")
618
  else:
619
    logging.error("vgs output has the wrong number of fields (expected"
620
                  " three): %s", str(valarr))
621
  return retdic
622

    
623

    
624
def _GatherBlockDevs(instance):
625
  """Set up an instance's block device(s).
626

627
  This is run on the primary node at instance startup. The block
628
  devices must be already assembled.
629

630
  """
631
  block_devices = []
632
  for disk in instance.disks:
633
    device = _RecursiveFindBD(disk)
634
    if device is None:
635
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
636
                                    str(disk))
637
    device.Open()
638
    block_devices.append((disk, device))
639
  return block_devices
640

    
641

    
642
def StartInstance(instance, extra_args):
643
  """Start an instance.
644

645
  @type instance: instance object
646
  @param instance: the instance object
647
  @rtype: boolean
648
  @return: whether the startup was successful or not
649

650
  """
651
  running_instances = GetInstanceList([instance.hypervisor])
652

    
653
  if instance.name in running_instances:
654
    return True
655

    
656
  block_devices = _GatherBlockDevs(instance)
657
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
658

    
659
  try:
660
    hyper.StartInstance(instance, block_devices, extra_args)
661
  except errors.HypervisorError, err:
662
    logging.exception("Failed to start instance")
663
    return False
664

    
665
  return True
666

    
667

    
668
def ShutdownInstance(instance):
669
  """Shut an instance down.
670

671
  @type instance: instance object
672
  @param instance: the instance object
673
  @rtype: boolean
674
  @return: whether the startup was successful or not
675

676
  """
677
  hv_name = instance.hypervisor
678
  running_instances = GetInstanceList([hv_name])
679

    
680
  if instance.name not in running_instances:
681
    return True
682

    
683
  hyper = hypervisor.GetHypervisor(hv_name)
684
  try:
685
    hyper.StopInstance(instance)
686
  except errors.HypervisorError, err:
687
    logging.error("Failed to stop instance")
688
    return False
689

    
690
  # test every 10secs for 2min
691
  shutdown_ok = False
692

    
693
  time.sleep(1)
694
  for dummy in range(11):
695
    if instance.name not in GetInstanceList([hv_name]):
696
      break
697
    time.sleep(10)
698
  else:
699
    # the shutdown did not succeed
700
    logging.error("shutdown of '%s' unsuccessful, using destroy", instance)
701

    
702
    try:
703
      hyper.StopInstance(instance, force=True)
704
    except errors.HypervisorError, err:
705
      logging.exception("Failed to stop instance")
706
      return False
707

    
708
    time.sleep(1)
709
    if instance.name in GetInstanceList([hv_name]):
710
      logging.error("could not shutdown instance '%s' even by destroy",
711
                    instance.name)
712
      return False
713

    
714
  return True
715

    
716

    
717
def RebootInstance(instance, reboot_type, extra_args):
718
  """Reboot an instance.
719

720
  Args:
721
    instance    - name of instance to reboot
722
    reboot_type - how to reboot [soft,hard,full]
723

724
  """
725
  running_instances = GetInstanceList([instance.hypervisor])
726

    
727
  if instance.name not in running_instances:
728
    logging.error("Cannot reboot instance that is not running")
729
    return False
730

    
731
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
732
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
733
    try:
734
      hyper.RebootInstance(instance)
735
    except errors.HypervisorError, err:
736
      logging.exception("Failed to soft reboot instance")
737
      return False
738
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
739
    try:
740
      ShutdownInstance(instance)
741
      StartInstance(instance, extra_args)
742
    except errors.HypervisorError, err:
743
      logging.exception("Failed to hard reboot instance")
744
      return False
745
  else:
746
    raise errors.ParameterError("reboot_type invalid")
747

    
748
  return True
749

    
750

    
751
def MigrateInstance(instance, target, live):
752
  """Migrates an instance to another node.
753

754
  @type instance: C{objects.Instance}
755
  @param instance: the instance definition
756
  @type target: string
757
  @param target: the target node name
758
  @type live: boolean
759
  @param live: whether the migration should be done live or not (the
760
      interpretation of this parameter is left to the hypervisor)
761
  @rtype: tuple
762
  @return: a tuple of (success, msg) where:
763
      - succes is a boolean denoting the success/failure of the operation
764
      - msg is a string with details in case of failure
765

766
  """
767
  hyper = hypervisor.GetHypervisor(instance.hypervisor_name)
768

    
769
  try:
770
    hyper.MigrateInstance(instance.name, target, live)
771
  except errors.HypervisorError, err:
772
    msg = "Failed to migrate instance: %s" % str(err)
773
    logging.error(msg)
774
    return (False, msg)
775
  return (True, "Migration successfull")
776

    
777

    
778
def CreateBlockDevice(disk, size, owner, on_primary, info):
779
  """Creates a block device for an instance.
780

781
  Args:
782
   disk: a ganeti.objects.Disk object
783
   size: the size of the physical underlying device
784
   owner: a string with the name of the instance
785
   on_primary: a boolean indicating if it is the primary node or not
786
   info: string that will be sent to the physical device creation
787

788
  Returns:
789
    the new unique_id of the device (this can sometime be
790
    computed only after creation), or None. On secondary nodes,
791
    it's not required to return anything.
792

793
  """
794
  clist = []
795
  if disk.children:
796
    for child in disk.children:
797
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
798
      if on_primary or disk.AssembleOnSecondary():
799
        # we need the children open in case the device itself has to
800
        # be assembled
801
        crdev.Open()
802
      clist.append(crdev)
803
  try:
804
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
805
    if device is not None:
806
      logging.info("removing existing device %s", disk)
807
      device.Remove()
808
  except errors.BlockDeviceError, err:
809
    pass
810

    
811
  device = bdev.Create(disk.dev_type, disk.physical_id,
812
                       clist, size)
813
  if device is None:
814
    raise ValueError("Can't create child device for %s, %s" %
815
                     (disk, size))
816
  if on_primary or disk.AssembleOnSecondary():
817
    if not device.Assemble():
818
      errorstring = "Can't assemble device after creation"
819
      logging.error(errorstring)
820
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
821
                                    " daemon logs" % errorstring)
822
    device.SetSyncSpeed(constants.SYNC_SPEED)
823
    if on_primary or disk.OpenOnSecondary():
824
      device.Open(force=True)
825
    DevCacheManager.UpdateCache(device.dev_path, owner,
826
                                on_primary, disk.iv_name)
827

    
828
  device.SetInfo(info)
829

    
830
  physical_id = device.unique_id
831
  return physical_id
832

    
833

    
834
def RemoveBlockDevice(disk):
835
  """Remove a block device.
836

837
  This is intended to be called recursively.
838

839
  """
840
  try:
841
    # since we are removing the device, allow a partial match
842
    # this allows removal of broken mirrors
843
    rdev = _RecursiveFindBD(disk, allow_partial=True)
844
  except errors.BlockDeviceError, err:
845
    # probably can't attach
846
    logging.info("Can't attach to device %s in remove", disk)
847
    rdev = None
848
  if rdev is not None:
849
    r_path = rdev.dev_path
850
    result = rdev.Remove()
851
    if result:
852
      DevCacheManager.RemoveCache(r_path)
853
  else:
854
    result = True
855
  if disk.children:
856
    for child in disk.children:
857
      result = result and RemoveBlockDevice(child)
858
  return result
859

    
860

    
861
def _RecursiveAssembleBD(disk, owner, as_primary):
862
  """Activate a block device for an instance.
863

864
  This is run on the primary and secondary nodes for an instance.
865

866
  This function is called recursively.
867

868
  Args:
869
    disk: a objects.Disk object
870
    as_primary: if we should make the block device read/write
871

872
  Returns:
873
    the assembled device or None (in case no device was assembled)
874

875
  If the assembly is not successful, an exception is raised.
876

877
  """
878
  children = []
879
  if disk.children:
880
    mcn = disk.ChildrenNeeded()
881
    if mcn == -1:
882
      mcn = 0 # max number of Nones allowed
883
    else:
884
      mcn = len(disk.children) - mcn # max number of Nones
885
    for chld_disk in disk.children:
886
      try:
887
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
888
      except errors.BlockDeviceError, err:
889
        if children.count(None) >= mcn:
890
          raise
891
        cdev = None
892
        logging.debug("Error in child activation: %s", str(err))
893
      children.append(cdev)
894

    
895
  if as_primary or disk.AssembleOnSecondary():
896
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
897
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
898
    result = r_dev
899
    if as_primary or disk.OpenOnSecondary():
900
      r_dev.Open()
901
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
902
                                as_primary, disk.iv_name)
903

    
904
  else:
905
    result = True
906
  return result
907

    
908

    
909
def AssembleBlockDevice(disk, owner, as_primary):
910
  """Activate a block device for an instance.
911

912
  This is a wrapper over _RecursiveAssembleBD.
913

914
  Returns:
915
    a /dev path for primary nodes
916
    True for secondary nodes
917

918
  """
919
  result = _RecursiveAssembleBD(disk, owner, as_primary)
920
  if isinstance(result, bdev.BlockDev):
921
    result = result.dev_path
922
  return result
923

    
924

    
925
def ShutdownBlockDevice(disk):
926
  """Shut down a block device.
927

928
  First, if the device is assembled (can `Attach()`), then the device
929
  is shutdown. Then the children of the device are shutdown.
930

931
  This function is called recursively. Note that we don't cache the
932
  children or such, as oppossed to assemble, shutdown of different
933
  devices doesn't require that the upper device was active.
934

935
  """
936
  r_dev = _RecursiveFindBD(disk)
937
  if r_dev is not None:
938
    r_path = r_dev.dev_path
939
    result = r_dev.Shutdown()
940
    if result:
941
      DevCacheManager.RemoveCache(r_path)
942
  else:
943
    result = True
944
  if disk.children:
945
    for child in disk.children:
946
      result = result and ShutdownBlockDevice(child)
947
  return result
948

    
949

    
950
def MirrorAddChildren(parent_cdev, new_cdevs):
951
  """Extend a mirrored block device.
952

953
  """
954
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
955
  if parent_bdev is None:
956
    logging.error("Can't find parent device")
957
    return False
958
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
959
  if new_bdevs.count(None) > 0:
960
    logging.error("Can't find new device(s) to add: %s:%s",
961
                  new_bdevs, new_cdevs)
962
    return False
963
  parent_bdev.AddChildren(new_bdevs)
964
  return True
965

    
966

    
967
def MirrorRemoveChildren(parent_cdev, new_cdevs):
968
  """Shrink a mirrored block device.
969

970
  """
971
  parent_bdev = _RecursiveFindBD(parent_cdev)
972
  if parent_bdev is None:
973
    logging.error("Can't find parent in remove children: %s", parent_cdev)
974
    return False
975
  devs = []
976
  for disk in new_cdevs:
977
    rpath = disk.StaticDevPath()
978
    if rpath is None:
979
      bd = _RecursiveFindBD(disk)
980
      if bd is None:
981
        logging.error("Can't find dynamic device %s while removing children",
982
                      disk)
983
        return False
984
      else:
985
        devs.append(bd.dev_path)
986
    else:
987
      devs.append(rpath)
988
  parent_bdev.RemoveChildren(devs)
989
  return True
990

    
991

    
992
def GetMirrorStatus(disks):
993
  """Get the mirroring status of a list of devices.
994

995
  Args:
996
    disks: list of `objects.Disk`
997

998
  Returns:
999
    list of (mirror_done, estimated_time) tuples, which
1000
    are the result of bdev.BlockDevice.CombinedSyncStatus()
1001

1002
  """
1003
  stats = []
1004
  for dsk in disks:
1005
    rbd = _RecursiveFindBD(dsk)
1006
    if rbd is None:
1007
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
1008
    stats.append(rbd.CombinedSyncStatus())
1009
  return stats
1010

    
1011

    
1012
def _RecursiveFindBD(disk, allow_partial=False):
1013
  """Check if a device is activated.
1014

1015
  If so, return informations about the real device.
1016

1017
  Args:
1018
    disk: the objects.Disk instance
1019
    allow_partial: don't abort the find if a child of the
1020
                   device can't be found; this is intended to be
1021
                   used when repairing mirrors
1022

1023
  Returns:
1024
    None if the device can't be found
1025
    otherwise the device instance
1026

1027
  """
1028
  children = []
1029
  if disk.children:
1030
    for chdisk in disk.children:
1031
      children.append(_RecursiveFindBD(chdisk))
1032

    
1033
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
1034

    
1035

    
1036
def FindBlockDevice(disk):
1037
  """Check if a device is activated.
1038

1039
  If so, return informations about the real device.
1040

1041
  Args:
1042
    disk: the objects.Disk instance
1043
  Returns:
1044
    None if the device can't be found
1045
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
1046

1047
  """
1048
  rbd = _RecursiveFindBD(disk)
1049
  if rbd is None:
1050
    return rbd
1051
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
1052

    
1053

    
1054
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
1055
  """Write a file to the filesystem.
1056

1057
  This allows the master to overwrite(!) a file. It will only perform
1058
  the operation if the file belongs to a list of configuration files.
1059

1060
  """
1061
  if not os.path.isabs(file_name):
1062
    logging.error("Filename passed to UploadFile is not absolute: '%s'",
1063
                  file_name)
1064
    return False
1065

    
1066
  allowed_files = [
1067
    constants.CLUSTER_CONF_FILE,
1068
    constants.ETC_HOSTS,
1069
    constants.SSH_KNOWN_HOSTS_FILE,
1070
    constants.VNC_PASSWORD_FILE,
1071
    ]
1072

    
1073
  if file_name not in allowed_files:
1074
    logging.error("Filename passed to UploadFile not in allowed"
1075
                 " upload targets: '%s'", file_name)
1076
    return False
1077

    
1078
  utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
1079
                  atime=atime, mtime=mtime)
1080
  return True
1081

    
1082

    
1083
def _ErrnoOrStr(err):
1084
  """Format an EnvironmentError exception.
1085

1086
  If the `err` argument has an errno attribute, it will be looked up
1087
  and converted into a textual EXXXX description. Otherwise the string
1088
  representation of the error will be returned.
1089

1090
  """
1091
  if hasattr(err, 'errno'):
1092
    detail = errno.errorcode[err.errno]
1093
  else:
1094
    detail = str(err)
1095
  return detail
1096

    
1097

    
1098
def _OSOndiskVersion(name, os_dir):
1099
  """Compute and return the API version of a given OS.
1100

1101
  This function will try to read the API version of the os given by
1102
  the 'name' parameter and residing in the 'os_dir' directory.
1103

1104
  Return value will be either an integer denoting the version or None in the
1105
  case when this is not a valid OS name.
1106

1107
  """
1108
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1109

    
1110
  try:
1111
    st = os.stat(api_file)
1112
  except EnvironmentError, err:
1113
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1114
                           " found (%s)" % _ErrnoOrStr(err))
1115

    
1116
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1117
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1118
                           " a regular file")
1119

    
1120
  try:
1121
    f = open(api_file)
1122
    try:
1123
      api_versions = f.readlines()
1124
    finally:
1125
      f.close()
1126
  except EnvironmentError, err:
1127
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1128
                           " API version (%s)" % _ErrnoOrStr(err))
1129

    
1130
  api_versions = [version.strip() for version in api_versions]
1131
  try:
1132
    api_versions = [int(version) for version in api_versions]
1133
  except (TypeError, ValueError), err:
1134
    raise errors.InvalidOS(name, os_dir,
1135
                           "API version is not integer (%s)" % str(err))
1136

    
1137
  return api_versions
1138

    
1139

    
1140
def DiagnoseOS(top_dirs=None):
1141
  """Compute the validity for all OSes.
1142

1143
  Returns an OS object for each name in all the given top directories
1144
  (if not given defaults to constants.OS_SEARCH_PATH)
1145

1146
  Returns:
1147
    list of OS objects
1148

1149
  """
1150
  if top_dirs is None:
1151
    top_dirs = constants.OS_SEARCH_PATH
1152

    
1153
  result = []
1154
  for dir_name in top_dirs:
1155
    if os.path.isdir(dir_name):
1156
      try:
1157
        f_names = utils.ListVisibleFiles(dir_name)
1158
      except EnvironmentError, err:
1159
        logging.exception("Can't list the OS directory %s", dir_name)
1160
        break
1161
      for name in f_names:
1162
        try:
1163
          os_inst = OSFromDisk(name, base_dir=dir_name)
1164
          result.append(os_inst)
1165
        except errors.InvalidOS, err:
1166
          result.append(objects.OS.FromInvalidOS(err))
1167

    
1168
  return result
1169

    
1170

    
1171
def OSFromDisk(name, base_dir=None):
1172
  """Create an OS instance from disk.
1173

1174
  This function will return an OS instance if the given name is a
1175
  valid OS name. Otherwise, it will raise an appropriate
1176
  `errors.InvalidOS` exception, detailing why this is not a valid
1177
  OS.
1178

1179
  @type base_dir: string
1180
  @keyword base_dir: Base directory containing OS installations.
1181
                     Defaults to a search in all the OS_SEARCH_PATH dirs.
1182

1183
  """
1184

    
1185
  if base_dir is None:
1186
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1187
    if os_dir is None:
1188
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1189
  else:
1190
    os_dir = os.path.sep.join([base_dir, name])
1191

    
1192
  api_versions = _OSOndiskVersion(name, os_dir)
1193

    
1194
  if constants.OS_API_VERSION not in api_versions:
1195
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1196
                           " (found %s want %s)"
1197
                           % (api_versions, constants.OS_API_VERSION))
1198

    
1199
  # OS Scripts dictionary, we will populate it with the actual script names
1200
  os_scripts = dict.fromkeys(constants.OS_SCRIPTS)
1201

    
1202
  for script in os_scripts:
1203
    os_scripts[script] = os.path.sep.join([os_dir, script])
1204

    
1205
    try:
1206
      st = os.stat(os_scripts[script])
1207
    except EnvironmentError, err:
1208
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1209
                             (script, _ErrnoOrStr(err)))
1210

    
1211
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1212
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1213
                             script)
1214

    
1215
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1216
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1217
                             script)
1218

    
1219

    
1220
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1221
                    create_script=os_scripts[constants.OS_SCRIPT_CREATE],
1222
                    export_script=os_scripts[constants.OS_SCRIPT_EXPORT],
1223
                    import_script=os_scripts[constants.OS_SCRIPT_IMPORT],
1224
                    rename_script=os_scripts[constants.OS_SCRIPT_RENAME],
1225
                    api_versions=api_versions)
1226

    
1227
def OSEnvironment(instance, debug=0):
1228
  """Calculate the environment for an os script.
1229

1230
  @type instance: instance object
1231
  @param instance: target instance for the os script run
1232
  @type debug: integer
1233
  @param debug: debug level (0 or 1, for os api 10)
1234
  @rtype: dict
1235
  @return: dict of environment variables
1236

1237
  """
1238
  result = {}
1239
  result['OS_API_VERSION'] = '%d' % constants.OS_API_VERSION
1240
  result['INSTANCE_NAME'] = instance.name
1241
  result['HYPERVISOR'] = instance.hypervisor
1242
  result['DISK_COUNT'] = '%d' % len(instance.disks)
1243
  result['NIC_COUNT'] = '%d' % len(instance.nics)
1244
  result['DEBUG_LEVEL'] = '%d' % debug
1245
  for idx, disk in enumerate(instance.disks):
1246
    real_disk = _RecursiveFindBD(disk)
1247
    if real_disk is None:
1248
      raise errors.BlockDeviceError("Block device '%s' is not set up" %
1249
                                    str(disk))
1250
    real_disk.Open()
1251
    result['DISK_%d_PATH' % idx] = real_disk.dev_path
1252
    # FIXME: When disks will have read-only mode, populate this
1253
    result['DISK_%d_ACCESS' % idx] = 'W'
1254
    if constants.HV_DISK_TYPE in instance.hvparams:
1255
      result['DISK_%d_FRONTEND_TYPE' % idx] = \
1256
        instance.hvparams[constants.HV_DISK_TYPE]
1257
    if disk.dev_type in constants.LDS_BLOCK:
1258
      result['DISK_%d_BACKEND_TYPE' % idx] = 'block'
1259
    elif disk.dev_type == constants.LD_FILE:
1260
      result['DISK_%d_BACKEND_TYPE' % idx] = \
1261
        'file:%s' % disk.physical_id[0]
1262
  for idx, nic in enumerate(instance.nics):
1263
    result['NIC_%d_MAC' % idx] = nic.mac
1264
    if nic.ip:
1265
      result['NIC_%d_IP' % idx] = nic.ip
1266
    result['NIC_%d_BRIDGE' % idx] = nic.bridge
1267
    if constants.HV_NIC_TYPE in instance.hvparams:
1268
      result['NIC_%d_FRONTEND_TYPE' % idx] = \
1269
        instance.hvparams[constants.HV_NIC_TYPE]
1270

    
1271
  return result
1272

    
1273
def GrowBlockDevice(disk, amount):
1274
  """Grow a stack of block devices.
1275

1276
  This function is called recursively, with the childrens being the
1277
  first one resize.
1278

1279
  Args:
1280
    disk: the disk to be grown
1281

1282
  Returns: a tuple of (status, result), with:
1283
    status: the result (true/false) of the operation
1284
    result: the error message if the operation failed, otherwise not used
1285

1286
  """
1287
  r_dev = _RecursiveFindBD(disk)
1288
  if r_dev is None:
1289
    return False, "Cannot find block device %s" % (disk,)
1290

    
1291
  try:
1292
    r_dev.Grow(amount)
1293
  except errors.BlockDeviceError, err:
1294
    return False, str(err)
1295

    
1296
  return True, None
1297

    
1298

    
1299
def SnapshotBlockDevice(disk):
1300
  """Create a snapshot copy of a block device.
1301

1302
  This function is called recursively, and the snapshot is actually created
1303
  just for the leaf lvm backend device.
1304

1305
  @type disk: L{objects.Disk}
1306
  @param disk: the disk to be snapshotted
1307
  @rtype: string
1308
  @return: snapshot disk path
1309

1310
  """
1311
  if disk.children:
1312
    if len(disk.children) == 1:
1313
      # only one child, let's recurse on it
1314
      return SnapshotBlockDevice(disk.children[0])
1315
    else:
1316
      # more than one child, choose one that matches
1317
      for child in disk.children:
1318
        if child.size == disk.size:
1319
          # return implies breaking the loop
1320
          return SnapshotBlockDevice(child)
1321
  elif disk.dev_type == constants.LD_LV:
1322
    r_dev = _RecursiveFindBD(disk)
1323
    if r_dev is not None:
1324
      # let's stay on the safe side and ask for the full size, for now
1325
      return r_dev.Snapshot(disk.size)
1326
    else:
1327
      return None
1328
  else:
1329
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1330
                                 " '%s' of type '%s'" %
1331
                                 (disk.unique_id, disk.dev_type))
1332

    
1333

    
1334
def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
1335
  """Export a block device snapshot to a remote node.
1336

1337
  @type disk: L{objects.Disk}
1338
  @param disk: the description of the disk to export
1339
  @type dest_node: str
1340
  @param dest_node: the destination node to export to
1341
  @type instance: L{objects.Instance}
1342
  @param instance: the instance object to whom the disk belongs
1343
  @type cluster_name: str
1344
  @param cluster_name: the cluster name, needed for SSH hostalias
1345
  @type idx: int
1346
  @param idx: the index of the disk in the instance's disk list,
1347
      used to export to the OS scripts environment
1348
  @rtype: bool
1349
  @return: the success of the operation
1350

1351
  """
1352
  export_env = OSEnvironment(instance)
1353

    
1354
  inst_os = OSFromDisk(instance.os)
1355
  export_script = inst_os.export_script
1356

    
1357
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1358
                                     instance.name, int(time.time()))
1359
  if not os.path.exists(constants.LOG_OS_DIR):
1360
    os.mkdir(constants.LOG_OS_DIR, 0750)
1361
  real_disk = _RecursiveFindBD(disk)
1362
  if real_disk is None:
1363
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1364
                                  str(disk))
1365
  real_disk.Open()
1366

    
1367
  export_env['EXPORT_DEVICE'] = real_disk.dev_path
1368
  export_env['EXPORT_INDEX'] = str(idx)
1369

    
1370
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1371
  destfile = disk.physical_id[1]
1372

    
1373
  # the target command is built out of three individual commands,
1374
  # which are joined by pipes; we check each individual command for
1375
  # valid parameters
1376
  expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
1377
                               export_script, logfile)
1378

    
1379
  comprcmd = "gzip"
1380

    
1381
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1382
                                destdir, destdir, destfile)
1383
  remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
1384
                                                   constants.GANETI_RUNAS,
1385
                                                   destcmd)
1386

    
1387
  # all commands have been checked, so we're safe to combine them
1388
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1389

    
1390
  result = utils.RunCmd(command, env=export_env)
1391

    
1392
  if result.failed:
1393
    logging.error("os snapshot export command '%s' returned error: %s"
1394
                  " output: %s", command, result.fail_reason, result.output)
1395
    return False
1396

    
1397
  return True
1398

    
1399

    
1400
def FinalizeExport(instance, snap_disks):
1401
  """Write out the export configuration information.
1402

1403
  Args:
1404
    instance: instance configuration
1405
    snap_disks: snapshot block devices
1406

1407
  Returns:
1408
    False in case of error, True otherwise.
1409

1410
  """
1411
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1412
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1413

    
1414
  config = objects.SerializableConfigParser()
1415

    
1416
  config.add_section(constants.INISECT_EXP)
1417
  config.set(constants.INISECT_EXP, 'version', '0')
1418
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1419
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1420
  config.set(constants.INISECT_EXP, 'os', instance.os)
1421
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1422

    
1423
  config.add_section(constants.INISECT_INS)
1424
  config.set(constants.INISECT_INS, 'name', instance.name)
1425
  config.set(constants.INISECT_INS, 'memory', '%d' %
1426
             instance.beparams[constants.BE_MEMORY])
1427
  config.set(constants.INISECT_INS, 'vcpus', '%d' %
1428
             instance.beparams[constants.BE_VCPUS])
1429
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1430

    
1431
  nic_count = 0
1432
  for nic_count, nic in enumerate(instance.nics):
1433
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1434
               nic_count, '%s' % nic.mac)
1435
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1436
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count,
1437
               '%s' % nic.bridge)
1438
  # TODO: redundant: on load can read nics until it doesn't exist
1439
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1440

    
1441
  disk_count = 0
1442
  for disk_count, disk in enumerate(snap_disks):
1443
    if disk:
1444
      config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1445
                 ('%s' % disk.iv_name))
1446
      config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1447
                 ('%s' % disk.physical_id[1]))
1448
      config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1449
                 ('%d' % disk.size))
1450
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1451

    
1452
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1453
  cfo = open(cff, 'w')
1454
  try:
1455
    config.write(cfo)
1456
  finally:
1457
    cfo.close()
1458

    
1459
  shutil.rmtree(finaldestdir, True)
1460
  shutil.move(destdir, finaldestdir)
1461

    
1462
  return True
1463

    
1464

    
1465
def ExportInfo(dest):
1466
  """Get export configuration information.
1467

1468
  Args:
1469
    dest: directory containing the export
1470

1471
  Returns:
1472
    A serializable config file containing the export info.
1473

1474
  """
1475
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1476

    
1477
  config = objects.SerializableConfigParser()
1478
  config.read(cff)
1479

    
1480
  if (not config.has_section(constants.INISECT_EXP) or
1481
      not config.has_section(constants.INISECT_INS)):
1482
    return None
1483

    
1484
  return config
1485

    
1486

    
1487
def ImportOSIntoInstance(instance, src_node, src_images, cluster_name):
1488
  """Import an os image into an instance.
1489

1490
  @type instance: L{objects.instance}
1491
  @param instance: instance to import the disks into
1492
  @type src_node: string
1493
  @param src_node: source node for the disk images
1494
  @type src_images: list of string
1495
  @param src_images: absolute paths of the disk images
1496
  @rtype: list of boolean
1497
  @return: each boolean represent the success of importing the n-th disk
1498

1499
  """
1500
  import_env = OSEnvironment(instance)
1501
  inst_os = OSFromDisk(instance.os)
1502
  import_script = inst_os.import_script
1503

    
1504
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1505
                                        instance.name, int(time.time()))
1506
  if not os.path.exists(constants.LOG_OS_DIR):
1507
    os.mkdir(constants.LOG_OS_DIR, 0750)
1508

    
1509
  comprcmd = "gunzip"
1510
  impcmd = utils.BuildShellCmd("(cd %s; %s &>%s)", inst_os.path, import_script,
1511
                               logfile)
1512

    
1513
  final_result = []
1514
  for idx, image in enumerate(src_images):
1515
    if image:
1516
      destcmd = utils.BuildShellCmd('cat %s', image)
1517
      remotecmd = _GetSshRunner(cluster_name).BuildCmd(src_node,
1518
                                                       constants.GANETI_RUNAS,
1519
                                                       destcmd)
1520
      command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1521
      import_env['IMPORT_DEVICE'] = import_env['DISK_%d_PATH' % idx]
1522
      import_env['IMPORT_INDEX'] = str(idx)
1523
      result = utils.RunCmd(command, env=import_env)
1524
      if result.failed:
1525
        logging.error("disk import command '%s' returned error: %s"
1526
                      " output: %s", command, result.fail_reason, result.output)
1527
        final_result.append(False)
1528
      else:
1529
        final_result.append(True)
1530
    else:
1531
      final_result.append(True)
1532

    
1533
  return final_result
1534

    
1535

    
1536
def ListExports():
1537
  """Return a list of exports currently available on this machine.
1538

1539
  """
1540
  if os.path.isdir(constants.EXPORT_DIR):
1541
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1542
  else:
1543
    return []
1544

    
1545

    
1546
def RemoveExport(export):
1547
  """Remove an existing export from the node.
1548

1549
  Args:
1550
    export: the name of the export to remove
1551

1552
  Returns:
1553
    False in case of error, True otherwise.
1554

1555
  """
1556
  target = os.path.join(constants.EXPORT_DIR, export)
1557

    
1558
  shutil.rmtree(target)
1559
  # TODO: catch some of the relevant exceptions and provide a pretty
1560
  # error message if rmtree fails.
1561

    
1562
  return True
1563

    
1564

    
1565
def RenameBlockDevices(devlist):
1566
  """Rename a list of block devices.
1567

1568
  The devlist argument is a list of tuples (disk, new_logical,
1569
  new_physical). The return value will be a combined boolean result
1570
  (True only if all renames succeeded).
1571

1572
  """
1573
  result = True
1574
  for disk, unique_id in devlist:
1575
    dev = _RecursiveFindBD(disk)
1576
    if dev is None:
1577
      result = False
1578
      continue
1579
    try:
1580
      old_rpath = dev.dev_path
1581
      dev.Rename(unique_id)
1582
      new_rpath = dev.dev_path
1583
      if old_rpath != new_rpath:
1584
        DevCacheManager.RemoveCache(old_rpath)
1585
        # FIXME: we should add the new cache information here, like:
1586
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1587
        # but we don't have the owner here - maybe parse from existing
1588
        # cache? for now, we only lose lvm data when we rename, which
1589
        # is less critical than DRBD or MD
1590
    except errors.BlockDeviceError, err:
1591
      logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
1592
      result = False
1593
  return result
1594

    
1595

    
1596
def _TransformFileStorageDir(file_storage_dir):
1597
  """Checks whether given file_storage_dir is valid.
1598

1599
  Checks wheter the given file_storage_dir is within the cluster-wide
1600
  default file_storage_dir stored in SimpleStore. Only paths under that
1601
  directory are allowed.
1602

1603
  Args:
1604
    file_storage_dir: string with path
1605

1606
  Returns:
1607
    normalized file_storage_dir (string) if valid, None otherwise
1608

1609
  """
1610
  cfg = _GetConfig()
1611
  file_storage_dir = os.path.normpath(file_storage_dir)
1612
  base_file_storage_dir = cfg.GetFileStorageDir()
1613
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1614
      base_file_storage_dir):
1615
    logging.error("file storage directory '%s' is not under base file"
1616
                  " storage directory '%s'",
1617
                  file_storage_dir, base_file_storage_dir)
1618
    return None
1619
  return file_storage_dir
1620

    
1621

    
1622
def CreateFileStorageDir(file_storage_dir):
1623
  """Create file storage directory.
1624

1625
  Args:
1626
    file_storage_dir: string containing the path
1627

1628
  Returns:
1629
    tuple with first element a boolean indicating wheter dir
1630
    creation was successful or not
1631

1632
  """
1633
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1634
  result = True,
1635
  if not file_storage_dir:
1636
    result = False,
1637
  else:
1638
    if os.path.exists(file_storage_dir):
1639
      if not os.path.isdir(file_storage_dir):
1640
        logging.error("'%s' is not a directory", file_storage_dir)
1641
        result = False,
1642
    else:
1643
      try:
1644
        os.makedirs(file_storage_dir, 0750)
1645
      except OSError, err:
1646
        logging.error("Cannot create file storage directory '%s': %s",
1647
                      file_storage_dir, err)
1648
        result = False,
1649
  return result
1650

    
1651

    
1652
def RemoveFileStorageDir(file_storage_dir):
1653
  """Remove file storage directory.
1654

1655
  Remove it only if it's empty. If not log an error and return.
1656

1657
  Args:
1658
    file_storage_dir: string containing the path
1659

1660
  Returns:
1661
    tuple with first element a boolean indicating wheter dir
1662
    removal was successful or not
1663

1664
  """
1665
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1666
  result = True,
1667
  if not file_storage_dir:
1668
    result = False,
1669
  else:
1670
    if os.path.exists(file_storage_dir):
1671
      if not os.path.isdir(file_storage_dir):
1672
        logging.error("'%s' is not a directory", file_storage_dir)
1673
        result = False,
1674
      # deletes dir only if empty, otherwise we want to return False
1675
      try:
1676
        os.rmdir(file_storage_dir)
1677
      except OSError, err:
1678
        logging.exception("Cannot remove file storage directory '%s'",
1679
                          file_storage_dir)
1680
        result = False,
1681
  return result
1682

    
1683

    
1684
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1685
  """Rename the file storage directory.
1686

1687
  Args:
1688
    old_file_storage_dir: string containing the old path
1689
    new_file_storage_dir: string containing the new path
1690

1691
  Returns:
1692
    tuple with first element a boolean indicating wheter dir
1693
    rename was successful or not
1694

1695
  """
1696
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1697
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1698
  result = True,
1699
  if not old_file_storage_dir or not new_file_storage_dir:
1700
    result = False,
1701
  else:
1702
    if not os.path.exists(new_file_storage_dir):
1703
      if os.path.isdir(old_file_storage_dir):
1704
        try:
1705
          os.rename(old_file_storage_dir, new_file_storage_dir)
1706
        except OSError, err:
1707
          logging.exception("Cannot rename '%s' to '%s'",
1708
                            old_file_storage_dir, new_file_storage_dir)
1709
          result =  False,
1710
      else:
1711
        logging.error("'%s' is not a directory", old_file_storage_dir)
1712
        result = False,
1713
    else:
1714
      if os.path.exists(old_file_storage_dir):
1715
        logging.error("Cannot rename '%s' to '%s'. Both locations exist.",
1716
                      old_file_storage_dir, new_file_storage_dir)
1717
        result = False,
1718
  return result
1719

    
1720

    
1721
def _IsJobQueueFile(file_name):
1722
  """Checks whether the given filename is in the queue directory.
1723

1724
  """
1725
  queue_dir = os.path.normpath(constants.QUEUE_DIR)
1726
  result = (os.path.commonprefix([queue_dir, file_name]) == queue_dir)
1727

    
1728
  if not result:
1729
    logging.error("'%s' is not a file in the queue directory",
1730
                  file_name)
1731

    
1732
  return result
1733

    
1734

    
1735
def JobQueueUpdate(file_name, content):
1736
  """Updates a file in the queue directory.
1737

1738
  """
1739
  if not _IsJobQueueFile(file_name):
1740
    return False
1741

    
1742
  # Write and replace the file atomically
1743
  utils.WriteFile(file_name, data=content)
1744

    
1745
  return True
1746

    
1747

    
1748
def JobQueueRename(old, new):
1749
  """Renames a job queue file.
1750

1751
  """
1752
  if not (_IsJobQueueFile(old) and _IsJobQueueFile(new)):
1753
    return False
1754

    
1755
  os.rename(old, new)
1756

    
1757
  return True
1758

    
1759

    
1760
def JobQueueSetDrainFlag(drain_flag):
1761
  """Set the drain flag for the queue.
1762

1763
  This will set or unset the queue drain flag.
1764

1765
  @type drain_flag: bool
1766
  @param drain_flag: if True, will set the drain flag, otherwise reset it.
1767

1768
  """
1769
  if drain_flag:
1770
    utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True)
1771
  else:
1772
    utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE)
1773

    
1774
  return True
1775

    
1776

    
1777
def CloseBlockDevices(disks):
1778
  """Closes the given block devices.
1779

1780
  This means they will be switched to secondary mode (in case of DRBD).
1781

1782
  """
1783
  bdevs = []
1784
  for cf in disks:
1785
    rd = _RecursiveFindBD(cf)
1786
    if rd is None:
1787
      return (False, "Can't find device %s" % cf)
1788
    bdevs.append(rd)
1789

    
1790
  msg = []
1791
  for rd in bdevs:
1792
    try:
1793
      rd.Close()
1794
    except errors.BlockDeviceError, err:
1795
      msg.append(str(err))
1796
  if msg:
1797
    return (False, "Can't make devices secondary: %s" % ",".join(msg))
1798
  else:
1799
    return (True, "All devices secondary")
1800

    
1801

    
1802
def ValidateHVParams(hvname, hvparams):
1803
  """Validates the given hypervisor parameters.
1804

1805
  @type hvname: string
1806
  @param hvname: the hypervisor name
1807
  @type hvparams: dict
1808
  @param hvparams: the hypervisor parameters to be validated
1809
  @rtype: tuple (bool, str)
1810
  @return: tuple of (success, message)
1811

1812
  """
1813
  try:
1814
    hv_type = hypervisor.GetHypervisor(hvname)
1815
    hv_type.ValidateParameters(hvparams)
1816
    return (True, "Validation passed")
1817
  except errors.HypervisorError, err:
1818
    return (False, str(err))
1819

    
1820

    
1821
class HooksRunner(object):
1822
  """Hook runner.
1823

1824
  This class is instantiated on the node side (ganeti-noded) and not on
1825
  the master side.
1826

1827
  """
1828
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1829

    
1830
  def __init__(self, hooks_base_dir=None):
1831
    """Constructor for hooks runner.
1832

1833
    Args:
1834
      - hooks_base_dir: if not None, this overrides the
1835
        constants.HOOKS_BASE_DIR (useful for unittests)
1836

1837
    """
1838
    if hooks_base_dir is None:
1839
      hooks_base_dir = constants.HOOKS_BASE_DIR
1840
    self._BASE_DIR = hooks_base_dir
1841

    
1842
  @staticmethod
1843
  def ExecHook(script, env):
1844
    """Exec one hook script.
1845

1846
    Args:
1847
     - script: the full path to the script
1848
     - env: the environment with which to exec the script
1849

1850
    """
1851
    # exec the process using subprocess and log the output
1852
    fdstdin = None
1853
    try:
1854
      fdstdin = open("/dev/null", "r")
1855
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1856
                               stderr=subprocess.STDOUT, close_fds=True,
1857
                               shell=False, cwd="/", env=env)
1858
      output = ""
1859
      try:
1860
        output = child.stdout.read(4096)
1861
        child.stdout.close()
1862
      except EnvironmentError, err:
1863
        output += "Hook script error: %s" % str(err)
1864

    
1865
      while True:
1866
        try:
1867
          result = child.wait()
1868
          break
1869
        except EnvironmentError, err:
1870
          if err.errno == errno.EINTR:
1871
            continue
1872
          raise
1873
    finally:
1874
      # try not to leak fds
1875
      for fd in (fdstdin, ):
1876
        if fd is not None:
1877
          try:
1878
            fd.close()
1879
          except EnvironmentError, err:
1880
            # just log the error
1881
            #logging.exception("Error while closing fd %s", fd)
1882
            pass
1883

    
1884
    return result == 0, output
1885

    
1886
  def RunHooks(self, hpath, phase, env):
1887
    """Run the scripts in the hooks directory.
1888

1889
    This method will not be usually overriden by child opcodes.
1890

1891
    """
1892
    if phase == constants.HOOKS_PHASE_PRE:
1893
      suffix = "pre"
1894
    elif phase == constants.HOOKS_PHASE_POST:
1895
      suffix = "post"
1896
    else:
1897
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1898
    rr = []
1899

    
1900
    subdir = "%s-%s.d" % (hpath, suffix)
1901
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1902
    try:
1903
      dir_contents = utils.ListVisibleFiles(dir_name)
1904
    except OSError, err:
1905
      # must log
1906
      return rr
1907

    
1908
    # we use the standard python sort order,
1909
    # so 00name is the recommended naming scheme
1910
    dir_contents.sort()
1911
    for relname in dir_contents:
1912
      fname = os.path.join(dir_name, relname)
1913
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1914
          self.RE_MASK.match(relname) is not None):
1915
        rrval = constants.HKR_SKIP
1916
        output = ""
1917
      else:
1918
        result, output = self.ExecHook(fname, env)
1919
        if not result:
1920
          rrval = constants.HKR_FAIL
1921
        else:
1922
          rrval = constants.HKR_SUCCESS
1923
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1924

    
1925
    return rr
1926

    
1927

    
1928
class IAllocatorRunner(object):
1929
  """IAllocator runner.
1930

1931
  This class is instantiated on the node side (ganeti-noded) and not on
1932
  the master side.
1933

1934
  """
1935
  def Run(self, name, idata):
1936
    """Run an iallocator script.
1937

1938
    Return value: tuple of:
1939
       - run status (one of the IARUN_ constants)
1940
       - stdout
1941
       - stderr
1942
       - fail reason (as from utils.RunResult)
1943

1944
    """
1945
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1946
                                  os.path.isfile)
1947
    if alloc_script is None:
1948
      return (constants.IARUN_NOTFOUND, None, None, None)
1949

    
1950
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1951
    try:
1952
      os.write(fd, idata)
1953
      os.close(fd)
1954
      result = utils.RunCmd([alloc_script, fin_name])
1955
      if result.failed:
1956
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1957
                result.fail_reason)
1958
    finally:
1959
      os.unlink(fin_name)
1960

    
1961
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1962

    
1963

    
1964
class DevCacheManager(object):
1965
  """Simple class for managing a cache of block device information.
1966

1967
  """
1968
  _DEV_PREFIX = "/dev/"
1969
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1970

    
1971
  @classmethod
1972
  def _ConvertPath(cls, dev_path):
1973
    """Converts a /dev/name path to the cache file name.
1974

1975
    This replaces slashes with underscores and strips the /dev
1976
    prefix. It then returns the full path to the cache file
1977

1978
    """
1979
    if dev_path.startswith(cls._DEV_PREFIX):
1980
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1981
    dev_path = dev_path.replace("/", "_")
1982
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1983
    return fpath
1984

    
1985
  @classmethod
1986
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1987
    """Updates the cache information for a given device.
1988

1989
    """
1990
    if dev_path is None:
1991
      logging.error("DevCacheManager.UpdateCache got a None dev_path")
1992
      return
1993
    fpath = cls._ConvertPath(dev_path)
1994
    if on_primary:
1995
      state = "primary"
1996
    else:
1997
      state = "secondary"
1998
    if iv_name is None:
1999
      iv_name = "not_visible"
2000
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
2001
    try:
2002
      utils.WriteFile(fpath, data=fdata)
2003
    except EnvironmentError, err:
2004
      logging.exception("Can't update bdev cache for %s", dev_path)
2005

    
2006
  @classmethod
2007
  def RemoveCache(cls, dev_path):
2008
    """Remove data for a dev_path.
2009

2010
    """
2011
    if dev_path is None:
2012
      logging.error("DevCacheManager.RemoveCache got a None dev_path")
2013
      return
2014
    fpath = cls._ConvertPath(dev_path)
2015
    try:
2016
      utils.RemoveFile(fpath)
2017
    except EnvironmentError, err:
2018
      logging.exception("Can't update bdev cache for %s", dev_path)