Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ e873317a

History | View | Annotate | Download (53.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34
import logging
35

    
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def _GetSshRunner():
47
  return ssh.SshRunner()
48

    
49

    
50
def _GetMasterInfo():
51
  """Return the master ip and netdev.
52

53
  """
54
  try:
55
    ss = ssconf.SimpleStore()
56
    master_netdev = ss.GetMasterNetdev()
57
    master_ip = ss.GetMasterIP()
58
  except errors.ConfigurationError, err:
59
    logging.exception("Cluster configuration incomplete")
60
    return (None, None)
61
  return (master_netdev, master_ip)
62

    
63

    
64
def StartMaster(start_daemons):
65
  """Activate local node as master node.
66

67
  The function will always try activate the IP address of the master
68
  (if someone else has it, then it won't). Then, if the start_daemons
69
  parameter is True, it will also start the master daemons
70
  (ganet-masterd and ganeti-rapi).
71

72
  """
73
  ok = True
74
  master_netdev, master_ip = _GetMasterInfo()
75
  if not master_netdev:
76
    return False
77

    
78
  if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
79
    if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT,
80
                     source=constants.LOCALHOST_IP_ADDRESS):
81
      # we already have the ip:
82
      logging.debug("Already started")
83
    else:
84
      logging.error("Someone else has the master ip, not activating")
85
      ok = False
86
  else:
87
    result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
88
                           "dev", master_netdev, "label",
89
                           "%s:0" % master_netdev])
90
    if result.failed:
91
      logging.error("Can't activate master IP: %s", result.output)
92
      ok = False
93

    
94
    result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
95
                           "-s", master_ip, master_ip])
96
    # we'll ignore the exit code of arping
97

    
98
  # and now start the master and rapi daemons
99
  if start_daemons:
100
    for daemon in 'ganeti-masterd', 'ganeti-rapi':
101
      result = utils.RunCmd([daemon])
102
      if result.failed:
103
        logging.error("Can't start daemon %s: %s", daemon, result.output)
104
        ok = False
105
  return ok
106

    
107

    
108
def StopMaster(stop_daemons):
109
  """Deactivate this node as master.
110

111
  The function will always try to deactivate the IP address of the
112
  master. Then, if the stop_daemons parameter is True, it will also
113
  stop the master daemons (ganet-masterd and ganeti-rapi).
114

115
  """
116
  master_netdev, master_ip = _GetMasterInfo()
117
  if not master_netdev:
118
    return False
119

    
120
  result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
121
                         "dev", master_netdev])
122
  if result.failed:
123
    logger.error("Can't remove the master IP, error: %s", result.output)
124
    # but otherwise ignore the failure
125

    
126
  if stop_daemons:
127
    # stop/kill the rapi and the master daemon
128
    for daemon in constants.RAPI_PID, constants.MASTERD_PID:
129
      utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
130

    
131
  return True
132

    
133

    
134
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
135
  """Joins this node to the cluster.
136

137
  This does the following:
138
      - updates the hostkeys of the machine (rsa and dsa)
139
      - adds the ssh private key to the user
140
      - adds the ssh public key to the users' authorized_keys file
141

142
  """
143
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
144
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
145
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
146
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
147
  for name, content, mode in sshd_keys:
148
    utils.WriteFile(name, data=content, mode=mode)
149

    
150
  try:
151
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
152
                                                    mkdir=True)
153
  except errors.OpExecError, err:
154
    logging.exception("Error while processing user ssh files")
155
    return False
156

    
157
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
158
    utils.WriteFile(name, data=content, mode=0600)
159

    
160
  utils.AddAuthorizedKey(auth_keys, sshpub)
161

    
162
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
163

    
164
  return True
165

    
166

    
167
def LeaveCluster():
168
  """Cleans up the current node and prepares it to be removed from the cluster.
169

170
  """
171
  if os.path.isdir(constants.DATA_DIR):
172
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
173
      full_name = os.path.join(constants.DATA_DIR, rel_name)
174
      if os.path.isfile(full_name) and not os.path.islink(full_name):
175
        utils.RemoveFile(full_name)
176

    
177
  try:
178
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
179
  except errors.OpExecError:
180
    logging.exception("Error while processing ssh files")
181
    return
182

    
183
  f = open(pub_key, 'r')
184
  try:
185
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
186
  finally:
187
    f.close()
188

    
189
  utils.RemoveFile(priv_key)
190
  utils.RemoveFile(pub_key)
191

    
192
  # Return a reassuring string to the caller, and quit
193
  raise errors.QuitGanetiException(False, 'Shutdown scheduled')
194

    
195

    
196
def GetNodeInfo(vgname):
197
  """Gives back a hash with different informations about the node.
198

199
  Returns:
200
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
201
      'memory_free' : xxx, 'memory_total' : xxx }
202
    where
203
    vg_size is the size of the configured volume group in MiB
204
    vg_free is the free size of the volume group in MiB
205
    memory_dom0 is the memory allocated for domain0 in MiB
206
    memory_free is the currently available (free) ram in MiB
207
    memory_total is the total number of ram in MiB
208

209
  """
210
  outputarray = {}
211
  vginfo = _GetVGInfo(vgname)
212
  outputarray['vg_size'] = vginfo['vg_size']
213
  outputarray['vg_free'] = vginfo['vg_free']
214

    
215
  hyper = hypervisor.GetHypervisor()
216
  hyp_info = hyper.GetNodeInfo()
217
  if hyp_info is not None:
218
    outputarray.update(hyp_info)
219

    
220
  f = open("/proc/sys/kernel/random/boot_id", 'r')
221
  try:
222
    outputarray["bootid"] = f.read(128).rstrip("\n")
223
  finally:
224
    f.close()
225

    
226
  return outputarray
227

    
228

    
229
def VerifyNode(what):
230
  """Verify the status of the local node.
231

232
  Args:
233
    what - a dictionary of things to check:
234
      'filelist' : list of files for which to compute checksums
235
      'nodelist' : list of nodes we should check communication with
236
      'hypervisor': run the hypervisor-specific verify
237

238
  Requested files on local node are checksummed and the result returned.
239

240
  The nodelist is traversed, with the following checks being made
241
  for each node:
242
  - known_hosts key correct
243
  - correct resolving of node name (target node returns its own hostname
244
    by ssh-execution of 'hostname', result compared against name in list.
245

246
  """
247
  result = {}
248

    
249
  if 'hypervisor' in what:
250
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
251

    
252
  if 'filelist' in what:
253
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
254

    
255
  if 'nodelist' in what:
256
    result['nodelist'] = {}
257
    random.shuffle(what['nodelist'])
258
    for node in what['nodelist']:
259
      success, message = _GetSshRunner().VerifyNodeHostname(node)
260
      if not success:
261
        result['nodelist'][node] = message
262
  if 'node-net-test' in what:
263
    result['node-net-test'] = {}
264
    my_name = utils.HostInfo().name
265
    my_pip = my_sip = None
266
    for name, pip, sip in what['node-net-test']:
267
      if name == my_name:
268
        my_pip = pip
269
        my_sip = sip
270
        break
271
    if not my_pip:
272
      result['node-net-test'][my_name] = ("Can't find my own"
273
                                          " primary/secondary IP"
274
                                          " in the node list")
275
    else:
276
      port = ssconf.SimpleStore().GetNodeDaemonPort()
277
      for name, pip, sip in what['node-net-test']:
278
        fail = []
279
        if not utils.TcpPing(pip, port, source=my_pip):
280
          fail.append("primary")
281
        if sip != pip:
282
          if not utils.TcpPing(sip, port, source=my_sip):
283
            fail.append("secondary")
284
        if fail:
285
          result['node-net-test'][name] = ("failure using the %s"
286
                                           " interface(s)" %
287
                                           " and ".join(fail))
288

    
289
  return result
290

    
291

    
292
def GetVolumeList(vg_name):
293
  """Compute list of logical volumes and their size.
294

295
  Returns:
296
    dictionary of all partions (key) with their size (in MiB), inactive
297
    and online status:
298
    {'test1': ('20.06', True, True)}
299

300
  """
301
  lvs = {}
302
  sep = '|'
303
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
304
                         "--separator=%s" % sep,
305
                         "-olv_name,lv_size,lv_attr", vg_name])
306
  if result.failed:
307
    logging.error("Failed to list logical volumes, lvs output: %s",
308
                  result.output)
309
    return result.output
310

    
311
  valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
312
  for line in result.stdout.splitlines():
313
    line = line.strip()
314
    match = valid_line_re.match(line)
315
    if not match:
316
      logging.error("Invalid line returned from lvs output: '%s'", line)
317
      continue
318
    name, size, attr = match.groups()
319
    inactive = attr[4] == '-'
320
    online = attr[5] == 'o'
321
    lvs[name] = (size, inactive, online)
322

    
323
  return lvs
324

    
325

    
326
def ListVolumeGroups():
327
  """List the volume groups and their size.
328

329
  Returns:
330
    Dictionary with keys volume name and values the size of the volume
331

332
  """
333
  return utils.ListVolumeGroups()
334

    
335

    
336
def NodeVolumes():
337
  """List all volumes on this node.
338

339
  """
340
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
341
                         "--separator=|",
342
                         "--options=lv_name,lv_size,devices,vg_name"])
343
  if result.failed:
344
    logging.error("Failed to list logical volumes, lvs output: %s",
345
                  result.output)
346
    return {}
347

    
348
  def parse_dev(dev):
349
    if '(' in dev:
350
      return dev.split('(')[0]
351
    else:
352
      return dev
353

    
354
  def map_line(line):
355
    return {
356
      'name': line[0].strip(),
357
      'size': line[1].strip(),
358
      'dev': parse_dev(line[2].strip()),
359
      'vg': line[3].strip(),
360
    }
361

    
362
  return [map_line(line.split('|')) for line in result.stdout.splitlines()
363
          if line.count('|') >= 3]
364

    
365

    
366
def BridgesExist(bridges_list):
367
  """Check if a list of bridges exist on the current node.
368

369
  Returns:
370
    True if all of them exist, false otherwise
371

372
  """
373
  for bridge in bridges_list:
374
    if not utils.BridgeExists(bridge):
375
      return False
376

    
377
  return True
378

    
379

    
380
def GetInstanceList():
381
  """Provides a list of instances.
382

383
  Returns:
384
    A list of all running instances on the current node
385
    - instance1.example.com
386
    - instance2.example.com
387

388
  """
389
  try:
390
    names = hypervisor.GetHypervisor().ListInstances()
391
  except errors.HypervisorError, err:
392
    logging.exception("Error enumerating instances")
393
    raise
394

    
395
  return names
396

    
397

    
398
def GetInstanceInfo(instance):
399
  """Gives back the informations about an instance as a dictionary.
400

401
  Args:
402
    instance: name of the instance (ex. instance1.example.com)
403

404
  Returns:
405
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
406
    where
407
    memory: memory size of instance (int)
408
    state: xen state of instance (string)
409
    time: cpu time of instance (float)
410

411
  """
412
  output = {}
413

    
414
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
415
  if iinfo is not None:
416
    output['memory'] = iinfo[2]
417
    output['state'] = iinfo[4]
418
    output['time'] = iinfo[5]
419

    
420
  return output
421

    
422

    
423
def GetAllInstancesInfo():
424
  """Gather data about all instances.
425

426
  This is the equivalent of `GetInstanceInfo()`, except that it
427
  computes data for all instances at once, thus being faster if one
428
  needs data about more than one instance.
429

430
  Returns: a dictionary of dictionaries, keys being the instance name,
431
    and with values:
432
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
433
    where
434
    memory: memory size of instance (int)
435
    state: xen state of instance (string)
436
    time: cpu time of instance (float)
437
    vcpus: the number of cpus
438

439
  """
440
  output = {}
441

    
442
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
443
  if iinfo:
444
    for name, inst_id, memory, vcpus, state, times in iinfo:
445
      output[name] = {
446
        'memory': memory,
447
        'vcpus': vcpus,
448
        'state': state,
449
        'time': times,
450
        }
451

    
452
  return output
453

    
454

    
455
def AddOSToInstance(instance, os_disk, swap_disk):
456
  """Add an OS to an instance.
457

458
  Args:
459
    instance: the instance object
460
    os_disk: the instance-visible name of the os device
461
    swap_disk: the instance-visible name of the swap device
462

463
  """
464
  inst_os = OSFromDisk(instance.os)
465

    
466
  create_script = inst_os.create_script
467

    
468
  os_device = instance.FindDisk(os_disk)
469
  if os_device is None:
470
    logging.error("Can't find this device-visible name '%s'", os_disk)
471
    return False
472

    
473
  swap_device = instance.FindDisk(swap_disk)
474
  if swap_device is None:
475
    logging.error("Can't find this device-visible name '%s'", swap_disk)
476
    return False
477

    
478
  real_os_dev = _RecursiveFindBD(os_device)
479
  if real_os_dev is None:
480
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
481
                                  str(os_device))
482
  real_os_dev.Open()
483

    
484
  real_swap_dev = _RecursiveFindBD(swap_device)
485
  if real_swap_dev is None:
486
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
487
                                  str(swap_device))
488
  real_swap_dev.Open()
489

    
490
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
491
                                     instance.name, int(time.time()))
492
  if not os.path.exists(constants.LOG_OS_DIR):
493
    os.mkdir(constants.LOG_OS_DIR, 0750)
494

    
495
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
496
                                inst_os.path, create_script, instance.name,
497
                                real_os_dev.dev_path, real_swap_dev.dev_path,
498
                                logfile)
499

    
500
  result = utils.RunCmd(command)
501
  if result.failed:
502
    logging.error("os create command '%s' returned error: %s, logfile: %s,"
503
                  " output: %s", command, result.fail_reason, logfile,
504
                  result.output)
505
    return False
506

    
507
  return True
508

    
509

    
510
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
511
  """Run the OS rename script for an instance.
512

513
  Args:
514
    instance: the instance object
515
    old_name: the old name of the instance
516
    os_disk: the instance-visible name of the os device
517
    swap_disk: the instance-visible name of the swap device
518

519
  """
520
  inst_os = OSFromDisk(instance.os)
521

    
522
  script = inst_os.rename_script
523

    
524
  os_device = instance.FindDisk(os_disk)
525
  if os_device is None:
526
    logging.error("Can't find this device-visible name '%s'", os_disk)
527
    return False
528

    
529
  swap_device = instance.FindDisk(swap_disk)
530
  if swap_device is None:
531
    logging.error("Can't find this device-visible name '%s'", swap_disk)
532
    return False
533

    
534
  real_os_dev = _RecursiveFindBD(os_device)
535
  if real_os_dev is None:
536
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
537
                                  str(os_device))
538
  real_os_dev.Open()
539

    
540
  real_swap_dev = _RecursiveFindBD(swap_device)
541
  if real_swap_dev is None:
542
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
543
                                  str(swap_device))
544
  real_swap_dev.Open()
545

    
546
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
547
                                           old_name,
548
                                           instance.name, int(time.time()))
549
  if not os.path.exists(constants.LOG_OS_DIR):
550
    os.mkdir(constants.LOG_OS_DIR, 0750)
551

    
552
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
553
                                inst_os.path, script, old_name, instance.name,
554
                                real_os_dev.dev_path, real_swap_dev.dev_path,
555
                                logfile)
556

    
557
  result = utils.RunCmd(command)
558

    
559
  if result.failed:
560
    logging.error("os create command '%s' returned error: %s output: %s",
561
                  command, result.fail_reason, result.output)
562
    return False
563

    
564
  return True
565

    
566

    
567
def _GetVGInfo(vg_name):
568
  """Get informations about the volume group.
569

570
  Args:
571
    vg_name: the volume group
572

573
  Returns:
574
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
575
    where
576
    vg_size is the total size of the volume group in MiB
577
    vg_free is the free size of the volume group in MiB
578
    pv_count are the number of physical disks in that vg
579

580
  If an error occurs during gathering of data, we return the same dict
581
  with keys all set to None.
582

583
  """
584
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
585

    
586
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
587
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
588

    
589
  if retval.failed:
590
    logging.error("volume group %s not present", vg_name)
591
    return retdic
592
  valarr = retval.stdout.strip().rstrip(':').split(':')
593
  if len(valarr) == 3:
594
    try:
595
      retdic = {
596
        "vg_size": int(round(float(valarr[0]), 0)),
597
        "vg_free": int(round(float(valarr[1]), 0)),
598
        "pv_count": int(valarr[2]),
599
        }
600
    except ValueError, err:
601
      logging.exception("Fail to parse vgs output")
602
  else:
603
    logging.error("vgs output has the wrong number of fields (expected"
604
                  " three): %s", str(valarr))
605
  return retdic
606

    
607

    
608
def _GatherBlockDevs(instance):
609
  """Set up an instance's block device(s).
610

611
  This is run on the primary node at instance startup. The block
612
  devices must be already assembled.
613

614
  """
615
  block_devices = []
616
  for disk in instance.disks:
617
    device = _RecursiveFindBD(disk)
618
    if device is None:
619
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
620
                                    str(disk))
621
    device.Open()
622
    block_devices.append((disk, device))
623
  return block_devices
624

    
625

    
626
def StartInstance(instance, extra_args):
627
  """Start an instance.
628

629
  Args:
630
    instance - name of instance to start.
631

632
  """
633
  running_instances = GetInstanceList()
634

    
635
  if instance.name in running_instances:
636
    return True
637

    
638
  block_devices = _GatherBlockDevs(instance)
639
  hyper = hypervisor.GetHypervisor()
640

    
641
  try:
642
    hyper.StartInstance(instance, block_devices, extra_args)
643
  except errors.HypervisorError, err:
644
    logging.exception("Failed to start instance")
645
    return False
646

    
647
  return True
648

    
649

    
650
def ShutdownInstance(instance):
651
  """Shut an instance down.
652

653
  Args:
654
    instance - name of instance to shutdown.
655

656
  """
657
  running_instances = GetInstanceList()
658

    
659
  if instance.name not in running_instances:
660
    return True
661

    
662
  hyper = hypervisor.GetHypervisor()
663
  try:
664
    hyper.StopInstance(instance)
665
  except errors.HypervisorError, err:
666
    logging.error("Failed to stop instance")
667
    return False
668

    
669
  # test every 10secs for 2min
670
  shutdown_ok = False
671

    
672
  time.sleep(1)
673
  for dummy in range(11):
674
    if instance.name not in GetInstanceList():
675
      break
676
    time.sleep(10)
677
  else:
678
    # the shutdown did not succeed
679
    logging.error("shutdown of '%s' unsuccessful, using destroy", instance)
680

    
681
    try:
682
      hyper.StopInstance(instance, force=True)
683
    except errors.HypervisorError, err:
684
      logging.exception("Failed to stop instance")
685
      return False
686

    
687
    time.sleep(1)
688
    if instance.name in GetInstanceList():
689
      logging.error("could not shutdown instance '%s' even by destroy",
690
                    instance.name)
691
      return False
692

    
693
  return True
694

    
695

    
696
def RebootInstance(instance, reboot_type, extra_args):
697
  """Reboot an instance.
698

699
  Args:
700
    instance    - name of instance to reboot
701
    reboot_type - how to reboot [soft,hard,full]
702

703
  """
704
  running_instances = GetInstanceList()
705

    
706
  if instance.name not in running_instances:
707
    logging.error("Cannot reboot instance that is not running")
708
    return False
709

    
710
  hyper = hypervisor.GetHypervisor()
711
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
712
    try:
713
      hyper.RebootInstance(instance)
714
    except errors.HypervisorError, err:
715
      logging.exception("Failed to soft reboot instance")
716
      return False
717
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
718
    try:
719
      ShutdownInstance(instance)
720
      StartInstance(instance, extra_args)
721
    except errors.HypervisorError, err:
722
      logging.exception("Failed to hard reboot instance")
723
      return False
724
  else:
725
    raise errors.ParameterError("reboot_type invalid")
726

    
727

    
728
  return True
729

    
730

    
731
def MigrateInstance(instance, target, live):
732
  """Migrates an instance to another node.
733

734
  """
735
  hyper = hypervisor.GetHypervisor()
736

    
737
  try:
738
    hyper.MigrateInstance(instance, target, live)
739
  except errors.HypervisorError, err:
740
    msg = "Failed to migrate instance: %s" % str(err)
741
    logging.error(msg)
742
    return (False, msg)
743
  return (True, "Migration successfull")
744

    
745

    
746
def CreateBlockDevice(disk, size, owner, on_primary, info):
747
  """Creates a block device for an instance.
748

749
  Args:
750
   disk: a ganeti.objects.Disk object
751
   size: the size of the physical underlying device
752
   owner: a string with the name of the instance
753
   on_primary: a boolean indicating if it is the primary node or not
754
   info: string that will be sent to the physical device creation
755

756
  Returns:
757
    the new unique_id of the device (this can sometime be
758
    computed only after creation), or None. On secondary nodes,
759
    it's not required to return anything.
760

761
  """
762
  clist = []
763
  if disk.children:
764
    for child in disk.children:
765
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
766
      if on_primary or disk.AssembleOnSecondary():
767
        # we need the children open in case the device itself has to
768
        # be assembled
769
        crdev.Open()
770
      clist.append(crdev)
771
  try:
772
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
773
    if device is not None:
774
      logging.info("removing existing device %s", disk)
775
      device.Remove()
776
  except errors.BlockDeviceError, err:
777
    pass
778

    
779
  device = bdev.Create(disk.dev_type, disk.physical_id,
780
                       clist, size)
781
  if device is None:
782
    raise ValueError("Can't create child device for %s, %s" %
783
                     (disk, size))
784
  if on_primary or disk.AssembleOnSecondary():
785
    if not device.Assemble():
786
      errorstring = "Can't assemble device after creation"
787
      logging.error(errorstring)
788
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
789
                                    " daemon logs" % errorstring)
790
    device.SetSyncSpeed(constants.SYNC_SPEED)
791
    if on_primary or disk.OpenOnSecondary():
792
      device.Open(force=True)
793
    DevCacheManager.UpdateCache(device.dev_path, owner,
794
                                on_primary, disk.iv_name)
795

    
796
  device.SetInfo(info)
797

    
798
  physical_id = device.unique_id
799
  return physical_id
800

    
801

    
802
def RemoveBlockDevice(disk):
803
  """Remove a block device.
804

805
  This is intended to be called recursively.
806

807
  """
808
  try:
809
    # since we are removing the device, allow a partial match
810
    # this allows removal of broken mirrors
811
    rdev = _RecursiveFindBD(disk, allow_partial=True)
812
  except errors.BlockDeviceError, err:
813
    # probably can't attach
814
    logging.info("Can't attach to device %s in remove", disk)
815
    rdev = None
816
  if rdev is not None:
817
    r_path = rdev.dev_path
818
    result = rdev.Remove()
819
    if result:
820
      DevCacheManager.RemoveCache(r_path)
821
  else:
822
    result = True
823
  if disk.children:
824
    for child in disk.children:
825
      result = result and RemoveBlockDevice(child)
826
  return result
827

    
828

    
829
def _RecursiveAssembleBD(disk, owner, as_primary):
830
  """Activate a block device for an instance.
831

832
  This is run on the primary and secondary nodes for an instance.
833

834
  This function is called recursively.
835

836
  Args:
837
    disk: a objects.Disk object
838
    as_primary: if we should make the block device read/write
839

840
  Returns:
841
    the assembled device or None (in case no device was assembled)
842

843
  If the assembly is not successful, an exception is raised.
844

845
  """
846
  children = []
847
  if disk.children:
848
    mcn = disk.ChildrenNeeded()
849
    if mcn == -1:
850
      mcn = 0 # max number of Nones allowed
851
    else:
852
      mcn = len(disk.children) - mcn # max number of Nones
853
    for chld_disk in disk.children:
854
      try:
855
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
856
      except errors.BlockDeviceError, err:
857
        if children.count(None) >= mcn:
858
          raise
859
        cdev = None
860
        logging.debug("Error in child activation: %s", str(err))
861
      children.append(cdev)
862

    
863
  if as_primary or disk.AssembleOnSecondary():
864
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
865
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
866
    result = r_dev
867
    if as_primary or disk.OpenOnSecondary():
868
      r_dev.Open()
869
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
870
                                as_primary, disk.iv_name)
871

    
872
  else:
873
    result = True
874
  return result
875

    
876

    
877
def AssembleBlockDevice(disk, owner, as_primary):
878
  """Activate a block device for an instance.
879

880
  This is a wrapper over _RecursiveAssembleBD.
881

882
  Returns:
883
    a /dev path for primary nodes
884
    True for secondary nodes
885

886
  """
887
  result = _RecursiveAssembleBD(disk, owner, as_primary)
888
  if isinstance(result, bdev.BlockDev):
889
    result = result.dev_path
890
  return result
891

    
892

    
893
def ShutdownBlockDevice(disk):
894
  """Shut down a block device.
895

896
  First, if the device is assembled (can `Attach()`), then the device
897
  is shutdown. Then the children of the device are shutdown.
898

899
  This function is called recursively. Note that we don't cache the
900
  children or such, as oppossed to assemble, shutdown of different
901
  devices doesn't require that the upper device was active.
902

903
  """
904
  r_dev = _RecursiveFindBD(disk)
905
  if r_dev is not None:
906
    r_path = r_dev.dev_path
907
    result = r_dev.Shutdown()
908
    if result:
909
      DevCacheManager.RemoveCache(r_path)
910
  else:
911
    result = True
912
  if disk.children:
913
    for child in disk.children:
914
      result = result and ShutdownBlockDevice(child)
915
  return result
916

    
917

    
918
def MirrorAddChildren(parent_cdev, new_cdevs):
919
  """Extend a mirrored block device.
920

921
  """
922
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
923
  if parent_bdev is None:
924
    logging.error("Can't find parent device")
925
    return False
926
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
927
  if new_bdevs.count(None) > 0:
928
    logging.error("Can't find new device(s) to add: %s:%s",
929
                  new_bdevs, new_cdevs)
930
    return False
931
  parent_bdev.AddChildren(new_bdevs)
932
  return True
933

    
934

    
935
def MirrorRemoveChildren(parent_cdev, new_cdevs):
936
  """Shrink a mirrored block device.
937

938
  """
939
  parent_bdev = _RecursiveFindBD(parent_cdev)
940
  if parent_bdev is None:
941
    logging.error("Can't find parent in remove children: %s", parent_cdev)
942
    return False
943
  devs = []
944
  for disk in new_cdevs:
945
    rpath = disk.StaticDevPath()
946
    if rpath is None:
947
      bd = _RecursiveFindBD(disk)
948
      if bd is None:
949
        logging.error("Can't find dynamic device %s while removing children",
950
                      disk)
951
        return False
952
      else:
953
        devs.append(bd.dev_path)
954
    else:
955
      devs.append(rpath)
956
  parent_bdev.RemoveChildren(devs)
957
  return True
958

    
959

    
960
def GetMirrorStatus(disks):
961
  """Get the mirroring status of a list of devices.
962

963
  Args:
964
    disks: list of `objects.Disk`
965

966
  Returns:
967
    list of (mirror_done, estimated_time) tuples, which
968
    are the result of bdev.BlockDevice.CombinedSyncStatus()
969

970
  """
971
  stats = []
972
  for dsk in disks:
973
    rbd = _RecursiveFindBD(dsk)
974
    if rbd is None:
975
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
976
    stats.append(rbd.CombinedSyncStatus())
977
  return stats
978

    
979

    
980
def _RecursiveFindBD(disk, allow_partial=False):
981
  """Check if a device is activated.
982

983
  If so, return informations about the real device.
984

985
  Args:
986
    disk: the objects.Disk instance
987
    allow_partial: don't abort the find if a child of the
988
                   device can't be found; this is intended to be
989
                   used when repairing mirrors
990

991
  Returns:
992
    None if the device can't be found
993
    otherwise the device instance
994

995
  """
996
  children = []
997
  if disk.children:
998
    for chdisk in disk.children:
999
      children.append(_RecursiveFindBD(chdisk))
1000

    
1001
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
1002

    
1003

    
1004
def FindBlockDevice(disk):
1005
  """Check if a device is activated.
1006

1007
  If so, return informations about the real device.
1008

1009
  Args:
1010
    disk: the objects.Disk instance
1011
  Returns:
1012
    None if the device can't be found
1013
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
1014

1015
  """
1016
  rbd = _RecursiveFindBD(disk)
1017
  if rbd is None:
1018
    return rbd
1019
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
1020

    
1021

    
1022
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
1023
  """Write a file to the filesystem.
1024

1025
  This allows the master to overwrite(!) a file. It will only perform
1026
  the operation if the file belongs to a list of configuration files.
1027

1028
  """
1029
  if not os.path.isabs(file_name):
1030
    logging.error("Filename passed to UploadFile is not absolute: '%s'",
1031
                  file_name)
1032
    return False
1033

    
1034
  allowed_files = [
1035
    constants.CLUSTER_CONF_FILE,
1036
    constants.ETC_HOSTS,
1037
    constants.SSH_KNOWN_HOSTS_FILE,
1038
    constants.VNC_PASSWORD_FILE,
1039
    constants.JOB_QUEUE_SERIAL_FILE,
1040
    ]
1041
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
1042
  if file_name not in allowed_files:
1043
    logging.error("Filename passed to UploadFile not in allowed"
1044
                 " upload targets: '%s'", file_name)
1045
    return False
1046

    
1047
  utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
1048
                  atime=atime, mtime=mtime)
1049
  return True
1050

    
1051

    
1052
def _ErrnoOrStr(err):
1053
  """Format an EnvironmentError exception.
1054

1055
  If the `err` argument has an errno attribute, it will be looked up
1056
  and converted into a textual EXXXX description. Otherwise the string
1057
  representation of the error will be returned.
1058

1059
  """
1060
  if hasattr(err, 'errno'):
1061
    detail = errno.errorcode[err.errno]
1062
  else:
1063
    detail = str(err)
1064
  return detail
1065

    
1066

    
1067
def _OSOndiskVersion(name, os_dir):
1068
  """Compute and return the API version of a given OS.
1069

1070
  This function will try to read the API version of the os given by
1071
  the 'name' parameter and residing in the 'os_dir' directory.
1072

1073
  Return value will be either an integer denoting the version or None in the
1074
  case when this is not a valid OS name.
1075

1076
  """
1077
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1078

    
1079
  try:
1080
    st = os.stat(api_file)
1081
  except EnvironmentError, err:
1082
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1083
                           " found (%s)" % _ErrnoOrStr(err))
1084

    
1085
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1086
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1087
                           " a regular file")
1088

    
1089
  try:
1090
    f = open(api_file)
1091
    try:
1092
      api_version = f.read(256)
1093
    finally:
1094
      f.close()
1095
  except EnvironmentError, err:
1096
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1097
                           " API version (%s)" % _ErrnoOrStr(err))
1098

    
1099
  api_version = api_version.strip()
1100
  try:
1101
    api_version = int(api_version)
1102
  except (TypeError, ValueError), err:
1103
    raise errors.InvalidOS(name, os_dir,
1104
                           "API version is not integer (%s)" % str(err))
1105

    
1106
  return api_version
1107

    
1108

    
1109
def DiagnoseOS(top_dirs=None):
1110
  """Compute the validity for all OSes.
1111

1112
  Returns an OS object for each name in all the given top directories
1113
  (if not given defaults to constants.OS_SEARCH_PATH)
1114

1115
  Returns:
1116
    list of OS objects
1117

1118
  """
1119
  if top_dirs is None:
1120
    top_dirs = constants.OS_SEARCH_PATH
1121

    
1122
  result = []
1123
  for dir_name in top_dirs:
1124
    if os.path.isdir(dir_name):
1125
      try:
1126
        f_names = utils.ListVisibleFiles(dir_name)
1127
      except EnvironmentError, err:
1128
        logging.exception("Can't list the OS directory %s", dir_name)
1129
        break
1130
      for name in f_names:
1131
        try:
1132
          os_inst = OSFromDisk(name, base_dir=dir_name)
1133
          result.append(os_inst)
1134
        except errors.InvalidOS, err:
1135
          result.append(objects.OS.FromInvalidOS(err))
1136

    
1137
  return result
1138

    
1139

    
1140
def OSFromDisk(name, base_dir=None):
1141
  """Create an OS instance from disk.
1142

1143
  This function will return an OS instance if the given name is a
1144
  valid OS name. Otherwise, it will raise an appropriate
1145
  `errors.InvalidOS` exception, detailing why this is not a valid
1146
  OS.
1147

1148
  Args:
1149
    os_dir: Directory containing the OS scripts. Defaults to a search
1150
            in all the OS_SEARCH_PATH directories.
1151

1152
  """
1153

    
1154
  if base_dir is None:
1155
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1156
    if os_dir is None:
1157
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1158
  else:
1159
    os_dir = os.path.sep.join([base_dir, name])
1160

    
1161
  api_version = _OSOndiskVersion(name, os_dir)
1162

    
1163
  if api_version != constants.OS_API_VERSION:
1164
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1165
                           " (found %s want %s)"
1166
                           % (api_version, constants.OS_API_VERSION))
1167

    
1168
  # OS Scripts dictionary, we will populate it with the actual script names
1169
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1170

    
1171
  for script in os_scripts:
1172
    os_scripts[script] = os.path.sep.join([os_dir, script])
1173

    
1174
    try:
1175
      st = os.stat(os_scripts[script])
1176
    except EnvironmentError, err:
1177
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1178
                             (script, _ErrnoOrStr(err)))
1179

    
1180
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1181
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1182
                             script)
1183

    
1184
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1185
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1186
                             script)
1187

    
1188

    
1189
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1190
                    create_script=os_scripts['create'],
1191
                    export_script=os_scripts['export'],
1192
                    import_script=os_scripts['import'],
1193
                    rename_script=os_scripts['rename'],
1194
                    api_version=api_version)
1195

    
1196

    
1197
def GrowBlockDevice(disk, amount):
1198
  """Grow a stack of block devices.
1199

1200
  This function is called recursively, with the childrens being the
1201
  first one resize.
1202

1203
  Args:
1204
    disk: the disk to be grown
1205

1206
  Returns: a tuple of (status, result), with:
1207
    status: the result (true/false) of the operation
1208
    result: the error message if the operation failed, otherwise not used
1209

1210
  """
1211
  r_dev = _RecursiveFindBD(disk)
1212
  if r_dev is None:
1213
    return False, "Cannot find block device %s" % (disk,)
1214

    
1215
  try:
1216
    r_dev.Grow(amount)
1217
  except errors.BlockDeviceError, err:
1218
    return False, str(err)
1219

    
1220
  return True, None
1221

    
1222

    
1223
def SnapshotBlockDevice(disk):
1224
  """Create a snapshot copy of a block device.
1225

1226
  This function is called recursively, and the snapshot is actually created
1227
  just for the leaf lvm backend device.
1228

1229
  Args:
1230
    disk: the disk to be snapshotted
1231

1232
  Returns:
1233
    a config entry for the actual lvm device snapshotted.
1234

1235
  """
1236
  if disk.children:
1237
    if len(disk.children) == 1:
1238
      # only one child, let's recurse on it
1239
      return SnapshotBlockDevice(disk.children[0])
1240
    else:
1241
      # more than one child, choose one that matches
1242
      for child in disk.children:
1243
        if child.size == disk.size:
1244
          # return implies breaking the loop
1245
          return SnapshotBlockDevice(child)
1246
  elif disk.dev_type == constants.LD_LV:
1247
    r_dev = _RecursiveFindBD(disk)
1248
    if r_dev is not None:
1249
      # let's stay on the safe side and ask for the full size, for now
1250
      return r_dev.Snapshot(disk.size)
1251
    else:
1252
      return None
1253
  else:
1254
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1255
                                 " '%s' of type '%s'" %
1256
                                 (disk.unique_id, disk.dev_type))
1257

    
1258

    
1259
def ExportSnapshot(disk, dest_node, instance):
1260
  """Export a block device snapshot to a remote node.
1261

1262
  Args:
1263
    disk: the snapshot block device
1264
    dest_node: the node to send the image to
1265
    instance: instance being exported
1266

1267
  Returns:
1268
    True if successful, False otherwise.
1269

1270
  """
1271
  inst_os = OSFromDisk(instance.os)
1272
  export_script = inst_os.export_script
1273

    
1274
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1275
                                     instance.name, int(time.time()))
1276
  if not os.path.exists(constants.LOG_OS_DIR):
1277
    os.mkdir(constants.LOG_OS_DIR, 0750)
1278

    
1279
  real_os_dev = _RecursiveFindBD(disk)
1280
  if real_os_dev is None:
1281
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1282
                                  str(disk))
1283
  real_os_dev.Open()
1284

    
1285
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1286
  destfile = disk.physical_id[1]
1287

    
1288
  # the target command is built out of three individual commands,
1289
  # which are joined by pipes; we check each individual command for
1290
  # valid parameters
1291

    
1292
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1293
                               export_script, instance.name,
1294
                               real_os_dev.dev_path, logfile)
1295

    
1296
  comprcmd = "gzip"
1297

    
1298
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1299
                                destdir, destdir, destfile)
1300
  remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1301
                                       destcmd)
1302

    
1303
  # all commands have been checked, so we're safe to combine them
1304
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1305

    
1306
  result = utils.RunCmd(command)
1307

    
1308
  if result.failed:
1309
    logging.error("os snapshot export command '%s' returned error: %s"
1310
                  " output: %s", command, result.fail_reason, result.output)
1311
    return False
1312

    
1313
  return True
1314

    
1315

    
1316
def FinalizeExport(instance, snap_disks):
1317
  """Write out the export configuration information.
1318

1319
  Args:
1320
    instance: instance configuration
1321
    snap_disks: snapshot block devices
1322

1323
  Returns:
1324
    False in case of error, True otherwise.
1325

1326
  """
1327
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1328
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1329

    
1330
  config = objects.SerializableConfigParser()
1331

    
1332
  config.add_section(constants.INISECT_EXP)
1333
  config.set(constants.INISECT_EXP, 'version', '0')
1334
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1335
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1336
  config.set(constants.INISECT_EXP, 'os', instance.os)
1337
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1338

    
1339
  config.add_section(constants.INISECT_INS)
1340
  config.set(constants.INISECT_INS, 'name', instance.name)
1341
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1342
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1343
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1344

    
1345
  nic_count = 0
1346
  for nic_count, nic in enumerate(instance.nics):
1347
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1348
               nic_count, '%s' % nic.mac)
1349
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1350
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1351
  # TODO: redundant: on load can read nics until it doesn't exist
1352
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1353

    
1354
  disk_count = 0
1355
  for disk_count, disk in enumerate(snap_disks):
1356
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1357
               ('%s' % disk.iv_name))
1358
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1359
               ('%s' % disk.physical_id[1]))
1360
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1361
               ('%d' % disk.size))
1362
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1363

    
1364
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1365
  cfo = open(cff, 'w')
1366
  try:
1367
    config.write(cfo)
1368
  finally:
1369
    cfo.close()
1370

    
1371
  shutil.rmtree(finaldestdir, True)
1372
  shutil.move(destdir, finaldestdir)
1373

    
1374
  return True
1375

    
1376

    
1377
def ExportInfo(dest):
1378
  """Get export configuration information.
1379

1380
  Args:
1381
    dest: directory containing the export
1382

1383
  Returns:
1384
    A serializable config file containing the export info.
1385

1386
  """
1387
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1388

    
1389
  config = objects.SerializableConfigParser()
1390
  config.read(cff)
1391

    
1392
  if (not config.has_section(constants.INISECT_EXP) or
1393
      not config.has_section(constants.INISECT_INS)):
1394
    return None
1395

    
1396
  return config
1397

    
1398

    
1399
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1400
  """Import an os image into an instance.
1401

1402
  Args:
1403
    instance: the instance object
1404
    os_disk: the instance-visible name of the os device
1405
    swap_disk: the instance-visible name of the swap device
1406
    src_node: node holding the source image
1407
    src_image: path to the source image on src_node
1408

1409
  Returns:
1410
    False in case of error, True otherwise.
1411

1412
  """
1413
  inst_os = OSFromDisk(instance.os)
1414
  import_script = inst_os.import_script
1415

    
1416
  os_device = instance.FindDisk(os_disk)
1417
  if os_device is None:
1418
    logging.error("Can't find this device-visible name '%s'", os_disk)
1419
    return False
1420

    
1421
  swap_device = instance.FindDisk(swap_disk)
1422
  if swap_device is None:
1423
    logging.error("Can't find this device-visible name '%s'", swap_disk)
1424
    return False
1425

    
1426
  real_os_dev = _RecursiveFindBD(os_device)
1427
  if real_os_dev is None:
1428
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1429
                                  str(os_device))
1430
  real_os_dev.Open()
1431

    
1432
  real_swap_dev = _RecursiveFindBD(swap_device)
1433
  if real_swap_dev is None:
1434
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1435
                                  str(swap_device))
1436
  real_swap_dev.Open()
1437

    
1438
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1439
                                        instance.name, int(time.time()))
1440
  if not os.path.exists(constants.LOG_OS_DIR):
1441
    os.mkdir(constants.LOG_OS_DIR, 0750)
1442

    
1443
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1444
  remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1445
                                       destcmd)
1446

    
1447
  comprcmd = "gunzip"
1448
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1449
                               inst_os.path, import_script, instance.name,
1450
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1451
                               logfile)
1452

    
1453
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1454

    
1455
  result = utils.RunCmd(command)
1456

    
1457
  if result.failed:
1458
    logging.error("os import command '%s' returned error: %s"
1459
                  " output: %s", command, result.fail_reason, result.output)
1460
    return False
1461

    
1462
  return True
1463

    
1464

    
1465
def ListExports():
1466
  """Return a list of exports currently available on this machine.
1467

1468
  """
1469
  if os.path.isdir(constants.EXPORT_DIR):
1470
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1471
  else:
1472
    return []
1473

    
1474

    
1475
def RemoveExport(export):
1476
  """Remove an existing export from the node.
1477

1478
  Args:
1479
    export: the name of the export to remove
1480

1481
  Returns:
1482
    False in case of error, True otherwise.
1483

1484
  """
1485
  target = os.path.join(constants.EXPORT_DIR, export)
1486

    
1487
  shutil.rmtree(target)
1488
  # TODO: catch some of the relevant exceptions and provide a pretty
1489
  # error message if rmtree fails.
1490

    
1491
  return True
1492

    
1493

    
1494
def RenameBlockDevices(devlist):
1495
  """Rename a list of block devices.
1496

1497
  The devlist argument is a list of tuples (disk, new_logical,
1498
  new_physical). The return value will be a combined boolean result
1499
  (True only if all renames succeeded).
1500

1501
  """
1502
  result = True
1503
  for disk, unique_id in devlist:
1504
    dev = _RecursiveFindBD(disk)
1505
    if dev is None:
1506
      result = False
1507
      continue
1508
    try:
1509
      old_rpath = dev.dev_path
1510
      dev.Rename(unique_id)
1511
      new_rpath = dev.dev_path
1512
      if old_rpath != new_rpath:
1513
        DevCacheManager.RemoveCache(old_rpath)
1514
        # FIXME: we should add the new cache information here, like:
1515
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1516
        # but we don't have the owner here - maybe parse from existing
1517
        # cache? for now, we only lose lvm data when we rename, which
1518
        # is less critical than DRBD or MD
1519
    except errors.BlockDeviceError, err:
1520
      logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
1521
      result = False
1522
  return result
1523

    
1524

    
1525
def _TransformFileStorageDir(file_storage_dir):
1526
  """Checks whether given file_storage_dir is valid.
1527

1528
  Checks wheter the given file_storage_dir is within the cluster-wide
1529
  default file_storage_dir stored in SimpleStore. Only paths under that
1530
  directory are allowed.
1531

1532
  Args:
1533
    file_storage_dir: string with path
1534

1535
  Returns:
1536
    normalized file_storage_dir (string) if valid, None otherwise
1537

1538
  """
1539
  file_storage_dir = os.path.normpath(file_storage_dir)
1540
  base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1541
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1542
      base_file_storage_dir):
1543
    logging.error("file storage directory '%s' is not under base file"
1544
                  " storage directory '%s'",
1545
                  file_storage_dir, base_file_storage_dir)
1546
    return None
1547
  return file_storage_dir
1548

    
1549

    
1550
def CreateFileStorageDir(file_storage_dir):
1551
  """Create file storage directory.
1552

1553
  Args:
1554
    file_storage_dir: string containing the path
1555

1556
  Returns:
1557
    tuple with first element a boolean indicating wheter dir
1558
    creation was successful or not
1559

1560
  """
1561
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1562
  result = True,
1563
  if not file_storage_dir:
1564
    result = False,
1565
  else:
1566
    if os.path.exists(file_storage_dir):
1567
      if not os.path.isdir(file_storage_dir):
1568
        logging.error("'%s' is not a directory", file_storage_dir)
1569
        result = False,
1570
    else:
1571
      try:
1572
        os.makedirs(file_storage_dir, 0750)
1573
      except OSError, err:
1574
        logging.error("Cannot create file storage directory '%s': %s",
1575
                      file_storage_dir, err)
1576
        result = False,
1577
  return result
1578

    
1579

    
1580
def RemoveFileStorageDir(file_storage_dir):
1581
  """Remove file storage directory.
1582

1583
  Remove it only if it's empty. If not log an error and return.
1584

1585
  Args:
1586
    file_storage_dir: string containing the path
1587

1588
  Returns:
1589
    tuple with first element a boolean indicating wheter dir
1590
    removal was successful or not
1591

1592
  """
1593
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1594
  result = True,
1595
  if not file_storage_dir:
1596
    result = False,
1597
  else:
1598
    if os.path.exists(file_storage_dir):
1599
      if not os.path.isdir(file_storage_dir):
1600
        logging.error("'%s' is not a directory", file_storage_dir)
1601
        result = False,
1602
      # deletes dir only if empty, otherwise we want to return False
1603
      try:
1604
        os.rmdir(file_storage_dir)
1605
      except OSError, err:
1606
        logging.exception("Cannot remove file storage directory '%s'",
1607
                          file_storage_dir)
1608
        result = False,
1609
  return result
1610

    
1611

    
1612
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1613
  """Rename the file storage directory.
1614

1615
  Args:
1616
    old_file_storage_dir: string containing the old path
1617
    new_file_storage_dir: string containing the new path
1618

1619
  Returns:
1620
    tuple with first element a boolean indicating wheter dir
1621
    rename was successful or not
1622

1623
  """
1624
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1625
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1626
  result = True,
1627
  if not old_file_storage_dir or not new_file_storage_dir:
1628
    result = False,
1629
  else:
1630
    if not os.path.exists(new_file_storage_dir):
1631
      if os.path.isdir(old_file_storage_dir):
1632
        try:
1633
          os.rename(old_file_storage_dir, new_file_storage_dir)
1634
        except OSError, err:
1635
          logging.exception("Cannot rename '%s' to '%s'",
1636
                            old_file_storage_dir, new_file_storage_dir)
1637
          result =  False,
1638
      else:
1639
        logging.error("'%s' is not a directory", old_file_storage_dir)
1640
        result = False,
1641
    else:
1642
      if os.path.exists(old_file_storage_dir):
1643
        logging.error("Cannot rename '%s' to '%s'. Both locations exist.",
1644
                      old_file_storage_dir, new_file_storage_dir)
1645
        result = False,
1646
  return result
1647

    
1648

    
1649
def CloseBlockDevices(disks):
1650
  """Closes the given block devices.
1651

1652
  This means they will be switched to secondary mode (in case of DRBD).
1653

1654
  """
1655
  bdevs = []
1656
  for cf in disks:
1657
    rd = _RecursiveFindBD(cf)
1658
    if rd is None:
1659
      return (False, "Can't find device %s" % cf)
1660
    bdevs.append(rd)
1661

    
1662
  msg = []
1663
  for rd in bdevs:
1664
    try:
1665
      rd.Close()
1666
    except errors.BlockDeviceError, err:
1667
      msg.append(str(err))
1668
  if msg:
1669
    return (False, "Can't make devices secondary: %s" % ",".join(msg))
1670
  else:
1671
    return (True, "All devices secondary")
1672

    
1673

    
1674
class HooksRunner(object):
1675
  """Hook runner.
1676

1677
  This class is instantiated on the node side (ganeti-noded) and not on
1678
  the master side.
1679

1680
  """
1681
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1682

    
1683
  def __init__(self, hooks_base_dir=None):
1684
    """Constructor for hooks runner.
1685

1686
    Args:
1687
      - hooks_base_dir: if not None, this overrides the
1688
        constants.HOOKS_BASE_DIR (useful for unittests)
1689

1690
    """
1691
    if hooks_base_dir is None:
1692
      hooks_base_dir = constants.HOOKS_BASE_DIR
1693
    self._BASE_DIR = hooks_base_dir
1694

    
1695
  @staticmethod
1696
  def ExecHook(script, env):
1697
    """Exec one hook script.
1698

1699
    Args:
1700
     - script: the full path to the script
1701
     - env: the environment with which to exec the script
1702

1703
    """
1704
    # exec the process using subprocess and log the output
1705
    fdstdin = None
1706
    try:
1707
      fdstdin = open("/dev/null", "r")
1708
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1709
                               stderr=subprocess.STDOUT, close_fds=True,
1710
                               shell=False, cwd="/", env=env)
1711
      output = ""
1712
      try:
1713
        output = child.stdout.read(4096)
1714
        child.stdout.close()
1715
      except EnvironmentError, err:
1716
        output += "Hook script error: %s" % str(err)
1717

    
1718
      while True:
1719
        try:
1720
          result = child.wait()
1721
          break
1722
        except EnvironmentError, err:
1723
          if err.errno == errno.EINTR:
1724
            continue
1725
          raise
1726
    finally:
1727
      # try not to leak fds
1728
      for fd in (fdstdin, ):
1729
        if fd is not None:
1730
          try:
1731
            fd.close()
1732
          except EnvironmentError, err:
1733
            # just log the error
1734
            #logging.exception("Error while closing fd %s", fd)
1735
            pass
1736

    
1737
    return result == 0, output
1738

    
1739
  def RunHooks(self, hpath, phase, env):
1740
    """Run the scripts in the hooks directory.
1741

1742
    This method will not be usually overriden by child opcodes.
1743

1744
    """
1745
    if phase == constants.HOOKS_PHASE_PRE:
1746
      suffix = "pre"
1747
    elif phase == constants.HOOKS_PHASE_POST:
1748
      suffix = "post"
1749
    else:
1750
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1751
    rr = []
1752

    
1753
    subdir = "%s-%s.d" % (hpath, suffix)
1754
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1755
    try:
1756
      dir_contents = utils.ListVisibleFiles(dir_name)
1757
    except OSError, err:
1758
      # must log
1759
      return rr
1760

    
1761
    # we use the standard python sort order,
1762
    # so 00name is the recommended naming scheme
1763
    dir_contents.sort()
1764
    for relname in dir_contents:
1765
      fname = os.path.join(dir_name, relname)
1766
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1767
          self.RE_MASK.match(relname) is not None):
1768
        rrval = constants.HKR_SKIP
1769
        output = ""
1770
      else:
1771
        result, output = self.ExecHook(fname, env)
1772
        if not result:
1773
          rrval = constants.HKR_FAIL
1774
        else:
1775
          rrval = constants.HKR_SUCCESS
1776
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1777

    
1778
    return rr
1779

    
1780

    
1781
class IAllocatorRunner(object):
1782
  """IAllocator runner.
1783

1784
  This class is instantiated on the node side (ganeti-noded) and not on
1785
  the master side.
1786

1787
  """
1788
  def Run(self, name, idata):
1789
    """Run an iallocator script.
1790

1791
    Return value: tuple of:
1792
       - run status (one of the IARUN_ constants)
1793
       - stdout
1794
       - stderr
1795
       - fail reason (as from utils.RunResult)
1796

1797
    """
1798
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1799
                                  os.path.isfile)
1800
    if alloc_script is None:
1801
      return (constants.IARUN_NOTFOUND, None, None, None)
1802

    
1803
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1804
    try:
1805
      os.write(fd, idata)
1806
      os.close(fd)
1807
      result = utils.RunCmd([alloc_script, fin_name])
1808
      if result.failed:
1809
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1810
                result.fail_reason)
1811
    finally:
1812
      os.unlink(fin_name)
1813

    
1814
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1815

    
1816

    
1817
class DevCacheManager(object):
1818
  """Simple class for managing a cache of block device information.
1819

1820
  """
1821
  _DEV_PREFIX = "/dev/"
1822
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1823

    
1824
  @classmethod
1825
  def _ConvertPath(cls, dev_path):
1826
    """Converts a /dev/name path to the cache file name.
1827

1828
    This replaces slashes with underscores and strips the /dev
1829
    prefix. It then returns the full path to the cache file
1830

1831
    """
1832
    if dev_path.startswith(cls._DEV_PREFIX):
1833
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1834
    dev_path = dev_path.replace("/", "_")
1835
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1836
    return fpath
1837

    
1838
  @classmethod
1839
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1840
    """Updates the cache information for a given device.
1841

1842
    """
1843
    if dev_path is None:
1844
      logging.error("DevCacheManager.UpdateCache got a None dev_path")
1845
      return
1846
    fpath = cls._ConvertPath(dev_path)
1847
    if on_primary:
1848
      state = "primary"
1849
    else:
1850
      state = "secondary"
1851
    if iv_name is None:
1852
      iv_name = "not_visible"
1853
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1854
    try:
1855
      utils.WriteFile(fpath, data=fdata)
1856
    except EnvironmentError, err:
1857
      logging.exception("Can't update bdev cache for %s", dev_path)
1858

    
1859
  @classmethod
1860
  def RemoveCache(cls, dev_path):
1861
    """Remove data for a dev_path.
1862

1863
    """
1864
    if dev_path is None:
1865
      logging.error("DevCacheManager.RemoveCache got a None dev_path")
1866
      return
1867
    fpath = cls._ConvertPath(dev_path)
1868
    try:
1869
      utils.RemoveFile(fpath)
1870
    except EnvironmentError, err:
1871
      logging.exception("Can't update bdev cache for %s", dev_path)