Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ d61cbe76

History | View | Annotate | Download (51.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def _GetSshRunner():
47
  return ssh.SshRunner()
48

    
49

    
50
def StartMaster():
51
  """Activate local node as master node.
52

53
  There are two needed steps for this:
54
    - run the master script
55
    - register the cron script
56

57
  """
58
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
59

    
60
  if result.failed:
61
    logger.Error("could not activate cluster interface with command %s,"
62
                 " error: '%s'" % (result.cmd, result.output))
63
    return False
64

    
65
  return True
66

    
67

    
68
def StopMaster():
69
  """Deactivate this node as master.
70

71
  This runs the master stop script.
72

73
  """
74
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
75

    
76
  if result.failed:
77
    logger.Error("could not deactivate cluster interface with command %s,"
78
                 " error: '%s'" % (result.cmd, result.output))
79
    return False
80

    
81
  return True
82

    
83

    
84
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85
  """Joins this node to the cluster.
86

87
  This does the following:
88
      - updates the hostkeys of the machine (rsa and dsa)
89
      - adds the ssh private key to the user
90
      - adds the ssh public key to the users' authorized_keys file
91

92
  """
93
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97
  for name, content, mode in sshd_keys:
98
    utils.WriteFile(name, data=content, mode=mode)
99

    
100
  try:
101
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
102
                                                    mkdir=True)
103
  except errors.OpExecError, err:
104
    logger.Error("Error while processing user ssh files: %s" % err)
105
    return False
106

    
107
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108
    utils.WriteFile(name, data=content, mode=0600)
109

    
110
  utils.AddAuthorizedKey(auth_keys, sshpub)
111

    
112
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
113

    
114
  return True
115

    
116

    
117
def LeaveCluster():
118
  """Cleans up the current node and prepares it to be removed from the cluster.
119

120
  """
121
  if os.path.isdir(constants.DATA_DIR):
122
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123
      full_name = os.path.join(constants.DATA_DIR, rel_name)
124
      if os.path.isfile(full_name) and not os.path.islink(full_name):
125
        utils.RemoveFile(full_name)
126

    
127
  try:
128
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129
  except errors.OpExecError, err:
130
    logger.Error("Error while processing ssh files: %s" % err)
131
    return
132

    
133
  f = open(pub_key, 'r')
134
  try:
135
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
136
  finally:
137
    f.close()
138

    
139
  utils.RemoveFile(priv_key)
140
  utils.RemoveFile(pub_key)
141

    
142

    
143
def GetNodeInfo(vgname):
144
  """Gives back a hash with different informations about the node.
145

146
  Returns:
147
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
148
      'memory_free' : xxx, 'memory_total' : xxx }
149
    where
150
    vg_size is the size of the configured volume group in MiB
151
    vg_free is the free size of the volume group in MiB
152
    memory_dom0 is the memory allocated for domain0 in MiB
153
    memory_free is the currently available (free) ram in MiB
154
    memory_total is the total number of ram in MiB
155

156
  """
157
  outputarray = {}
158
  vginfo = _GetVGInfo(vgname)
159
  outputarray['vg_size'] = vginfo['vg_size']
160
  outputarray['vg_free'] = vginfo['vg_free']
161

    
162
  hyper = hypervisor.GetHypervisor()
163
  hyp_info = hyper.GetNodeInfo()
164
  if hyp_info is not None:
165
    outputarray.update(hyp_info)
166

    
167
  f = open("/proc/sys/kernel/random/boot_id", 'r')
168
  try:
169
    outputarray["bootid"] = f.read(128).rstrip("\n")
170
  finally:
171
    f.close()
172

    
173
  return outputarray
174

    
175

    
176
def VerifyNode(what):
177
  """Verify the status of the local node.
178

179
  Args:
180
    what - a dictionary of things to check:
181
      'filelist' : list of files for which to compute checksums
182
      'nodelist' : list of nodes we should check communication with
183
      'hypervisor': run the hypervisor-specific verify
184

185
  Requested files on local node are checksummed and the result returned.
186

187
  The nodelist is traversed, with the following checks being made
188
  for each node:
189
  - known_hosts key correct
190
  - correct resolving of node name (target node returns its own hostname
191
    by ssh-execution of 'hostname', result compared against name in list.
192

193
  """
194
  result = {}
195

    
196
  if 'hypervisor' in what:
197
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198

    
199
  if 'filelist' in what:
200
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
201

    
202
  if 'nodelist' in what:
203
    result['nodelist'] = {}
204
    random.shuffle(what['nodelist'])
205
    for node in what['nodelist']:
206
      success, message = _GetSshRunner().VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  if 'node-net-test' in what:
210
    result['node-net-test'] = {}
211
    my_name = utils.HostInfo().name
212
    my_pip = my_sip = None
213
    for name, pip, sip in what['node-net-test']:
214
      if name == my_name:
215
        my_pip = pip
216
        my_sip = sip
217
        break
218
    if not my_pip:
219
      result['node-net-test'][my_name] = ("Can't find my own"
220
                                          " primary/secondary IP"
221
                                          " in the node list")
222
    else:
223
      port = ssconf.SimpleStore().GetNodeDaemonPort()
224
      for name, pip, sip in what['node-net-test']:
225
        fail = []
226
        if not utils.TcpPing(pip, port, source=my_pip):
227
          fail.append("primary")
228
        if sip != pip:
229
          if not utils.TcpPing(sip, port, source=my_sip):
230
            fail.append("secondary")
231
        if fail:
232
          result['node-net-test'][name] = ("failure using the %s"
233
                                           " interface(s)" %
234
                                           " and ".join(fail))
235

    
236
  return result
237

    
238

    
239
def GetVolumeList(vg_name):
240
  """Compute list of logical volumes and their size.
241

242
  Returns:
243
    dictionary of all partions (key) with their size (in MiB), inactive
244
    and online status:
245
    {'test1': ('20.06', True, True)}
246

247
  """
248
  lvs = {}
249
  sep = '|'
250
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
251
                         "--separator=%s" % sep,
252
                         "-olv_name,lv_size,lv_attr", vg_name])
253
  if result.failed:
254
    logger.Error("Failed to list logical volumes, lvs output: %s" %
255
                 result.output)
256
    return result.output
257

    
258
  for line in result.stdout.splitlines():
259
    line = line.strip().rstrip(sep)
260
    name, size, attr = line.split(sep)
261
    if len(attr) != 6:
262
      attr = '------'
263
    inactive = attr[4] == '-'
264
    online = attr[5] == 'o'
265
    lvs[name] = (size, inactive, online)
266

    
267
  return lvs
268

    
269

    
270
def ListVolumeGroups():
271
  """List the volume groups and their size.
272

273
  Returns:
274
    Dictionary with keys volume name and values the size of the volume
275

276
  """
277
  return utils.ListVolumeGroups()
278

    
279

    
280
def NodeVolumes():
281
  """List all volumes on this node.
282

283
  """
284
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
285
                         "--separator=|",
286
                         "--options=lv_name,lv_size,devices,vg_name"])
287
  if result.failed:
288
    logger.Error("Failed to list logical volumes, lvs output: %s" %
289
                 result.output)
290
    return {}
291

    
292
  def parse_dev(dev):
293
    if '(' in dev:
294
      return dev.split('(')[0]
295
    else:
296
      return dev
297

    
298
  def map_line(line):
299
    return {
300
      'name': line[0].strip(),
301
      'size': line[1].strip(),
302
      'dev': parse_dev(line[2].strip()),
303
      'vg': line[3].strip(),
304
    }
305

    
306
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
307

    
308

    
309
def BridgesExist(bridges_list):
310
  """Check if a list of bridges exist on the current node.
311

312
  Returns:
313
    True if all of them exist, false otherwise
314

315
  """
316
  for bridge in bridges_list:
317
    if not utils.BridgeExists(bridge):
318
      return False
319

    
320
  return True
321

    
322

    
323
def GetInstanceList():
324
  """Provides a list of instances.
325

326
  Returns:
327
    A list of all running instances on the current node
328
    - instance1.example.com
329
    - instance2.example.com
330

331
  """
332
  try:
333
    names = hypervisor.GetHypervisor().ListInstances()
334
  except errors.HypervisorError, err:
335
    logger.Error("error enumerating instances: %s" % str(err))
336
    raise
337

    
338
  return names
339

    
340

    
341
def GetInstanceInfo(instance):
342
  """Gives back the informations about an instance as a dictionary.
343

344
  Args:
345
    instance: name of the instance (ex. instance1.example.com)
346

347
  Returns:
348
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
349
    where
350
    memory: memory size of instance (int)
351
    state: xen state of instance (string)
352
    time: cpu time of instance (float)
353

354
  """
355
  output = {}
356

    
357
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
358
  if iinfo is not None:
359
    output['memory'] = iinfo[2]
360
    output['state'] = iinfo[4]
361
    output['time'] = iinfo[5]
362

    
363
  return output
364

    
365

    
366
def GetAllInstancesInfo():
367
  """Gather data about all instances.
368

369
  This is the equivalent of `GetInstanceInfo()`, except that it
370
  computes data for all instances at once, thus being faster if one
371
  needs data about more than one instance.
372

373
  Returns: a dictionary of dictionaries, keys being the instance name,
374
    and with values:
375
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
376
    where
377
    memory: memory size of instance (int)
378
    state: xen state of instance (string)
379
    time: cpu time of instance (float)
380
    vcpus: the number of cpus
381

382
  """
383
  output = {}
384

    
385
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
386
  if iinfo:
387
    for name, inst_id, memory, vcpus, state, times in iinfo:
388
      output[name] = {
389
        'memory': memory,
390
        'vcpus': vcpus,
391
        'state': state,
392
        'time': times,
393
        }
394

    
395
  return output
396

    
397

    
398
def AddOSToInstance(instance, os_disk, swap_disk):
399
  """Add an OS to an instance.
400

401
  Args:
402
    instance: the instance object
403
    os_disk: the instance-visible name of the os device
404
    swap_disk: the instance-visible name of the swap device
405

406
  """
407
  inst_os = OSFromDisk(instance.os)
408

    
409
  create_script = inst_os.create_script
410

    
411
  os_device = instance.FindDisk(os_disk)
412
  if os_device is None:
413
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
414
    return False
415

    
416
  swap_device = instance.FindDisk(swap_disk)
417
  if swap_device is None:
418
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
419
    return False
420

    
421
  real_os_dev = _RecursiveFindBD(os_device)
422
  if real_os_dev is None:
423
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
424
                                  str(os_device))
425
  real_os_dev.Open()
426

    
427
  real_swap_dev = _RecursiveFindBD(swap_device)
428
  if real_swap_dev is None:
429
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
430
                                  str(swap_device))
431
  real_swap_dev.Open()
432

    
433
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
434
                                     instance.name, int(time.time()))
435
  if not os.path.exists(constants.LOG_OS_DIR):
436
    os.mkdir(constants.LOG_OS_DIR, 0750)
437

    
438
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
439
                                inst_os.path, create_script, instance.name,
440
                                real_os_dev.dev_path, real_swap_dev.dev_path,
441
                                logfile)
442

    
443
  result = utils.RunCmd(command)
444
  if result.failed:
445
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
446
                 " output: %s" %
447
                 (command, result.fail_reason, logfile, result.output))
448
    return False
449

    
450
  return True
451

    
452

    
453
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
454
  """Run the OS rename script for an instance.
455

456
  Args:
457
    instance: the instance object
458
    old_name: the old name of the instance
459
    os_disk: the instance-visible name of the os device
460
    swap_disk: the instance-visible name of the swap device
461

462
  """
463
  inst_os = OSFromDisk(instance.os)
464

    
465
  script = inst_os.rename_script
466

    
467
  os_device = instance.FindDisk(os_disk)
468
  if os_device is None:
469
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
470
    return False
471

    
472
  swap_device = instance.FindDisk(swap_disk)
473
  if swap_device is None:
474
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
475
    return False
476

    
477
  real_os_dev = _RecursiveFindBD(os_device)
478
  if real_os_dev is None:
479
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
480
                                  str(os_device))
481
  real_os_dev.Open()
482

    
483
  real_swap_dev = _RecursiveFindBD(swap_device)
484
  if real_swap_dev is None:
485
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
486
                                  str(swap_device))
487
  real_swap_dev.Open()
488

    
489
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
490
                                           old_name,
491
                                           instance.name, int(time.time()))
492
  if not os.path.exists(constants.LOG_OS_DIR):
493
    os.mkdir(constants.LOG_OS_DIR, 0750)
494

    
495
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
496
                                inst_os.path, script, old_name, instance.name,
497
                                real_os_dev.dev_path, real_swap_dev.dev_path,
498
                                logfile)
499

    
500
  result = utils.RunCmd(command)
501

    
502
  if result.failed:
503
    logger.Error("os create command '%s' returned error: %s"
504
                 " output: %s" %
505
                 (command, result.fail_reason, result.output))
506
    return False
507

    
508
  return True
509

    
510

    
511
def _GetVGInfo(vg_name):
512
  """Get informations about the volume group.
513

514
  Args:
515
    vg_name: the volume group
516

517
  Returns:
518
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
519
    where
520
    vg_size is the total size of the volume group in MiB
521
    vg_free is the free size of the volume group in MiB
522
    pv_count are the number of physical disks in that vg
523

524
  If an error occurs during gathering of data, we return the same dict
525
  with keys all set to None.
526

527
  """
528
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
529

    
530
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
531
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
532

    
533
  if retval.failed:
534
    errmsg = "volume group %s not present" % vg_name
535
    logger.Error(errmsg)
536
    return retdic
537
  valarr = retval.stdout.strip().rstrip(':').split(':')
538
  if len(valarr) == 3:
539
    try:
540
      retdic = {
541
        "vg_size": int(round(float(valarr[0]), 0)),
542
        "vg_free": int(round(float(valarr[1]), 0)),
543
        "pv_count": int(valarr[2]),
544
        }
545
    except ValueError, err:
546
      logger.Error("Fail to parse vgs output: %s" % str(err))
547
  else:
548
    logger.Error("vgs output has the wrong number of fields (expected"
549
                 " three): %s" % str(valarr))
550
  return retdic
551

    
552

    
553
def _GatherBlockDevs(instance):
554
  """Set up an instance's block device(s).
555

556
  This is run on the primary node at instance startup. The block
557
  devices must be already assembled.
558

559
  """
560
  block_devices = []
561
  for disk in instance.disks:
562
    device = _RecursiveFindBD(disk)
563
    if device is None:
564
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
565
                                    str(disk))
566
    device.Open()
567
    block_devices.append((disk, device))
568
  return block_devices
569

    
570

    
571
def StartInstance(instance, extra_args):
572
  """Start an instance.
573

574
  Args:
575
    instance - name of instance to start.
576

577
  """
578
  running_instances = GetInstanceList()
579

    
580
  if instance.name in running_instances:
581
    return True
582

    
583
  block_devices = _GatherBlockDevs(instance)
584
  hyper = hypervisor.GetHypervisor()
585

    
586
  try:
587
    hyper.StartInstance(instance, block_devices, extra_args)
588
  except errors.HypervisorError, err:
589
    logger.Error("Failed to start instance: %s" % err)
590
    return False
591

    
592
  return True
593

    
594

    
595
def ShutdownInstance(instance):
596
  """Shut an instance down.
597

598
  Args:
599
    instance - name of instance to shutdown.
600

601
  """
602
  running_instances = GetInstanceList()
603

    
604
  if instance.name not in running_instances:
605
    return True
606

    
607
  hyper = hypervisor.GetHypervisor()
608
  try:
609
    hyper.StopInstance(instance)
610
  except errors.HypervisorError, err:
611
    logger.Error("Failed to stop instance: %s" % err)
612
    return False
613

    
614
  # test every 10secs for 2min
615
  shutdown_ok = False
616

    
617
  time.sleep(1)
618
  for dummy in range(11):
619
    if instance.name not in GetInstanceList():
620
      break
621
    time.sleep(10)
622
  else:
623
    # the shutdown did not succeed
624
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
625

    
626
    try:
627
      hyper.StopInstance(instance, force=True)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to stop instance: %s" % err)
630
      return False
631

    
632
    time.sleep(1)
633
    if instance.name in GetInstanceList():
634
      logger.Error("could not shutdown instance '%s' even by destroy")
635
      return False
636

    
637
  return True
638

    
639

    
640
def RebootInstance(instance, reboot_type, extra_args):
641
  """Reboot an instance.
642

643
  Args:
644
    instance    - name of instance to reboot
645
    reboot_type - how to reboot [soft,hard,full]
646

647
  """
648
  running_instances = GetInstanceList()
649

    
650
  if instance.name not in running_instances:
651
    logger.Error("Cannot reboot instance that is not running")
652
    return False
653

    
654
  hyper = hypervisor.GetHypervisor()
655
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
656
    try:
657
      hyper.RebootInstance(instance)
658
    except errors.HypervisorError, err:
659
      logger.Error("Failed to soft reboot instance: %s" % err)
660
      return False
661
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
662
    try:
663
      ShutdownInstance(instance)
664
      StartInstance(instance, extra_args)
665
    except errors.HypervisorError, err:
666
      logger.Error("Failed to hard reboot instance: %s" % err)
667
      return False
668
  else:
669
    raise errors.ParameterError("reboot_type invalid")
670

    
671

    
672
  return True
673

    
674

    
675
def MigrateInstance(instance, target, live):
676
  """Migrates an instance to another node.
677

678
  """
679
  hyper = hypervisor.GetHypervisor()
680

    
681
  try:
682
    hyper.MigrateInstance(instance, target, live)
683
  except errors.HypervisorError, err:
684
    msg = "Failed to migrate instance: %s" % str(err)
685
    logger.Error(msg)
686
    return (False, msg)
687
  return (True, "Migration successfull")
688

    
689

    
690
def CreateBlockDevice(disk, size, owner, on_primary, info):
691
  """Creates a block device for an instance.
692

693
  Args:
694
   disk: a ganeti.objects.Disk object
695
   size: the size of the physical underlying device
696
   owner: a string with the name of the instance
697
   on_primary: a boolean indicating if it is the primary node or not
698
   info: string that will be sent to the physical device creation
699

700
  Returns:
701
    the new unique_id of the device (this can sometime be
702
    computed only after creation), or None. On secondary nodes,
703
    it's not required to return anything.
704

705
  """
706
  clist = []
707
  if disk.children:
708
    for child in disk.children:
709
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
710
      if on_primary or disk.AssembleOnSecondary():
711
        # we need the children open in case the device itself has to
712
        # be assembled
713
        crdev.Open()
714
      clist.append(crdev)
715
  try:
716
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
717
    if device is not None:
718
      logger.Info("removing existing device %s" % disk)
719
      device.Remove()
720
  except errors.BlockDeviceError, err:
721
    pass
722

    
723
  device = bdev.Create(disk.dev_type, disk.physical_id,
724
                       clist, size)
725
  if device is None:
726
    raise ValueError("Can't create child device for %s, %s" %
727
                     (disk, size))
728
  if on_primary or disk.AssembleOnSecondary():
729
    if not device.Assemble():
730
      errorstring = "Can't assemble device after creation"
731
      logger.Error(errorstring)
732
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
733
                                    " daemon logs" % errorstring)
734
    device.SetSyncSpeed(constants.SYNC_SPEED)
735
    if on_primary or disk.OpenOnSecondary():
736
      device.Open(force=True)
737
    DevCacheManager.UpdateCache(device.dev_path, owner,
738
                                on_primary, disk.iv_name)
739

    
740
  device.SetInfo(info)
741

    
742
  physical_id = device.unique_id
743
  return physical_id
744

    
745

    
746
def RemoveBlockDevice(disk):
747
  """Remove a block device.
748

749
  This is intended to be called recursively.
750

751
  """
752
  try:
753
    # since we are removing the device, allow a partial match
754
    # this allows removal of broken mirrors
755
    rdev = _RecursiveFindBD(disk, allow_partial=True)
756
  except errors.BlockDeviceError, err:
757
    # probably can't attach
758
    logger.Info("Can't attach to device %s in remove" % disk)
759
    rdev = None
760
  if rdev is not None:
761
    r_path = rdev.dev_path
762
    result = rdev.Remove()
763
    if result:
764
      DevCacheManager.RemoveCache(r_path)
765
  else:
766
    result = True
767
  if disk.children:
768
    for child in disk.children:
769
      result = result and RemoveBlockDevice(child)
770
  return result
771

    
772

    
773
def _RecursiveAssembleBD(disk, owner, as_primary):
774
  """Activate a block device for an instance.
775

776
  This is run on the primary and secondary nodes for an instance.
777

778
  This function is called recursively.
779

780
  Args:
781
    disk: a objects.Disk object
782
    as_primary: if we should make the block device read/write
783

784
  Returns:
785
    the assembled device or None (in case no device was assembled)
786

787
  If the assembly is not successful, an exception is raised.
788

789
  """
790
  children = []
791
  if disk.children:
792
    mcn = disk.ChildrenNeeded()
793
    if mcn == -1:
794
      mcn = 0 # max number of Nones allowed
795
    else:
796
      mcn = len(disk.children) - mcn # max number of Nones
797
    for chld_disk in disk.children:
798
      try:
799
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
800
      except errors.BlockDeviceError, err:
801
        if children.count(None) >= mcn:
802
          raise
803
        cdev = None
804
        logger.Debug("Error in child activation: %s" % str(err))
805
      children.append(cdev)
806

    
807
  if as_primary or disk.AssembleOnSecondary():
808
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
809
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
810
    result = r_dev
811
    if as_primary or disk.OpenOnSecondary():
812
      r_dev.Open()
813
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
814
                                as_primary, disk.iv_name)
815

    
816
  else:
817
    result = True
818
  return result
819

    
820

    
821
def AssembleBlockDevice(disk, owner, as_primary):
822
  """Activate a block device for an instance.
823

824
  This is a wrapper over _RecursiveAssembleBD.
825

826
  Returns:
827
    a /dev path for primary nodes
828
    True for secondary nodes
829

830
  """
831
  result = _RecursiveAssembleBD(disk, owner, as_primary)
832
  if isinstance(result, bdev.BlockDev):
833
    result = result.dev_path
834
  return result
835

    
836

    
837
def ShutdownBlockDevice(disk):
838
  """Shut down a block device.
839

840
  First, if the device is assembled (can `Attach()`), then the device
841
  is shutdown. Then the children of the device are shutdown.
842

843
  This function is called recursively. Note that we don't cache the
844
  children or such, as oppossed to assemble, shutdown of different
845
  devices doesn't require that the upper device was active.
846

847
  """
848
  r_dev = _RecursiveFindBD(disk)
849
  if r_dev is not None:
850
    r_path = r_dev.dev_path
851
    result = r_dev.Shutdown()
852
    if result:
853
      DevCacheManager.RemoveCache(r_path)
854
  else:
855
    result = True
856
  if disk.children:
857
    for child in disk.children:
858
      result = result and ShutdownBlockDevice(child)
859
  return result
860

    
861

    
862
def MirrorAddChildren(parent_cdev, new_cdevs):
863
  """Extend a mirrored block device.
864

865
  """
866
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
867
  if parent_bdev is None:
868
    logger.Error("Can't find parent device")
869
    return False
870
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
871
  if new_bdevs.count(None) > 0:
872
    logger.Error("Can't find new device(s) to add: %s:%s" %
873
                 (new_bdevs, new_cdevs))
874
    return False
875
  parent_bdev.AddChildren(new_bdevs)
876
  return True
877

    
878

    
879
def MirrorRemoveChildren(parent_cdev, new_cdevs):
880
  """Shrink a mirrored block device.
881

882
  """
883
  parent_bdev = _RecursiveFindBD(parent_cdev)
884
  if parent_bdev is None:
885
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
886
    return False
887
  devs = []
888
  for disk in new_cdevs:
889
    rpath = disk.StaticDevPath()
890
    if rpath is None:
891
      bd = _RecursiveFindBD(disk)
892
      if bd is None:
893
        logger.Error("Can't find dynamic device %s while removing children" %
894
                     disk)
895
        return False
896
      else:
897
        devs.append(bd.dev_path)
898
    else:
899
      devs.append(rpath)
900
  parent_bdev.RemoveChildren(devs)
901
  return True
902

    
903

    
904
def GetMirrorStatus(disks):
905
  """Get the mirroring status of a list of devices.
906

907
  Args:
908
    disks: list of `objects.Disk`
909

910
  Returns:
911
    list of (mirror_done, estimated_time) tuples, which
912
    are the result of bdev.BlockDevice.CombinedSyncStatus()
913

914
  """
915
  stats = []
916
  for dsk in disks:
917
    rbd = _RecursiveFindBD(dsk)
918
    if rbd is None:
919
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
920
    stats.append(rbd.CombinedSyncStatus())
921
  return stats
922

    
923

    
924
def _RecursiveFindBD(disk, allow_partial=False):
925
  """Check if a device is activated.
926

927
  If so, return informations about the real device.
928

929
  Args:
930
    disk: the objects.Disk instance
931
    allow_partial: don't abort the find if a child of the
932
                   device can't be found; this is intended to be
933
                   used when repairing mirrors
934

935
  Returns:
936
    None if the device can't be found
937
    otherwise the device instance
938

939
  """
940
  children = []
941
  if disk.children:
942
    for chdisk in disk.children:
943
      children.append(_RecursiveFindBD(chdisk))
944

    
945
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
946

    
947

    
948
def FindBlockDevice(disk):
949
  """Check if a device is activated.
950

951
  If so, return informations about the real device.
952

953
  Args:
954
    disk: the objects.Disk instance
955
  Returns:
956
    None if the device can't be found
957
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
958

959
  """
960
  rbd = _RecursiveFindBD(disk)
961
  if rbd is None:
962
    return rbd
963
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
964

    
965

    
966
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
967
  """Write a file to the filesystem.
968

969
  This allows the master to overwrite(!) a file. It will only perform
970
  the operation if the file belongs to a list of configuration files.
971

972
  """
973
  if not os.path.isabs(file_name):
974
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
975
                 file_name)
976
    return False
977

    
978
  allowed_files = [
979
    constants.CLUSTER_CONF_FILE,
980
    constants.ETC_HOSTS,
981
    constants.SSH_KNOWN_HOSTS_FILE,
982
    ]
983
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
984
  if file_name not in allowed_files:
985
    logger.Error("Filename passed to UploadFile not in allowed"
986
                 " upload targets: '%s'" % file_name)
987
    return False
988

    
989
  utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
990
                  atime=atime, mtime=mtime)
991
  return True
992

    
993

    
994
def _ErrnoOrStr(err):
995
  """Format an EnvironmentError exception.
996

997
  If the `err` argument has an errno attribute, it will be looked up
998
  and converted into a textual EXXXX description. Otherwise the string
999
  representation of the error will be returned.
1000

1001
  """
1002
  if hasattr(err, 'errno'):
1003
    detail = errno.errorcode[err.errno]
1004
  else:
1005
    detail = str(err)
1006
  return detail
1007

    
1008

    
1009
def _OSOndiskVersion(name, os_dir):
1010
  """Compute and return the API version of a given OS.
1011

1012
  This function will try to read the API version of the os given by
1013
  the 'name' parameter and residing in the 'os_dir' directory.
1014

1015
  Return value will be either an integer denoting the version or None in the
1016
  case when this is not a valid OS name.
1017

1018
  """
1019
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1020

    
1021
  try:
1022
    st = os.stat(api_file)
1023
  except EnvironmentError, err:
1024
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1025
                           " found (%s)" % _ErrnoOrStr(err))
1026

    
1027
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1028
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1029
                           " a regular file")
1030

    
1031
  try:
1032
    f = open(api_file)
1033
    try:
1034
      api_version = f.read(256)
1035
    finally:
1036
      f.close()
1037
  except EnvironmentError, err:
1038
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1039
                           " API version (%s)" % _ErrnoOrStr(err))
1040

    
1041
  api_version = api_version.strip()
1042
  try:
1043
    api_version = int(api_version)
1044
  except (TypeError, ValueError), err:
1045
    raise errors.InvalidOS(name, os_dir,
1046
                           "API version is not integer (%s)" % str(err))
1047

    
1048
  return api_version
1049

    
1050

    
1051
def DiagnoseOS(top_dirs=None):
1052
  """Compute the validity for all OSes.
1053

1054
  Returns an OS object for each name in all the given top directories
1055
  (if not given defaults to constants.OS_SEARCH_PATH)
1056

1057
  Returns:
1058
    list of OS objects
1059

1060
  """
1061
  if top_dirs is None:
1062
    top_dirs = constants.OS_SEARCH_PATH
1063

    
1064
  result = []
1065
  for dir_name in top_dirs:
1066
    if os.path.isdir(dir_name):
1067
      try:
1068
        f_names = utils.ListVisibleFiles(dir_name)
1069
      except EnvironmentError, err:
1070
        logger.Error("Can't list the OS directory %s: %s" %
1071
                     (dir_name, str(err)))
1072
        break
1073
      for name in f_names:
1074
        try:
1075
          os_inst = OSFromDisk(name, base_dir=dir_name)
1076
          result.append(os_inst)
1077
        except errors.InvalidOS, err:
1078
          result.append(objects.OS.FromInvalidOS(err))
1079

    
1080
  return result
1081

    
1082

    
1083
def OSFromDisk(name, base_dir=None):
1084
  """Create an OS instance from disk.
1085

1086
  This function will return an OS instance if the given name is a
1087
  valid OS name. Otherwise, it will raise an appropriate
1088
  `errors.InvalidOS` exception, detailing why this is not a valid
1089
  OS.
1090

1091
  Args:
1092
    os_dir: Directory containing the OS scripts. Defaults to a search
1093
            in all the OS_SEARCH_PATH directories.
1094

1095
  """
1096

    
1097
  if base_dir is None:
1098
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1099
    if os_dir is None:
1100
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1101
  else:
1102
    os_dir = os.path.sep.join([base_dir, name])
1103

    
1104
  api_version = _OSOndiskVersion(name, os_dir)
1105

    
1106
  if api_version != constants.OS_API_VERSION:
1107
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1108
                           " (found %s want %s)"
1109
                           % (api_version, constants.OS_API_VERSION))
1110

    
1111
  # OS Scripts dictionary, we will populate it with the actual script names
1112
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1113

    
1114
  for script in os_scripts:
1115
    os_scripts[script] = os.path.sep.join([os_dir, script])
1116

    
1117
    try:
1118
      st = os.stat(os_scripts[script])
1119
    except EnvironmentError, err:
1120
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1121
                             (script, _ErrnoOrStr(err)))
1122

    
1123
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1124
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1125
                             script)
1126

    
1127
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1128
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1129
                             script)
1130

    
1131

    
1132
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1133
                    create_script=os_scripts['create'],
1134
                    export_script=os_scripts['export'],
1135
                    import_script=os_scripts['import'],
1136
                    rename_script=os_scripts['rename'],
1137
                    api_version=api_version)
1138

    
1139

    
1140
def GrowBlockDevice(disk, amount):
1141
  """Grow a stack of block devices.
1142

1143
  This function is called recursively, with the childrens being the
1144
  first one resize.
1145

1146
  Args:
1147
    disk: the disk to be grown
1148

1149
  Returns: a tuple of (status, result), with:
1150
    status: the result (true/false) of the operation
1151
    result: the error message if the operation failed, otherwise not used
1152

1153
  """
1154
  r_dev = _RecursiveFindBD(disk)
1155
  if r_dev is None:
1156
    return False, "Cannot find block device %s" % (disk,)
1157

    
1158
  try:
1159
    r_dev.Grow(amount)
1160
  except errors.BlockDeviceError, err:
1161
    return False, str(err)
1162

    
1163
  return True, None
1164

    
1165

    
1166
def SnapshotBlockDevice(disk):
1167
  """Create a snapshot copy of a block device.
1168

1169
  This function is called recursively, and the snapshot is actually created
1170
  just for the leaf lvm backend device.
1171

1172
  Args:
1173
    disk: the disk to be snapshotted
1174

1175
  Returns:
1176
    a config entry for the actual lvm device snapshotted.
1177

1178
  """
1179
  if disk.children:
1180
    if len(disk.children) == 1:
1181
      # only one child, let's recurse on it
1182
      return SnapshotBlockDevice(disk.children[0])
1183
    else:
1184
      # more than one child, choose one that matches
1185
      for child in disk.children:
1186
        if child.size == disk.size:
1187
          # return implies breaking the loop
1188
          return SnapshotBlockDevice(child)
1189
  elif disk.dev_type == constants.LD_LV:
1190
    r_dev = _RecursiveFindBD(disk)
1191
    if r_dev is not None:
1192
      # let's stay on the safe side and ask for the full size, for now
1193
      return r_dev.Snapshot(disk.size)
1194
    else:
1195
      return None
1196
  else:
1197
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1198
                                 " '%s' of type '%s'" %
1199
                                 (disk.unique_id, disk.dev_type))
1200

    
1201

    
1202
def ExportSnapshot(disk, dest_node, instance):
1203
  """Export a block device snapshot to a remote node.
1204

1205
  Args:
1206
    disk: the snapshot block device
1207
    dest_node: the node to send the image to
1208
    instance: instance being exported
1209

1210
  Returns:
1211
    True if successful, False otherwise.
1212

1213
  """
1214
  inst_os = OSFromDisk(instance.os)
1215
  export_script = inst_os.export_script
1216

    
1217
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1218
                                     instance.name, int(time.time()))
1219
  if not os.path.exists(constants.LOG_OS_DIR):
1220
    os.mkdir(constants.LOG_OS_DIR, 0750)
1221

    
1222
  real_os_dev = _RecursiveFindBD(disk)
1223
  if real_os_dev is None:
1224
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1225
                                  str(disk))
1226
  real_os_dev.Open()
1227

    
1228
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1229
  destfile = disk.physical_id[1]
1230

    
1231
  # the target command is built out of three individual commands,
1232
  # which are joined by pipes; we check each individual command for
1233
  # valid parameters
1234

    
1235
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1236
                               export_script, instance.name,
1237
                               real_os_dev.dev_path, logfile)
1238

    
1239
  comprcmd = "gzip"
1240

    
1241
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1242
                                destdir, destdir, destfile)
1243
  remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1244
                                       destcmd)
1245

    
1246
  # all commands have been checked, so we're safe to combine them
1247
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1248

    
1249
  result = utils.RunCmd(command)
1250

    
1251
  if result.failed:
1252
    logger.Error("os snapshot export command '%s' returned error: %s"
1253
                 " output: %s" %
1254
                 (command, result.fail_reason, result.output))
1255
    return False
1256

    
1257
  return True
1258

    
1259

    
1260
def FinalizeExport(instance, snap_disks):
1261
  """Write out the export configuration information.
1262

1263
  Args:
1264
    instance: instance configuration
1265
    snap_disks: snapshot block devices
1266

1267
  Returns:
1268
    False in case of error, True otherwise.
1269

1270
  """
1271
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1272
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1273

    
1274
  config = objects.SerializableConfigParser()
1275

    
1276
  config.add_section(constants.INISECT_EXP)
1277
  config.set(constants.INISECT_EXP, 'version', '0')
1278
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1279
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1280
  config.set(constants.INISECT_EXP, 'os', instance.os)
1281
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1282

    
1283
  config.add_section(constants.INISECT_INS)
1284
  config.set(constants.INISECT_INS, 'name', instance.name)
1285
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1286
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1287
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1288

    
1289
  nic_count = 0
1290
  for nic_count, nic in enumerate(instance.nics):
1291
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1292
               nic_count, '%s' % nic.mac)
1293
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1294
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1295
  # TODO: redundant: on load can read nics until it doesn't exist
1296
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1297

    
1298
  disk_count = 0
1299
  for disk_count, disk in enumerate(snap_disks):
1300
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1301
               ('%s' % disk.iv_name))
1302
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1303
               ('%s' % disk.physical_id[1]))
1304
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1305
               ('%d' % disk.size))
1306
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1307

    
1308
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1309
  cfo = open(cff, 'w')
1310
  try:
1311
    config.write(cfo)
1312
  finally:
1313
    cfo.close()
1314

    
1315
  shutil.rmtree(finaldestdir, True)
1316
  shutil.move(destdir, finaldestdir)
1317

    
1318
  return True
1319

    
1320

    
1321
def ExportInfo(dest):
1322
  """Get export configuration information.
1323

1324
  Args:
1325
    dest: directory containing the export
1326

1327
  Returns:
1328
    A serializable config file containing the export info.
1329

1330
  """
1331
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1332

    
1333
  config = objects.SerializableConfigParser()
1334
  config.read(cff)
1335

    
1336
  if (not config.has_section(constants.INISECT_EXP) or
1337
      not config.has_section(constants.INISECT_INS)):
1338
    return None
1339

    
1340
  return config
1341

    
1342

    
1343
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1344
  """Import an os image into an instance.
1345

1346
  Args:
1347
    instance: the instance object
1348
    os_disk: the instance-visible name of the os device
1349
    swap_disk: the instance-visible name of the swap device
1350
    src_node: node holding the source image
1351
    src_image: path to the source image on src_node
1352

1353
  Returns:
1354
    False in case of error, True otherwise.
1355

1356
  """
1357
  inst_os = OSFromDisk(instance.os)
1358
  import_script = inst_os.import_script
1359

    
1360
  os_device = instance.FindDisk(os_disk)
1361
  if os_device is None:
1362
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1363
    return False
1364

    
1365
  swap_device = instance.FindDisk(swap_disk)
1366
  if swap_device is None:
1367
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1368
    return False
1369

    
1370
  real_os_dev = _RecursiveFindBD(os_device)
1371
  if real_os_dev is None:
1372
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1373
                                  str(os_device))
1374
  real_os_dev.Open()
1375

    
1376
  real_swap_dev = _RecursiveFindBD(swap_device)
1377
  if real_swap_dev is None:
1378
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1379
                                  str(swap_device))
1380
  real_swap_dev.Open()
1381

    
1382
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1383
                                        instance.name, int(time.time()))
1384
  if not os.path.exists(constants.LOG_OS_DIR):
1385
    os.mkdir(constants.LOG_OS_DIR, 0750)
1386

    
1387
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1388
  remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1389
                                       destcmd)
1390

    
1391
  comprcmd = "gunzip"
1392
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1393
                               inst_os.path, import_script, instance.name,
1394
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1395
                               logfile)
1396

    
1397
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1398

    
1399
  result = utils.RunCmd(command)
1400

    
1401
  if result.failed:
1402
    logger.Error("os import command '%s' returned error: %s"
1403
                 " output: %s" %
1404
                 (command, result.fail_reason, result.output))
1405
    return False
1406

    
1407
  return True
1408

    
1409

    
1410
def ListExports():
1411
  """Return a list of exports currently available on this machine.
1412

1413
  """
1414
  if os.path.isdir(constants.EXPORT_DIR):
1415
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1416
  else:
1417
    return []
1418

    
1419

    
1420
def RemoveExport(export):
1421
  """Remove an existing export from the node.
1422

1423
  Args:
1424
    export: the name of the export to remove
1425

1426
  Returns:
1427
    False in case of error, True otherwise.
1428

1429
  """
1430
  target = os.path.join(constants.EXPORT_DIR, export)
1431

    
1432
  shutil.rmtree(target)
1433
  # TODO: catch some of the relevant exceptions and provide a pretty
1434
  # error message if rmtree fails.
1435

    
1436
  return True
1437

    
1438

    
1439
def RenameBlockDevices(devlist):
1440
  """Rename a list of block devices.
1441

1442
  The devlist argument is a list of tuples (disk, new_logical,
1443
  new_physical). The return value will be a combined boolean result
1444
  (True only if all renames succeeded).
1445

1446
  """
1447
  result = True
1448
  for disk, unique_id in devlist:
1449
    dev = _RecursiveFindBD(disk)
1450
    if dev is None:
1451
      result = False
1452
      continue
1453
    try:
1454
      old_rpath = dev.dev_path
1455
      dev.Rename(unique_id)
1456
      new_rpath = dev.dev_path
1457
      if old_rpath != new_rpath:
1458
        DevCacheManager.RemoveCache(old_rpath)
1459
        # FIXME: we should add the new cache information here, like:
1460
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1461
        # but we don't have the owner here - maybe parse from existing
1462
        # cache? for now, we only lose lvm data when we rename, which
1463
        # is less critical than DRBD or MD
1464
    except errors.BlockDeviceError, err:
1465
      logger.Error("Can't rename device '%s' to '%s': %s" %
1466
                   (dev, unique_id, err))
1467
      result = False
1468
  return result
1469

    
1470

    
1471
def _TransformFileStorageDir(file_storage_dir):
1472
  """Checks whether given file_storage_dir is valid.
1473

1474
  Checks wheter the given file_storage_dir is within the cluster-wide
1475
  default file_storage_dir stored in SimpleStore. Only paths under that
1476
  directory are allowed.
1477

1478
  Args:
1479
    file_storage_dir: string with path
1480

1481
  Returns:
1482
    normalized file_storage_dir (string) if valid, None otherwise
1483

1484
  """
1485
  file_storage_dir = os.path.normpath(file_storage_dir)
1486
  base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1487
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1488
      base_file_storage_dir):
1489
    logger.Error("file storage directory '%s' is not under base file"
1490
                 " storage directory '%s'" %
1491
                 (file_storage_dir, base_file_storage_dir))
1492
    return None
1493
  return file_storage_dir
1494

    
1495

    
1496
def CreateFileStorageDir(file_storage_dir):
1497
  """Create file storage directory.
1498

1499
  Args:
1500
    file_storage_dir: string containing the path
1501

1502
  Returns:
1503
    tuple with first element a boolean indicating wheter dir
1504
    creation was successful or not
1505

1506
  """
1507
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1508
  result = True,
1509
  if not file_storage_dir:
1510
    result = False,
1511
  else:
1512
    if os.path.exists(file_storage_dir):
1513
      if not os.path.isdir(file_storage_dir):
1514
        logger.Error("'%s' is not a directory" % file_storage_dir)
1515
        result = False,
1516
    else:
1517
      try:
1518
        os.makedirs(file_storage_dir, 0750)
1519
      except OSError, err:
1520
        logger.Error("Cannot create file storage directory '%s': %s" %
1521
                     (file_storage_dir, err))
1522
        result = False,
1523
  return result
1524

    
1525

    
1526
def RemoveFileStorageDir(file_storage_dir):
1527
  """Remove file storage directory.
1528

1529
  Remove it only if it's empty. If not log an error and return.
1530

1531
  Args:
1532
    file_storage_dir: string containing the path
1533

1534
  Returns:
1535
    tuple with first element a boolean indicating wheter dir
1536
    removal was successful or not
1537

1538
  """
1539
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1540
  result = True,
1541
  if not file_storage_dir:
1542
    result = False,
1543
  else:
1544
    if os.path.exists(file_storage_dir):
1545
      if not os.path.isdir(file_storage_dir):
1546
        logger.Error("'%s' is not a directory" % file_storage_dir)
1547
        result = False,
1548
      # deletes dir only if empty, otherwise we want to return False
1549
      try:
1550
        os.rmdir(file_storage_dir)
1551
      except OSError, err:
1552
        logger.Error("Cannot remove file storage directory '%s': %s" %
1553
                     (file_storage_dir, err))
1554
        result = False,
1555
  return result
1556

    
1557

    
1558
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1559
  """Rename the file storage directory.
1560

1561
  Args:
1562
    old_file_storage_dir: string containing the old path
1563
    new_file_storage_dir: string containing the new path
1564

1565
  Returns:
1566
    tuple with first element a boolean indicating wheter dir
1567
    rename was successful or not
1568

1569
  """
1570
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1571
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1572
  result = True,
1573
  if not old_file_storage_dir or not new_file_storage_dir:
1574
    result = False,
1575
  else:
1576
    if not os.path.exists(new_file_storage_dir):
1577
      if os.path.isdir(old_file_storage_dir):
1578
        try:
1579
          os.rename(old_file_storage_dir, new_file_storage_dir)
1580
        except OSError, err:
1581
          logger.Error("Cannot rename '%s' to '%s': %s"
1582
                       % (old_file_storage_dir, new_file_storage_dir, err))
1583
          result =  False,
1584
      else:
1585
        logger.Error("'%s' is not a directory" % old_file_storage_dir)
1586
        result = False,
1587
    else:
1588
      if os.path.exists(old_file_storage_dir):
1589
        logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1590
                     old_file_storage_dir, new_file_storage_dir)
1591
        result = False,
1592
  return result
1593

    
1594

    
1595
def CloseBlockDevices(disks):
1596
  """Closes the given block devices.
1597

1598
  This means they will be switched to secondary mode (in case of DRBD).
1599

1600
  """
1601
  bdevs = []
1602
  for cf in disks:
1603
    rd = _RecursiveFindBD(cf)
1604
    if rd is None:
1605
      return (False, "Can't find device %s" % cf)
1606
    bdevs.append(rd)
1607

    
1608
  msg = []
1609
  for rd in bdevs:
1610
    try:
1611
      rd.Close()
1612
    except errors.BlockDeviceError, err:
1613
      msg.append(str(err))
1614
  if msg:
1615
    return (False, "Can't make devices secondary: %s" % ",".join(msg))
1616
  else:
1617
    return (True, "All devices secondary")
1618

    
1619

    
1620
class HooksRunner(object):
1621
  """Hook runner.
1622

1623
  This class is instantiated on the node side (ganeti-noded) and not on
1624
  the master side.
1625

1626
  """
1627
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1628

    
1629
  def __init__(self, hooks_base_dir=None):
1630
    """Constructor for hooks runner.
1631

1632
    Args:
1633
      - hooks_base_dir: if not None, this overrides the
1634
        constants.HOOKS_BASE_DIR (useful for unittests)
1635

1636
    """
1637
    if hooks_base_dir is None:
1638
      hooks_base_dir = constants.HOOKS_BASE_DIR
1639
    self._BASE_DIR = hooks_base_dir
1640

    
1641
  @staticmethod
1642
  def ExecHook(script, env):
1643
    """Exec one hook script.
1644

1645
    Args:
1646
     - script: the full path to the script
1647
     - env: the environment with which to exec the script
1648

1649
    """
1650
    # exec the process using subprocess and log the output
1651
    fdstdin = None
1652
    try:
1653
      fdstdin = open("/dev/null", "r")
1654
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1655
                               stderr=subprocess.STDOUT, close_fds=True,
1656
                               shell=False, cwd="/", env=env)
1657
      output = ""
1658
      try:
1659
        output = child.stdout.read(4096)
1660
        child.stdout.close()
1661
      except EnvironmentError, err:
1662
        output += "Hook script error: %s" % str(err)
1663

    
1664
      while True:
1665
        try:
1666
          result = child.wait()
1667
          break
1668
        except EnvironmentError, err:
1669
          if err.errno == errno.EINTR:
1670
            continue
1671
          raise
1672
    finally:
1673
      # try not to leak fds
1674
      for fd in (fdstdin, ):
1675
        if fd is not None:
1676
          try:
1677
            fd.close()
1678
          except EnvironmentError, err:
1679
            # just log the error
1680
            #logger.Error("While closing fd %s: %s" % (fd, err))
1681
            pass
1682

    
1683
    return result == 0, output
1684

    
1685
  def RunHooks(self, hpath, phase, env):
1686
    """Run the scripts in the hooks directory.
1687

1688
    This method will not be usually overriden by child opcodes.
1689

1690
    """
1691
    if phase == constants.HOOKS_PHASE_PRE:
1692
      suffix = "pre"
1693
    elif phase == constants.HOOKS_PHASE_POST:
1694
      suffix = "post"
1695
    else:
1696
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1697
    rr = []
1698

    
1699
    subdir = "%s-%s.d" % (hpath, suffix)
1700
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1701
    try:
1702
      dir_contents = utils.ListVisibleFiles(dir_name)
1703
    except OSError, err:
1704
      # must log
1705
      return rr
1706

    
1707
    # we use the standard python sort order,
1708
    # so 00name is the recommended naming scheme
1709
    dir_contents.sort()
1710
    for relname in dir_contents:
1711
      fname = os.path.join(dir_name, relname)
1712
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1713
          self.RE_MASK.match(relname) is not None):
1714
        rrval = constants.HKR_SKIP
1715
        output = ""
1716
      else:
1717
        result, output = self.ExecHook(fname, env)
1718
        if not result:
1719
          rrval = constants.HKR_FAIL
1720
        else:
1721
          rrval = constants.HKR_SUCCESS
1722
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1723

    
1724
    return rr
1725

    
1726

    
1727
class IAllocatorRunner(object):
1728
  """IAllocator runner.
1729

1730
  This class is instantiated on the node side (ganeti-noded) and not on
1731
  the master side.
1732

1733
  """
1734
  def Run(self, name, idata):
1735
    """Run an iallocator script.
1736

1737
    Return value: tuple of:
1738
       - run status (one of the IARUN_ constants)
1739
       - stdout
1740
       - stderr
1741
       - fail reason (as from utils.RunResult)
1742

1743
    """
1744
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1745
                                  os.path.isfile)
1746
    if alloc_script is None:
1747
      return (constants.IARUN_NOTFOUND, None, None, None)
1748

    
1749
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1750
    try:
1751
      os.write(fd, idata)
1752
      os.close(fd)
1753
      result = utils.RunCmd([alloc_script, fin_name])
1754
      if result.failed:
1755
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1756
                result.fail_reason)
1757
    finally:
1758
      os.unlink(fin_name)
1759

    
1760
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1761

    
1762

    
1763
class DevCacheManager(object):
1764
  """Simple class for managing a cache of block device information.
1765

1766
  """
1767
  _DEV_PREFIX = "/dev/"
1768
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1769

    
1770
  @classmethod
1771
  def _ConvertPath(cls, dev_path):
1772
    """Converts a /dev/name path to the cache file name.
1773

1774
    This replaces slashes with underscores and strips the /dev
1775
    prefix. It then returns the full path to the cache file
1776

1777
    """
1778
    if dev_path.startswith(cls._DEV_PREFIX):
1779
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1780
    dev_path = dev_path.replace("/", "_")
1781
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1782
    return fpath
1783

    
1784
  @classmethod
1785
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1786
    """Updates the cache information for a given device.
1787

1788
    """
1789
    if dev_path is None:
1790
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1791
      return
1792
    fpath = cls._ConvertPath(dev_path)
1793
    if on_primary:
1794
      state = "primary"
1795
    else:
1796
      state = "secondary"
1797
    if iv_name is None:
1798
      iv_name = "not_visible"
1799
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1800
    try:
1801
      utils.WriteFile(fpath, data=fdata)
1802
    except EnvironmentError, err:
1803
      logger.Error("Can't update bdev cache for %s, error %s" %
1804
                   (dev_path, str(err)))
1805

    
1806
  @classmethod
1807
  def RemoveCache(cls, dev_path):
1808
    """Remove data for a dev_path.
1809

1810
    """
1811
    if dev_path is None:
1812
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1813
      return
1814
    fpath = cls._ConvertPath(dev_path)
1815
    try:
1816
      utils.RemoveFile(fpath)
1817
    except EnvironmentError, err:
1818
      logger.Error("Can't update bdev cache for %s, error %s" %
1819
                   (dev_path, str(err)))