Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ c9673d92

History | View | Annotate | Download (50.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def _GetSshRunner():
47
  return ssh.SshRunner()
48

    
49

    
50
def StartMaster():
51
  """Activate local node as master node.
52

53
  There are two needed steps for this:
54
    - run the master script
55
    - register the cron script
56

57
  """
58
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
59

    
60
  if result.failed:
61
    logger.Error("could not activate cluster interface with command %s,"
62
                 " error: '%s'" % (result.cmd, result.output))
63
    return False
64

    
65
  return True
66

    
67

    
68
def StopMaster():
69
  """Deactivate this node as master.
70

71
  This runs the master stop script.
72

73
  """
74
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
75

    
76
  if result.failed:
77
    logger.Error("could not deactivate cluster interface with command %s,"
78
                 " error: '%s'" % (result.cmd, result.output))
79
    return False
80

    
81
  return True
82

    
83

    
84
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85
  """Joins this node to the cluster.
86

87
  This does the following:
88
      - updates the hostkeys of the machine (rsa and dsa)
89
      - adds the ssh private key to the user
90
      - adds the ssh public key to the users' authorized_keys file
91

92
  """
93
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97
  for name, content, mode in sshd_keys:
98
    utils.WriteFile(name, data=content, mode=mode)
99

    
100
  try:
101
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
102
                                                    mkdir=True)
103
  except errors.OpExecError, err:
104
    logger.Error("Error while processing user ssh files: %s" % err)
105
    return False
106

    
107
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108
    utils.WriteFile(name, data=content, mode=0600)
109

    
110
  utils.AddAuthorizedKey(auth_keys, sshpub)
111

    
112
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
113

    
114
  return True
115

    
116

    
117
def LeaveCluster():
118
  """Cleans up the current node and prepares it to be removed from the cluster.
119

120
  """
121
  if os.path.isdir(constants.DATA_DIR):
122
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123
      full_name = os.path.join(constants.DATA_DIR, rel_name)
124
      if os.path.isfile(full_name) and not os.path.islink(full_name):
125
        utils.RemoveFile(full_name)
126

    
127
  try:
128
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129
  except errors.OpExecError, err:
130
    logger.Error("Error while processing ssh files: %s" % err)
131
    return
132

    
133
  f = open(pub_key, 'r')
134
  try:
135
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
136
  finally:
137
    f.close()
138

    
139
  utils.RemoveFile(priv_key)
140
  utils.RemoveFile(pub_key)
141

    
142

    
143
def GetNodeInfo(vgname):
144
  """Gives back a hash with different informations about the node.
145

146
  Returns:
147
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
148
      'memory_free' : xxx, 'memory_total' : xxx }
149
    where
150
    vg_size is the size of the configured volume group in MiB
151
    vg_free is the free size of the volume group in MiB
152
    memory_dom0 is the memory allocated for domain0 in MiB
153
    memory_free is the currently available (free) ram in MiB
154
    memory_total is the total number of ram in MiB
155

156
  """
157
  outputarray = {}
158
  vginfo = _GetVGInfo(vgname)
159
  outputarray['vg_size'] = vginfo['vg_size']
160
  outputarray['vg_free'] = vginfo['vg_free']
161

    
162
  hyper = hypervisor.GetHypervisor()
163
  hyp_info = hyper.GetNodeInfo()
164
  if hyp_info is not None:
165
    outputarray.update(hyp_info)
166

    
167
  f = open("/proc/sys/kernel/random/boot_id", 'r')
168
  try:
169
    outputarray["bootid"] = f.read(128).rstrip("\n")
170
  finally:
171
    f.close()
172

    
173
  return outputarray
174

    
175

    
176
def VerifyNode(what):
177
  """Verify the status of the local node.
178

179
  Args:
180
    what - a dictionary of things to check:
181
      'filelist' : list of files for which to compute checksums
182
      'nodelist' : list of nodes we should check communication with
183
      'hypervisor': run the hypervisor-specific verify
184

185
  Requested files on local node are checksummed and the result returned.
186

187
  The nodelist is traversed, with the following checks being made
188
  for each node:
189
  - known_hosts key correct
190
  - correct resolving of node name (target node returns its own hostname
191
    by ssh-execution of 'hostname', result compared against name in list.
192

193
  """
194
  result = {}
195

    
196
  if 'hypervisor' in what:
197
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198

    
199
  if 'filelist' in what:
200
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
201

    
202
  if 'nodelist' in what:
203
    result['nodelist'] = {}
204
    random.shuffle(what['nodelist'])
205
    for node in what['nodelist']:
206
      success, message = _GetSshRunner().VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  if 'node-net-test' in what:
210
    result['node-net-test'] = {}
211
    my_name = utils.HostInfo().name
212
    my_pip = my_sip = None
213
    for name, pip, sip in what['node-net-test']:
214
      if name == my_name:
215
        my_pip = pip
216
        my_sip = sip
217
        break
218
    if not my_pip:
219
      result['node-net-test'][my_name] = ("Can't find my own"
220
                                          " primary/secondary IP"
221
                                          " in the node list")
222
    else:
223
      port = ssconf.SimpleStore().GetNodeDaemonPort()
224
      for name, pip, sip in what['node-net-test']:
225
        fail = []
226
        if not utils.TcpPing(pip, port, source=my_pip):
227
          fail.append("primary")
228
        if sip != pip:
229
          if not utils.TcpPing(sip, port, source=my_sip):
230
            fail.append("secondary")
231
        if fail:
232
          result['node-net-test'][name] = ("failure using the %s"
233
                                           " interface(s)" %
234
                                           " and ".join(fail))
235

    
236
  return result
237

    
238

    
239
def GetVolumeList(vg_name):
240
  """Compute list of logical volumes and their size.
241

242
  Returns:
243
    dictionary of all partions (key) with their size (in MiB), inactive
244
    and online status:
245
    {'test1': ('20.06', True, True)}
246

247
  """
248
  lvs = {}
249
  sep = '|'
250
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
251
                         "--separator=%s" % sep,
252
                         "-olv_name,lv_size,lv_attr", vg_name])
253
  if result.failed:
254
    logger.Error("Failed to list logical volumes, lvs output: %s" %
255
                 result.output)
256
    return result.output
257

    
258
  for line in result.stdout.splitlines():
259
    line = line.strip().rstrip(sep)
260
    name, size, attr = line.split(sep)
261
    if len(attr) != 6:
262
      attr = '------'
263
    inactive = attr[4] == '-'
264
    online = attr[5] == 'o'
265
    lvs[name] = (size, inactive, online)
266

    
267
  return lvs
268

    
269

    
270
def ListVolumeGroups():
271
  """List the volume groups and their size.
272

273
  Returns:
274
    Dictionary with keys volume name and values the size of the volume
275

276
  """
277
  return utils.ListVolumeGroups()
278

    
279

    
280
def NodeVolumes():
281
  """List all volumes on this node.
282

283
  """
284
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
285
                         "--separator=|",
286
                         "--options=lv_name,lv_size,devices,vg_name"])
287
  if result.failed:
288
    logger.Error("Failed to list logical volumes, lvs output: %s" %
289
                 result.output)
290
    return {}
291

    
292
  def parse_dev(dev):
293
    if '(' in dev:
294
      return dev.split('(')[0]
295
    else:
296
      return dev
297

    
298
  def map_line(line):
299
    return {
300
      'name': line[0].strip(),
301
      'size': line[1].strip(),
302
      'dev': parse_dev(line[2].strip()),
303
      'vg': line[3].strip(),
304
    }
305

    
306
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
307

    
308

    
309
def BridgesExist(bridges_list):
310
  """Check if a list of bridges exist on the current node.
311

312
  Returns:
313
    True if all of them exist, false otherwise
314

315
  """
316
  for bridge in bridges_list:
317
    if not utils.BridgeExists(bridge):
318
      return False
319

    
320
  return True
321

    
322

    
323
def GetInstanceList():
324
  """Provides a list of instances.
325

326
  Returns:
327
    A list of all running instances on the current node
328
    - instance1.example.com
329
    - instance2.example.com
330

331
  """
332
  try:
333
    names = hypervisor.GetHypervisor().ListInstances()
334
  except errors.HypervisorError, err:
335
    logger.Error("error enumerating instances: %s" % str(err))
336
    raise
337

    
338
  return names
339

    
340

    
341
def GetInstanceInfo(instance):
342
  """Gives back the informations about an instance as a dictionary.
343

344
  Args:
345
    instance: name of the instance (ex. instance1.example.com)
346

347
  Returns:
348
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
349
    where
350
    memory: memory size of instance (int)
351
    state: xen state of instance (string)
352
    time: cpu time of instance (float)
353

354
  """
355
  output = {}
356

    
357
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
358
  if iinfo is not None:
359
    output['memory'] = iinfo[2]
360
    output['state'] = iinfo[4]
361
    output['time'] = iinfo[5]
362

    
363
  return output
364

    
365

    
366
def GetAllInstancesInfo():
367
  """Gather data about all instances.
368

369
  This is the equivalent of `GetInstanceInfo()`, except that it
370
  computes data for all instances at once, thus being faster if one
371
  needs data about more than one instance.
372

373
  Returns: a dictionary of dictionaries, keys being the instance name,
374
    and with values:
375
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
376
    where
377
    memory: memory size of instance (int)
378
    state: xen state of instance (string)
379
    time: cpu time of instance (float)
380
    vcpus: the number of cpus
381

382
  """
383
  output = {}
384

    
385
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
386
  if iinfo:
387
    for name, inst_id, memory, vcpus, state, times in iinfo:
388
      output[name] = {
389
        'memory': memory,
390
        'vcpus': vcpus,
391
        'state': state,
392
        'time': times,
393
        }
394

    
395
  return output
396

    
397

    
398
def AddOSToInstance(instance, os_disk, swap_disk):
399
  """Add an OS to an instance.
400

401
  Args:
402
    instance: the instance object
403
    os_disk: the instance-visible name of the os device
404
    swap_disk: the instance-visible name of the swap device
405

406
  """
407
  inst_os = OSFromDisk(instance.os)
408

    
409
  create_script = inst_os.create_script
410

    
411
  os_device = instance.FindDisk(os_disk)
412
  if os_device is None:
413
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
414
    return False
415

    
416
  swap_device = instance.FindDisk(swap_disk)
417
  if swap_device is None:
418
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
419
    return False
420

    
421
  real_os_dev = _RecursiveFindBD(os_device)
422
  if real_os_dev is None:
423
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
424
                                  str(os_device))
425
  real_os_dev.Open()
426

    
427
  real_swap_dev = _RecursiveFindBD(swap_device)
428
  if real_swap_dev is None:
429
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
430
                                  str(swap_device))
431
  real_swap_dev.Open()
432

    
433
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
434
                                     instance.name, int(time.time()))
435
  if not os.path.exists(constants.LOG_OS_DIR):
436
    os.mkdir(constants.LOG_OS_DIR, 0750)
437

    
438
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
439
                                inst_os.path, create_script, instance.name,
440
                                real_os_dev.dev_path, real_swap_dev.dev_path,
441
                                logfile)
442

    
443
  result = utils.RunCmd(command)
444
  if result.failed:
445
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
446
                 " output: %s" %
447
                 (command, result.fail_reason, logfile, result.output))
448
    return False
449

    
450
  return True
451

    
452

    
453
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
454
  """Run the OS rename script for an instance.
455

456
  Args:
457
    instance: the instance object
458
    old_name: the old name of the instance
459
    os_disk: the instance-visible name of the os device
460
    swap_disk: the instance-visible name of the swap device
461

462
  """
463
  inst_os = OSFromDisk(instance.os)
464

    
465
  script = inst_os.rename_script
466

    
467
  os_device = instance.FindDisk(os_disk)
468
  if os_device is None:
469
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
470
    return False
471

    
472
  swap_device = instance.FindDisk(swap_disk)
473
  if swap_device is None:
474
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
475
    return False
476

    
477
  real_os_dev = _RecursiveFindBD(os_device)
478
  if real_os_dev is None:
479
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
480
                                  str(os_device))
481
  real_os_dev.Open()
482

    
483
  real_swap_dev = _RecursiveFindBD(swap_device)
484
  if real_swap_dev is None:
485
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
486
                                  str(swap_device))
487
  real_swap_dev.Open()
488

    
489
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
490
                                           old_name,
491
                                           instance.name, int(time.time()))
492
  if not os.path.exists(constants.LOG_OS_DIR):
493
    os.mkdir(constants.LOG_OS_DIR, 0750)
494

    
495
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
496
                                inst_os.path, script, old_name, instance.name,
497
                                real_os_dev.dev_path, real_swap_dev.dev_path,
498
                                logfile)
499

    
500
  result = utils.RunCmd(command)
501

    
502
  if result.failed:
503
    logger.Error("os create command '%s' returned error: %s"
504
                 " output: %s" %
505
                 (command, result.fail_reason, result.output))
506
    return False
507

    
508
  return True
509

    
510

    
511
def _GetVGInfo(vg_name):
512
  """Get informations about the volume group.
513

514
  Args:
515
    vg_name: the volume group
516

517
  Returns:
518
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
519
    where
520
    vg_size is the total size of the volume group in MiB
521
    vg_free is the free size of the volume group in MiB
522
    pv_count are the number of physical disks in that vg
523

524
  If an error occurs during gathering of data, we return the same dict
525
  with keys all set to None.
526

527
  """
528
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
529

    
530
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
531
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
532

    
533
  if retval.failed:
534
    errmsg = "volume group %s not present" % vg_name
535
    logger.Error(errmsg)
536
    return retdic
537
  valarr = retval.stdout.strip().rstrip(':').split(':')
538
  if len(valarr) == 3:
539
    try:
540
      retdic = {
541
        "vg_size": int(round(float(valarr[0]), 0)),
542
        "vg_free": int(round(float(valarr[1]), 0)),
543
        "pv_count": int(valarr[2]),
544
        }
545
    except ValueError, err:
546
      logger.Error("Fail to parse vgs output: %s" % str(err))
547
  else:
548
    logger.Error("vgs output has the wrong number of fields (expected"
549
                 " three): %s" % str(valarr))
550
  return retdic
551

    
552

    
553
def _GatherBlockDevs(instance):
554
  """Set up an instance's block device(s).
555

556
  This is run on the primary node at instance startup. The block
557
  devices must be already assembled.
558

559
  """
560
  block_devices = []
561
  for disk in instance.disks:
562
    device = _RecursiveFindBD(disk)
563
    if device is None:
564
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
565
                                    str(disk))
566
    device.Open()
567
    block_devices.append((disk, device))
568
  return block_devices
569

    
570

    
571
def StartInstance(instance, extra_args):
572
  """Start an instance.
573

574
  Args:
575
    instance - name of instance to start.
576

577
  """
578
  running_instances = GetInstanceList()
579

    
580
  if instance.name in running_instances:
581
    return True
582

    
583
  block_devices = _GatherBlockDevs(instance)
584
  hyper = hypervisor.GetHypervisor()
585

    
586
  try:
587
    hyper.StartInstance(instance, block_devices, extra_args)
588
  except errors.HypervisorError, err:
589
    logger.Error("Failed to start instance: %s" % err)
590
    return False
591

    
592
  return True
593

    
594

    
595
def ShutdownInstance(instance):
596
  """Shut an instance down.
597

598
  Args:
599
    instance - name of instance to shutdown.
600

601
  """
602
  running_instances = GetInstanceList()
603

    
604
  if instance.name not in running_instances:
605
    return True
606

    
607
  hyper = hypervisor.GetHypervisor()
608
  try:
609
    hyper.StopInstance(instance)
610
  except errors.HypervisorError, err:
611
    logger.Error("Failed to stop instance: %s" % err)
612
    return False
613

    
614
  # test every 10secs for 2min
615
  shutdown_ok = False
616

    
617
  time.sleep(1)
618
  for dummy in range(11):
619
    if instance.name not in GetInstanceList():
620
      break
621
    time.sleep(10)
622
  else:
623
    # the shutdown did not succeed
624
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
625

    
626
    try:
627
      hyper.StopInstance(instance, force=True)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to stop instance: %s" % err)
630
      return False
631

    
632
    time.sleep(1)
633
    if instance.name in GetInstanceList():
634
      logger.Error("could not shutdown instance '%s' even by destroy")
635
      return False
636

    
637
  return True
638

    
639

    
640
def RebootInstance(instance, reboot_type, extra_args):
641
  """Reboot an instance.
642

643
  Args:
644
    instance    - name of instance to reboot
645
    reboot_type - how to reboot [soft,hard,full]
646

647
  """
648
  running_instances = GetInstanceList()
649

    
650
  if instance.name not in running_instances:
651
    logger.Error("Cannot reboot instance that is not running")
652
    return False
653

    
654
  hyper = hypervisor.GetHypervisor()
655
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
656
    try:
657
      hyper.RebootInstance(instance)
658
    except errors.HypervisorError, err:
659
      logger.Error("Failed to soft reboot instance: %s" % err)
660
      return False
661
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
662
    try:
663
      ShutdownInstance(instance)
664
      StartInstance(instance, extra_args)
665
    except errors.HypervisorError, err:
666
      logger.Error("Failed to hard reboot instance: %s" % err)
667
      return False
668
  else:
669
    raise errors.ParameterError("reboot_type invalid")
670

    
671

    
672
  return True
673

    
674

    
675
def CreateBlockDevice(disk, size, owner, on_primary, info):
676
  """Creates a block device for an instance.
677

678
  Args:
679
   disk: a ganeti.objects.Disk object
680
   size: the size of the physical underlying device
681
   owner: a string with the name of the instance
682
   on_primary: a boolean indicating if it is the primary node or not
683
   info: string that will be sent to the physical device creation
684

685
  Returns:
686
    the new unique_id of the device (this can sometime be
687
    computed only after creation), or None. On secondary nodes,
688
    it's not required to return anything.
689

690
  """
691
  clist = []
692
  if disk.children:
693
    for child in disk.children:
694
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
695
      if on_primary or disk.AssembleOnSecondary():
696
        # we need the children open in case the device itself has to
697
        # be assembled
698
        crdev.Open()
699
      clist.append(crdev)
700
  try:
701
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
702
    if device is not None:
703
      logger.Info("removing existing device %s" % disk)
704
      device.Remove()
705
  except errors.BlockDeviceError, err:
706
    pass
707

    
708
  device = bdev.Create(disk.dev_type, disk.physical_id,
709
                       clist, size)
710
  if device is None:
711
    raise ValueError("Can't create child device for %s, %s" %
712
                     (disk, size))
713
  if on_primary or disk.AssembleOnSecondary():
714
    if not device.Assemble():
715
      errorstring = "Can't assemble device after creation"
716
      logger.Error(errorstring)
717
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
718
                                    " daemon logs" % errorstring)
719
    device.SetSyncSpeed(constants.SYNC_SPEED)
720
    if on_primary or disk.OpenOnSecondary():
721
      device.Open(force=True)
722
    DevCacheManager.UpdateCache(device.dev_path, owner,
723
                                on_primary, disk.iv_name)
724

    
725
  device.SetInfo(info)
726

    
727
  physical_id = device.unique_id
728
  return physical_id
729

    
730

    
731
def RemoveBlockDevice(disk):
732
  """Remove a block device.
733

734
  This is intended to be called recursively.
735

736
  """
737
  try:
738
    # since we are removing the device, allow a partial match
739
    # this allows removal of broken mirrors
740
    rdev = _RecursiveFindBD(disk, allow_partial=True)
741
  except errors.BlockDeviceError, err:
742
    # probably can't attach
743
    logger.Info("Can't attach to device %s in remove" % disk)
744
    rdev = None
745
  if rdev is not None:
746
    r_path = rdev.dev_path
747
    result = rdev.Remove()
748
    if result:
749
      DevCacheManager.RemoveCache(r_path)
750
  else:
751
    result = True
752
  if disk.children:
753
    for child in disk.children:
754
      result = result and RemoveBlockDevice(child)
755
  return result
756

    
757

    
758
def _RecursiveAssembleBD(disk, owner, as_primary):
759
  """Activate a block device for an instance.
760

761
  This is run on the primary and secondary nodes for an instance.
762

763
  This function is called recursively.
764

765
  Args:
766
    disk: a objects.Disk object
767
    as_primary: if we should make the block device read/write
768

769
  Returns:
770
    the assembled device or None (in case no device was assembled)
771

772
  If the assembly is not successful, an exception is raised.
773

774
  """
775
  children = []
776
  if disk.children:
777
    mcn = disk.ChildrenNeeded()
778
    if mcn == -1:
779
      mcn = 0 # max number of Nones allowed
780
    else:
781
      mcn = len(disk.children) - mcn # max number of Nones
782
    for chld_disk in disk.children:
783
      try:
784
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
785
      except errors.BlockDeviceError, err:
786
        if children.count(None) >= mcn:
787
          raise
788
        cdev = None
789
        logger.Debug("Error in child activation: %s" % str(err))
790
      children.append(cdev)
791

    
792
  if as_primary or disk.AssembleOnSecondary():
793
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
794
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
795
    result = r_dev
796
    if as_primary or disk.OpenOnSecondary():
797
      r_dev.Open()
798
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
799
                                as_primary, disk.iv_name)
800

    
801
  else:
802
    result = True
803
  return result
804

    
805

    
806
def AssembleBlockDevice(disk, owner, as_primary):
807
  """Activate a block device for an instance.
808

809
  This is a wrapper over _RecursiveAssembleBD.
810

811
  Returns:
812
    a /dev path for primary nodes
813
    True for secondary nodes
814

815
  """
816
  result = _RecursiveAssembleBD(disk, owner, as_primary)
817
  if isinstance(result, bdev.BlockDev):
818
    result = result.dev_path
819
  return result
820

    
821

    
822
def ShutdownBlockDevice(disk):
823
  """Shut down a block device.
824

825
  First, if the device is assembled (can `Attach()`), then the device
826
  is shutdown. Then the children of the device are shutdown.
827

828
  This function is called recursively. Note that we don't cache the
829
  children or such, as oppossed to assemble, shutdown of different
830
  devices doesn't require that the upper device was active.
831

832
  """
833
  r_dev = _RecursiveFindBD(disk)
834
  if r_dev is not None:
835
    r_path = r_dev.dev_path
836
    result = r_dev.Shutdown()
837
    if result:
838
      DevCacheManager.RemoveCache(r_path)
839
  else:
840
    result = True
841
  if disk.children:
842
    for child in disk.children:
843
      result = result and ShutdownBlockDevice(child)
844
  return result
845

    
846

    
847
def MirrorAddChildren(parent_cdev, new_cdevs):
848
  """Extend a mirrored block device.
849

850
  """
851
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
852
  if parent_bdev is None:
853
    logger.Error("Can't find parent device")
854
    return False
855
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
856
  if new_bdevs.count(None) > 0:
857
    logger.Error("Can't find new device(s) to add: %s:%s" %
858
                 (new_bdevs, new_cdevs))
859
    return False
860
  parent_bdev.AddChildren(new_bdevs)
861
  return True
862

    
863

    
864
def MirrorRemoveChildren(parent_cdev, new_cdevs):
865
  """Shrink a mirrored block device.
866

867
  """
868
  parent_bdev = _RecursiveFindBD(parent_cdev)
869
  if parent_bdev is None:
870
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
871
    return False
872
  devs = []
873
  for disk in new_cdevs:
874
    rpath = disk.StaticDevPath()
875
    if rpath is None:
876
      bd = _RecursiveFindBD(disk)
877
      if bd is None:
878
        logger.Error("Can't find dynamic device %s while removing children" %
879
                     disk)
880
        return False
881
      else:
882
        devs.append(bd.dev_path)
883
    else:
884
      devs.append(rpath)
885
  parent_bdev.RemoveChildren(devs)
886
  return True
887

    
888

    
889
def GetMirrorStatus(disks):
890
  """Get the mirroring status of a list of devices.
891

892
  Args:
893
    disks: list of `objects.Disk`
894

895
  Returns:
896
    list of (mirror_done, estimated_time) tuples, which
897
    are the result of bdev.BlockDevice.CombinedSyncStatus()
898

899
  """
900
  stats = []
901
  for dsk in disks:
902
    rbd = _RecursiveFindBD(dsk)
903
    if rbd is None:
904
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
905
    stats.append(rbd.CombinedSyncStatus())
906
  return stats
907

    
908

    
909
def _RecursiveFindBD(disk, allow_partial=False):
910
  """Check if a device is activated.
911

912
  If so, return informations about the real device.
913

914
  Args:
915
    disk: the objects.Disk instance
916
    allow_partial: don't abort the find if a child of the
917
                   device can't be found; this is intended to be
918
                   used when repairing mirrors
919

920
  Returns:
921
    None if the device can't be found
922
    otherwise the device instance
923

924
  """
925
  children = []
926
  if disk.children:
927
    for chdisk in disk.children:
928
      children.append(_RecursiveFindBD(chdisk))
929

    
930
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
931

    
932

    
933
def FindBlockDevice(disk):
934
  """Check if a device is activated.
935

936
  If so, return informations about the real device.
937

938
  Args:
939
    disk: the objects.Disk instance
940
  Returns:
941
    None if the device can't be found
942
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
943

944
  """
945
  rbd = _RecursiveFindBD(disk)
946
  if rbd is None:
947
    return rbd
948
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
949

    
950

    
951
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
952
  """Write a file to the filesystem.
953

954
  This allows the master to overwrite(!) a file. It will only perform
955
  the operation if the file belongs to a list of configuration files.
956

957
  """
958
  if not os.path.isabs(file_name):
959
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
960
                 file_name)
961
    return False
962

    
963
  allowed_files = [
964
    constants.CLUSTER_CONF_FILE,
965
    constants.ETC_HOSTS,
966
    constants.SSH_KNOWN_HOSTS_FILE,
967
    ]
968
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
969
  if file_name not in allowed_files:
970
    logger.Error("Filename passed to UploadFile not in allowed"
971
                 " upload targets: '%s'" % file_name)
972
    return False
973

    
974
  utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
975
                  atime=atime, mtime=mtime)
976
  return True
977

    
978

    
979
def _ErrnoOrStr(err):
980
  """Format an EnvironmentError exception.
981

982
  If the `err` argument has an errno attribute, it will be looked up
983
  and converted into a textual EXXXX description. Otherwise the string
984
  representation of the error will be returned.
985

986
  """
987
  if hasattr(err, 'errno'):
988
    detail = errno.errorcode[err.errno]
989
  else:
990
    detail = str(err)
991
  return detail
992

    
993

    
994
def _OSOndiskVersion(name, os_dir):
995
  """Compute and return the API version of a given OS.
996

997
  This function will try to read the API version of the os given by
998
  the 'name' parameter and residing in the 'os_dir' directory.
999

1000
  Return value will be either an integer denoting the version or None in the
1001
  case when this is not a valid OS name.
1002

1003
  """
1004
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1005

    
1006
  try:
1007
    st = os.stat(api_file)
1008
  except EnvironmentError, err:
1009
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1010
                           " found (%s)" % _ErrnoOrStr(err))
1011

    
1012
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1013
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1014
                           " a regular file")
1015

    
1016
  try:
1017
    f = open(api_file)
1018
    try:
1019
      api_version = f.read(256)
1020
    finally:
1021
      f.close()
1022
  except EnvironmentError, err:
1023
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1024
                           " API version (%s)" % _ErrnoOrStr(err))
1025

    
1026
  api_version = api_version.strip()
1027
  try:
1028
    api_version = int(api_version)
1029
  except (TypeError, ValueError), err:
1030
    raise errors.InvalidOS(name, os_dir,
1031
                           "API version is not integer (%s)" % str(err))
1032

    
1033
  return api_version
1034

    
1035

    
1036
def DiagnoseOS(top_dirs=None):
1037
  """Compute the validity for all OSes.
1038

1039
  Returns an OS object for each name in all the given top directories
1040
  (if not given defaults to constants.OS_SEARCH_PATH)
1041

1042
  Returns:
1043
    list of OS objects
1044

1045
  """
1046
  if top_dirs is None:
1047
    top_dirs = constants.OS_SEARCH_PATH
1048

    
1049
  result = []
1050
  for dir_name in top_dirs:
1051
    if os.path.isdir(dir_name):
1052
      try:
1053
        f_names = utils.ListVisibleFiles(dir_name)
1054
      except EnvironmentError, err:
1055
        logger.Error("Can't list the OS directory %s: %s" %
1056
                     (dir_name, str(err)))
1057
        break
1058
      for name in f_names:
1059
        try:
1060
          os_inst = OSFromDisk(name, base_dir=dir_name)
1061
          result.append(os_inst)
1062
        except errors.InvalidOS, err:
1063
          result.append(objects.OS.FromInvalidOS(err))
1064

    
1065
  return result
1066

    
1067

    
1068
def OSFromDisk(name, base_dir=None):
1069
  """Create an OS instance from disk.
1070

1071
  This function will return an OS instance if the given name is a
1072
  valid OS name. Otherwise, it will raise an appropriate
1073
  `errors.InvalidOS` exception, detailing why this is not a valid
1074
  OS.
1075

1076
  Args:
1077
    os_dir: Directory containing the OS scripts. Defaults to a search
1078
            in all the OS_SEARCH_PATH directories.
1079

1080
  """
1081

    
1082
  if base_dir is None:
1083
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1084
    if os_dir is None:
1085
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1086
  else:
1087
    os_dir = os.path.sep.join([base_dir, name])
1088

    
1089
  api_version = _OSOndiskVersion(name, os_dir)
1090

    
1091
  if api_version != constants.OS_API_VERSION:
1092
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1093
                           " (found %s want %s)"
1094
                           % (api_version, constants.OS_API_VERSION))
1095

    
1096
  # OS Scripts dictionary, we will populate it with the actual script names
1097
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1098

    
1099
  for script in os_scripts:
1100
    os_scripts[script] = os.path.sep.join([os_dir, script])
1101

    
1102
    try:
1103
      st = os.stat(os_scripts[script])
1104
    except EnvironmentError, err:
1105
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1106
                             (script, _ErrnoOrStr(err)))
1107

    
1108
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1109
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1110
                             script)
1111

    
1112
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1113
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1114
                             script)
1115

    
1116

    
1117
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1118
                    create_script=os_scripts['create'],
1119
                    export_script=os_scripts['export'],
1120
                    import_script=os_scripts['import'],
1121
                    rename_script=os_scripts['rename'],
1122
                    api_version=api_version)
1123

    
1124

    
1125
def SnapshotBlockDevice(disk):
1126
  """Create a snapshot copy of a block device.
1127

1128
  This function is called recursively, and the snapshot is actually created
1129
  just for the leaf lvm backend device.
1130

1131
  Args:
1132
    disk: the disk to be snapshotted
1133

1134
  Returns:
1135
    a config entry for the actual lvm device snapshotted.
1136

1137
  """
1138
  if disk.children:
1139
    if len(disk.children) == 1:
1140
      # only one child, let's recurse on it
1141
      return SnapshotBlockDevice(disk.children[0])
1142
    else:
1143
      # more than one child, choose one that matches
1144
      for child in disk.children:
1145
        if child.size == disk.size:
1146
          # return implies breaking the loop
1147
          return SnapshotBlockDevice(child)
1148
  elif disk.dev_type == constants.LD_LV:
1149
    r_dev = _RecursiveFindBD(disk)
1150
    if r_dev is not None:
1151
      # let's stay on the safe side and ask for the full size, for now
1152
      return r_dev.Snapshot(disk.size)
1153
    else:
1154
      return None
1155
  else:
1156
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1157
                                 " '%s' of type '%s'" %
1158
                                 (disk.unique_id, disk.dev_type))
1159

    
1160

    
1161
def ExportSnapshot(disk, dest_node, instance):
1162
  """Export a block device snapshot to a remote node.
1163

1164
  Args:
1165
    disk: the snapshot block device
1166
    dest_node: the node to send the image to
1167
    instance: instance being exported
1168

1169
  Returns:
1170
    True if successful, False otherwise.
1171

1172
  """
1173
  inst_os = OSFromDisk(instance.os)
1174
  export_script = inst_os.export_script
1175

    
1176
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1177
                                     instance.name, int(time.time()))
1178
  if not os.path.exists(constants.LOG_OS_DIR):
1179
    os.mkdir(constants.LOG_OS_DIR, 0750)
1180

    
1181
  real_os_dev = _RecursiveFindBD(disk)
1182
  if real_os_dev is None:
1183
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1184
                                  str(disk))
1185
  real_os_dev.Open()
1186

    
1187
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1188
  destfile = disk.physical_id[1]
1189

    
1190
  # the target command is built out of three individual commands,
1191
  # which are joined by pipes; we check each individual command for
1192
  # valid parameters
1193

    
1194
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1195
                               export_script, instance.name,
1196
                               real_os_dev.dev_path, logfile)
1197

    
1198
  comprcmd = "gzip"
1199

    
1200
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1201
                                destdir, destdir, destfile)
1202
  remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1203
                                       destcmd)
1204

    
1205
  # all commands have been checked, so we're safe to combine them
1206
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1207

    
1208
  result = utils.RunCmd(command)
1209

    
1210
  if result.failed:
1211
    logger.Error("os snapshot export command '%s' returned error: %s"
1212
                 " output: %s" %
1213
                 (command, result.fail_reason, result.output))
1214
    return False
1215

    
1216
  return True
1217

    
1218

    
1219
def FinalizeExport(instance, snap_disks):
1220
  """Write out the export configuration information.
1221

1222
  Args:
1223
    instance: instance configuration
1224
    snap_disks: snapshot block devices
1225

1226
  Returns:
1227
    False in case of error, True otherwise.
1228

1229
  """
1230
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1231
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1232

    
1233
  config = objects.SerializableConfigParser()
1234

    
1235
  config.add_section(constants.INISECT_EXP)
1236
  config.set(constants.INISECT_EXP, 'version', '0')
1237
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1238
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1239
  config.set(constants.INISECT_EXP, 'os', instance.os)
1240
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1241

    
1242
  config.add_section(constants.INISECT_INS)
1243
  config.set(constants.INISECT_INS, 'name', instance.name)
1244
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1245
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1246
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1247

    
1248
  nic_count = 0
1249
  for nic_count, nic in enumerate(instance.nics):
1250
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1251
               nic_count, '%s' % nic.mac)
1252
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1253
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1254
  # TODO: redundant: on load can read nics until it doesn't exist
1255
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1256

    
1257
  disk_count = 0
1258
  for disk_count, disk in enumerate(snap_disks):
1259
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1260
               ('%s' % disk.iv_name))
1261
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1262
               ('%s' % disk.physical_id[1]))
1263
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1264
               ('%d' % disk.size))
1265
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1266

    
1267
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1268
  cfo = open(cff, 'w')
1269
  try:
1270
    config.write(cfo)
1271
  finally:
1272
    cfo.close()
1273

    
1274
  shutil.rmtree(finaldestdir, True)
1275
  shutil.move(destdir, finaldestdir)
1276

    
1277
  return True
1278

    
1279

    
1280
def ExportInfo(dest):
1281
  """Get export configuration information.
1282

1283
  Args:
1284
    dest: directory containing the export
1285

1286
  Returns:
1287
    A serializable config file containing the export info.
1288

1289
  """
1290
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1291

    
1292
  config = objects.SerializableConfigParser()
1293
  config.read(cff)
1294

    
1295
  if (not config.has_section(constants.INISECT_EXP) or
1296
      not config.has_section(constants.INISECT_INS)):
1297
    return None
1298

    
1299
  return config
1300

    
1301

    
1302
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1303
  """Import an os image into an instance.
1304

1305
  Args:
1306
    instance: the instance object
1307
    os_disk: the instance-visible name of the os device
1308
    swap_disk: the instance-visible name of the swap device
1309
    src_node: node holding the source image
1310
    src_image: path to the source image on src_node
1311

1312
  Returns:
1313
    False in case of error, True otherwise.
1314

1315
  """
1316
  inst_os = OSFromDisk(instance.os)
1317
  import_script = inst_os.import_script
1318

    
1319
  os_device = instance.FindDisk(os_disk)
1320
  if os_device is None:
1321
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1322
    return False
1323

    
1324
  swap_device = instance.FindDisk(swap_disk)
1325
  if swap_device is None:
1326
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1327
    return False
1328

    
1329
  real_os_dev = _RecursiveFindBD(os_device)
1330
  if real_os_dev is None:
1331
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1332
                                  str(os_device))
1333
  real_os_dev.Open()
1334

    
1335
  real_swap_dev = _RecursiveFindBD(swap_device)
1336
  if real_swap_dev is None:
1337
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1338
                                  str(swap_device))
1339
  real_swap_dev.Open()
1340

    
1341
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1342
                                        instance.name, int(time.time()))
1343
  if not os.path.exists(constants.LOG_OS_DIR):
1344
    os.mkdir(constants.LOG_OS_DIR, 0750)
1345

    
1346
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1347
  remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1348
                                       destcmd)
1349

    
1350
  comprcmd = "gunzip"
1351
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1352
                               inst_os.path, import_script, instance.name,
1353
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1354
                               logfile)
1355

    
1356
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1357

    
1358
  result = utils.RunCmd(command)
1359

    
1360
  if result.failed:
1361
    logger.Error("os import command '%s' returned error: %s"
1362
                 " output: %s" %
1363
                 (command, result.fail_reason, result.output))
1364
    return False
1365

    
1366
  return True
1367

    
1368

    
1369
def ListExports():
1370
  """Return a list of exports currently available on this machine.
1371

1372
  """
1373
  if os.path.isdir(constants.EXPORT_DIR):
1374
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1375
  else:
1376
    return []
1377

    
1378

    
1379
def RemoveExport(export):
1380
  """Remove an existing export from the node.
1381

1382
  Args:
1383
    export: the name of the export to remove
1384

1385
  Returns:
1386
    False in case of error, True otherwise.
1387

1388
  """
1389
  target = os.path.join(constants.EXPORT_DIR, export)
1390

    
1391
  shutil.rmtree(target)
1392
  # TODO: catch some of the relevant exceptions and provide a pretty
1393
  # error message if rmtree fails.
1394

    
1395
  return True
1396

    
1397

    
1398
def RenameBlockDevices(devlist):
1399
  """Rename a list of block devices.
1400

1401
  The devlist argument is a list of tuples (disk, new_logical,
1402
  new_physical). The return value will be a combined boolean result
1403
  (True only if all renames succeeded).
1404

1405
  """
1406
  result = True
1407
  for disk, unique_id in devlist:
1408
    dev = _RecursiveFindBD(disk)
1409
    if dev is None:
1410
      result = False
1411
      continue
1412
    try:
1413
      old_rpath = dev.dev_path
1414
      dev.Rename(unique_id)
1415
      new_rpath = dev.dev_path
1416
      if old_rpath != new_rpath:
1417
        DevCacheManager.RemoveCache(old_rpath)
1418
        # FIXME: we should add the new cache information here, like:
1419
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1420
        # but we don't have the owner here - maybe parse from existing
1421
        # cache? for now, we only lose lvm data when we rename, which
1422
        # is less critical than DRBD or MD
1423
    except errors.BlockDeviceError, err:
1424
      logger.Error("Can't rename device '%s' to '%s': %s" %
1425
                   (dev, unique_id, err))
1426
      result = False
1427
  return result
1428

    
1429

    
1430
def _TransformFileStorageDir(file_storage_dir):
1431
  """Checks whether given file_storage_dir is valid.
1432

1433
  Checks wheter the given file_storage_dir is within the cluster-wide
1434
  default file_storage_dir stored in SimpleStore. Only paths under that
1435
  directory are allowed.
1436

1437
  Args:
1438
    file_storage_dir: string with path
1439
  
1440
  Returns:
1441
    normalized file_storage_dir (string) if valid, None otherwise
1442

1443
  """
1444
  file_storage_dir = os.path.normpath(file_storage_dir)
1445
  base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1446
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1447
      base_file_storage_dir):
1448
    logger.Error("file storage directory '%s' is not under base file"
1449
                 " storage directory '%s'" %
1450
                 (file_storage_dir, base_file_storage_dir))
1451
    return None
1452
  return file_storage_dir
1453

    
1454

    
1455
def CreateFileStorageDir(file_storage_dir):
1456
  """Create file storage directory.
1457

1458
  Args:
1459
    file_storage_dir: string containing the path
1460

1461
  Returns:
1462
    tuple with first element a boolean indicating wheter dir
1463
    creation was successful or not
1464

1465
  """
1466
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1467
  result = True,
1468
  if not file_storage_dir:
1469
    result = False,
1470
  else:
1471
    if os.path.exists(file_storage_dir):
1472
      if not os.path.isdir(file_storage_dir):
1473
        logger.Error("'%s' is not a directory" % file_storage_dir)
1474
        result = False,
1475
    else:
1476
      try:
1477
        os.makedirs(file_storage_dir, 0750)
1478
      except OSError, err:
1479
        logger.Error("Cannot create file storage directory '%s': %s" %
1480
                     (file_storage_dir, err))
1481
        result = False,
1482
  return result
1483

    
1484

    
1485
def RemoveFileStorageDir(file_storage_dir):
1486
  """Remove file storage directory.
1487

1488
  Remove it only if it's empty. If not log an error and return.
1489

1490
  Args:
1491
    file_storage_dir: string containing the path
1492

1493
  Returns:
1494
    tuple with first element a boolean indicating wheter dir
1495
    removal was successful or not
1496

1497
  """
1498
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1499
  result = True,
1500
  if not file_storage_dir:
1501
    result = False,
1502
  else:
1503
    if os.path.exists(file_storage_dir):
1504
      if not os.path.isdir(file_storage_dir):
1505
        logger.Error("'%s' is not a directory" % file_storage_dir)
1506
        result = False,
1507
      # deletes dir only if empty, otherwise we want to return False
1508
      try:
1509
        os.rmdir(file_storage_dir)
1510
      except OSError, err:
1511
        logger.Error("Cannot remove file storage directory '%s': %s" %
1512
                     (file_storage_dir, err))
1513
        result = False,
1514
  return result
1515

    
1516

    
1517
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1518
  """Rename the file storage directory.
1519

1520
  Args:
1521
    old_file_storage_dir: string containing the old path
1522
    new_file_storage_dir: string containing the new path
1523

1524
  Returns:
1525
    tuple with first element a boolean indicating wheter dir
1526
    rename was successful or not
1527

1528
  """
1529
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1530
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1531
  result = True,
1532
  if not old_file_storage_dir or not new_file_storage_dir:
1533
    result = False,
1534
  else:
1535
    if not os.path.exists(new_file_storage_dir):
1536
      if os.path.isdir(old_file_storage_dir):
1537
        try:
1538
          os.rename(old_file_storage_dir, new_file_storage_dir)
1539
        except OSError, err:
1540
          logger.Error("Cannot rename '%s' to '%s': %s"
1541
                       % (old_file_storage_dir, new_file_storage_dir, err))
1542
          result =  False,
1543
      else:
1544
        logger.Error("'%s' is not a directory" % old_file_storage_dir)
1545
        result = False,
1546
    else:
1547
      if os.path.exists(old_file_storage_dir):
1548
        logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1549
                     old_file_storage_dir, new_file_storage_dir)
1550
        result = False,
1551
  return result
1552

    
1553

    
1554
class HooksRunner(object):
1555
  """Hook runner.
1556

1557
  This class is instantiated on the node side (ganeti-noded) and not on
1558
  the master side.
1559

1560
  """
1561
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1562

    
1563
  def __init__(self, hooks_base_dir=None):
1564
    """Constructor for hooks runner.
1565

1566
    Args:
1567
      - hooks_base_dir: if not None, this overrides the
1568
        constants.HOOKS_BASE_DIR (useful for unittests)
1569

1570
    """
1571
    if hooks_base_dir is None:
1572
      hooks_base_dir = constants.HOOKS_BASE_DIR
1573
    self._BASE_DIR = hooks_base_dir
1574

    
1575
  @staticmethod
1576
  def ExecHook(script, env):
1577
    """Exec one hook script.
1578

1579
    Args:
1580
     - script: the full path to the script
1581
     - env: the environment with which to exec the script
1582

1583
    """
1584
    # exec the process using subprocess and log the output
1585
    fdstdin = None
1586
    try:
1587
      fdstdin = open("/dev/null", "r")
1588
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1589
                               stderr=subprocess.STDOUT, close_fds=True,
1590
                               shell=False, cwd="/", env=env)
1591
      output = ""
1592
      try:
1593
        output = child.stdout.read(4096)
1594
        child.stdout.close()
1595
      except EnvironmentError, err:
1596
        output += "Hook script error: %s" % str(err)
1597

    
1598
      while True:
1599
        try:
1600
          result = child.wait()
1601
          break
1602
        except EnvironmentError, err:
1603
          if err.errno == errno.EINTR:
1604
            continue
1605
          raise
1606
    finally:
1607
      # try not to leak fds
1608
      for fd in (fdstdin, ):
1609
        if fd is not None:
1610
          try:
1611
            fd.close()
1612
          except EnvironmentError, err:
1613
            # just log the error
1614
            #logger.Error("While closing fd %s: %s" % (fd, err))
1615
            pass
1616

    
1617
    return result == 0, output
1618

    
1619
  def RunHooks(self, hpath, phase, env):
1620
    """Run the scripts in the hooks directory.
1621

1622
    This method will not be usually overriden by child opcodes.
1623

1624
    """
1625
    if phase == constants.HOOKS_PHASE_PRE:
1626
      suffix = "pre"
1627
    elif phase == constants.HOOKS_PHASE_POST:
1628
      suffix = "post"
1629
    else:
1630
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1631
    rr = []
1632

    
1633
    subdir = "%s-%s.d" % (hpath, suffix)
1634
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1635
    try:
1636
      dir_contents = utils.ListVisibleFiles(dir_name)
1637
    except OSError, err:
1638
      # must log
1639
      return rr
1640

    
1641
    # we use the standard python sort order,
1642
    # so 00name is the recommended naming scheme
1643
    dir_contents.sort()
1644
    for relname in dir_contents:
1645
      fname = os.path.join(dir_name, relname)
1646
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1647
          self.RE_MASK.match(relname) is not None):
1648
        rrval = constants.HKR_SKIP
1649
        output = ""
1650
      else:
1651
        result, output = self.ExecHook(fname, env)
1652
        if not result:
1653
          rrval = constants.HKR_FAIL
1654
        else:
1655
          rrval = constants.HKR_SUCCESS
1656
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1657

    
1658
    return rr
1659

    
1660

    
1661
class IAllocatorRunner(object):
1662
  """IAllocator runner.
1663

1664
  This class is instantiated on the node side (ganeti-noded) and not on
1665
  the master side.
1666

1667
  """
1668
  def Run(self, name, idata):
1669
    """Run an iallocator script.
1670

1671
    Return value: tuple of:
1672
       - run status (one of the IARUN_ constants)
1673
       - stdout
1674
       - stderr
1675
       - fail reason (as from utils.RunResult)
1676

1677
    """
1678
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1679
                                  os.path.isfile)
1680
    if alloc_script is None:
1681
      return (constants.IARUN_NOTFOUND, None, None, None)
1682

    
1683
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1684
    try:
1685
      os.write(fd, idata)
1686
      os.close(fd)
1687
      result = utils.RunCmd([alloc_script, fin_name])
1688
      if result.failed:
1689
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1690
                result.fail_reason)
1691
    finally:
1692
      os.unlink(fin_name)
1693

    
1694
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1695

    
1696

    
1697
class DevCacheManager(object):
1698
  """Simple class for managing a cache of block device information.
1699

1700
  """
1701
  _DEV_PREFIX = "/dev/"
1702
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1703

    
1704
  @classmethod
1705
  def _ConvertPath(cls, dev_path):
1706
    """Converts a /dev/name path to the cache file name.
1707

1708
    This replaces slashes with underscores and strips the /dev
1709
    prefix. It then returns the full path to the cache file
1710

1711
    """
1712
    if dev_path.startswith(cls._DEV_PREFIX):
1713
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1714
    dev_path = dev_path.replace("/", "_")
1715
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1716
    return fpath
1717

    
1718
  @classmethod
1719
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1720
    """Updates the cache information for a given device.
1721

1722
    """
1723
    if dev_path is None:
1724
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1725
      return
1726
    fpath = cls._ConvertPath(dev_path)
1727
    if on_primary:
1728
      state = "primary"
1729
    else:
1730
      state = "secondary"
1731
    if iv_name is None:
1732
      iv_name = "not_visible"
1733
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1734
    try:
1735
      utils.WriteFile(fpath, data=fdata)
1736
    except EnvironmentError, err:
1737
      logger.Error("Can't update bdev cache for %s, error %s" %
1738
                   (dev_path, str(err)))
1739

    
1740
  @classmethod
1741
  def RemoveCache(cls, dev_path):
1742
    """Remove data for a dev_path.
1743

1744
    """
1745
    if dev_path is None:
1746
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1747
      return
1748
    fpath = cls._ConvertPath(dev_path)
1749
    try:
1750
      utils.RemoveFile(fpath)
1751
    except EnvironmentError, err:
1752
      logger.Error("Can't update bdev cache for %s, error %s" %
1753
                   (dev_path, str(err)))