Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 40a03283

History | View | Annotate | Download (45.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """Joins this node to the cluster.
84

85
  This does the following:
86
      - updates the hostkeys of the machine (rsa and dsa)
87
      - adds the ssh private key to the user
88
      - adds the ssh public key to the users' authorized_keys file
89

90
  """
91
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95
  for name, content, mode in sshd_keys:
96
    utils.WriteFile(name, data=content, mode=mode)
97

    
98
  try:
99
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
100
                                                    mkdir=True)
101
  except errors.OpExecError, err:
102
    logger.Error("Error while processing user ssh files: %s" % err)
103
    return False
104

    
105
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106
    utils.WriteFile(name, data=content, mode=0600)
107

    
108
  utils.AddAuthorizedKey(auth_keys, sshpub)
109

    
110
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
111

    
112
  return True
113

    
114

    
115
def LeaveCluster():
116
  """Cleans up the current node and prepares it to be removed from the cluster.
117

118
  """
119
  if os.path.isdir(constants.DATA_DIR):
120
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121
      full_name = os.path.join(constants.DATA_DIR, rel_name)
122
      if os.path.isfile(full_name) and not os.path.islink(full_name):
123
        utils.RemoveFile(full_name)
124

    
125
  try:
126
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127
  except errors.OpExecError, err:
128
    logger.Error("Error while processing ssh files: %s" % err)
129
    return
130

    
131
  f = open(pub_key, 'r')
132
  try:
133
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
134
  finally:
135
    f.close()
136

    
137
  utils.RemoveFile(priv_key)
138
  utils.RemoveFile(pub_key)
139

    
140

    
141
def GetNodeInfo(vgname):
142
  """Gives back a hash with different informations about the node.
143

144
  Returns:
145
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
146
      'memory_free' : xxx, 'memory_total' : xxx }
147
    where
148
    vg_size is the size of the configured volume group in MiB
149
    vg_free is the free size of the volume group in MiB
150
    memory_dom0 is the memory allocated for domain0 in MiB
151
    memory_free is the currently available (free) ram in MiB
152
    memory_total is the total number of ram in MiB
153

154
  """
155
  outputarray = {}
156
  vginfo = _GetVGInfo(vgname)
157
  outputarray['vg_size'] = vginfo['vg_size']
158
  outputarray['vg_free'] = vginfo['vg_free']
159

    
160
  hyper = hypervisor.GetHypervisor()
161
  hyp_info = hyper.GetNodeInfo()
162
  if hyp_info is not None:
163
    outputarray.update(hyp_info)
164

    
165
  f = open("/proc/sys/kernel/random/boot_id", 'r')
166
  try:
167
    outputarray["bootid"] = f.read(128).rstrip("\n")
168
  finally:
169
    f.close()
170

    
171
  return outputarray
172

    
173

    
174
def VerifyNode(what):
175
  """Verify the status of the local node.
176

177
  Args:
178
    what - a dictionary of things to check:
179
      'filelist' : list of files for which to compute checksums
180
      'nodelist' : list of nodes we should check communication with
181
      'hypervisor': run the hypervisor-specific verify
182

183
  Requested files on local node are checksummed and the result returned.
184

185
  The nodelist is traversed, with the following checks being made
186
  for each node:
187
  - known_hosts key correct
188
  - correct resolving of node name (target node returns its own hostname
189
    by ssh-execution of 'hostname', result compared against name in list.
190

191
  """
192
  result = {}
193

    
194
  if 'hypervisor' in what:
195
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
196

    
197
  if 'filelist' in what:
198
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
199

    
200
  if 'nodelist' in what:
201
    result['nodelist'] = {}
202
    for node in what['nodelist']:
203
      success, message = ssh.VerifyNodeHostname(node)
204
      if not success:
205
        result['nodelist'][node] = message
206
  return result
207

    
208

    
209
def GetVolumeList(vg_name):
210
  """Compute list of logical volumes and their size.
211

212
  Returns:
213
    dictionary of all partions (key) with their size (in MiB), inactive
214
    and online status:
215
    {'test1': ('20.06', True, True)}
216

217
  """
218
  lvs = {}
219
  sep = '|'
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221
                         "--separator=%s" % sep,
222
                         "-olv_name,lv_size,lv_attr", vg_name])
223
  if result.failed:
224
    logger.Error("Failed to list logical volumes, lvs output: %s" %
225
                 result.output)
226
    return result.output
227

    
228
  for line in result.stdout.splitlines():
229
    line = line.strip().rstrip(sep)
230
    name, size, attr = line.split(sep)
231
    if len(attr) != 6:
232
      attr = '------'
233
    inactive = attr[4] == '-'
234
    online = attr[5] == 'o'
235
    lvs[name] = (size, inactive, online)
236

    
237
  return lvs
238

    
239

    
240
def ListVolumeGroups():
241
  """List the volume groups and their size.
242

243
  Returns:
244
    Dictionary with keys volume name and values the size of the volume
245

246
  """
247
  return utils.ListVolumeGroups()
248

    
249

    
250
def NodeVolumes():
251
  """List all volumes on this node.
252

253
  """
254
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
255
                         "--separator=|",
256
                         "--options=lv_name,lv_size,devices,vg_name"])
257
  if result.failed:
258
    logger.Error("Failed to list logical volumes, lvs output: %s" %
259
                 result.output)
260
    return {}
261

    
262
  def parse_dev(dev):
263
    if '(' in dev:
264
      return dev.split('(')[0]
265
    else:
266
      return dev
267

    
268
  def map_line(line):
269
    return {
270
      'name': line[0].strip(),
271
      'size': line[1].strip(),
272
      'dev': parse_dev(line[2].strip()),
273
      'vg': line[3].strip(),
274
    }
275

    
276
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
277

    
278

    
279
def BridgesExist(bridges_list):
280
  """Check if a list of bridges exist on the current node.
281

282
  Returns:
283
    True if all of them exist, false otherwise
284

285
  """
286
  for bridge in bridges_list:
287
    if not utils.BridgeExists(bridge):
288
      return False
289

    
290
  return True
291

    
292

    
293
def GetInstanceList():
294
  """Provides a list of instances.
295

296
  Returns:
297
    A list of all running instances on the current node
298
    - instance1.example.com
299
    - instance2.example.com
300

301
  """
302
  try:
303
    names = hypervisor.GetHypervisor().ListInstances()
304
  except errors.HypervisorError, err:
305
    logger.Error("error enumerating instances: %s" % str(err))
306
    raise
307

    
308
  return names
309

    
310

    
311
def GetInstanceInfo(instance):
312
  """Gives back the informations about an instance as a dictionary.
313

314
  Args:
315
    instance: name of the instance (ex. instance1.example.com)
316

317
  Returns:
318
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
319
    where
320
    memory: memory size of instance (int)
321
    state: xen state of instance (string)
322
    time: cpu time of instance (float)
323

324
  """
325
  output = {}
326

    
327
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328
  if iinfo is not None:
329
    output['memory'] = iinfo[2]
330
    output['state'] = iinfo[4]
331
    output['time'] = iinfo[5]
332

    
333
  return output
334

    
335

    
336
def GetAllInstancesInfo():
337
  """Gather data about all instances.
338

339
  This is the equivalent of `GetInstanceInfo()`, except that it
340
  computes data for all instances at once, thus being faster if one
341
  needs data about more than one instance.
342

343
  Returns: a dictionary of dictionaries, keys being the instance name,
344
    and with values:
345
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
346
    where
347
    memory: memory size of instance (int)
348
    state: xen state of instance (string)
349
    time: cpu time of instance (float)
350
    vcpus: the number of cpus
351

352
  """
353
  output = {}
354

    
355
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
356
  if iinfo:
357
    for name, inst_id, memory, vcpus, state, times in iinfo:
358
      output[name] = {
359
        'memory': memory,
360
        'vcpus': vcpus,
361
        'state': state,
362
        'time': times,
363
        }
364

    
365
  return output
366

    
367

    
368
def AddOSToInstance(instance, os_disk, swap_disk):
369
  """Add an OS to an instance.
370

371
  Args:
372
    instance: the instance object
373
    os_disk: the instance-visible name of the os device
374
    swap_disk: the instance-visible name of the swap device
375

376
  """
377
  inst_os = OSFromDisk(instance.os)
378

    
379
  create_script = inst_os.create_script
380

    
381
  os_device = instance.FindDisk(os_disk)
382
  if os_device is None:
383
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
384
    return False
385

    
386
  swap_device = instance.FindDisk(swap_disk)
387
  if swap_device is None:
388
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
389
    return False
390

    
391
  real_os_dev = _RecursiveFindBD(os_device)
392
  if real_os_dev is None:
393
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
394
                                  str(os_device))
395
  real_os_dev.Open()
396

    
397
  real_swap_dev = _RecursiveFindBD(swap_device)
398
  if real_swap_dev is None:
399
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
400
                                  str(swap_device))
401
  real_swap_dev.Open()
402

    
403
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404
                                     instance.name, int(time.time()))
405
  if not os.path.exists(constants.LOG_OS_DIR):
406
    os.mkdir(constants.LOG_OS_DIR, 0750)
407

    
408
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409
                                inst_os.path, create_script, instance.name,
410
                                real_os_dev.dev_path, real_swap_dev.dev_path,
411
                                logfile)
412

    
413
  result = utils.RunCmd(command)
414
  if result.failed:
415
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
416
                 " output: %s" %
417
                 (command, result.fail_reason, logfile, result.output))
418
    return False
419

    
420
  return True
421

    
422

    
423
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424
  """Run the OS rename script for an instance.
425

426
  Args:
427
    instance: the instance object
428
    old_name: the old name of the instance
429
    os_disk: the instance-visible name of the os device
430
    swap_disk: the instance-visible name of the swap device
431

432
  """
433
  inst_os = OSFromDisk(instance.os)
434

    
435
  script = inst_os.rename_script
436

    
437
  os_device = instance.FindDisk(os_disk)
438
  if os_device is None:
439
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
440
    return False
441

    
442
  swap_device = instance.FindDisk(swap_disk)
443
  if swap_device is None:
444
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
445
    return False
446

    
447
  real_os_dev = _RecursiveFindBD(os_device)
448
  if real_os_dev is None:
449
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
450
                                  str(os_device))
451
  real_os_dev.Open()
452

    
453
  real_swap_dev = _RecursiveFindBD(swap_device)
454
  if real_swap_dev is None:
455
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
456
                                  str(swap_device))
457
  real_swap_dev.Open()
458

    
459
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
460
                                           old_name,
461
                                           instance.name, int(time.time()))
462
  if not os.path.exists(constants.LOG_OS_DIR):
463
    os.mkdir(constants.LOG_OS_DIR, 0750)
464

    
465
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466
                                inst_os.path, script, old_name, instance.name,
467
                                real_os_dev.dev_path, real_swap_dev.dev_path,
468
                                logfile)
469

    
470
  result = utils.RunCmd(command)
471

    
472
  if result.failed:
473
    logger.Error("os create command '%s' returned error: %s"
474
                 " output: %s" %
475
                 (command, result.fail_reason, result.output))
476
    return False
477

    
478
  return True
479

    
480

    
481
def _GetVGInfo(vg_name):
482
  """Get informations about the volume group.
483

484
  Args:
485
    vg_name: the volume group
486

487
  Returns:
488
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
489
    where
490
    vg_size is the total size of the volume group in MiB
491
    vg_free is the free size of the volume group in MiB
492
    pv_count are the number of physical disks in that vg
493

494
  If an error occurs during gathering of data, we return the same dict
495
  with keys all set to None.
496

497
  """
498
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
499

    
500
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
501
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
502

    
503
  if retval.failed:
504
    errmsg = "volume group %s not present" % vg_name
505
    logger.Error(errmsg)
506
    return retdic
507
  valarr = retval.stdout.strip().rstrip(':').split(':')
508
  if len(valarr) == 3:
509
    try:
510
      retdic = {
511
        "vg_size": int(round(float(valarr[0]), 0)),
512
        "vg_free": int(round(float(valarr[1]), 0)),
513
        "pv_count": int(valarr[2]),
514
        }
515
    except ValueError, err:
516
      logger.Error("Fail to parse vgs output: %s" % str(err))
517
  else:
518
    logger.Error("vgs output has the wrong number of fields (expected"
519
                 " three): %s" % str(valarr))
520
  return retdic
521

    
522

    
523
def _GatherBlockDevs(instance):
524
  """Set up an instance's block device(s).
525

526
  This is run on the primary node at instance startup. The block
527
  devices must be already assembled.
528

529
  """
530
  block_devices = []
531
  for disk in instance.disks:
532
    device = _RecursiveFindBD(disk)
533
    if device is None:
534
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
535
                                    str(disk))
536
    device.Open()
537
    block_devices.append((disk, device))
538
  return block_devices
539

    
540

    
541
def StartInstance(instance, extra_args):
542
  """Start an instance.
543

544
  Args:
545
    instance - name of instance to start.
546

547
  """
548
  running_instances = GetInstanceList()
549

    
550
  if instance.name in running_instances:
551
    return True
552

    
553
  block_devices = _GatherBlockDevs(instance)
554
  hyper = hypervisor.GetHypervisor()
555

    
556
  try:
557
    hyper.StartInstance(instance, block_devices, extra_args)
558
  except errors.HypervisorError, err:
559
    logger.Error("Failed to start instance: %s" % err)
560
    return False
561

    
562
  return True
563

    
564

    
565
def ShutdownInstance(instance):
566
  """Shut an instance down.
567

568
  Args:
569
    instance - name of instance to shutdown.
570

571
  """
572
  running_instances = GetInstanceList()
573

    
574
  if instance.name not in running_instances:
575
    return True
576

    
577
  hyper = hypervisor.GetHypervisor()
578
  try:
579
    hyper.StopInstance(instance)
580
  except errors.HypervisorError, err:
581
    logger.Error("Failed to stop instance: %s" % err)
582
    return False
583

    
584
  # test every 10secs for 2min
585
  shutdown_ok = False
586

    
587
  time.sleep(1)
588
  for dummy in range(11):
589
    if instance.name not in GetInstanceList():
590
      break
591
    time.sleep(10)
592
  else:
593
    # the shutdown did not succeed
594
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
595

    
596
    try:
597
      hyper.StopInstance(instance, force=True)
598
    except errors.HypervisorError, err:
599
      logger.Error("Failed to stop instance: %s" % err)
600
      return False
601

    
602
    time.sleep(1)
603
    if instance.name in GetInstanceList():
604
      logger.Error("could not shutdown instance '%s' even by destroy")
605
      return False
606

    
607
  return True
608

    
609

    
610
def RebootInstance(instance, reboot_type, extra_args):
611
  """Reboot an instance.
612

613
  Args:
614
    instance    - name of instance to reboot
615
    reboot_type - how to reboot [soft,hard,full]
616

617
  """
618
  running_instances = GetInstanceList()
619

    
620
  if instance.name not in running_instances:
621
    logger.Error("Cannot reboot instance that is not running")
622
    return False
623

    
624
  hyper = hypervisor.GetHypervisor()
625
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
626
    try:
627
      hyper.RebootInstance(instance)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to soft reboot instance: %s" % err)
630
      return False
631
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
632
    try:
633
      ShutdownInstance(instance)
634
      StartInstance(instance, extra_args)
635
    except errors.HypervisorError, err:
636
      logger.Error("Failed to hard reboot instance: %s" % err)
637
      return False
638
  else:
639
    raise errors.ParameterError("reboot_type invalid")
640

    
641

    
642
  return True
643

    
644

    
645
def CreateBlockDevice(disk, size, owner, on_primary, info):
646
  """Creates a block device for an instance.
647

648
  Args:
649
   bdev: a ganeti.objects.Disk object
650
   size: the size of the physical underlying devices
651
   do_open: if the device should be `Assemble()`-d and
652
            `Open()`-ed after creation
653

654
  Returns:
655
    the new unique_id of the device (this can sometime be
656
    computed only after creation), or None. On secondary nodes,
657
    it's not required to return anything.
658

659
  """
660
  clist = []
661
  if disk.children:
662
    for child in disk.children:
663
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
664
      if on_primary or disk.AssembleOnSecondary():
665
        # we need the children open in case the device itself has to
666
        # be assembled
667
        crdev.Open()
668
      clist.append(crdev)
669
  try:
670
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
671
    if device is not None:
672
      logger.Info("removing existing device %s" % disk)
673
      device.Remove()
674
  except errors.BlockDeviceError, err:
675
    pass
676

    
677
  device = bdev.Create(disk.dev_type, disk.physical_id,
678
                       clist, size)
679
  if device is None:
680
    raise ValueError("Can't create child device for %s, %s" %
681
                     (disk, size))
682
  if on_primary or disk.AssembleOnSecondary():
683
    if not device.Assemble():
684
      errorstring = "Can't assemble device after creation"
685
      logger.Error(errorstring)
686
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
687
                                    " daemon logs" % errorstring)
688
    device.SetSyncSpeed(constants.SYNC_SPEED)
689
    if on_primary or disk.OpenOnSecondary():
690
      device.Open(force=True)
691
    DevCacheManager.UpdateCache(device.dev_path, owner,
692
                                on_primary, disk.iv_name)
693

    
694
  device.SetInfo(info)
695

    
696
  physical_id = device.unique_id
697
  return physical_id
698

    
699

    
700
def RemoveBlockDevice(disk):
701
  """Remove a block device.
702

703
  This is intended to be called recursively.
704

705
  """
706
  try:
707
    # since we are removing the device, allow a partial match
708
    # this allows removal of broken mirrors
709
    rdev = _RecursiveFindBD(disk, allow_partial=True)
710
  except errors.BlockDeviceError, err:
711
    # probably can't attach
712
    logger.Info("Can't attach to device %s in remove" % disk)
713
    rdev = None
714
  if rdev is not None:
715
    r_path = rdev.dev_path
716
    result = rdev.Remove()
717
    if result:
718
      DevCacheManager.RemoveCache(r_path)
719
  else:
720
    result = True
721
  if disk.children:
722
    for child in disk.children:
723
      result = result and RemoveBlockDevice(child)
724
  return result
725

    
726

    
727
def _RecursiveAssembleBD(disk, owner, as_primary):
728
  """Activate a block device for an instance.
729

730
  This is run on the primary and secondary nodes for an instance.
731

732
  This function is called recursively.
733

734
  Args:
735
    disk: a objects.Disk object
736
    as_primary: if we should make the block device read/write
737

738
  Returns:
739
    the assembled device or None (in case no device was assembled)
740

741
  If the assembly is not successful, an exception is raised.
742

743
  """
744
  children = []
745
  if disk.children:
746
    mcn = disk.ChildrenNeeded()
747
    if mcn == -1:
748
      mcn = 0 # max number of Nones allowed
749
    else:
750
      mcn = len(disk.children) - mcn # max number of Nones
751
    for chld_disk in disk.children:
752
      try:
753
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
754
      except errors.BlockDeviceError, err:
755
        if children.count(None) >= mcn:
756
          raise
757
        cdev = None
758
        logger.Debug("Error in child activation: %s" % str(err))
759
      children.append(cdev)
760

    
761
  if as_primary or disk.AssembleOnSecondary():
762
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
763
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
764
    result = r_dev
765
    if as_primary or disk.OpenOnSecondary():
766
      r_dev.Open()
767
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
768
                                as_primary, disk.iv_name)
769

    
770
  else:
771
    result = True
772
  return result
773

    
774

    
775
def AssembleBlockDevice(disk, owner, as_primary):
776
  """Activate a block device for an instance.
777

778
  This is a wrapper over _RecursiveAssembleBD.
779

780
  Returns:
781
    a /dev path for primary nodes
782
    True for secondary nodes
783

784
  """
785
  result = _RecursiveAssembleBD(disk, owner, as_primary)
786
  if isinstance(result, bdev.BlockDev):
787
    result = result.dev_path
788
  return result
789

    
790

    
791
def ShutdownBlockDevice(disk):
792
  """Shut down a block device.
793

794
  First, if the device is assembled (can `Attach()`), then the device
795
  is shutdown. Then the children of the device are shutdown.
796

797
  This function is called recursively. Note that we don't cache the
798
  children or such, as oppossed to assemble, shutdown of different
799
  devices doesn't require that the upper device was active.
800

801
  """
802
  r_dev = _RecursiveFindBD(disk)
803
  if r_dev is not None:
804
    r_path = r_dev.dev_path
805
    result = r_dev.Shutdown()
806
    if result:
807
      DevCacheManager.RemoveCache(r_path)
808
  else:
809
    result = True
810
  if disk.children:
811
    for child in disk.children:
812
      result = result and ShutdownBlockDevice(child)
813
  return result
814

    
815

    
816
def MirrorAddChildren(parent_cdev, new_cdevs):
817
  """Extend a mirrored block device.
818

819
  """
820
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
821
  if parent_bdev is None:
822
    logger.Error("Can't find parent device")
823
    return False
824
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
825
  if new_bdevs.count(None) > 0:
826
    logger.Error("Can't find new device(s) to add: %s:%s" %
827
                 (new_bdevs, new_cdevs))
828
    return False
829
  parent_bdev.AddChildren(new_bdevs)
830
  return True
831

    
832

    
833
def MirrorRemoveChildren(parent_cdev, new_cdevs):
834
  """Shrink a mirrored block device.
835

836
  """
837
  parent_bdev = _RecursiveFindBD(parent_cdev)
838
  if parent_bdev is None:
839
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
840
    return False
841
  devs = []
842
  for disk in new_cdevs:
843
    rpath = disk.StaticDevPath()
844
    if rpath is None:
845
      bd = _RecursiveFindBD(disk)
846
      if bd is None:
847
        logger.Error("Can't find dynamic device %s while removing children" %
848
                     disk)
849
        return False
850
      else:
851
        devs.append(bd.dev_path)
852
    else:
853
      devs.append(rpath)
854
  parent_bdev.RemoveChildren(devs)
855
  return True
856

    
857

    
858
def GetMirrorStatus(disks):
859
  """Get the mirroring status of a list of devices.
860

861
  Args:
862
    disks: list of `objects.Disk`
863

864
  Returns:
865
    list of (mirror_done, estimated_time) tuples, which
866
    are the result of bdev.BlockDevice.CombinedSyncStatus()
867

868
  """
869
  stats = []
870
  for dsk in disks:
871
    rbd = _RecursiveFindBD(dsk)
872
    if rbd is None:
873
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
874
    stats.append(rbd.CombinedSyncStatus())
875
  return stats
876

    
877

    
878
def _RecursiveFindBD(disk, allow_partial=False):
879
  """Check if a device is activated.
880

881
  If so, return informations about the real device.
882

883
  Args:
884
    disk: the objects.Disk instance
885
    allow_partial: don't abort the find if a child of the
886
                   device can't be found; this is intended to be
887
                   used when repairing mirrors
888

889
  Returns:
890
    None if the device can't be found
891
    otherwise the device instance
892

893
  """
894
  children = []
895
  if disk.children:
896
    for chdisk in disk.children:
897
      children.append(_RecursiveFindBD(chdisk))
898

    
899
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
900

    
901

    
902
def FindBlockDevice(disk):
903
  """Check if a device is activated.
904

905
  If so, return informations about the real device.
906

907
  Args:
908
    disk: the objects.Disk instance
909
  Returns:
910
    None if the device can't be found
911
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
912

913
  """
914
  rbd = _RecursiveFindBD(disk)
915
  if rbd is None:
916
    return rbd
917
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
918

    
919

    
920
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
921
  """Write a file to the filesystem.
922

923
  This allows the master to overwrite(!) a file. It will only perform
924
  the operation if the file belongs to a list of configuration files.
925

926
  """
927
  if not os.path.isabs(file_name):
928
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
929
                 file_name)
930
    return False
931

    
932
  allowed_files = [
933
    constants.CLUSTER_CONF_FILE,
934
    constants.ETC_HOSTS,
935
    constants.SSH_KNOWN_HOSTS_FILE,
936
    ]
937
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
938
  if file_name not in allowed_files:
939
    logger.Error("Filename passed to UploadFile not in allowed"
940
                 " upload targets: '%s'" % file_name)
941
    return False
942

    
943
  dir_name, small_name = os.path.split(file_name)
944
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
945
  # here we need to make sure we remove the temp file, if any error
946
  # leaves it in place
947
  try:
948
    os.chown(new_name, uid, gid)
949
    os.chmod(new_name, mode)
950
    os.write(fd, data)
951
    os.fsync(fd)
952
    os.utime(new_name, (atime, mtime))
953
    os.rename(new_name, file_name)
954
  finally:
955
    os.close(fd)
956
    utils.RemoveFile(new_name)
957
  return True
958

    
959

    
960
def _ErrnoOrStr(err):
961
  """Format an EnvironmentError exception.
962

963
  If the `err` argument has an errno attribute, it will be looked up
964
  and converted into a textual EXXXX description. Otherwise the string
965
  representation of the error will be returned.
966

967
  """
968
  if hasattr(err, 'errno'):
969
    detail = errno.errorcode[err.errno]
970
  else:
971
    detail = str(err)
972
  return detail
973

    
974

    
975
def _OSSearch(name, search_path=None):
976
  """Search for OSes with the given name in the search_path.
977

978
  Args:
979
    name: The name of the OS to look for
980
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
981

982
  Returns:
983
    The base_dir the OS resides in
984

985
  """
986
  if search_path is None:
987
    search_path = constants.OS_SEARCH_PATH
988

    
989
  for dir_name in search_path:
990
    t_os_dir = os.path.sep.join([dir_name, name])
991
    if os.path.isdir(t_os_dir):
992
      return dir_name
993

    
994
  return None
995

    
996

    
997
def _OSOndiskVersion(name, os_dir):
998
  """Compute and return the API version of a given OS.
999

1000
  This function will try to read the API version of the os given by
1001
  the 'name' parameter and residing in the 'os_dir' directory.
1002

1003
  Return value will be either an integer denoting the version or None in the
1004
  case when this is not a valid OS name.
1005

1006
  """
1007
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1008

    
1009
  try:
1010
    st = os.stat(api_file)
1011
  except EnvironmentError, err:
1012
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1013
                           " found (%s)" % _ErrnoOrStr(err))
1014

    
1015
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1016
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1017
                           " a regular file")
1018

    
1019
  try:
1020
    f = open(api_file)
1021
    try:
1022
      api_version = f.read(256)
1023
    finally:
1024
      f.close()
1025
  except EnvironmentError, err:
1026
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1027
                           " API version (%s)" % _ErrnoOrStr(err))
1028

    
1029
  api_version = api_version.strip()
1030
  try:
1031
    api_version = int(api_version)
1032
  except (TypeError, ValueError), err:
1033
    raise errors.InvalidOS(name, os_dir,
1034
                           "API version is not integer (%s)" % str(err))
1035

    
1036
  return api_version
1037

    
1038

    
1039
def DiagnoseOS(top_dirs=None):
1040
  """Compute the validity for all OSes.
1041

1042
  Returns an OS object for each name in all the given top directories
1043
  (if not given defaults to constants.OS_SEARCH_PATH)
1044

1045
  Returns:
1046
    list of OS objects
1047

1048
  """
1049
  if top_dirs is None:
1050
    top_dirs = constants.OS_SEARCH_PATH
1051

    
1052
  result = []
1053
  for dir_name in top_dirs:
1054
    if os.path.isdir(dir_name):
1055
      try:
1056
        f_names = utils.ListVisibleFiles(dir_name)
1057
      except EnvironmentError, err:
1058
        logger.Error("Can't list the OS directory %s: %s" %
1059
                     (dir_name, str(err)))
1060
        break
1061
      for name in f_names:
1062
        try:
1063
          os_inst = OSFromDisk(name, base_dir=dir_name)
1064
          result.append(os_inst)
1065
        except errors.InvalidOS, err:
1066
          result.append(objects.OS.FromInvalidOS(err))
1067

    
1068
  return result
1069

    
1070

    
1071
def OSFromDisk(name, base_dir=None):
1072
  """Create an OS instance from disk.
1073

1074
  This function will return an OS instance if the given name is a
1075
  valid OS name. Otherwise, it will raise an appropriate
1076
  `errors.InvalidOS` exception, detailing why this is not a valid
1077
  OS.
1078

1079
  Args:
1080
    os_dir: Directory containing the OS scripts. Defaults to a search
1081
            in all the OS_SEARCH_PATH directories.
1082

1083
  """
1084

    
1085
  if base_dir is None:
1086
    base_dir = _OSSearch(name)
1087

    
1088
  if base_dir is None:
1089
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1090

    
1091
  os_dir = os.path.sep.join([base_dir, name])
1092
  api_version = _OSOndiskVersion(name, os_dir)
1093

    
1094
  if api_version != constants.OS_API_VERSION:
1095
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1096
                           " (found %s want %s)"
1097
                           % (api_version, constants.OS_API_VERSION))
1098

    
1099
  # OS Scripts dictionary, we will populate it with the actual script names
1100
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1101

    
1102
  for script in os_scripts:
1103
    os_scripts[script] = os.path.sep.join([os_dir, script])
1104

    
1105
    try:
1106
      st = os.stat(os_scripts[script])
1107
    except EnvironmentError, err:
1108
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1109
                             (script, _ErrnoOrStr(err)))
1110

    
1111
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1112
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1113
                             script)
1114

    
1115
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1116
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1117
                             script)
1118

    
1119

    
1120
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1121
                    create_script=os_scripts['create'],
1122
                    export_script=os_scripts['export'],
1123
                    import_script=os_scripts['import'],
1124
                    rename_script=os_scripts['rename'],
1125
                    api_version=api_version)
1126

    
1127

    
1128
def SnapshotBlockDevice(disk):
1129
  """Create a snapshot copy of a block device.
1130

1131
  This function is called recursively, and the snapshot is actually created
1132
  just for the leaf lvm backend device.
1133

1134
  Args:
1135
    disk: the disk to be snapshotted
1136

1137
  Returns:
1138
    a config entry for the actual lvm device snapshotted.
1139

1140
  """
1141
  if disk.children:
1142
    if len(disk.children) == 1:
1143
      # only one child, let's recurse on it
1144
      return SnapshotBlockDevice(disk.children[0])
1145
    else:
1146
      # more than one child, choose one that matches
1147
      for child in disk.children:
1148
        if child.size == disk.size:
1149
          # return implies breaking the loop
1150
          return SnapshotBlockDevice(child)
1151
  elif disk.dev_type == constants.LD_LV:
1152
    r_dev = _RecursiveFindBD(disk)
1153
    if r_dev is not None:
1154
      # let's stay on the safe side and ask for the full size, for now
1155
      return r_dev.Snapshot(disk.size)
1156
    else:
1157
      return None
1158
  else:
1159
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1160
                                 " '%s' of type '%s'" %
1161
                                 (disk.unique_id, disk.dev_type))
1162

    
1163

    
1164
def ExportSnapshot(disk, dest_node, instance):
1165
  """Export a block device snapshot to a remote node.
1166

1167
  Args:
1168
    disk: the snapshot block device
1169
    dest_node: the node to send the image to
1170
    instance: instance being exported
1171

1172
  Returns:
1173
    True if successful, False otherwise.
1174

1175
  """
1176
  inst_os = OSFromDisk(instance.os)
1177
  export_script = inst_os.export_script
1178

    
1179
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1180
                                     instance.name, int(time.time()))
1181
  if not os.path.exists(constants.LOG_OS_DIR):
1182
    os.mkdir(constants.LOG_OS_DIR, 0750)
1183

    
1184
  real_os_dev = _RecursiveFindBD(disk)
1185
  if real_os_dev is None:
1186
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1187
                                  str(disk))
1188
  real_os_dev.Open()
1189

    
1190
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1191
  destfile = disk.physical_id[1]
1192

    
1193
  # the target command is built out of three individual commands,
1194
  # which are joined by pipes; we check each individual command for
1195
  # valid parameters
1196

    
1197
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1198
                               export_script, instance.name,
1199
                               real_os_dev.dev_path, logfile)
1200

    
1201
  comprcmd = "gzip"
1202

    
1203
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1204
                                destdir, destdir, destfile)
1205
  remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1206

    
1207

    
1208

    
1209
  # all commands have been checked, so we're safe to combine them
1210
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1211

    
1212
  result = utils.RunCmd(command)
1213

    
1214
  if result.failed:
1215
    logger.Error("os snapshot export command '%s' returned error: %s"
1216
                 " output: %s" %
1217
                 (command, result.fail_reason, result.output))
1218
    return False
1219

    
1220
  return True
1221

    
1222

    
1223
def FinalizeExport(instance, snap_disks):
1224
  """Write out the export configuration information.
1225

1226
  Args:
1227
    instance: instance configuration
1228
    snap_disks: snapshot block devices
1229

1230
  Returns:
1231
    False in case of error, True otherwise.
1232

1233
  """
1234
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1235
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1236

    
1237
  config = objects.SerializableConfigParser()
1238

    
1239
  config.add_section(constants.INISECT_EXP)
1240
  config.set(constants.INISECT_EXP, 'version', '0')
1241
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1242
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1243
  config.set(constants.INISECT_EXP, 'os', instance.os)
1244
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1245

    
1246
  config.add_section(constants.INISECT_INS)
1247
  config.set(constants.INISECT_INS, 'name', instance.name)
1248
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1249
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1250
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1251
  for nic_count, nic in enumerate(instance.nics):
1252
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1253
               nic_count, '%s' % nic.mac)
1254
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1255
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1256
  # TODO: redundant: on load can read nics until it doesn't exist
1257
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1258

    
1259
  for disk_count, disk in enumerate(snap_disks):
1260
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1261
               ('%s' % disk.iv_name))
1262
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1263
               ('%s' % disk.physical_id[1]))
1264
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1265
               ('%d' % disk.size))
1266
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1267

    
1268
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1269
  cfo = open(cff, 'w')
1270
  try:
1271
    config.write(cfo)
1272
  finally:
1273
    cfo.close()
1274

    
1275
  shutil.rmtree(finaldestdir, True)
1276
  shutil.move(destdir, finaldestdir)
1277

    
1278
  return True
1279

    
1280

    
1281
def ExportInfo(dest):
1282
  """Get export configuration information.
1283

1284
  Args:
1285
    dest: directory containing the export
1286

1287
  Returns:
1288
    A serializable config file containing the export info.
1289

1290
  """
1291
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1292

    
1293
  config = objects.SerializableConfigParser()
1294
  config.read(cff)
1295

    
1296
  if (not config.has_section(constants.INISECT_EXP) or
1297
      not config.has_section(constants.INISECT_INS)):
1298
    return None
1299

    
1300
  return config
1301

    
1302

    
1303
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1304
  """Import an os image into an instance.
1305

1306
  Args:
1307
    instance: the instance object
1308
    os_disk: the instance-visible name of the os device
1309
    swap_disk: the instance-visible name of the swap device
1310
    src_node: node holding the source image
1311
    src_image: path to the source image on src_node
1312

1313
  Returns:
1314
    False in case of error, True otherwise.
1315

1316
  """
1317
  inst_os = OSFromDisk(instance.os)
1318
  import_script = inst_os.import_script
1319

    
1320
  os_device = instance.FindDisk(os_disk)
1321
  if os_device is None:
1322
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1323
    return False
1324

    
1325
  swap_device = instance.FindDisk(swap_disk)
1326
  if swap_device is None:
1327
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1328
    return False
1329

    
1330
  real_os_dev = _RecursiveFindBD(os_device)
1331
  if real_os_dev is None:
1332
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1333
                                  str(os_device))
1334
  real_os_dev.Open()
1335

    
1336
  real_swap_dev = _RecursiveFindBD(swap_device)
1337
  if real_swap_dev is None:
1338
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1339
                                  str(swap_device))
1340
  real_swap_dev.Open()
1341

    
1342
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1343
                                        instance.name, int(time.time()))
1344
  if not os.path.exists(constants.LOG_OS_DIR):
1345
    os.mkdir(constants.LOG_OS_DIR, 0750)
1346

    
1347
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1348
  remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1349

    
1350
  comprcmd = "gunzip"
1351
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1352
                               inst_os.path, import_script, instance.name,
1353
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1354
                               logfile)
1355

    
1356
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1357

    
1358
  result = utils.RunCmd(command)
1359

    
1360
  if result.failed:
1361
    logger.Error("os import command '%s' returned error: %s"
1362
                 " output: %s" %
1363
                 (command, result.fail_reason, result.output))
1364
    return False
1365

    
1366
  return True
1367

    
1368

    
1369
def ListExports():
1370
  """Return a list of exports currently available on this machine.
1371

1372
  """
1373
  if os.path.isdir(constants.EXPORT_DIR):
1374
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1375
  else:
1376
    return []
1377

    
1378

    
1379
def RemoveExport(export):
1380
  """Remove an existing export from the node.
1381

1382
  Args:
1383
    export: the name of the export to remove
1384

1385
  Returns:
1386
    False in case of error, True otherwise.
1387

1388
  """
1389
  target = os.path.join(constants.EXPORT_DIR, export)
1390

    
1391
  shutil.rmtree(target)
1392
  # TODO: catch some of the relevant exceptions and provide a pretty
1393
  # error message if rmtree fails.
1394

    
1395
  return True
1396

    
1397

    
1398
def RenameBlockDevices(devlist):
1399
  """Rename a list of block devices.
1400

1401
  The devlist argument is a list of tuples (disk, new_logical,
1402
  new_physical). The return value will be a combined boolean result
1403
  (True only if all renames succeeded).
1404

1405
  """
1406
  result = True
1407
  for disk, unique_id in devlist:
1408
    dev = _RecursiveFindBD(disk)
1409
    if dev is None:
1410
      result = False
1411
      continue
1412
    try:
1413
      old_rpath = dev.dev_path
1414
      dev.Rename(unique_id)
1415
      new_rpath = dev.dev_path
1416
      if old_rpath != new_rpath:
1417
        DevCacheManager.RemoveCache(old_rpath)
1418
        # FIXME: we should add the new cache information here, like:
1419
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1420
        # but we don't have the owner here - maybe parse from existing
1421
        # cache? for now, we only lose lvm data when we rename, which
1422
        # is less critical than DRBD or MD
1423
    except errors.BlockDeviceError, err:
1424
      logger.Error("Can't rename device '%s' to '%s': %s" %
1425
                   (dev, unique_id, err))
1426
      result = False
1427
  return result
1428

    
1429

    
1430
class HooksRunner(object):
1431
  """Hook runner.
1432

1433
  This class is instantiated on the node side (ganeti-noded) and not on
1434
  the master side.
1435

1436
  """
1437
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1438

    
1439
  def __init__(self, hooks_base_dir=None):
1440
    """Constructor for hooks runner.
1441

1442
    Args:
1443
      - hooks_base_dir: if not None, this overrides the
1444
        constants.HOOKS_BASE_DIR (useful for unittests)
1445
      - logs_base_dir: if not None, this overrides the
1446
        constants.LOG_HOOKS_DIR (useful for unittests)
1447
      - logging: enable or disable logging of script output
1448

1449
    """
1450
    if hooks_base_dir is None:
1451
      hooks_base_dir = constants.HOOKS_BASE_DIR
1452
    self._BASE_DIR = hooks_base_dir
1453

    
1454
  @staticmethod
1455
  def ExecHook(script, env):
1456
    """Exec one hook script.
1457

1458
    Args:
1459
     - phase: the phase
1460
     - script: the full path to the script
1461
     - env: the environment with which to exec the script
1462

1463
    """
1464
    # exec the process using subprocess and log the output
1465
    fdstdin = None
1466
    try:
1467
      fdstdin = open("/dev/null", "r")
1468
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1469
                               stderr=subprocess.STDOUT, close_fds=True,
1470
                               shell=False, cwd="/", env=env)
1471
      output = ""
1472
      try:
1473
        output = child.stdout.read(4096)
1474
        child.stdout.close()
1475
      except EnvironmentError, err:
1476
        output += "Hook script error: %s" % str(err)
1477

    
1478
      while True:
1479
        try:
1480
          result = child.wait()
1481
          break
1482
        except EnvironmentError, err:
1483
          if err.errno == errno.EINTR:
1484
            continue
1485
          raise
1486
    finally:
1487
      # try not to leak fds
1488
      for fd in (fdstdin, ):
1489
        if fd is not None:
1490
          try:
1491
            fd.close()
1492
          except EnvironmentError, err:
1493
            # just log the error
1494
            #logger.Error("While closing fd %s: %s" % (fd, err))
1495
            pass
1496

    
1497
    return result == 0, output
1498

    
1499
  def RunHooks(self, hpath, phase, env):
1500
    """Run the scripts in the hooks directory.
1501

1502
    This method will not be usually overriden by child opcodes.
1503

1504
    """
1505
    if phase == constants.HOOKS_PHASE_PRE:
1506
      suffix = "pre"
1507
    elif phase == constants.HOOKS_PHASE_POST:
1508
      suffix = "post"
1509
    else:
1510
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1511
    rr = []
1512

    
1513
    subdir = "%s-%s.d" % (hpath, suffix)
1514
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1515
    try:
1516
      dir_contents = utils.ListVisibleFiles(dir_name)
1517
    except OSError, err:
1518
      # must log
1519
      return rr
1520

    
1521
    # we use the standard python sort order,
1522
    # so 00name is the recommended naming scheme
1523
    dir_contents.sort()
1524
    for relname in dir_contents:
1525
      fname = os.path.join(dir_name, relname)
1526
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1527
          self.RE_MASK.match(relname) is not None):
1528
        rrval = constants.HKR_SKIP
1529
        output = ""
1530
      else:
1531
        result, output = self.ExecHook(fname, env)
1532
        if not result:
1533
          rrval = constants.HKR_FAIL
1534
        else:
1535
          rrval = constants.HKR_SUCCESS
1536
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1537

    
1538
    return rr
1539

    
1540

    
1541
class DevCacheManager(object):
1542
  """Simple class for managing a chache of block device information.
1543

1544
  """
1545
  _DEV_PREFIX = "/dev/"
1546
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1547

    
1548
  @classmethod
1549
  def _ConvertPath(cls, dev_path):
1550
    """Converts a /dev/name path to the cache file name.
1551

1552
    This replaces slashes with underscores and strips the /dev
1553
    prefix. It then returns the full path to the cache file
1554

1555
    """
1556
    if dev_path.startswith(cls._DEV_PREFIX):
1557
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1558
    dev_path = dev_path.replace("/", "_")
1559
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1560
    return fpath
1561

    
1562
  @classmethod
1563
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1564
    """Updates the cache information for a given device.
1565

1566
    """
1567
    if dev_path is None:
1568
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1569
      return
1570
    fpath = cls._ConvertPath(dev_path)
1571
    if on_primary:
1572
      state = "primary"
1573
    else:
1574
      state = "secondary"
1575
    if iv_name is None:
1576
      iv_name = "not_visible"
1577
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1578
    try:
1579
      utils.WriteFile(fpath, data=fdata)
1580
    except EnvironmentError, err:
1581
      logger.Error("Can't update bdev cache for %s, error %s" %
1582
                   (dev_path, str(err)))
1583

    
1584
  @classmethod
1585
  def RemoveCache(cls, dev_path):
1586
    """Remove data for a dev_path.
1587

1588
    """
1589
    if dev_path is None:
1590
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1591
      return
1592
    fpath = cls._ConvertPath(dev_path)
1593
    try:
1594
      utils.RemoveFile(fpath)
1595
    except EnvironmentError, err:
1596
      logger.Error("Can't update bdev cache for %s, error %s" %
1597
                   (dev_path, str(err)))