Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 6c8af3d0

History | View | Annotate | Download (45.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """Joins this node to the cluster.
84

85
  This does the following:
86
      - updates the hostkeys of the machine (rsa and dsa)
87
      - adds the ssh private key to the user
88
      - adds the ssh public key to the users' authorized_keys file
89

90
  """
91
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95
  for name, content, mode in sshd_keys:
96
    utils.WriteFile(name, data=content, mode=mode)
97

    
98
  try:
99
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
100
                                                    mkdir=True)
101
  except errors.OpExecError, err:
102
    logger.Error("Error while processing user ssh files: %s" % err)
103
    return False
104

    
105
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106
    utils.WriteFile(name, data=content, mode=0600)
107

    
108
  utils.AddAuthorizedKey(auth_keys, sshpub)
109

    
110
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
111

    
112
  return True
113

    
114

    
115
def LeaveCluster():
116
  """Cleans up the current node and prepares it to be removed from the cluster.
117

118
  """
119
  if os.path.isdir(constants.DATA_DIR):
120
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121
      full_name = os.path.join(constants.DATA_DIR, rel_name)
122
      if os.path.isfile(full_name) and not os.path.islink(full_name):
123
        utils.RemoveFile(full_name)
124

    
125
  try:
126
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127
  except errors.OpExecError, err:
128
    logger.Error("Error while processing ssh files: %s" % err)
129
    return
130

    
131
  f = open(pub_key, 'r')
132
  try:
133
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
134
  finally:
135
    f.close()
136

    
137
  utils.RemoveFile(priv_key)
138
  utils.RemoveFile(pub_key)
139

    
140

    
141
def GetNodeInfo(vgname):
142
  """Gives back a hash with different informations about the node.
143

144
  Returns:
145
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
146
      'memory_free' : xxx, 'memory_total' : xxx }
147
    where
148
    vg_size is the size of the configured volume group in MiB
149
    vg_free is the free size of the volume group in MiB
150
    memory_dom0 is the memory allocated for domain0 in MiB
151
    memory_free is the currently available (free) ram in MiB
152
    memory_total is the total number of ram in MiB
153

154
  """
155
  outputarray = {}
156
  vginfo = _GetVGInfo(vgname)
157
  outputarray['vg_size'] = vginfo['vg_size']
158
  outputarray['vg_free'] = vginfo['vg_free']
159

    
160
  hyper = hypervisor.GetHypervisor()
161
  hyp_info = hyper.GetNodeInfo()
162
  if hyp_info is not None:
163
    outputarray.update(hyp_info)
164

    
165
  f = open("/proc/sys/kernel/random/boot_id", 'r')
166
  try:
167
    outputarray["bootid"] = f.read(128).rstrip("\n")
168
  finally:
169
    f.close()
170

    
171
  return outputarray
172

    
173

    
174
def VerifyNode(what):
175
  """Verify the status of the local node.
176

177
  Args:
178
    what - a dictionary of things to check:
179
      'filelist' : list of files for which to compute checksums
180
      'nodelist' : list of nodes we should check communication with
181
      'hypervisor': run the hypervisor-specific verify
182

183
  Requested files on local node are checksummed and the result returned.
184

185
  The nodelist is traversed, with the following checks being made
186
  for each node:
187
  - known_hosts key correct
188
  - correct resolving of node name (target node returns its own hostname
189
    by ssh-execution of 'hostname', result compared against name in list.
190

191
  """
192
  result = {}
193

    
194
  if 'hypervisor' in what:
195
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
196

    
197
  if 'filelist' in what:
198
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
199

    
200
  if 'nodelist' in what:
201
    result['nodelist'] = {}
202
    for node in what['nodelist']:
203
      success, message = ssh.VerifyNodeHostname(node)
204
      if not success:
205
        result['nodelist'][node] = message
206
  return result
207

    
208

    
209
def GetVolumeList(vg_name):
210
  """Compute list of logical volumes and their size.
211

212
  Returns:
213
    dictionary of all partions (key) with their size (in MiB), inactive
214
    and online status:
215
    {'test1': ('20.06', True, True)}
216

217
  """
218
  lvs = {}
219
  sep = '|'
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221
                         "--separator=%s" % sep,
222
                         "-olv_name,lv_size,lv_attr", vg_name])
223
  if result.failed:
224
    logger.Error("Failed to list logical volumes, lvs output: %s" %
225
                 result.output)
226
    return result.output
227

    
228
  for line in result.stdout.splitlines():
229
    line = line.strip().rstrip(sep)
230
    name, size, attr = line.split(sep)
231
    if len(attr) != 6:
232
      attr = '------'
233
    inactive = attr[4] == '-'
234
    online = attr[5] == 'o'
235
    lvs[name] = (size, inactive, online)
236

    
237
  return lvs
238

    
239

    
240
def ListVolumeGroups():
241
  """List the volume groups and their size.
242

243
  Returns:
244
    Dictionary with keys volume name and values the size of the volume
245

246
  """
247
  return utils.ListVolumeGroups()
248

    
249

    
250
def NodeVolumes():
251
  """List all volumes on this node.
252

253
  """
254
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
255
                         "--separator=|",
256
                         "--options=lv_name,lv_size,devices,vg_name"])
257
  if result.failed:
258
    logger.Error("Failed to list logical volumes, lvs output: %s" %
259
                 result.output)
260
    return {}
261

    
262
  def parse_dev(dev):
263
    if '(' in dev:
264
      return dev.split('(')[0]
265
    else:
266
      return dev
267

    
268
  def map_line(line):
269
    return {
270
      'name': line[0].strip(),
271
      'size': line[1].strip(),
272
      'dev': parse_dev(line[2].strip()),
273
      'vg': line[3].strip(),
274
    }
275

    
276
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
277

    
278

    
279
def BridgesExist(bridges_list):
280
  """Check if a list of bridges exist on the current node.
281

282
  Returns:
283
    True if all of them exist, false otherwise
284

285
  """
286
  for bridge in bridges_list:
287
    if not utils.BridgeExists(bridge):
288
      return False
289

    
290
  return True
291

    
292

    
293
def GetInstanceList():
294
  """Provides a list of instances.
295

296
  Returns:
297
    A list of all running instances on the current node
298
    - instance1.example.com
299
    - instance2.example.com
300

301
  """
302
  try:
303
    names = hypervisor.GetHypervisor().ListInstances()
304
  except errors.HypervisorError, err:
305
    logger.Error("error enumerating instances: %s" % str(err))
306
    raise
307

    
308
  return names
309

    
310

    
311
def GetInstanceInfo(instance):
312
  """Gives back the informations about an instance as a dictionary.
313

314
  Args:
315
    instance: name of the instance (ex. instance1.example.com)
316

317
  Returns:
318
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
319
    where
320
    memory: memory size of instance (int)
321
    state: xen state of instance (string)
322
    time: cpu time of instance (float)
323

324
  """
325
  output = {}
326

    
327
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328
  if iinfo is not None:
329
    output['memory'] = iinfo[2]
330
    output['state'] = iinfo[4]
331
    output['time'] = iinfo[5]
332

    
333
  return output
334

    
335

    
336
def GetAllInstancesInfo():
337
  """Gather data about all instances.
338

339
  This is the equivalent of `GetInstanceInfo()`, except that it
340
  computes data for all instances at once, thus being faster if one
341
  needs data about more than one instance.
342

343
  Returns: a dictionary of dictionaries, keys being the instance name,
344
    and with values:
345
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
346
    where
347
    memory: memory size of instance (int)
348
    state: xen state of instance (string)
349
    time: cpu time of instance (float)
350
    vcpus: the number of cpus
351

352
  """
353
  output = {}
354

    
355
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
356
  if iinfo:
357
    for name, inst_id, memory, vcpus, state, times in iinfo:
358
      output[name] = {
359
        'memory': memory,
360
        'vcpus': vcpus,
361
        'state': state,
362
        'time': times,
363
        }
364

    
365
  return output
366

    
367

    
368
def AddOSToInstance(instance, os_disk, swap_disk):
369
  """Add an OS to an instance.
370

371
  Args:
372
    instance: the instance object
373
    os_disk: the instance-visible name of the os device
374
    swap_disk: the instance-visible name of the swap device
375

376
  """
377
  inst_os = OSFromDisk(instance.os)
378

    
379
  create_script = inst_os.create_script
380

    
381
  os_device = instance.FindDisk(os_disk)
382
  if os_device is None:
383
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
384
    return False
385

    
386
  swap_device = instance.FindDisk(swap_disk)
387
  if swap_device is None:
388
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
389
    return False
390

    
391
  real_os_dev = _RecursiveFindBD(os_device)
392
  if real_os_dev is None:
393
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
394
                                  str(os_device))
395
  real_os_dev.Open()
396

    
397
  real_swap_dev = _RecursiveFindBD(swap_device)
398
  if real_swap_dev is None:
399
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
400
                                  str(swap_device))
401
  real_swap_dev.Open()
402

    
403
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404
                                     instance.name, int(time.time()))
405
  if not os.path.exists(constants.LOG_OS_DIR):
406
    os.mkdir(constants.LOG_OS_DIR, 0750)
407

    
408
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409
                                inst_os.path, create_script, instance.name,
410
                                real_os_dev.dev_path, real_swap_dev.dev_path,
411
                                logfile)
412

    
413
  result = utils.RunCmd(command)
414
  if result.failed:
415
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
416
                 " output: %s" %
417
                 (command, result.fail_reason, logfile, result.output))
418
    return False
419

    
420
  return True
421

    
422

    
423
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424
  """Run the OS rename script for an instance.
425

426
  Args:
427
    instance: the instance object
428
    old_name: the old name of the instance
429
    os_disk: the instance-visible name of the os device
430
    swap_disk: the instance-visible name of the swap device
431

432
  """
433
  inst_os = OSFromDisk(instance.os)
434

    
435
  script = inst_os.rename_script
436

    
437
  os_device = instance.FindDisk(os_disk)
438
  if os_device is None:
439
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
440
    return False
441

    
442
  swap_device = instance.FindDisk(swap_disk)
443
  if swap_device is None:
444
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
445
    return False
446

    
447
  real_os_dev = _RecursiveFindBD(os_device)
448
  if real_os_dev is None:
449
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
450
                                  str(os_device))
451
  real_os_dev.Open()
452

    
453
  real_swap_dev = _RecursiveFindBD(swap_device)
454
  if real_swap_dev is None:
455
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
456
                                  str(swap_device))
457
  real_swap_dev.Open()
458

    
459
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
460
                                           old_name,
461
                                           instance.name, int(time.time()))
462
  if not os.path.exists(constants.LOG_OS_DIR):
463
    os.mkdir(constants.LOG_OS_DIR, 0750)
464

    
465
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466
                                inst_os.path, script, old_name, instance.name,
467
                                real_os_dev.dev_path, real_swap_dev.dev_path,
468
                                logfile)
469

    
470
  result = utils.RunCmd(command)
471

    
472
  if result.failed:
473
    logger.Error("os create command '%s' returned error: %s"
474
                 " output: %s" %
475
                 (command, result.fail_reason, result.output))
476
    return False
477

    
478
  return True
479

    
480

    
481
def _GetVGInfo(vg_name):
482
  """Get informations about the volume group.
483

484
  Args:
485
    vg_name: the volume group
486

487
  Returns:
488
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
489
    where
490
    vg_size is the total size of the volume group in MiB
491
    vg_free is the free size of the volume group in MiB
492
    pv_count are the number of physical disks in that vg
493

494
  If an error occurs during gathering of data, we return the same dict
495
  with keys all set to None.
496

497
  """
498
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
499

    
500
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
501
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
502

    
503
  if retval.failed:
504
    errmsg = "volume group %s not present" % vg_name
505
    logger.Error(errmsg)
506
    return retdic
507
  valarr = retval.stdout.strip().rstrip(':').split(':')
508
  if len(valarr) == 3:
509
    try:
510
      retdic = {
511
        "vg_size": int(round(float(valarr[0]), 0)),
512
        "vg_free": int(round(float(valarr[1]), 0)),
513
        "pv_count": int(valarr[2]),
514
        }
515
    except ValueError, err:
516
      logger.Error("Fail to parse vgs output: %s" % str(err))
517
  else:
518
    logger.Error("vgs output has the wrong number of fields (expected"
519
                 " three): %s" % str(valarr))
520
  return retdic
521

    
522

    
523
def _GatherBlockDevs(instance):
524
  """Set up an instance's block device(s).
525

526
  This is run on the primary node at instance startup. The block
527
  devices must be already assembled.
528

529
  """
530
  block_devices = []
531
  for disk in instance.disks:
532
    device = _RecursiveFindBD(disk)
533
    if device is None:
534
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
535
                                    str(disk))
536
    device.Open()
537
    block_devices.append((disk, device))
538
  return block_devices
539

    
540

    
541
def StartInstance(instance, extra_args):
542
  """Start an instance.
543

544
  Args:
545
    instance - name of instance to start.
546

547
  """
548
  running_instances = GetInstanceList()
549

    
550
  if instance.name in running_instances:
551
    return True
552

    
553
  block_devices = _GatherBlockDevs(instance)
554
  hyper = hypervisor.GetHypervisor()
555

    
556
  try:
557
    hyper.StartInstance(instance, block_devices, extra_args)
558
  except errors.HypervisorError, err:
559
    logger.Error("Failed to start instance: %s" % err)
560
    return False
561

    
562
  return True
563

    
564

    
565
def ShutdownInstance(instance):
566
  """Shut an instance down.
567

568
  Args:
569
    instance - name of instance to shutdown.
570

571
  """
572
  running_instances = GetInstanceList()
573

    
574
  if instance.name not in running_instances:
575
    return True
576

    
577
  hyper = hypervisor.GetHypervisor()
578
  try:
579
    hyper.StopInstance(instance)
580
  except errors.HypervisorError, err:
581
    logger.Error("Failed to stop instance: %s" % err)
582
    return False
583

    
584
  # test every 10secs for 2min
585
  shutdown_ok = False
586

    
587
  time.sleep(1)
588
  for dummy in range(11):
589
    if instance.name not in GetInstanceList():
590
      break
591
    time.sleep(10)
592
  else:
593
    # the shutdown did not succeed
594
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
595

    
596
    try:
597
      hyper.StopInstance(instance, force=True)
598
    except errors.HypervisorError, err:
599
      logger.Error("Failed to stop instance: %s" % err)
600
      return False
601

    
602
    time.sleep(1)
603
    if instance.name in GetInstanceList():
604
      logger.Error("could not shutdown instance '%s' even by destroy")
605
      return False
606

    
607
  return True
608

    
609

    
610
def RebootInstance(instance, reboot_type, extra_args):
611
  """Reboot an instance.
612

613
  Args:
614
    instance    - name of instance to reboot
615
    reboot_type - how to reboot [soft,hard,full]
616

617
  """
618
  running_instances = GetInstanceList()
619

    
620
  if instance.name not in running_instances:
621
    logger.Error("Cannot reboot instance that is not running")
622
    return False
623

    
624
  hyper = hypervisor.GetHypervisor()
625
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
626
    try:
627
      hyper.RebootInstance(instance)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to soft reboot instance: %s" % err)
630
      return False
631
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
632
    try:
633
      ShutdownInstance(instance)
634
      StartInstance(instance, extra_args)
635
    except errors.HypervisorError, err:
636
      logger.Error("Failed to hard reboot instance: %s" % err)
637
      return False
638
  else:
639
    raise errors.ParameterError("reboot_type invalid")
640

    
641

    
642
  return True
643

    
644

    
645
def CreateBlockDevice(disk, size, owner, on_primary, info):
646
  """Creates a block device for an instance.
647

648
  Args:
649
   disk: a ganeti.objects.Disk object
650
   size: the size of the physical underlying device
651
   owner: a string with the name of the instance
652
   on_primary: a boolean indicating if it is the primary node or not
653
   info: string that will be sent to the physical device creation
654

655
  Returns:
656
    the new unique_id of the device (this can sometime be
657
    computed only after creation), or None. On secondary nodes,
658
    it's not required to return anything.
659

660
  """
661
  clist = []
662
  if disk.children:
663
    for child in disk.children:
664
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
665
      if on_primary or disk.AssembleOnSecondary():
666
        # we need the children open in case the device itself has to
667
        # be assembled
668
        crdev.Open()
669
      clist.append(crdev)
670
  try:
671
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
672
    if device is not None:
673
      logger.Info("removing existing device %s" % disk)
674
      device.Remove()
675
  except errors.BlockDeviceError, err:
676
    pass
677

    
678
  device = bdev.Create(disk.dev_type, disk.physical_id,
679
                       clist, size)
680
  if device is None:
681
    raise ValueError("Can't create child device for %s, %s" %
682
                     (disk, size))
683
  if on_primary or disk.AssembleOnSecondary():
684
    if not device.Assemble():
685
      errorstring = "Can't assemble device after creation"
686
      logger.Error(errorstring)
687
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
688
                                    " daemon logs" % errorstring)
689
    device.SetSyncSpeed(constants.SYNC_SPEED)
690
    if on_primary or disk.OpenOnSecondary():
691
      device.Open(force=True)
692
    DevCacheManager.UpdateCache(device.dev_path, owner,
693
                                on_primary, disk.iv_name)
694

    
695
  device.SetInfo(info)
696

    
697
  physical_id = device.unique_id
698
  return physical_id
699

    
700

    
701
def RemoveBlockDevice(disk):
702
  """Remove a block device.
703

704
  This is intended to be called recursively.
705

706
  """
707
  try:
708
    # since we are removing the device, allow a partial match
709
    # this allows removal of broken mirrors
710
    rdev = _RecursiveFindBD(disk, allow_partial=True)
711
  except errors.BlockDeviceError, err:
712
    # probably can't attach
713
    logger.Info("Can't attach to device %s in remove" % disk)
714
    rdev = None
715
  if rdev is not None:
716
    r_path = rdev.dev_path
717
    result = rdev.Remove()
718
    if result:
719
      DevCacheManager.RemoveCache(r_path)
720
  else:
721
    result = True
722
  if disk.children:
723
    for child in disk.children:
724
      result = result and RemoveBlockDevice(child)
725
  return result
726

    
727

    
728
def _RecursiveAssembleBD(disk, owner, as_primary):
729
  """Activate a block device for an instance.
730

731
  This is run on the primary and secondary nodes for an instance.
732

733
  This function is called recursively.
734

735
  Args:
736
    disk: a objects.Disk object
737
    as_primary: if we should make the block device read/write
738

739
  Returns:
740
    the assembled device or None (in case no device was assembled)
741

742
  If the assembly is not successful, an exception is raised.
743

744
  """
745
  children = []
746
  if disk.children:
747
    mcn = disk.ChildrenNeeded()
748
    if mcn == -1:
749
      mcn = 0 # max number of Nones allowed
750
    else:
751
      mcn = len(disk.children) - mcn # max number of Nones
752
    for chld_disk in disk.children:
753
      try:
754
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
755
      except errors.BlockDeviceError, err:
756
        if children.count(None) >= mcn:
757
          raise
758
        cdev = None
759
        logger.Debug("Error in child activation: %s" % str(err))
760
      children.append(cdev)
761

    
762
  if as_primary or disk.AssembleOnSecondary():
763
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
764
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
765
    result = r_dev
766
    if as_primary or disk.OpenOnSecondary():
767
      r_dev.Open()
768
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
769
                                as_primary, disk.iv_name)
770

    
771
  else:
772
    result = True
773
  return result
774

    
775

    
776
def AssembleBlockDevice(disk, owner, as_primary):
777
  """Activate a block device for an instance.
778

779
  This is a wrapper over _RecursiveAssembleBD.
780

781
  Returns:
782
    a /dev path for primary nodes
783
    True for secondary nodes
784

785
  """
786
  result = _RecursiveAssembleBD(disk, owner, as_primary)
787
  if isinstance(result, bdev.BlockDev):
788
    result = result.dev_path
789
  return result
790

    
791

    
792
def ShutdownBlockDevice(disk):
793
  """Shut down a block device.
794

795
  First, if the device is assembled (can `Attach()`), then the device
796
  is shutdown. Then the children of the device are shutdown.
797

798
  This function is called recursively. Note that we don't cache the
799
  children or such, as oppossed to assemble, shutdown of different
800
  devices doesn't require that the upper device was active.
801

802
  """
803
  r_dev = _RecursiveFindBD(disk)
804
  if r_dev is not None:
805
    r_path = r_dev.dev_path
806
    result = r_dev.Shutdown()
807
    if result:
808
      DevCacheManager.RemoveCache(r_path)
809
  else:
810
    result = True
811
  if disk.children:
812
    for child in disk.children:
813
      result = result and ShutdownBlockDevice(child)
814
  return result
815

    
816

    
817
def MirrorAddChildren(parent_cdev, new_cdevs):
818
  """Extend a mirrored block device.
819

820
  """
821
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
822
  if parent_bdev is None:
823
    logger.Error("Can't find parent device")
824
    return False
825
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
826
  if new_bdevs.count(None) > 0:
827
    logger.Error("Can't find new device(s) to add: %s:%s" %
828
                 (new_bdevs, new_cdevs))
829
    return False
830
  parent_bdev.AddChildren(new_bdevs)
831
  return True
832

    
833

    
834
def MirrorRemoveChildren(parent_cdev, new_cdevs):
835
  """Shrink a mirrored block device.
836

837
  """
838
  parent_bdev = _RecursiveFindBD(parent_cdev)
839
  if parent_bdev is None:
840
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
841
    return False
842
  devs = []
843
  for disk in new_cdevs:
844
    rpath = disk.StaticDevPath()
845
    if rpath is None:
846
      bd = _RecursiveFindBD(disk)
847
      if bd is None:
848
        logger.Error("Can't find dynamic device %s while removing children" %
849
                     disk)
850
        return False
851
      else:
852
        devs.append(bd.dev_path)
853
    else:
854
      devs.append(rpath)
855
  parent_bdev.RemoveChildren(devs)
856
  return True
857

    
858

    
859
def GetMirrorStatus(disks):
860
  """Get the mirroring status of a list of devices.
861

862
  Args:
863
    disks: list of `objects.Disk`
864

865
  Returns:
866
    list of (mirror_done, estimated_time) tuples, which
867
    are the result of bdev.BlockDevice.CombinedSyncStatus()
868

869
  """
870
  stats = []
871
  for dsk in disks:
872
    rbd = _RecursiveFindBD(dsk)
873
    if rbd is None:
874
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
875
    stats.append(rbd.CombinedSyncStatus())
876
  return stats
877

    
878

    
879
def _RecursiveFindBD(disk, allow_partial=False):
880
  """Check if a device is activated.
881

882
  If so, return informations about the real device.
883

884
  Args:
885
    disk: the objects.Disk instance
886
    allow_partial: don't abort the find if a child of the
887
                   device can't be found; this is intended to be
888
                   used when repairing mirrors
889

890
  Returns:
891
    None if the device can't be found
892
    otherwise the device instance
893

894
  """
895
  children = []
896
  if disk.children:
897
    for chdisk in disk.children:
898
      children.append(_RecursiveFindBD(chdisk))
899

    
900
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
901

    
902

    
903
def FindBlockDevice(disk):
904
  """Check if a device is activated.
905

906
  If so, return informations about the real device.
907

908
  Args:
909
    disk: the objects.Disk instance
910
  Returns:
911
    None if the device can't be found
912
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
913

914
  """
915
  rbd = _RecursiveFindBD(disk)
916
  if rbd is None:
917
    return rbd
918
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
919

    
920

    
921
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
922
  """Write a file to the filesystem.
923

924
  This allows the master to overwrite(!) a file. It will only perform
925
  the operation if the file belongs to a list of configuration files.
926

927
  """
928
  if not os.path.isabs(file_name):
929
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
930
                 file_name)
931
    return False
932

    
933
  allowed_files = [
934
    constants.CLUSTER_CONF_FILE,
935
    constants.ETC_HOSTS,
936
    constants.SSH_KNOWN_HOSTS_FILE,
937
    ]
938
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
939
  if file_name not in allowed_files:
940
    logger.Error("Filename passed to UploadFile not in allowed"
941
                 " upload targets: '%s'" % file_name)
942
    return False
943

    
944
  dir_name, small_name = os.path.split(file_name)
945
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
946
  # here we need to make sure we remove the temp file, if any error
947
  # leaves it in place
948
  try:
949
    os.chown(new_name, uid, gid)
950
    os.chmod(new_name, mode)
951
    os.write(fd, data)
952
    os.fsync(fd)
953
    os.utime(new_name, (atime, mtime))
954
    os.rename(new_name, file_name)
955
  finally:
956
    os.close(fd)
957
    utils.RemoveFile(new_name)
958
  return True
959

    
960

    
961
def _ErrnoOrStr(err):
962
  """Format an EnvironmentError exception.
963

964
  If the `err` argument has an errno attribute, it will be looked up
965
  and converted into a textual EXXXX description. Otherwise the string
966
  representation of the error will be returned.
967

968
  """
969
  if hasattr(err, 'errno'):
970
    detail = errno.errorcode[err.errno]
971
  else:
972
    detail = str(err)
973
  return detail
974

    
975

    
976
def _OSSearch(name, search_path=None):
977
  """Search for OSes with the given name in the search_path.
978

979
  Args:
980
    name: The name of the OS to look for
981
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
982

983
  Returns:
984
    The base_dir the OS resides in
985

986
  """
987
  if search_path is None:
988
    search_path = constants.OS_SEARCH_PATH
989

    
990
  for dir_name in search_path:
991
    t_os_dir = os.path.sep.join([dir_name, name])
992
    if os.path.isdir(t_os_dir):
993
      return dir_name
994

    
995
  return None
996

    
997

    
998
def _OSOndiskVersion(name, os_dir):
999
  """Compute and return the API version of a given OS.
1000

1001
  This function will try to read the API version of the os given by
1002
  the 'name' parameter and residing in the 'os_dir' directory.
1003

1004
  Return value will be either an integer denoting the version or None in the
1005
  case when this is not a valid OS name.
1006

1007
  """
1008
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1009

    
1010
  try:
1011
    st = os.stat(api_file)
1012
  except EnvironmentError, err:
1013
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1014
                           " found (%s)" % _ErrnoOrStr(err))
1015

    
1016
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1017
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1018
                           " a regular file")
1019

    
1020
  try:
1021
    f = open(api_file)
1022
    try:
1023
      api_version = f.read(256)
1024
    finally:
1025
      f.close()
1026
  except EnvironmentError, err:
1027
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1028
                           " API version (%s)" % _ErrnoOrStr(err))
1029

    
1030
  api_version = api_version.strip()
1031
  try:
1032
    api_version = int(api_version)
1033
  except (TypeError, ValueError), err:
1034
    raise errors.InvalidOS(name, os_dir,
1035
                           "API version is not integer (%s)" % str(err))
1036

    
1037
  return api_version
1038

    
1039

    
1040
def DiagnoseOS(top_dirs=None):
1041
  """Compute the validity for all OSes.
1042

1043
  Returns an OS object for each name in all the given top directories
1044
  (if not given defaults to constants.OS_SEARCH_PATH)
1045

1046
  Returns:
1047
    list of OS objects
1048

1049
  """
1050
  if top_dirs is None:
1051
    top_dirs = constants.OS_SEARCH_PATH
1052

    
1053
  result = []
1054
  for dir_name in top_dirs:
1055
    if os.path.isdir(dir_name):
1056
      try:
1057
        f_names = utils.ListVisibleFiles(dir_name)
1058
      except EnvironmentError, err:
1059
        logger.Error("Can't list the OS directory %s: %s" %
1060
                     (dir_name, str(err)))
1061
        break
1062
      for name in f_names:
1063
        try:
1064
          os_inst = OSFromDisk(name, base_dir=dir_name)
1065
          result.append(os_inst)
1066
        except errors.InvalidOS, err:
1067
          result.append(objects.OS.FromInvalidOS(err))
1068

    
1069
  return result
1070

    
1071

    
1072
def OSFromDisk(name, base_dir=None):
1073
  """Create an OS instance from disk.
1074

1075
  This function will return an OS instance if the given name is a
1076
  valid OS name. Otherwise, it will raise an appropriate
1077
  `errors.InvalidOS` exception, detailing why this is not a valid
1078
  OS.
1079

1080
  Args:
1081
    os_dir: Directory containing the OS scripts. Defaults to a search
1082
            in all the OS_SEARCH_PATH directories.
1083

1084
  """
1085

    
1086
  if base_dir is None:
1087
    base_dir = _OSSearch(name)
1088

    
1089
  if base_dir is None:
1090
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1091

    
1092
  os_dir = os.path.sep.join([base_dir, name])
1093
  api_version = _OSOndiskVersion(name, os_dir)
1094

    
1095
  if api_version != constants.OS_API_VERSION:
1096
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1097
                           " (found %s want %s)"
1098
                           % (api_version, constants.OS_API_VERSION))
1099

    
1100
  # OS Scripts dictionary, we will populate it with the actual script names
1101
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1102

    
1103
  for script in os_scripts:
1104
    os_scripts[script] = os.path.sep.join([os_dir, script])
1105

    
1106
    try:
1107
      st = os.stat(os_scripts[script])
1108
    except EnvironmentError, err:
1109
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1110
                             (script, _ErrnoOrStr(err)))
1111

    
1112
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1113
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1114
                             script)
1115

    
1116
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1117
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1118
                             script)
1119

    
1120

    
1121
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1122
                    create_script=os_scripts['create'],
1123
                    export_script=os_scripts['export'],
1124
                    import_script=os_scripts['import'],
1125
                    rename_script=os_scripts['rename'],
1126
                    api_version=api_version)
1127

    
1128

    
1129
def SnapshotBlockDevice(disk):
1130
  """Create a snapshot copy of a block device.
1131

1132
  This function is called recursively, and the snapshot is actually created
1133
  just for the leaf lvm backend device.
1134

1135
  Args:
1136
    disk: the disk to be snapshotted
1137

1138
  Returns:
1139
    a config entry for the actual lvm device snapshotted.
1140

1141
  """
1142
  if disk.children:
1143
    if len(disk.children) == 1:
1144
      # only one child, let's recurse on it
1145
      return SnapshotBlockDevice(disk.children[0])
1146
    else:
1147
      # more than one child, choose one that matches
1148
      for child in disk.children:
1149
        if child.size == disk.size:
1150
          # return implies breaking the loop
1151
          return SnapshotBlockDevice(child)
1152
  elif disk.dev_type == constants.LD_LV:
1153
    r_dev = _RecursiveFindBD(disk)
1154
    if r_dev is not None:
1155
      # let's stay on the safe side and ask for the full size, for now
1156
      return r_dev.Snapshot(disk.size)
1157
    else:
1158
      return None
1159
  else:
1160
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1161
                                 " '%s' of type '%s'" %
1162
                                 (disk.unique_id, disk.dev_type))
1163

    
1164

    
1165
def ExportSnapshot(disk, dest_node, instance):
1166
  """Export a block device snapshot to a remote node.
1167

1168
  Args:
1169
    disk: the snapshot block device
1170
    dest_node: the node to send the image to
1171
    instance: instance being exported
1172

1173
  Returns:
1174
    True if successful, False otherwise.
1175

1176
  """
1177
  inst_os = OSFromDisk(instance.os)
1178
  export_script = inst_os.export_script
1179

    
1180
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1181
                                     instance.name, int(time.time()))
1182
  if not os.path.exists(constants.LOG_OS_DIR):
1183
    os.mkdir(constants.LOG_OS_DIR, 0750)
1184

    
1185
  real_os_dev = _RecursiveFindBD(disk)
1186
  if real_os_dev is None:
1187
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1188
                                  str(disk))
1189
  real_os_dev.Open()
1190

    
1191
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1192
  destfile = disk.physical_id[1]
1193

    
1194
  # the target command is built out of three individual commands,
1195
  # which are joined by pipes; we check each individual command for
1196
  # valid parameters
1197

    
1198
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1199
                               export_script, instance.name,
1200
                               real_os_dev.dev_path, logfile)
1201

    
1202
  comprcmd = "gzip"
1203

    
1204
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1205
                                destdir, destdir, destfile)
1206
  remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1207

    
1208

    
1209

    
1210
  # all commands have been checked, so we're safe to combine them
1211
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1212

    
1213
  result = utils.RunCmd(command)
1214

    
1215
  if result.failed:
1216
    logger.Error("os snapshot export command '%s' returned error: %s"
1217
                 " output: %s" %
1218
                 (command, result.fail_reason, result.output))
1219
    return False
1220

    
1221
  return True
1222

    
1223

    
1224
def FinalizeExport(instance, snap_disks):
1225
  """Write out the export configuration information.
1226

1227
  Args:
1228
    instance: instance configuration
1229
    snap_disks: snapshot block devices
1230

1231
  Returns:
1232
    False in case of error, True otherwise.
1233

1234
  """
1235
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1236
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1237

    
1238
  config = objects.SerializableConfigParser()
1239

    
1240
  config.add_section(constants.INISECT_EXP)
1241
  config.set(constants.INISECT_EXP, 'version', '0')
1242
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1243
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1244
  config.set(constants.INISECT_EXP, 'os', instance.os)
1245
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1246

    
1247
  config.add_section(constants.INISECT_INS)
1248
  config.set(constants.INISECT_INS, 'name', instance.name)
1249
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1250
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1251
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1252
  for nic_count, nic in enumerate(instance.nics):
1253
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1254
               nic_count, '%s' % nic.mac)
1255
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1256
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1257
  # TODO: redundant: on load can read nics until it doesn't exist
1258
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1259

    
1260
  for disk_count, disk in enumerate(snap_disks):
1261
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1262
               ('%s' % disk.iv_name))
1263
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1264
               ('%s' % disk.physical_id[1]))
1265
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1266
               ('%d' % disk.size))
1267
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1268

    
1269
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1270
  cfo = open(cff, 'w')
1271
  try:
1272
    config.write(cfo)
1273
  finally:
1274
    cfo.close()
1275

    
1276
  shutil.rmtree(finaldestdir, True)
1277
  shutil.move(destdir, finaldestdir)
1278

    
1279
  return True
1280

    
1281

    
1282
def ExportInfo(dest):
1283
  """Get export configuration information.
1284

1285
  Args:
1286
    dest: directory containing the export
1287

1288
  Returns:
1289
    A serializable config file containing the export info.
1290

1291
  """
1292
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1293

    
1294
  config = objects.SerializableConfigParser()
1295
  config.read(cff)
1296

    
1297
  if (not config.has_section(constants.INISECT_EXP) or
1298
      not config.has_section(constants.INISECT_INS)):
1299
    return None
1300

    
1301
  return config
1302

    
1303

    
1304
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1305
  """Import an os image into an instance.
1306

1307
  Args:
1308
    instance: the instance object
1309
    os_disk: the instance-visible name of the os device
1310
    swap_disk: the instance-visible name of the swap device
1311
    src_node: node holding the source image
1312
    src_image: path to the source image on src_node
1313

1314
  Returns:
1315
    False in case of error, True otherwise.
1316

1317
  """
1318
  inst_os = OSFromDisk(instance.os)
1319
  import_script = inst_os.import_script
1320

    
1321
  os_device = instance.FindDisk(os_disk)
1322
  if os_device is None:
1323
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1324
    return False
1325

    
1326
  swap_device = instance.FindDisk(swap_disk)
1327
  if swap_device is None:
1328
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1329
    return False
1330

    
1331
  real_os_dev = _RecursiveFindBD(os_device)
1332
  if real_os_dev is None:
1333
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1334
                                  str(os_device))
1335
  real_os_dev.Open()
1336

    
1337
  real_swap_dev = _RecursiveFindBD(swap_device)
1338
  if real_swap_dev is None:
1339
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1340
                                  str(swap_device))
1341
  real_swap_dev.Open()
1342

    
1343
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1344
                                        instance.name, int(time.time()))
1345
  if not os.path.exists(constants.LOG_OS_DIR):
1346
    os.mkdir(constants.LOG_OS_DIR, 0750)
1347

    
1348
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1349
  remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1350

    
1351
  comprcmd = "gunzip"
1352
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1353
                               inst_os.path, import_script, instance.name,
1354
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1355
                               logfile)
1356

    
1357
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1358

    
1359
  result = utils.RunCmd(command)
1360

    
1361
  if result.failed:
1362
    logger.Error("os import command '%s' returned error: %s"
1363
                 " output: %s" %
1364
                 (command, result.fail_reason, result.output))
1365
    return False
1366

    
1367
  return True
1368

    
1369

    
1370
def ListExports():
1371
  """Return a list of exports currently available on this machine.
1372

1373
  """
1374
  if os.path.isdir(constants.EXPORT_DIR):
1375
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1376
  else:
1377
    return []
1378

    
1379

    
1380
def RemoveExport(export):
1381
  """Remove an existing export from the node.
1382

1383
  Args:
1384
    export: the name of the export to remove
1385

1386
  Returns:
1387
    False in case of error, True otherwise.
1388

1389
  """
1390
  target = os.path.join(constants.EXPORT_DIR, export)
1391

    
1392
  shutil.rmtree(target)
1393
  # TODO: catch some of the relevant exceptions and provide a pretty
1394
  # error message if rmtree fails.
1395

    
1396
  return True
1397

    
1398

    
1399
def RenameBlockDevices(devlist):
1400
  """Rename a list of block devices.
1401

1402
  The devlist argument is a list of tuples (disk, new_logical,
1403
  new_physical). The return value will be a combined boolean result
1404
  (True only if all renames succeeded).
1405

1406
  """
1407
  result = True
1408
  for disk, unique_id in devlist:
1409
    dev = _RecursiveFindBD(disk)
1410
    if dev is None:
1411
      result = False
1412
      continue
1413
    try:
1414
      old_rpath = dev.dev_path
1415
      dev.Rename(unique_id)
1416
      new_rpath = dev.dev_path
1417
      if old_rpath != new_rpath:
1418
        DevCacheManager.RemoveCache(old_rpath)
1419
        # FIXME: we should add the new cache information here, like:
1420
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1421
        # but we don't have the owner here - maybe parse from existing
1422
        # cache? for now, we only lose lvm data when we rename, which
1423
        # is less critical than DRBD or MD
1424
    except errors.BlockDeviceError, err:
1425
      logger.Error("Can't rename device '%s' to '%s': %s" %
1426
                   (dev, unique_id, err))
1427
      result = False
1428
  return result
1429

    
1430

    
1431
class HooksRunner(object):
1432
  """Hook runner.
1433

1434
  This class is instantiated on the node side (ganeti-noded) and not on
1435
  the master side.
1436

1437
  """
1438
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1439

    
1440
  def __init__(self, hooks_base_dir=None):
1441
    """Constructor for hooks runner.
1442

1443
    Args:
1444
      - hooks_base_dir: if not None, this overrides the
1445
        constants.HOOKS_BASE_DIR (useful for unittests)
1446
      - logs_base_dir: if not None, this overrides the
1447
        constants.LOG_HOOKS_DIR (useful for unittests)
1448
      - logging: enable or disable logging of script output
1449

1450
    """
1451
    if hooks_base_dir is None:
1452
      hooks_base_dir = constants.HOOKS_BASE_DIR
1453
    self._BASE_DIR = hooks_base_dir
1454

    
1455
  @staticmethod
1456
  def ExecHook(script, env):
1457
    """Exec one hook script.
1458

1459
    Args:
1460
     - phase: the phase
1461
     - script: the full path to the script
1462
     - env: the environment with which to exec the script
1463

1464
    """
1465
    # exec the process using subprocess and log the output
1466
    fdstdin = None
1467
    try:
1468
      fdstdin = open("/dev/null", "r")
1469
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1470
                               stderr=subprocess.STDOUT, close_fds=True,
1471
                               shell=False, cwd="/", env=env)
1472
      output = ""
1473
      try:
1474
        output = child.stdout.read(4096)
1475
        child.stdout.close()
1476
      except EnvironmentError, err:
1477
        output += "Hook script error: %s" % str(err)
1478

    
1479
      while True:
1480
        try:
1481
          result = child.wait()
1482
          break
1483
        except EnvironmentError, err:
1484
          if err.errno == errno.EINTR:
1485
            continue
1486
          raise
1487
    finally:
1488
      # try not to leak fds
1489
      for fd in (fdstdin, ):
1490
        if fd is not None:
1491
          try:
1492
            fd.close()
1493
          except EnvironmentError, err:
1494
            # just log the error
1495
            #logger.Error("While closing fd %s: %s" % (fd, err))
1496
            pass
1497

    
1498
    return result == 0, output
1499

    
1500
  def RunHooks(self, hpath, phase, env):
1501
    """Run the scripts in the hooks directory.
1502

1503
    This method will not be usually overriden by child opcodes.
1504

1505
    """
1506
    if phase == constants.HOOKS_PHASE_PRE:
1507
      suffix = "pre"
1508
    elif phase == constants.HOOKS_PHASE_POST:
1509
      suffix = "post"
1510
    else:
1511
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1512
    rr = []
1513

    
1514
    subdir = "%s-%s.d" % (hpath, suffix)
1515
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1516
    try:
1517
      dir_contents = utils.ListVisibleFiles(dir_name)
1518
    except OSError, err:
1519
      # must log
1520
      return rr
1521

    
1522
    # we use the standard python sort order,
1523
    # so 00name is the recommended naming scheme
1524
    dir_contents.sort()
1525
    for relname in dir_contents:
1526
      fname = os.path.join(dir_name, relname)
1527
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1528
          self.RE_MASK.match(relname) is not None):
1529
        rrval = constants.HKR_SKIP
1530
        output = ""
1531
      else:
1532
        result, output = self.ExecHook(fname, env)
1533
        if not result:
1534
          rrval = constants.HKR_FAIL
1535
        else:
1536
          rrval = constants.HKR_SUCCESS
1537
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1538

    
1539
    return rr
1540

    
1541

    
1542
class DevCacheManager(object):
1543
  """Simple class for managing a cache of block device information.
1544

1545
  """
1546
  _DEV_PREFIX = "/dev/"
1547
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1548

    
1549
  @classmethod
1550
  def _ConvertPath(cls, dev_path):
1551
    """Converts a /dev/name path to the cache file name.
1552

1553
    This replaces slashes with underscores and strips the /dev
1554
    prefix. It then returns the full path to the cache file
1555

1556
    """
1557
    if dev_path.startswith(cls._DEV_PREFIX):
1558
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1559
    dev_path = dev_path.replace("/", "_")
1560
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1561
    return fpath
1562

    
1563
  @classmethod
1564
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1565
    """Updates the cache information for a given device.
1566

1567
    """
1568
    if dev_path is None:
1569
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1570
      return
1571
    fpath = cls._ConvertPath(dev_path)
1572
    if on_primary:
1573
      state = "primary"
1574
    else:
1575
      state = "secondary"
1576
    if iv_name is None:
1577
      iv_name = "not_visible"
1578
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1579
    try:
1580
      utils.WriteFile(fpath, data=fdata)
1581
    except EnvironmentError, err:
1582
      logger.Error("Can't update bdev cache for %s, error %s" %
1583
                   (dev_path, str(err)))
1584

    
1585
  @classmethod
1586
  def RemoveCache(cls, dev_path):
1587
    """Remove data for a dev_path.
1588

1589
    """
1590
    if dev_path is None:
1591
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1592
      return
1593
    fpath = cls._ConvertPath(dev_path)
1594
    try:
1595
      utils.RemoveFile(fpath)
1596
    except EnvironmentError, err:
1597
      logger.Error("Can't update bdev cache for %s, error %s" %
1598
                   (dev_path, str(err)))