Revision 28a6fbc8

b/qa/qa-sample.yaml
55 55
  instance-import: True
56 56
  instance-reinstall: True
57 57
  instance-shutdown: True
58
  instance-automatic-restart: False
59
  instance-consecutive-failures: False
58 60

  
59 61
  # Make sure not to include the disk(s) required for Dom0 to be included in
60 62
  # the volume group used for instances. Otherwise the whole system may stop
61 63
  # working until restarted.
62 64
  instance-disk-failure: False
63 65

  
64
  # This test takes up to 6 minutes to complete
65
  instance-automatic-restart: False
66

  
67
  # This test takes at least 35 minutes to complete
68
  instance-consecutive-failures: False
69

  
70 66
# Other settings
71 67
options:
72 68
  burnin-instances: 2
b/qa/qa_daemon.py
67 67
    raise qa_error.Error("xm shutdown failed")
68 68

  
69 69

  
70
def _ResetWatcherDaemon(node):
70
def _ResetWatcherDaemon():
71 71
  """Removes the watcher daemon's state file.
72 72

  
73 73
  Args:
74 74
    node: Node to be reset
75 75
  """
76
  master = qa_config.GetMasterNode()
77

  
76 78
  cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
77
  AssertEqual(StartSSH(node['primary'],
79
  AssertEqual(StartSSH(master['primary'],
80
                       utils.ShellQuoteArgs(cmd)).wait(), 0)
81

  
82

  
83
def _RunWatcherDaemon():
84
  """Runs the ganeti-watcher daemon on the master node.
85

  
86
  """
87
  master = qa_config.GetMasterNode()
88

  
89
  cmd = ['ganeti-watcher', '-d']
90
  AssertEqual(StartSSH(master['primary'],
78 91
                       utils.ShellQuoteArgs(cmd)).wait(), 0)
79 92

  
80 93

  
81 94
def PrintCronWarning():
82
  """Shows a warning about the required cron job.
95
  """Shows a warning about the cron job.
83 96

  
84 97
  """
98
  msg = ("For the following tests it's recommended to turn off the "
99
         "ganeti-watcher cronjob.")
85 100
  print
86
  print qa_utils.FormatWarning("The following tests require the cron script "
87
                               "for ganeti-watcher to be set up.")
101
  print qa_utils.FormatWarning(msg)
88 102

  
89 103

  
90 104
def TestInstanceAutomaticRestart(node, instance):
91 105
  """Test automatic restart of instance by ganeti-watcher.
92 106

  
93
  Note: takes up to 6 minutes to complete.
94 107
  """
95 108
  master = qa_config.GetMasterNode()
96 109
  inst_name = qa_utils.ResolveInstanceName(instance)
97 110

  
98
  _ResetWatcherDaemon(node)
111
  _ResetWatcherDaemon()
99 112
  _XmShutdownInstance(node, inst_name)
100 113

  
101
  # Give it a bit more than five minutes to start again
102
  restart_at = time.time() + 330
114
  _RunWatcherDaemon()
115
  time.sleep(5)
103 116

  
104
  # Wait until it's running again
105
  while time.time() <= restart_at:
106
    if _InstanceRunning(node, inst_name):
107
      break
108
    time.sleep(15)
109
  else:
110
    raise qa_error.Error("Daemon didn't restart instance in time")
117
  if not _InstanceRunning(node, inst_name):
118
    raise qa_error.Error("Daemon didn't restart instance")
111 119

  
112 120
  cmd = ['gnt-instance', 'info', inst_name]
113 121
  AssertEqual(StartSSH(master['primary'],
......
117 125
def TestInstanceConsecutiveFailures(node, instance):
118 126
  """Test five consecutive instance failures.
119 127

  
120
  Note: takes at least 35 minutes to complete.
121 128
  """
122 129
  master = qa_config.GetMasterNode()
123 130
  inst_name = qa_utils.ResolveInstanceName(instance)
124 131

  
125
  _ResetWatcherDaemon(node)
126
  _XmShutdownInstance(node, inst_name)
127

  
128
  # Do shutdowns for 30 minutes
129
  finished_at = time.time() + (35 * 60)
132
  _ResetWatcherDaemon()
130 133

  
131
  while time.time() <= finished_at:
132
    if _InstanceRunning(node, inst_name):
133
      _XmShutdownInstance(node, inst_name)
134
    time.sleep(30)
134
  for should_start in ([True] * 5) + [False]:
135
    _XmShutdownInstance(node, inst_name)
136
    _RunWatcherDaemon()
137
    time.sleep(5)
135 138

  
136
  # Check for some time whether the instance doesn't start again
137
  check_until = time.time() + 330
138
  while time.time() <= check_until:
139
    if _InstanceRunning(node, inst_name):
140
      raise qa_error.Error("Instance started when it shouldn't")
141
    time.sleep(30)
139
    if bool(_InstanceRunning(node, inst_name)) != should_start:
140
      if should_start:
141
        msg = "Instance not started when it should"
142
      else:
143
        msg = "Instance started when it shouldn't"
144
      raise qa_error.Error(msg)
142 145

  
143 146
  cmd = ['gnt-instance', 'info', inst_name]
144 147
  AssertEqual(StartSSH(master['primary'],

Also available in: Unified diff