Revision 6dfcc47b daemons/ganeti-watcher

b/daemons/ganeti-watcher
225 225
    cli.SubmitOpCode(op, cl=client)
226 226

  
227 227

  
228
def GetInstanceList(with_secondaries=None):
228
def GetClusterData():
229 229
  """Get a list of instances on this cluster.
230 230

  
231 231
  """
232
  fields = ["name", "status", "admin_state"]
232
  op1_fields = ["name", "status", "admin_state", "snodes"]
233
  op1 = opcodes.OpQueryInstances(output_fields=op1_fields, names=[],
234
                                 use_locking=True)
235
  op2_fields = ["name", "bootid", "offline"]
236
  op2 = opcodes.OpQueryNodes(output_fields=op2_fields, names=[],
237
                             use_locking=True)
233 238

  
234
  if with_secondaries is not None:
235
    fields.append("snodes")
239
  job_id = client.SubmitJob([op1, op2])
236 240

  
237
  result = client.QueryInstances([], fields, True)
241
  all_results = cli.PollJob(job_id, cl=client, feedback_fn=logging.debug)
238 242

  
239
  instances = []
240
  for fields in result:
241
    if with_secondaries is not None:
242
      (name, status, autostart, snodes) = fields
243

  
244
      if not snodes:
245
        continue
243
  result = all_results[0]
244
  smap = {}
246 245

  
247
      for node in with_secondaries:
248
        if node in snodes:
249
          break
250
      else:
251
        continue
252

  
253
    else:
254
      (name, status, autostart) = fields
246
  instances = {}
247
  for fields in result:
248
    (name, status, autostart, snodes) = fields
255 249

  
256
    instances.append(Instance(name, status, autostart))
250
    # update the secondary node map
251
    for node in snodes:
252
      if node not in smap:
253
        smap[node] = []
254
      smap[node].append(name)
257 255

  
258
  return instances
256
    instances[name] = Instance(name, status, autostart)
259 257

  
258
  nodes =  dict([(name, (bootid, offline))
259
                 for name, bootid, offline in all_results[1]])
260 260

  
261
def GetNodeBootIDs():
262
  """Get a dict mapping nodes to boot IDs.
261
  client.ArchiveJob(job_id)
263 262

  
264
  """
265
  result = client.QueryNodes([], ["name", "bootid", "offline"], True)
266
  return dict([(name, (bootid, offline)) for name, bootid, offline in result])
263
  return instances, nodes, smap
267 264

  
268 265

  
269 266
class Watcher(object):
......
279 276
    master = client.QueryConfigValues(["master_node"])[0]
280 277
    if master != utils.HostInfo().name:
281 278
      raise NotMasterError("This is not the master node")
282
    self.instances = GetInstanceList()
283
    self.bootids = GetNodeBootIDs()
279
    self.instances, self.bootids, self.smap = GetClusterData()
284 280
    self.started_instances = set()
285 281
    self.opts = opts
286 282

  
......
321 317
    if check_nodes:
322 318
      # Activate disks for all instances with any of the checked nodes as a
323 319
      # secondary node.
324
      for instance in GetInstanceList(with_secondaries=check_nodes):
325
        if not instance.autostart:
326
          logging.info(("Skipping disk activation for non-autostart"
327
                        " instance %s"), instance.name)
328
          continue
329
        if instance.name in self.started_instances:
330
          # we already tried to start the instance, which should have
331
          # activated its drives (if they can be at all)
320
      for node in check_nodes:
321
        if node not in self.smap:
332 322
          continue
333
        try:
334
          logging.info("Activating disks for instance %s", instance.name)
335
          instance.ActivateDisks()
336
        except Exception:
337
          logging.exception("Error while activating disks for instance %s",
338
                            instance.name)
323
        for instance_name in self.smap[node]:
324
          instance = self.instances[instance_name]
325
          if not instance.autostart:
326
            logging.info(("Skipping disk activation for non-autostart"
327
                          " instance %s"), instance.name)
328
            continue
329
          if instance.name in self.started_instances:
330
            # we already tried to start the instance, which should have
331
            # activated its drives (if they can be at all)
332
            continue
333
          try:
334
            logging.info("Activating disks for instance %s", instance.name)
335
            instance.ActivateDisks()
336
          except Exception:
337
            logging.exception("Error while activating disks for instance %s",
338
                              instance.name)
339 339

  
340 340
      # Keep changed boot IDs
341 341
      for name in check_nodes:
......
345 345
    """Make a pass over the list of instances, restarting downed ones.
346 346

  
347 347
    """
348
    for instance in self.instances:
348
    for instance in self.instances.values():
349 349
      if instance.state in BAD_STATES:
350 350
        n = notepad.NumberOfRestartAttempts(instance)
351 351

  
......
383 383

  
384 384
    """
385 385
    op = opcodes.OpVerifyDisks()
386
    result = cli.SubmitOpCode(op, cl=client)
386
    job_id = client.SubmitJob([op])
387
    result = cli.PollJob(job_id, cl=client, feedback_fn=logging.debug)[0]
388
    client.ArchiveJob(job_id)
387 389
    if not isinstance(result, (tuple, list)):
388 390
      logging.error("Can't get a valid result from verify-disks")
389 391
      return

Also available in: Unified diff