Revision 73ff3118 tools/burnin
b/tools/burnin | ||
---|---|---|
41 | 41 |
|
42 | 42 |
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...") |
43 | 43 |
|
44 |
MAX_RETRIES = 3 |
|
44 | 45 |
|
45 | 46 |
class InstanceDown(Exception): |
46 | 47 |
"""The checked instance was not up""" |
47 | 48 |
|
48 | 49 |
|
50 |
class BurninFailure(Exception): |
|
51 |
"""Failure detected during burning""" |
|
52 |
|
|
53 |
|
|
49 | 54 |
def Usage(): |
50 | 55 |
"""Shows program usage information and exits the program.""" |
51 | 56 |
|
... | ... | |
106 | 111 |
self.to_rem = [] |
107 | 112 |
self.queued_ops = [] |
108 | 113 |
self.opts = None |
114 |
self.queue_retry = False |
|
115 |
self.disk_count = self.disk_growth = self.disk_size = None |
|
116 |
self.hvp = self.bep = None |
|
109 | 117 |
self.ParseOptions() |
110 | 118 |
self.cl = cli.GetClient() |
111 | 119 |
self.GetState() |
... | ... | |
125 | 133 |
if self.opts.verbose: |
126 | 134 |
Log(msg, indent=3) |
127 | 135 |
|
128 |
def ExecOp(self, *ops): |
|
136 |
def MaybeRetry(self, retry_count, msg, fn, *args): |
|
137 |
"""Possibly retry a given function execution. |
|
138 |
|
|
139 |
@type retry_count: int |
|
140 |
@param retry_count: retry counter: |
|
141 |
- 0: non-retryable action |
|
142 |
- 1: last retry for a retryable action |
|
143 |
- MAX_RETRIES: original try for a retryable action |
|
144 |
@type msg: str |
|
145 |
@param msg: the kind of the operation |
|
146 |
@type fn: callable |
|
147 |
@param fn: the function to be called |
|
148 |
|
|
149 |
""" |
|
150 |
try: |
|
151 |
val = fn(*args) |
|
152 |
if retry_count > 0 and retry_count < MAX_RETRIES: |
|
153 |
Log("Idempotent %s succeeded after %d retries" % |
|
154 |
(msg, MAX_RETRIES - retry_count)) |
|
155 |
return val |
|
156 |
except Exception, err: |
|
157 |
if retry_count == 0: |
|
158 |
Log("Non-idempotent %s failed, aborting" % (msg, )) |
|
159 |
raise |
|
160 |
elif retry_count == 1: |
|
161 |
Log("Idempotent %s repeated failure, aborting" % (msg, )) |
|
162 |
raise |
|
163 |
else: |
|
164 |
Log("Idempotent %s failed, retry #%d/%d: %s" % |
|
165 |
(msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err)) |
|
166 |
self.MaybeRetry(retry_count - 1, msg, fn, *args) |
|
167 |
|
|
168 |
def _ExecOp(self, *ops): |
|
129 | 169 |
"""Execute one or more opcodes and manage the exec buffer. |
130 | 170 |
|
131 | 171 |
@result: if only opcode has been passed, we return its result; |
... | ... | |
139 | 179 |
else: |
140 | 180 |
return results |
141 | 181 |
|
182 |
def ExecOp(self, retry, *ops): |
|
183 |
"""Execute one or more opcodes and manage the exec buffer. |
|
184 |
|
|
185 |
@result: if only opcode has been passed, we return its result; |
|
186 |
otherwise we return the list of results |
|
187 |
|
|
188 |
""" |
|
189 |
if retry: |
|
190 |
rval = MAX_RETRIES |
|
191 |
else: |
|
192 |
rval = 0 |
|
193 |
return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops) |
|
194 |
|
|
142 | 195 |
def ExecOrQueue(self, name, *ops): |
143 | 196 |
"""Execute an opcode and manage the exec buffer.""" |
144 | 197 |
if self.opts.parallel: |
145 | 198 |
self.queued_ops.append((ops, name)) |
146 | 199 |
else: |
147 |
return self.ExecOp(*ops) |
|
200 |
return self.ExecOp(self.queue_retry, *ops) |
|
201 |
|
|
202 |
def StartBatch(self, retry): |
|
203 |
"""Start a new batch of jobs. |
|
204 |
|
|
205 |
@param retry: whether this is a retryable batch |
|
206 |
|
|
207 |
""" |
|
208 |
self.queued_ops = [] |
|
209 |
self.queue_retry = retry |
|
148 | 210 |
|
149 | 211 |
def CommitQueue(self): |
150 | 212 |
"""Execute all submitted opcodes in case of parallel burnin""" |
151 | 213 |
if not self.opts.parallel: |
152 | 214 |
return |
153 | 215 |
|
216 |
if self.queue_retry: |
|
217 |
rval = MAX_RETRIES |
|
218 |
else: |
|
219 |
rval = 0 |
|
220 |
|
|
154 | 221 |
try: |
155 |
results = self.ExecJobSet(self.queued_ops) |
|
222 |
results = self.MaybeRetry(rval, "jobset", self.ExecJobSet, |
|
223 |
self.queued_ops) |
|
156 | 224 |
finally: |
157 | 225 |
self.queued_ops = [] |
158 | 226 |
return results |
... | ... | |
171 | 239 |
results = [] |
172 | 240 |
for jid, (_, iname) in zip(job_ids, jobs): |
173 | 241 |
Log("waiting for job %s for %s" % (jid, iname), indent=2) |
174 |
results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback)) |
|
175 |
|
|
242 |
try: |
|
243 |
results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback)) |
|
244 |
except Exception, err: |
|
245 |
Log("Job for %s failed: %s" % (iname, err)) |
|
246 |
if len(results) != len(jobs): |
|
247 |
raise BurninFailure() |
|
176 | 248 |
return results |
177 | 249 |
|
178 | 250 |
def ParseOptions(self): |
... | ... | |
325 | 397 |
try: |
326 | 398 |
op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"], |
327 | 399 |
names=names, use_locking=True) |
328 |
result = self.ExecOp(op) |
|
400 |
result = self.ExecOp(True, op)
|
|
329 | 401 |
except errors.GenericError, err: |
330 | 402 |
err_code, msg = cli.FormatError(err) |
331 | 403 |
Err(msg, exit_code=err_code) |
332 | 404 |
self.nodes = [data[0] for data in result if not (data[1] or data[2])] |
333 | 405 |
|
334 |
result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
|
|
335 |
names=[]))
|
|
406 |
op_diagos = opcodes.OpDiagnoseOS(output_fields=["name", "valid"], names=[])
|
|
407 |
result = self.ExecOp(True, op_diagos)
|
|
336 | 408 |
|
337 | 409 |
if not result: |
338 | 410 |
Err("Can't get the OS list") |
... | ... | |
347 | 419 |
"""Create the given instances. |
348 | 420 |
|
349 | 421 |
""" |
422 |
self.StartBatch(False) |
|
350 | 423 |
self.to_rem = [] |
351 | 424 |
mytor = izip(cycle(self.nodes), |
352 | 425 |
islice(cycle(self.nodes), 1, None), |
... | ... | |
396 | 469 |
def BurnGrowDisks(self): |
397 | 470 |
"""Grow both the os and the swap disks by the requested amount, if any.""" |
398 | 471 |
Log("Growing disks") |
472 |
self.StartBatch(False) |
|
399 | 473 |
for instance in self.instances: |
400 | 474 |
Log("instance %s" % instance, indent=1) |
401 | 475 |
for idx, growth in enumerate(self.disk_growth): |
... | ... | |
409 | 483 |
def BurnReplaceDisks1D8(self): |
410 | 484 |
"""Replace disks on primary and secondary for drbd8.""" |
411 | 485 |
Log("Replacing disks on the same nodes") |
486 |
self.StartBatch(True) |
|
412 | 487 |
for instance in self.instances: |
413 | 488 |
Log("instance %s" % instance, indent=1) |
414 | 489 |
ops = [] |
... | ... | |
424 | 499 |
def BurnReplaceDisks2(self): |
425 | 500 |
"""Replace secondary node.""" |
426 | 501 |
Log("Changing the secondary node") |
502 |
self.StartBatch(True) |
|
427 | 503 |
mode = constants.REPLACE_DISK_CHG |
428 | 504 |
|
429 | 505 |
mytor = izip(islice(cycle(self.nodes), 2, None), |
... | ... | |
447 | 523 |
def BurnFailover(self): |
448 | 524 |
"""Failover the instances.""" |
449 | 525 |
Log("Failing over instances") |
526 |
self.StartBatch(False) |
|
450 | 527 |
for instance in self.instances: |
451 | 528 |
Log("instance %s" % instance, indent=1) |
452 | 529 |
op = opcodes.OpFailoverInstance(instance_name=instance, |
... | ... | |
460 | 537 |
def BurnMigrate(self): |
461 | 538 |
"""Migrate the instances.""" |
462 | 539 |
Log("Migrating instances") |
540 |
self.StartBatch(False) |
|
463 | 541 |
for instance in self.instances: |
464 | 542 |
Log("instance %s" % instance, indent=1) |
465 | 543 |
op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True, |
... | ... | |
476 | 554 |
|
477 | 555 |
""" |
478 | 556 |
Log("Exporting and re-importing instances") |
557 |
self.StartBatch(False) |
|
479 | 558 |
mytor = izip(cycle(self.nodes), |
480 | 559 |
islice(cycle(self.nodes), 1, None), |
481 | 560 |
islice(cycle(self.nodes), 2, None), |
... | ... | |
486 | 565 |
# read the full name of the instance |
487 | 566 |
nam_op = opcodes.OpQueryInstances(output_fields=["name"], |
488 | 567 |
names=[instance], use_locking=True) |
489 |
full_name = self.ExecOp(nam_op)[0][0] |
|
568 |
full_name = self.ExecOp(False, nam_op)[0][0]
|
|
490 | 569 |
|
491 | 570 |
if self.opts.iallocator: |
492 | 571 |
pnode = snode = None |
... | ... | |
555 | 634 |
def BurnStopStart(self): |
556 | 635 |
"""Stop/start the instances.""" |
557 | 636 |
Log("Stopping and starting instances") |
637 |
self.StartBatch(True) |
|
558 | 638 |
for instance in self.instances: |
559 | 639 |
Log("instance %s" % instance, indent=1) |
560 | 640 |
op1 = self.StopInstanceOp(instance) |
... | ... | |
568 | 648 |
|
569 | 649 |
def BurnRemove(self): |
570 | 650 |
"""Remove the instances.""" |
651 |
self.StartBatch(False) |
|
571 | 652 |
Log("Removing instances") |
572 | 653 |
for instance in self.to_rem: |
573 | 654 |
Log("instance %s" % instance, indent=1) |
... | ... | |
594 | 675 |
op_rename2 = self.RenameInstanceOp(rename, instance) |
595 | 676 |
op_start1 = self.StartInstanceOp(rename) |
596 | 677 |
op_start2 = self.StartInstanceOp(instance) |
597 |
self.ExecOp(op_stop1, op_rename1, op_start1) |
|
678 |
self.ExecOp(False, op_stop1, op_rename1, op_start1)
|
|
598 | 679 |
self._CheckInstanceAlive(rename) |
599 |
self.ExecOp(op_stop2, op_rename2, op_start2) |
|
680 |
self.ExecOp(False, op_stop2, op_rename2, op_start2)
|
|
600 | 681 |
self._CheckInstanceAlive(instance) |
601 | 682 |
|
602 | 683 |
def BurnReinstall(self): |
603 | 684 |
"""Reinstall the instances.""" |
604 | 685 |
Log("Reinstalling instances") |
686 |
self.StartBatch(True) |
|
605 | 687 |
for instance in self.instances: |
606 | 688 |
Log("instance %s" % instance, indent=1) |
607 | 689 |
op1 = self.StopInstanceOp(instance) |
... | ... | |
621 | 703 |
def BurnReboot(self): |
622 | 704 |
"""Reboot the instances.""" |
623 | 705 |
Log("Rebooting instances") |
706 |
self.StartBatch(True) |
|
624 | 707 |
for instance in self.instances: |
625 | 708 |
Log("instance %s" % instance, indent=1) |
626 | 709 |
ops = [] |
... | ... | |
640 | 723 |
def BurnActivateDisks(self): |
641 | 724 |
"""Activate and deactivate disks of the instances.""" |
642 | 725 |
Log("Activating/deactivating disks") |
726 |
self.StartBatch(True) |
|
643 | 727 |
for instance in self.instances: |
644 | 728 |
Log("instance %s" % instance, indent=1) |
645 | 729 |
op_start = self.StartInstanceOp(instance) |
... | ... | |
657 | 741 |
def BurnAddRemoveDisks(self): |
658 | 742 |
"""Add and remove an extra disk for the instances.""" |
659 | 743 |
Log("Adding and removing disks") |
744 |
self.StartBatch(False) |
|
660 | 745 |
for instance in self.instances: |
661 | 746 |
Log("instance %s" % instance, indent=1) |
662 | 747 |
op_add = opcodes.OpSetInstanceParams(\ |
... | ... | |
676 | 761 |
def BurnAddRemoveNICs(self): |
677 | 762 |
"""Add and remove an extra NIC for the instances.""" |
678 | 763 |
Log("Adding and removing NICs") |
764 |
self.StartBatch(False) |
|
679 | 765 |
for instance in self.instances: |
680 | 766 |
Log("instance %s" % instance, indent=1) |
681 | 767 |
op_add = opcodes.OpSetInstanceParams(\ |
Also available in: Unified diff