Revision bc69c426
b/lib/watcher/__init__.py | ||
---|---|---|
173 | 173 |
self.secondaries = secondaries |
174 | 174 |
|
175 | 175 |
|
176 |
def _CleanupInstance(cl, notepad, inst): |
|
176 |
def _CleanupInstance(cl, notepad, inst, locks):
|
|
177 | 177 |
n = notepad.NumberOfCleanupAttempts(inst.name) |
178 | 178 |
|
179 |
if inst.name in locks: |
|
180 |
logging.info("Not cleaning up instance '%s', instance is locked", |
|
181 |
inst.name) |
|
182 |
return |
|
183 |
|
|
179 | 184 |
if n > MAXTRIES: |
180 | 185 |
logging.warning("Not cleaning up instance '%s', retries exhausted", |
181 | 186 |
inst.name) |
... | ... | |
194 | 199 |
notepad.RecordCleanupAttempt(inst.name) |
195 | 200 |
|
196 | 201 |
|
197 |
def _CheckInstances(cl, notepad, instances): |
|
202 |
def _CheckInstances(cl, notepad, instances, locks):
|
|
198 | 203 |
"""Make a pass over the list of instances, restarting downed ones. |
199 | 204 |
|
200 | 205 |
""" |
... | ... | |
204 | 209 |
|
205 | 210 |
for inst in instances.values(): |
206 | 211 |
if inst.status == constants.INSTST_USERDOWN: |
207 |
_CleanupInstance(cl, notepad, inst) |
|
212 |
_CleanupInstance(cl, notepad, inst, locks)
|
|
208 | 213 |
elif inst.status in BAD_STATES: |
209 | 214 |
n = notepad.NumberOfRestartAttempts(inst.name) |
210 | 215 |
|
... | ... | |
648 | 653 |
"""Retrieves instances and nodes per node group. |
649 | 654 |
|
650 | 655 |
""" |
656 |
locks = qcl.Query(constants.QR_LOCK, ["name", "mode"], None) |
|
657 |
|
|
658 |
prefix = "instance/" |
|
659 |
prefix_len = len(prefix) |
|
660 |
|
|
661 |
locked_instances = set() |
|
662 |
|
|
663 |
for [[_, name], [_, lock]] in locks.data: |
|
664 |
if name.startswith(prefix) and lock: |
|
665 |
locked_instances.add(name[prefix_len:]) |
|
666 |
|
|
651 | 667 |
queries = [ |
652 | 668 |
(constants.QR_INSTANCE, |
653 | 669 |
["name", "status", "disks_active", "snodes", |
... | ... | |
693 | 709 |
for (name, bootid, offline) in raw_nodes] |
694 | 710 |
|
695 | 711 |
return (dict((node.name, node) for node in nodes), |
696 |
dict((inst.name, inst) for inst in instances)) |
|
712 |
dict((inst.name, inst) for inst in instances), |
|
713 |
locked_instances) |
|
697 | 714 |
|
698 | 715 |
|
699 | 716 |
def _LoadKnownGroups(): |
... | ... | |
751 | 768 |
|
752 | 769 |
_CheckMaster(client) |
753 | 770 |
|
754 |
(nodes, instances) = _GetGroupData(query_client, group_uuid) |
|
771 |
(nodes, instances, locks) = _GetGroupData(query_client, group_uuid)
|
|
755 | 772 |
|
756 | 773 |
# Update per-group instance status file |
757 | 774 |
_UpdateInstanceStatus(inst_status_path, instances.values()) |
... | ... | |
760 | 777 |
pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE, |
761 | 778 |
known_groups) |
762 | 779 |
|
763 |
started = _CheckInstances(client, notepad, instances) |
|
780 |
started = _CheckInstances(client, notepad, instances, locks)
|
|
764 | 781 |
_CheckDisks(client, notepad, nodes, instances, started) |
765 | 782 |
_VerifyDisks(client, group_uuid, nodes, instances) |
766 | 783 |
except Exception, err: |
Also available in: Unified diff