mirror of
https://github.com/element-hq/synapse.git
synced 2025-03-14 09:45:51 +00:00
Add log message when worker lock timeouts get large (#18124)
This is to help track down a possible, but very rare, worker deadlock that was seen on matrix.org. In theory, you could work back from an instance of these new logs to the approximate time when the lock was obtained and focus the diagnostic efforts there. ### Pull Request Checklist <!-- Please read https://element-hq.github.io/synapse/latest/development/contributing_guide.html before submitting your pull request --> * [x] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [x] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters))
This commit is contained in:
parent
816054b012
commit
74aa47828d
2 changed files with 10 additions and 0 deletions
1
changelog.d/18124.misc
Normal file
1
changelog.d/18124.misc
Normal file
|
@ -0,0 +1 @@
|
|||
Add log message when worker lock timeouts get large.
|
|
@ -19,6 +19,7 @@
|
|||
#
|
||||
#
|
||||
|
||||
import logging
|
||||
import random
|
||||
from types import TracebackType
|
||||
from typing import (
|
||||
|
@ -269,6 +270,10 @@ class WaitingLock:
|
|||
def _get_next_retry_interval(self) -> float:
|
||||
next = self._retry_interval
|
||||
self._retry_interval = max(5, next * 2)
|
||||
if self._retry_interval > 5 * 2 ^ 7: # ~10 minutes
|
||||
logging.warning(
|
||||
f"Lock timeout is getting excessive: {self._retry_interval}s. There may be a deadlock."
|
||||
)
|
||||
return next * random.uniform(0.9, 1.1)
|
||||
|
||||
|
||||
|
@ -344,4 +349,8 @@ class WaitingMultiLock:
|
|||
def _get_next_retry_interval(self) -> float:
|
||||
next = self._retry_interval
|
||||
self._retry_interval = max(5, next * 2)
|
||||
if self._retry_interval > 5 * 2 ^ 7: # ~10 minutes
|
||||
logging.warning(
|
||||
f"Lock timeout is getting excessive: {self._retry_interval}s. There may be a deadlock."
|
||||
)
|
||||
return next * random.uniform(0.9, 1.1)
|
||||
|
|
Loading…
Add table
Reference in a new issue