Fix performance of check_state_groups_and_bump_deletion (#18141)

Regressed as part of https://github.com/element-hq/synapse/pull/18107

This does two things:
1. Only check if the state groups have been deleted when calculating the
event context (as that's when we will insert them). This avoids lots of
checks for read operations.
2. Don't lock the `state_groups` rows when doing the check. This adds
overhead, and it doesn't prevent any races.
This commit is contained in:
Erik Johnston 2025-02-07 11:18:32 +01:00 committed by GitHub
parent 553e9882bf
commit dcf7b39276
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 46 additions and 17 deletions

1
changelog.d/18141.bugfix Normal file
View file

@ -0,0 +1 @@
Fix regression in performance of sending events due to superfluous reads and locks. Introduced in v1.124.0rc1.

View file

@ -359,6 +359,28 @@ class StateHandler:
await_full_state=False,
)
# Ensure we still have the state groups we're relying on, and bump
# their usage time to avoid them being deleted from under us.
if entry.state_group:
missing_state_group = await self._state_deletion_store.check_state_groups_and_bump_deletion(
{entry.state_group}
)
if missing_state_group:
raise Exception(f"Missing state group: {entry.state_group}")
elif entry.prev_group:
# We only rely on the prev group when persisting the event if we
# don't have an `entry.state_group`.
missing_state_group = await self._state_deletion_store.check_state_groups_and_bump_deletion(
{entry.prev_group}
)
if missing_state_group:
# If we're missing the prev group then we can just clear the
# entries, and rely on `entry._state` (which must exist if
# `entry.state_group` is None)
entry.prev_group = None
entry.delta_ids = None
state_group_before_event_prev_group = entry.prev_group
deltas_to_state_group_before_event = entry.delta_ids
state_ids_before_event = None
@ -519,16 +541,6 @@ class StateHandler:
state_group_id
)
if prev_group:
# Ensure that we still have the prev group, and ensure we don't
# delete it while we're persisting the event.
missing_state_group = await self._state_deletion_store.check_state_groups_and_bump_deletion(
{prev_group}
)
if missing_state_group:
prev_group = None
delta_ids = None
return _StateCacheEntry(
state=None,
state_group=state_group_id,

View file

@ -123,12 +123,28 @@ class StateDeletionDataStore:
"check_state_groups_and_bump_deletion",
self._check_state_groups_and_bump_deletion_txn,
state_groups,
# We don't need to lock if we're just doing a quick check, as the
# lock doesn't prevent any races here.
lock=False,
)
def _check_state_groups_and_bump_deletion_txn(
self, txn: LoggingTransaction, state_groups: AbstractSet[int]
self, txn: LoggingTransaction, state_groups: AbstractSet[int], lock: bool = True
) -> Collection[int]:
existing_state_groups = self._get_existing_groups_with_lock(txn, state_groups)
"""Checks to make sure that the state groups haven't been deleted, and
if they're pending deletion we delay it (allowing time for any event
that will use them to finish persisting).
The `lock` flag sets if we should lock the `state_group` rows we're
checking, which we should do when storing new groups.
Returns:
The state groups that are missing, if any.
"""
existing_state_groups = self._get_existing_groups_with_lock(
txn, state_groups, lock=lock
)
self._bump_deletion_txn(txn, existing_state_groups)
@ -188,18 +204,18 @@ class StateDeletionDataStore:
)
def _get_existing_groups_with_lock(
self, txn: LoggingTransaction, state_groups: Collection[int]
self, txn: LoggingTransaction, state_groups: Collection[int], lock: bool = True
) -> AbstractSet[int]:
"""Return which of the given state groups are in the database, and locks
those rows with `KEY SHARE` to ensure they don't get concurrently
deleted."""
deleted (if `lock` is true)."""
clause, args = make_in_list_sql_clause(self.db_pool.engine, "id", state_groups)
sql = f"""
SELECT id FROM state_groups
WHERE {clause}
"""
if isinstance(self.db_pool.engine, PostgresEngine):
if lock and isinstance(self.db_pool.engine, PostgresEngine):
# On postgres we add a row level lock to the rows to ensure that we
# conflict with any concurrent DELETEs. `FOR KEY SHARE` lock will
# not conflict with other read

View file

@ -742,7 +742,7 @@ class RoomsCreateTestCase(RoomBase):
self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
self.assertTrue("room_id" in channel.json_body)
assert channel.resource_usage is not None
self.assertEqual(36, channel.resource_usage.db_txn_count)
self.assertEqual(35, channel.resource_usage.db_txn_count)
def test_post_room_initial_state(self) -> None:
# POST with initial_state config key, expect new room id
@ -755,7 +755,7 @@ class RoomsCreateTestCase(RoomBase):
self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
self.assertTrue("room_id" in channel.json_body)
assert channel.resource_usage is not None
self.assertEqual(38, channel.resource_usage.db_txn_count)
self.assertEqual(37, channel.resource_usage.db_txn_count)
def test_post_room_visibility_key(self) -> None:
# POST with visibility config key, expect new room id