Add caching support to media endpoints (#18235)

We do a few things in this PR to better support caching:

1. Change `Cache-Control` header to allow intermediary proxies to cache
media *only* if they revalidate on every request. This means that the
intermediary cache will still send the request to Synapse but with a
`If-None-Match` header, at which point Synapse can check auth and
respond with a 304 and empty content.
2. Add `ETag` response header to all media responses. We hardcode this
to `1` since all media is immutable (beyond being deleted).
3. Check for `If-None-Match` header (after checking for auth), and if it
matches then respond with a 304 and empty body.

---------

Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
This commit is contained in:
Erik Johnston 2025-03-13 16:28:19 +00:00 committed by GitHub
parent a278c0d852
commit 59a15da433
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 253 additions and 7 deletions

1
changelog.d/18235.misc Normal file
View file

@ -0,0 +1 @@
Add caching support to media endpoints.

View file

@ -118,6 +118,9 @@ DEFAULT_MAX_TIMEOUT_MS = 20_000
# Maximum allowed timeout_ms for download and thumbnail requests # Maximum allowed timeout_ms for download and thumbnail requests
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000 MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000
# The ETag header value to use for immutable media. This can be anything.
_IMMUTABLE_ETAG = "1"
def respond_404(request: SynapseRequest) -> None: def respond_404(request: SynapseRequest) -> None:
assert request.path is not None assert request.path is not None
@ -224,12 +227,7 @@ def add_file_headers(
request.setHeader(b"Content-Disposition", disposition.encode("ascii")) request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
# cache for at least a day. _add_cache_headers(request)
# XXX: we might want to turn this off for data we don't want to
# recommend caching as it's sensitive or private - or at least
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
if file_size is not None: if file_size is not None:
request.setHeader(b"Content-Length", b"%d" % (file_size,)) request.setHeader(b"Content-Length", b"%d" % (file_size,))
@ -240,6 +238,26 @@ def add_file_headers(
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex") request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
def _add_cache_headers(request: Request) -> None:
"""Adds the appropriate cache headers to the response"""
# Cache on the client for at least a day.
#
# We set this to "public,s-maxage=0,proxy-revalidate" to allow CDNs to cache
# the media, so long as they "revalidate" the media on every request. By
# revalidate, we mean send the request to Synapse with a `If-None-Match`
# header, to which Synapse can either respond with a 304 if the user is
# authenticated/authorized, or a 401/403 if they're not.
request.setHeader(
b"Cache-Control", b"public,max-age=86400,s-maxage=0,proxy-revalidate"
)
# Set an ETag header to allow requesters to use it in requests to check if
# the cache is still valid. Since media is immutable (though may be
# deleted), we just set this to a constant.
request.setHeader(b"ETag", _IMMUTABLE_ETAG)
# separators as defined in RFC2616. SP and HT are handled separately. # separators as defined in RFC2616. SP and HT are handled separately.
# see _can_encode_filename_as_token. # see _can_encode_filename_as_token.
_FILENAME_SEPARATOR_CHARS = { _FILENAME_SEPARATOR_CHARS = {
@ -336,13 +354,15 @@ async def respond_with_multipart_responder(
from synapse.media.media_storage import MultipartFileConsumer from synapse.media.media_storage import MultipartFileConsumer
_add_cache_headers(request)
# note that currently the json_object is just {}, this will change when linked media # note that currently the json_object is just {}, this will change when linked media
# is implemented # is implemented
multipart_consumer = MultipartFileConsumer( multipart_consumer = MultipartFileConsumer(
clock, clock,
request, request,
media_type, media_type,
{}, {}, # Note: if we change this we need to change the returned ETag.
disposition, disposition,
media_length, media_length,
) )
@ -419,6 +439,46 @@ async def respond_with_responder(
finish_request(request) finish_request(request)
def respond_with_304(request: SynapseRequest) -> None:
request.setResponseCode(304)
# could alternatively use request.notifyFinish() and flip a flag when
# the Deferred fires, but since the flag is RIGHT THERE it seems like
# a waste.
if request._disconnected:
logger.warning(
"Not sending response to request %s, already disconnected.", request
)
return None
_add_cache_headers(request)
request.finish()
def check_for_cached_entry_and_respond(request: SynapseRequest) -> bool:
"""Check if the request has a conditional header that allows us to return a
304 Not Modified response, and if it does, return a 304 response.
This handles clients and intermediary proxies caching media.
This method assumes that the user has already been
authorised to request the media.
Returns True if we have responded."""
# We've checked the user has access to the media, so we now check if it
# is a "conditional request" and we can just return a `304 Not Modified`
# response. Since media is immutable (though may be deleted), we just
# check this is the expected constant.
etag = request.getHeader("If-None-Match")
if etag == _IMMUTABLE_ETAG:
# Return a `304 Not modified`.
respond_with_304(request)
return True
return False
class Responder(ABC): class Responder(ABC):
"""Represents a response that can be streamed to the requester. """Represents a response that can be streamed to the requester.

View file

@ -52,6 +52,7 @@ from synapse.media._base import (
FileInfo, FileInfo,
Responder, Responder,
ThumbnailInfo, ThumbnailInfo,
check_for_cached_entry_and_respond,
get_filename_from_headers, get_filename_from_headers,
respond_404, respond_404,
respond_with_multipart_responder, respond_with_multipart_responder,
@ -459,6 +460,11 @@ class MediaRepository:
self.mark_recently_accessed(None, media_id) self.mark_recently_accessed(None, media_id)
# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return
media_type = media_info.media_type media_type = media_info.media_type
if not media_type: if not media_type:
media_type = "application/octet-stream" media_type = "application/octet-stream"
@ -538,6 +544,17 @@ class MediaRepository:
allow_authenticated, allow_authenticated,
) )
# Check if the media is cached on the client, if so return 304. We need
# to do this after we have fetched remote media, as we need it to do the
# auth.
if check_for_cached_entry_and_respond(request):
# We always need to use the responder.
if responder:
with responder:
pass
return
# We deliberately stream the file outside the lock # We deliberately stream the file outside the lock
if responder and media_info: if responder and media_info:
upload_name = name if name else media_info.upload_name upload_name = name if name else media_info.upload_name

View file

@ -34,6 +34,7 @@ from synapse.logging.opentracing import trace
from synapse.media._base import ( from synapse.media._base import (
FileInfo, FileInfo,
ThumbnailInfo, ThumbnailInfo,
check_for_cached_entry_and_respond,
respond_404, respond_404,
respond_with_file, respond_with_file,
respond_with_multipart_responder, respond_with_multipart_responder,
@ -294,6 +295,11 @@ class ThumbnailProvider:
if media_info.authenticated: if media_info.authenticated:
raise NotFoundError() raise NotFoundError()
# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id) thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
await self._select_and_respond_with_thumbnail( await self._select_and_respond_with_thumbnail(
request, request,
@ -334,6 +340,11 @@ class ThumbnailProvider:
if media_info.authenticated: if media_info.authenticated:
raise NotFoundError() raise NotFoundError()
# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id) thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos: for info in thumbnail_infos:
t_w = info.width == desired_width t_w = info.width == desired_width
@ -431,6 +442,10 @@ class ThumbnailProvider:
respond_404(request) respond_404(request)
return return
# Check if the media is cached on the client, if so return 304.
if check_for_cached_entry_and_respond(request):
return
thumbnail_infos = await self.store.get_remote_media_thumbnails( thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id server_name, media_id
) )
@ -510,6 +525,10 @@ class ThumbnailProvider:
if media_info.authenticated: if media_info.authenticated:
raise NotFoundError() raise NotFoundError()
# Check if the media is cached on the client, if so return 304.
if check_for_cached_entry_and_respond(request):
return
thumbnail_infos = await self.store.get_remote_media_thumbnails( thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id server_name, media_id
) )

View file

@ -147,6 +147,45 @@ class FederationMediaDownloadsTest(unittest.FederatingHomeserverTestCase):
found_file = any(SMALL_PNG in field for field in stripped_bytes) found_file = any(SMALL_PNG in field for field in stripped_bytes)
self.assertTrue(found_file) self.assertTrue(found_file)
def test_federation_etag(self) -> None:
"""Test that federation ETags work"""
content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
"text/plain",
"test_upload",
content,
46,
UserID.from_string("@user_id:whatever.org"),
)
)
channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
)
self.pump()
self.assertEqual(200, channel.code)
# We expect exactly one ETag header.
etags = channel.headers.getRawHeaders("ETag")
self.assertIsNotNone(etags)
assert etags is not None # For mypy
self.assertEqual(len(etags), 1)
etag = etags[0]
# Refetching with the etag should result in 304 and empty body.
channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
custom_headers=[("If-None-Match", etag)],
)
self.pump()
self.assertEqual(channel.code, 304)
self.assertEqual(channel.is_finished(), True)
self.assertNotIn("body", channel.result)
class FederationThumbnailTest(unittest.FederatingHomeserverTestCase): class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:

View file

@ -2676,3 +2676,113 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
access_token=self.tok, access_token=self.tok,
) )
self.assertEqual(channel10.code, 200) self.assertEqual(channel10.code, 200)
def test_authenticated_media_etag(self) -> None:
"""Test that ETag works correctly with authenticated media over client
APIs"""
# upload some local media with authentication on
channel = self.make_request(
"POST",
"_matrix/media/v3/upload?filename=test_png_upload",
SMALL_PNG,
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
assert res is not None
uri = res.split("mxc://")[1]
# Check standard media endpoint
self._check_caching(f"/download/{uri}")
# check thumbnails as well
params = "?width=32&height=32&method=crop"
self._check_caching(f"/thumbnail/{uri}{params}")
# Inject a piece of remote media.
file_id = "abcdefg12345"
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)
media_storage = self.hs.get_media_repository().media_storage
ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))
# we write the authenticated status when storing media, so this should pick up
# config and authenticate the media
self.get_success(
self.store.store_cached_remote_media(
origin="lonelyIsland",
media_id="52",
media_type="image/png",
media_length=1,
time_now_ms=self.clock.time_msec(),
upload_name="remote_test.png",
filesystem_id=file_id,
)
)
# ensure we have thumbnails for the non-dynamic code path
if self.extra_config == {"dynamic_thumbnails": False}:
self.get_success(
self.repo._generate_thumbnails(
"lonelyIsland", "52", file_id, "image/png"
)
)
self._check_caching("/download/lonelyIsland/52")
params = "?width=32&height=32&method=crop"
self._check_caching(f"/thumbnail/lonelyIsland/52{params}")
def _check_caching(self, path: str) -> None:
"""
Checks that:
1. fetching the path returns an ETag header
2. refetching with the ETag returns a 304 without a body
3. refetching with the ETag but through unauthenticated endpoint
returns 404
"""
# Request media over authenticated endpoint, should be found
channel1 = self.make_request(
"GET",
f"/_matrix/client/v1/media{path}",
access_token=self.tok,
shorthand=False,
)
self.assertEqual(channel1.code, 200)
# Should have a single ETag field
etags = channel1.headers.getRawHeaders("ETag")
self.assertIsNotNone(etags)
assert etags is not None # For mypy
self.assertEqual(len(etags), 1)
etag = etags[0]
# Refetching with the etag should result in 304 and empty body.
channel2 = self.make_request(
"GET",
f"/_matrix/client/v1/media{path}",
access_token=self.tok,
shorthand=False,
custom_headers=[("If-None-Match", etag)],
)
self.assertEqual(channel2.code, 304)
self.assertEqual(channel2.is_finished(), True)
self.assertNotIn("body", channel2.result)
# Refetching with the etag but no access token should result in 404.
channel3 = self.make_request(
"GET",
f"/_matrix/media/r0{path}",
shorthand=False,
custom_headers=[("If-None-Match", etag)],
)
self.assertEqual(channel3.code, 404)