mirror of
https://github.com/element-hq/synapse.git
synced 2025-03-14 09:45:51 +00:00
Add caching support to media endpoints (#18235)
We do a few things in this PR to better support caching: 1. Change `Cache-Control` header to allow intermediary proxies to cache media *only* if they revalidate on every request. This means that the intermediary cache will still send the request to Synapse but with a `If-None-Match` header, at which point Synapse can check auth and respond with a 304 and empty content. 2. Add `ETag` response header to all media responses. We hardcode this to `1` since all media is immutable (beyond being deleted). 3. Check for `If-None-Match` header (after checking for auth), and if it matches then respond with a 304 and empty body. --------- Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
This commit is contained in:
parent
a278c0d852
commit
59a15da433
6 changed files with 253 additions and 7 deletions
1
changelog.d/18235.misc
Normal file
1
changelog.d/18235.misc
Normal file
|
@ -0,0 +1 @@
|
|||
Add caching support to media endpoints.
|
|
@ -118,6 +118,9 @@ DEFAULT_MAX_TIMEOUT_MS = 20_000
|
|||
# Maximum allowed timeout_ms for download and thumbnail requests
|
||||
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000
|
||||
|
||||
# The ETag header value to use for immutable media. This can be anything.
|
||||
_IMMUTABLE_ETAG = "1"
|
||||
|
||||
|
||||
def respond_404(request: SynapseRequest) -> None:
|
||||
assert request.path is not None
|
||||
|
@ -224,12 +227,7 @@ def add_file_headers(
|
|||
|
||||
request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
|
||||
|
||||
# cache for at least a day.
|
||||
# XXX: we might want to turn this off for data we don't want to
|
||||
# recommend caching as it's sensitive or private - or at least
|
||||
# select private. don't bother setting Expires as all our
|
||||
# clients are smart enough to be happy with Cache-Control
|
||||
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
|
||||
_add_cache_headers(request)
|
||||
|
||||
if file_size is not None:
|
||||
request.setHeader(b"Content-Length", b"%d" % (file_size,))
|
||||
|
@ -240,6 +238,26 @@ def add_file_headers(
|
|||
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
|
||||
|
||||
|
||||
def _add_cache_headers(request: Request) -> None:
|
||||
"""Adds the appropriate cache headers to the response"""
|
||||
|
||||
# Cache on the client for at least a day.
|
||||
#
|
||||
# We set this to "public,s-maxage=0,proxy-revalidate" to allow CDNs to cache
|
||||
# the media, so long as they "revalidate" the media on every request. By
|
||||
# revalidate, we mean send the request to Synapse with a `If-None-Match`
|
||||
# header, to which Synapse can either respond with a 304 if the user is
|
||||
# authenticated/authorized, or a 401/403 if they're not.
|
||||
request.setHeader(
|
||||
b"Cache-Control", b"public,max-age=86400,s-maxage=0,proxy-revalidate"
|
||||
)
|
||||
|
||||
# Set an ETag header to allow requesters to use it in requests to check if
|
||||
# the cache is still valid. Since media is immutable (though may be
|
||||
# deleted), we just set this to a constant.
|
||||
request.setHeader(b"ETag", _IMMUTABLE_ETAG)
|
||||
|
||||
|
||||
# separators as defined in RFC2616. SP and HT are handled separately.
|
||||
# see _can_encode_filename_as_token.
|
||||
_FILENAME_SEPARATOR_CHARS = {
|
||||
|
@ -336,13 +354,15 @@ async def respond_with_multipart_responder(
|
|||
|
||||
from synapse.media.media_storage import MultipartFileConsumer
|
||||
|
||||
_add_cache_headers(request)
|
||||
|
||||
# note that currently the json_object is just {}, this will change when linked media
|
||||
# is implemented
|
||||
multipart_consumer = MultipartFileConsumer(
|
||||
clock,
|
||||
request,
|
||||
media_type,
|
||||
{},
|
||||
{}, # Note: if we change this we need to change the returned ETag.
|
||||
disposition,
|
||||
media_length,
|
||||
)
|
||||
|
@ -419,6 +439,46 @@ async def respond_with_responder(
|
|||
finish_request(request)
|
||||
|
||||
|
||||
def respond_with_304(request: SynapseRequest) -> None:
|
||||
request.setResponseCode(304)
|
||||
|
||||
# could alternatively use request.notifyFinish() and flip a flag when
|
||||
# the Deferred fires, but since the flag is RIGHT THERE it seems like
|
||||
# a waste.
|
||||
if request._disconnected:
|
||||
logger.warning(
|
||||
"Not sending response to request %s, already disconnected.", request
|
||||
)
|
||||
return None
|
||||
|
||||
_add_cache_headers(request)
|
||||
|
||||
request.finish()
|
||||
|
||||
|
||||
def check_for_cached_entry_and_respond(request: SynapseRequest) -> bool:
|
||||
"""Check if the request has a conditional header that allows us to return a
|
||||
304 Not Modified response, and if it does, return a 304 response.
|
||||
|
||||
This handles clients and intermediary proxies caching media.
|
||||
This method assumes that the user has already been
|
||||
authorised to request the media.
|
||||
|
||||
Returns True if we have responded."""
|
||||
|
||||
# We've checked the user has access to the media, so we now check if it
|
||||
# is a "conditional request" and we can just return a `304 Not Modified`
|
||||
# response. Since media is immutable (though may be deleted), we just
|
||||
# check this is the expected constant.
|
||||
etag = request.getHeader("If-None-Match")
|
||||
if etag == _IMMUTABLE_ETAG:
|
||||
# Return a `304 Not modified`.
|
||||
respond_with_304(request)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class Responder(ABC):
|
||||
"""Represents a response that can be streamed to the requester.
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ from synapse.media._base import (
|
|||
FileInfo,
|
||||
Responder,
|
||||
ThumbnailInfo,
|
||||
check_for_cached_entry_and_respond,
|
||||
get_filename_from_headers,
|
||||
respond_404,
|
||||
respond_with_multipart_responder,
|
||||
|
@ -459,6 +460,11 @@ class MediaRepository:
|
|||
|
||||
self.mark_recently_accessed(None, media_id)
|
||||
|
||||
# Once we've checked auth we can return early if the media is cached on
|
||||
# the client
|
||||
if check_for_cached_entry_and_respond(request):
|
||||
return
|
||||
|
||||
media_type = media_info.media_type
|
||||
if not media_type:
|
||||
media_type = "application/octet-stream"
|
||||
|
@ -538,6 +544,17 @@ class MediaRepository:
|
|||
allow_authenticated,
|
||||
)
|
||||
|
||||
# Check if the media is cached on the client, if so return 304. We need
|
||||
# to do this after we have fetched remote media, as we need it to do the
|
||||
# auth.
|
||||
if check_for_cached_entry_and_respond(request):
|
||||
# We always need to use the responder.
|
||||
if responder:
|
||||
with responder:
|
||||
pass
|
||||
|
||||
return
|
||||
|
||||
# We deliberately stream the file outside the lock
|
||||
if responder and media_info:
|
||||
upload_name = name if name else media_info.upload_name
|
||||
|
|
|
@ -34,6 +34,7 @@ from synapse.logging.opentracing import trace
|
|||
from synapse.media._base import (
|
||||
FileInfo,
|
||||
ThumbnailInfo,
|
||||
check_for_cached_entry_and_respond,
|
||||
respond_404,
|
||||
respond_with_file,
|
||||
respond_with_multipart_responder,
|
||||
|
@ -294,6 +295,11 @@ class ThumbnailProvider:
|
|||
if media_info.authenticated:
|
||||
raise NotFoundError()
|
||||
|
||||
# Once we've checked auth we can return early if the media is cached on
|
||||
# the client
|
||||
if check_for_cached_entry_and_respond(request):
|
||||
return
|
||||
|
||||
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
|
||||
await self._select_and_respond_with_thumbnail(
|
||||
request,
|
||||
|
@ -334,6 +340,11 @@ class ThumbnailProvider:
|
|||
if media_info.authenticated:
|
||||
raise NotFoundError()
|
||||
|
||||
# Once we've checked auth we can return early if the media is cached on
|
||||
# the client
|
||||
if check_for_cached_entry_and_respond(request):
|
||||
return
|
||||
|
||||
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
|
||||
for info in thumbnail_infos:
|
||||
t_w = info.width == desired_width
|
||||
|
@ -431,6 +442,10 @@ class ThumbnailProvider:
|
|||
respond_404(request)
|
||||
return
|
||||
|
||||
# Check if the media is cached on the client, if so return 304.
|
||||
if check_for_cached_entry_and_respond(request):
|
||||
return
|
||||
|
||||
thumbnail_infos = await self.store.get_remote_media_thumbnails(
|
||||
server_name, media_id
|
||||
)
|
||||
|
@ -510,6 +525,10 @@ class ThumbnailProvider:
|
|||
if media_info.authenticated:
|
||||
raise NotFoundError()
|
||||
|
||||
# Check if the media is cached on the client, if so return 304.
|
||||
if check_for_cached_entry_and_respond(request):
|
||||
return
|
||||
|
||||
thumbnail_infos = await self.store.get_remote_media_thumbnails(
|
||||
server_name, media_id
|
||||
)
|
||||
|
|
|
@ -147,6 +147,45 @@ class FederationMediaDownloadsTest(unittest.FederatingHomeserverTestCase):
|
|||
found_file = any(SMALL_PNG in field for field in stripped_bytes)
|
||||
self.assertTrue(found_file)
|
||||
|
||||
def test_federation_etag(self) -> None:
|
||||
"""Test that federation ETags work"""
|
||||
|
||||
content = io.BytesIO(b"file_to_stream")
|
||||
content_uri = self.get_success(
|
||||
self.media_repo.create_content(
|
||||
"text/plain",
|
||||
"test_upload",
|
||||
content,
|
||||
46,
|
||||
UserID.from_string("@user_id:whatever.org"),
|
||||
)
|
||||
)
|
||||
|
||||
channel = self.make_signed_federation_request(
|
||||
"GET",
|
||||
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
|
||||
)
|
||||
self.pump()
|
||||
self.assertEqual(200, channel.code)
|
||||
|
||||
# We expect exactly one ETag header.
|
||||
etags = channel.headers.getRawHeaders("ETag")
|
||||
self.assertIsNotNone(etags)
|
||||
assert etags is not None # For mypy
|
||||
self.assertEqual(len(etags), 1)
|
||||
etag = etags[0]
|
||||
|
||||
# Refetching with the etag should result in 304 and empty body.
|
||||
channel = self.make_signed_federation_request(
|
||||
"GET",
|
||||
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
|
||||
custom_headers=[("If-None-Match", etag)],
|
||||
)
|
||||
self.pump()
|
||||
self.assertEqual(channel.code, 304)
|
||||
self.assertEqual(channel.is_finished(), True)
|
||||
self.assertNotIn("body", channel.result)
|
||||
|
||||
|
||||
class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
|
||||
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
||||
|
|
|
@ -2676,3 +2676,113 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
|
|||
access_token=self.tok,
|
||||
)
|
||||
self.assertEqual(channel10.code, 200)
|
||||
|
||||
def test_authenticated_media_etag(self) -> None:
|
||||
"""Test that ETag works correctly with authenticated media over client
|
||||
APIs"""
|
||||
|
||||
# upload some local media with authentication on
|
||||
channel = self.make_request(
|
||||
"POST",
|
||||
"_matrix/media/v3/upload?filename=test_png_upload",
|
||||
SMALL_PNG,
|
||||
self.tok,
|
||||
shorthand=False,
|
||||
content_type=b"image/png",
|
||||
custom_headers=[("Content-Length", str(67))],
|
||||
)
|
||||
self.assertEqual(channel.code, 200)
|
||||
res = channel.json_body.get("content_uri")
|
||||
assert res is not None
|
||||
uri = res.split("mxc://")[1]
|
||||
|
||||
# Check standard media endpoint
|
||||
self._check_caching(f"/download/{uri}")
|
||||
|
||||
# check thumbnails as well
|
||||
params = "?width=32&height=32&method=crop"
|
||||
self._check_caching(f"/thumbnail/{uri}{params}")
|
||||
|
||||
# Inject a piece of remote media.
|
||||
file_id = "abcdefg12345"
|
||||
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)
|
||||
|
||||
media_storage = self.hs.get_media_repository().media_storage
|
||||
|
||||
ctx = media_storage.store_into_file(file_info)
|
||||
(f, fname) = self.get_success(ctx.__aenter__())
|
||||
f.write(SMALL_PNG)
|
||||
self.get_success(ctx.__aexit__(None, None, None))
|
||||
|
||||
# we write the authenticated status when storing media, so this should pick up
|
||||
# config and authenticate the media
|
||||
self.get_success(
|
||||
self.store.store_cached_remote_media(
|
||||
origin="lonelyIsland",
|
||||
media_id="52",
|
||||
media_type="image/png",
|
||||
media_length=1,
|
||||
time_now_ms=self.clock.time_msec(),
|
||||
upload_name="remote_test.png",
|
||||
filesystem_id=file_id,
|
||||
)
|
||||
)
|
||||
|
||||
# ensure we have thumbnails for the non-dynamic code path
|
||||
if self.extra_config == {"dynamic_thumbnails": False}:
|
||||
self.get_success(
|
||||
self.repo._generate_thumbnails(
|
||||
"lonelyIsland", "52", file_id, "image/png"
|
||||
)
|
||||
)
|
||||
|
||||
self._check_caching("/download/lonelyIsland/52")
|
||||
|
||||
params = "?width=32&height=32&method=crop"
|
||||
self._check_caching(f"/thumbnail/lonelyIsland/52{params}")
|
||||
|
||||
def _check_caching(self, path: str) -> None:
|
||||
"""
|
||||
Checks that:
|
||||
1. fetching the path returns an ETag header
|
||||
2. refetching with the ETag returns a 304 without a body
|
||||
3. refetching with the ETag but through unauthenticated endpoint
|
||||
returns 404
|
||||
"""
|
||||
|
||||
# Request media over authenticated endpoint, should be found
|
||||
channel1 = self.make_request(
|
||||
"GET",
|
||||
f"/_matrix/client/v1/media{path}",
|
||||
access_token=self.tok,
|
||||
shorthand=False,
|
||||
)
|
||||
self.assertEqual(channel1.code, 200)
|
||||
|
||||
# Should have a single ETag field
|
||||
etags = channel1.headers.getRawHeaders("ETag")
|
||||
self.assertIsNotNone(etags)
|
||||
assert etags is not None # For mypy
|
||||
self.assertEqual(len(etags), 1)
|
||||
etag = etags[0]
|
||||
|
||||
# Refetching with the etag should result in 304 and empty body.
|
||||
channel2 = self.make_request(
|
||||
"GET",
|
||||
f"/_matrix/client/v1/media{path}",
|
||||
access_token=self.tok,
|
||||
shorthand=False,
|
||||
custom_headers=[("If-None-Match", etag)],
|
||||
)
|
||||
self.assertEqual(channel2.code, 304)
|
||||
self.assertEqual(channel2.is_finished(), True)
|
||||
self.assertNotIn("body", channel2.result)
|
||||
|
||||
# Refetching with the etag but no access token should result in 404.
|
||||
channel3 = self.make_request(
|
||||
"GET",
|
||||
f"/_matrix/media/r0{path}",
|
||||
shorthand=False,
|
||||
custom_headers=[("If-None-Match", etag)],
|
||||
)
|
||||
self.assertEqual(channel3.code, 404)
|
||||
|
|
Loading…
Add table
Reference in a new issue