mirror of
https://github.com/element-hq/synapse.git
synced 2025-03-14 09:45:51 +00:00
Add caching support to media endpoints (#18235)
We do a few things in this PR to better support caching: 1. Change `Cache-Control` header to allow intermediary proxies to cache media *only* if they revalidate on every request. This means that the intermediary cache will still send the request to Synapse but with a `If-None-Match` header, at which point Synapse can check auth and respond with a 304 and empty content. 2. Add `ETag` response header to all media responses. We hardcode this to `1` since all media is immutable (beyond being deleted). 3. Check for `If-None-Match` header (after checking for auth), and if it matches then respond with a 304 and empty body. --------- Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
This commit is contained in:
parent
a278c0d852
commit
59a15da433
6 changed files with 253 additions and 7 deletions
1
changelog.d/18235.misc
Normal file
1
changelog.d/18235.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Add caching support to media endpoints.
|
|
@ -118,6 +118,9 @@ DEFAULT_MAX_TIMEOUT_MS = 20_000
|
||||||
# Maximum allowed timeout_ms for download and thumbnail requests
|
# Maximum allowed timeout_ms for download and thumbnail requests
|
||||||
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000
|
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000
|
||||||
|
|
||||||
|
# The ETag header value to use for immutable media. This can be anything.
|
||||||
|
_IMMUTABLE_ETAG = "1"
|
||||||
|
|
||||||
|
|
||||||
def respond_404(request: SynapseRequest) -> None:
|
def respond_404(request: SynapseRequest) -> None:
|
||||||
assert request.path is not None
|
assert request.path is not None
|
||||||
|
@ -224,12 +227,7 @@ def add_file_headers(
|
||||||
|
|
||||||
request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
|
request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
|
||||||
|
|
||||||
# cache for at least a day.
|
_add_cache_headers(request)
|
||||||
# XXX: we might want to turn this off for data we don't want to
|
|
||||||
# recommend caching as it's sensitive or private - or at least
|
|
||||||
# select private. don't bother setting Expires as all our
|
|
||||||
# clients are smart enough to be happy with Cache-Control
|
|
||||||
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
|
|
||||||
|
|
||||||
if file_size is not None:
|
if file_size is not None:
|
||||||
request.setHeader(b"Content-Length", b"%d" % (file_size,))
|
request.setHeader(b"Content-Length", b"%d" % (file_size,))
|
||||||
|
@ -240,6 +238,26 @@ def add_file_headers(
|
||||||
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
|
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
|
||||||
|
|
||||||
|
|
||||||
|
def _add_cache_headers(request: Request) -> None:
|
||||||
|
"""Adds the appropriate cache headers to the response"""
|
||||||
|
|
||||||
|
# Cache on the client for at least a day.
|
||||||
|
#
|
||||||
|
# We set this to "public,s-maxage=0,proxy-revalidate" to allow CDNs to cache
|
||||||
|
# the media, so long as they "revalidate" the media on every request. By
|
||||||
|
# revalidate, we mean send the request to Synapse with a `If-None-Match`
|
||||||
|
# header, to which Synapse can either respond with a 304 if the user is
|
||||||
|
# authenticated/authorized, or a 401/403 if they're not.
|
||||||
|
request.setHeader(
|
||||||
|
b"Cache-Control", b"public,max-age=86400,s-maxage=0,proxy-revalidate"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set an ETag header to allow requesters to use it in requests to check if
|
||||||
|
# the cache is still valid. Since media is immutable (though may be
|
||||||
|
# deleted), we just set this to a constant.
|
||||||
|
request.setHeader(b"ETag", _IMMUTABLE_ETAG)
|
||||||
|
|
||||||
|
|
||||||
# separators as defined in RFC2616. SP and HT are handled separately.
|
# separators as defined in RFC2616. SP and HT are handled separately.
|
||||||
# see _can_encode_filename_as_token.
|
# see _can_encode_filename_as_token.
|
||||||
_FILENAME_SEPARATOR_CHARS = {
|
_FILENAME_SEPARATOR_CHARS = {
|
||||||
|
@ -336,13 +354,15 @@ async def respond_with_multipart_responder(
|
||||||
|
|
||||||
from synapse.media.media_storage import MultipartFileConsumer
|
from synapse.media.media_storage import MultipartFileConsumer
|
||||||
|
|
||||||
|
_add_cache_headers(request)
|
||||||
|
|
||||||
# note that currently the json_object is just {}, this will change when linked media
|
# note that currently the json_object is just {}, this will change when linked media
|
||||||
# is implemented
|
# is implemented
|
||||||
multipart_consumer = MultipartFileConsumer(
|
multipart_consumer = MultipartFileConsumer(
|
||||||
clock,
|
clock,
|
||||||
request,
|
request,
|
||||||
media_type,
|
media_type,
|
||||||
{},
|
{}, # Note: if we change this we need to change the returned ETag.
|
||||||
disposition,
|
disposition,
|
||||||
media_length,
|
media_length,
|
||||||
)
|
)
|
||||||
|
@ -419,6 +439,46 @@ async def respond_with_responder(
|
||||||
finish_request(request)
|
finish_request(request)
|
||||||
|
|
||||||
|
|
||||||
|
def respond_with_304(request: SynapseRequest) -> None:
|
||||||
|
request.setResponseCode(304)
|
||||||
|
|
||||||
|
# could alternatively use request.notifyFinish() and flip a flag when
|
||||||
|
# the Deferred fires, but since the flag is RIGHT THERE it seems like
|
||||||
|
# a waste.
|
||||||
|
if request._disconnected:
|
||||||
|
logger.warning(
|
||||||
|
"Not sending response to request %s, already disconnected.", request
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
_add_cache_headers(request)
|
||||||
|
|
||||||
|
request.finish()
|
||||||
|
|
||||||
|
|
||||||
|
def check_for_cached_entry_and_respond(request: SynapseRequest) -> bool:
|
||||||
|
"""Check if the request has a conditional header that allows us to return a
|
||||||
|
304 Not Modified response, and if it does, return a 304 response.
|
||||||
|
|
||||||
|
This handles clients and intermediary proxies caching media.
|
||||||
|
This method assumes that the user has already been
|
||||||
|
authorised to request the media.
|
||||||
|
|
||||||
|
Returns True if we have responded."""
|
||||||
|
|
||||||
|
# We've checked the user has access to the media, so we now check if it
|
||||||
|
# is a "conditional request" and we can just return a `304 Not Modified`
|
||||||
|
# response. Since media is immutable (though may be deleted), we just
|
||||||
|
# check this is the expected constant.
|
||||||
|
etag = request.getHeader("If-None-Match")
|
||||||
|
if etag == _IMMUTABLE_ETAG:
|
||||||
|
# Return a `304 Not modified`.
|
||||||
|
respond_with_304(request)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class Responder(ABC):
|
class Responder(ABC):
|
||||||
"""Represents a response that can be streamed to the requester.
|
"""Represents a response that can be streamed to the requester.
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,7 @@ from synapse.media._base import (
|
||||||
FileInfo,
|
FileInfo,
|
||||||
Responder,
|
Responder,
|
||||||
ThumbnailInfo,
|
ThumbnailInfo,
|
||||||
|
check_for_cached_entry_and_respond,
|
||||||
get_filename_from_headers,
|
get_filename_from_headers,
|
||||||
respond_404,
|
respond_404,
|
||||||
respond_with_multipart_responder,
|
respond_with_multipart_responder,
|
||||||
|
@ -459,6 +460,11 @@ class MediaRepository:
|
||||||
|
|
||||||
self.mark_recently_accessed(None, media_id)
|
self.mark_recently_accessed(None, media_id)
|
||||||
|
|
||||||
|
# Once we've checked auth we can return early if the media is cached on
|
||||||
|
# the client
|
||||||
|
if check_for_cached_entry_and_respond(request):
|
||||||
|
return
|
||||||
|
|
||||||
media_type = media_info.media_type
|
media_type = media_info.media_type
|
||||||
if not media_type:
|
if not media_type:
|
||||||
media_type = "application/octet-stream"
|
media_type = "application/octet-stream"
|
||||||
|
@ -538,6 +544,17 @@ class MediaRepository:
|
||||||
allow_authenticated,
|
allow_authenticated,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check if the media is cached on the client, if so return 304. We need
|
||||||
|
# to do this after we have fetched remote media, as we need it to do the
|
||||||
|
# auth.
|
||||||
|
if check_for_cached_entry_and_respond(request):
|
||||||
|
# We always need to use the responder.
|
||||||
|
if responder:
|
||||||
|
with responder:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
# We deliberately stream the file outside the lock
|
# We deliberately stream the file outside the lock
|
||||||
if responder and media_info:
|
if responder and media_info:
|
||||||
upload_name = name if name else media_info.upload_name
|
upload_name = name if name else media_info.upload_name
|
||||||
|
|
|
@ -34,6 +34,7 @@ from synapse.logging.opentracing import trace
|
||||||
from synapse.media._base import (
|
from synapse.media._base import (
|
||||||
FileInfo,
|
FileInfo,
|
||||||
ThumbnailInfo,
|
ThumbnailInfo,
|
||||||
|
check_for_cached_entry_and_respond,
|
||||||
respond_404,
|
respond_404,
|
||||||
respond_with_file,
|
respond_with_file,
|
||||||
respond_with_multipart_responder,
|
respond_with_multipart_responder,
|
||||||
|
@ -294,6 +295,11 @@ class ThumbnailProvider:
|
||||||
if media_info.authenticated:
|
if media_info.authenticated:
|
||||||
raise NotFoundError()
|
raise NotFoundError()
|
||||||
|
|
||||||
|
# Once we've checked auth we can return early if the media is cached on
|
||||||
|
# the client
|
||||||
|
if check_for_cached_entry_and_respond(request):
|
||||||
|
return
|
||||||
|
|
||||||
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
|
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
|
||||||
await self._select_and_respond_with_thumbnail(
|
await self._select_and_respond_with_thumbnail(
|
||||||
request,
|
request,
|
||||||
|
@ -334,6 +340,11 @@ class ThumbnailProvider:
|
||||||
if media_info.authenticated:
|
if media_info.authenticated:
|
||||||
raise NotFoundError()
|
raise NotFoundError()
|
||||||
|
|
||||||
|
# Once we've checked auth we can return early if the media is cached on
|
||||||
|
# the client
|
||||||
|
if check_for_cached_entry_and_respond(request):
|
||||||
|
return
|
||||||
|
|
||||||
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
|
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
|
||||||
for info in thumbnail_infos:
|
for info in thumbnail_infos:
|
||||||
t_w = info.width == desired_width
|
t_w = info.width == desired_width
|
||||||
|
@ -431,6 +442,10 @@ class ThumbnailProvider:
|
||||||
respond_404(request)
|
respond_404(request)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Check if the media is cached on the client, if so return 304.
|
||||||
|
if check_for_cached_entry_and_respond(request):
|
||||||
|
return
|
||||||
|
|
||||||
thumbnail_infos = await self.store.get_remote_media_thumbnails(
|
thumbnail_infos = await self.store.get_remote_media_thumbnails(
|
||||||
server_name, media_id
|
server_name, media_id
|
||||||
)
|
)
|
||||||
|
@ -510,6 +525,10 @@ class ThumbnailProvider:
|
||||||
if media_info.authenticated:
|
if media_info.authenticated:
|
||||||
raise NotFoundError()
|
raise NotFoundError()
|
||||||
|
|
||||||
|
# Check if the media is cached on the client, if so return 304.
|
||||||
|
if check_for_cached_entry_and_respond(request):
|
||||||
|
return
|
||||||
|
|
||||||
thumbnail_infos = await self.store.get_remote_media_thumbnails(
|
thumbnail_infos = await self.store.get_remote_media_thumbnails(
|
||||||
server_name, media_id
|
server_name, media_id
|
||||||
)
|
)
|
||||||
|
|
|
@ -147,6 +147,45 @@ class FederationMediaDownloadsTest(unittest.FederatingHomeserverTestCase):
|
||||||
found_file = any(SMALL_PNG in field for field in stripped_bytes)
|
found_file = any(SMALL_PNG in field for field in stripped_bytes)
|
||||||
self.assertTrue(found_file)
|
self.assertTrue(found_file)
|
||||||
|
|
||||||
|
def test_federation_etag(self) -> None:
|
||||||
|
"""Test that federation ETags work"""
|
||||||
|
|
||||||
|
content = io.BytesIO(b"file_to_stream")
|
||||||
|
content_uri = self.get_success(
|
||||||
|
self.media_repo.create_content(
|
||||||
|
"text/plain",
|
||||||
|
"test_upload",
|
||||||
|
content,
|
||||||
|
46,
|
||||||
|
UserID.from_string("@user_id:whatever.org"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
channel = self.make_signed_federation_request(
|
||||||
|
"GET",
|
||||||
|
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(200, channel.code)
|
||||||
|
|
||||||
|
# We expect exactly one ETag header.
|
||||||
|
etags = channel.headers.getRawHeaders("ETag")
|
||||||
|
self.assertIsNotNone(etags)
|
||||||
|
assert etags is not None # For mypy
|
||||||
|
self.assertEqual(len(etags), 1)
|
||||||
|
etag = etags[0]
|
||||||
|
|
||||||
|
# Refetching with the etag should result in 304 and empty body.
|
||||||
|
channel = self.make_signed_federation_request(
|
||||||
|
"GET",
|
||||||
|
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
|
||||||
|
custom_headers=[("If-None-Match", etag)],
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 304)
|
||||||
|
self.assertEqual(channel.is_finished(), True)
|
||||||
|
self.assertNotIn("body", channel.result)
|
||||||
|
|
||||||
|
|
||||||
class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
|
class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
|
||||||
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
||||||
|
|
|
@ -2676,3 +2676,113 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
|
||||||
access_token=self.tok,
|
access_token=self.tok,
|
||||||
)
|
)
|
||||||
self.assertEqual(channel10.code, 200)
|
self.assertEqual(channel10.code, 200)
|
||||||
|
|
||||||
|
def test_authenticated_media_etag(self) -> None:
|
||||||
|
"""Test that ETag works correctly with authenticated media over client
|
||||||
|
APIs"""
|
||||||
|
|
||||||
|
# upload some local media with authentication on
|
||||||
|
channel = self.make_request(
|
||||||
|
"POST",
|
||||||
|
"_matrix/media/v3/upload?filename=test_png_upload",
|
||||||
|
SMALL_PNG,
|
||||||
|
self.tok,
|
||||||
|
shorthand=False,
|
||||||
|
content_type=b"image/png",
|
||||||
|
custom_headers=[("Content-Length", str(67))],
|
||||||
|
)
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
res = channel.json_body.get("content_uri")
|
||||||
|
assert res is not None
|
||||||
|
uri = res.split("mxc://")[1]
|
||||||
|
|
||||||
|
# Check standard media endpoint
|
||||||
|
self._check_caching(f"/download/{uri}")
|
||||||
|
|
||||||
|
# check thumbnails as well
|
||||||
|
params = "?width=32&height=32&method=crop"
|
||||||
|
self._check_caching(f"/thumbnail/{uri}{params}")
|
||||||
|
|
||||||
|
# Inject a piece of remote media.
|
||||||
|
file_id = "abcdefg12345"
|
||||||
|
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)
|
||||||
|
|
||||||
|
media_storage = self.hs.get_media_repository().media_storage
|
||||||
|
|
||||||
|
ctx = media_storage.store_into_file(file_info)
|
||||||
|
(f, fname) = self.get_success(ctx.__aenter__())
|
||||||
|
f.write(SMALL_PNG)
|
||||||
|
self.get_success(ctx.__aexit__(None, None, None))
|
||||||
|
|
||||||
|
# we write the authenticated status when storing media, so this should pick up
|
||||||
|
# config and authenticate the media
|
||||||
|
self.get_success(
|
||||||
|
self.store.store_cached_remote_media(
|
||||||
|
origin="lonelyIsland",
|
||||||
|
media_id="52",
|
||||||
|
media_type="image/png",
|
||||||
|
media_length=1,
|
||||||
|
time_now_ms=self.clock.time_msec(),
|
||||||
|
upload_name="remote_test.png",
|
||||||
|
filesystem_id=file_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ensure we have thumbnails for the non-dynamic code path
|
||||||
|
if self.extra_config == {"dynamic_thumbnails": False}:
|
||||||
|
self.get_success(
|
||||||
|
self.repo._generate_thumbnails(
|
||||||
|
"lonelyIsland", "52", file_id, "image/png"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self._check_caching("/download/lonelyIsland/52")
|
||||||
|
|
||||||
|
params = "?width=32&height=32&method=crop"
|
||||||
|
self._check_caching(f"/thumbnail/lonelyIsland/52{params}")
|
||||||
|
|
||||||
|
def _check_caching(self, path: str) -> None:
|
||||||
|
"""
|
||||||
|
Checks that:
|
||||||
|
1. fetching the path returns an ETag header
|
||||||
|
2. refetching with the ETag returns a 304 without a body
|
||||||
|
3. refetching with the ETag but through unauthenticated endpoint
|
||||||
|
returns 404
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Request media over authenticated endpoint, should be found
|
||||||
|
channel1 = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"/_matrix/client/v1/media{path}",
|
||||||
|
access_token=self.tok,
|
||||||
|
shorthand=False,
|
||||||
|
)
|
||||||
|
self.assertEqual(channel1.code, 200)
|
||||||
|
|
||||||
|
# Should have a single ETag field
|
||||||
|
etags = channel1.headers.getRawHeaders("ETag")
|
||||||
|
self.assertIsNotNone(etags)
|
||||||
|
assert etags is not None # For mypy
|
||||||
|
self.assertEqual(len(etags), 1)
|
||||||
|
etag = etags[0]
|
||||||
|
|
||||||
|
# Refetching with the etag should result in 304 and empty body.
|
||||||
|
channel2 = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"/_matrix/client/v1/media{path}",
|
||||||
|
access_token=self.tok,
|
||||||
|
shorthand=False,
|
||||||
|
custom_headers=[("If-None-Match", etag)],
|
||||||
|
)
|
||||||
|
self.assertEqual(channel2.code, 304)
|
||||||
|
self.assertEqual(channel2.is_finished(), True)
|
||||||
|
self.assertNotIn("body", channel2.result)
|
||||||
|
|
||||||
|
# Refetching with the etag but no access token should result in 404.
|
||||||
|
channel3 = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"/_matrix/media/r0{path}",
|
||||||
|
shorthand=False,
|
||||||
|
custom_headers=[("If-None-Match", etag)],
|
||||||
|
)
|
||||||
|
self.assertEqual(channel3.code, 404)
|
||||||
|
|
Loading…
Add table
Reference in a new issue