Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Fix empty url_cache_thumbnails/yyyy-mm-dd/ directories being left b…
Browse files Browse the repository at this point in the history
…ehind (#10924)
  • Loading branch information
squahtx authored Sep 29, 2021
1 parent 9fd057b commit 2be0fde
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 31 deletions.
1 change: 1 addition & 0 deletions changelog.d/10924.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where empty `yyyy-mm-dd/` directories would be left behind in the media store's `url_cache_thumbnails/` directory.
74 changes: 43 additions & 31 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@

ONE_HOUR = 60 * 60 * 1000
ONE_DAY = 24 * ONE_HOUR
IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY


@attr.s(slots=True, frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -496,6 +497,27 @@ async def _expire_url_cache_data(self) -> None:
logger.info("Still running DB updates; skipping expiry")
return

def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
"""Attempt to remove the given chain of parent directories
Args:
dirs: The list of directory paths to delete, with children appearing
before their parents.
"""
for dir in dirs:
try:
os.rmdir(dir)
except FileNotFoundError:
# Already deleted, continue with deleting the rest
pass
except OSError as e:
# Failed, skip deleting the rest of the parent dirs
if e.errno != errno.ENOTEMPTY:
logger.warning(
"Failed to remove media directory: %r: %s", dir, e
)
break

# First we delete expired url cache entries
media_ids = await self.store.get_expired_url_cache(now)

Expand All @@ -504,20 +526,16 @@ async def _expire_url_cache_data(self) -> None:
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

removed_media.append(media_id)

try:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
os.rmdir(dir)
except Exception:
pass
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
try_remove_parent_dirs(dirs)

await self.store.delete_url_cache(removed_media)

Expand All @@ -530,44 +548,38 @@ async def _expire_url_cache_data(self) -> None:
# These may be cached for a bit on the client (i.e., they
# may have a room open with a preview url thing open).
# So we wait a couple of days before deleting, just in case.
expire_before = now - 2 * ONE_DAY
expire_before = now - IMAGE_CACHE_EXPIRY_MS
media_ids = await self.store.get_url_cache_media_before(expire_before)

removed_media = []
for media_id in media_ids:
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

try:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
os.rmdir(dir)
except Exception:
pass
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
try_remove_parent_dirs(dirs)

thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
try:
shutil.rmtree(thumbnail_dir)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

removed_media.append(media_id)

try:
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
for dir in dirs:
os.rmdir(dir)
except Exception:
pass
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
# Note that one of the directories to be deleted has already been
# removed by the `rmtree` above.
try_remove_parent_dirs(dirs)

await self.store.delete_url_cache_media(removed_media)

Expand Down
31 changes: 31 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
from twisted.test.proto_helpers import AccumulatingProtocol

from synapse.config.oembed import OEmbedEndpointConfig
from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
from synapse.util.stringutils import parse_and_validate_mxc_uri

from tests import unittest
from tests.server import FakeTransport
from tests.test_utils import SMALL_PNG
from tests.utils import MockClock

try:
import lxml
Expand Down Expand Up @@ -851,3 +853,32 @@ def test_storage_providers_exclude_thumbnails(self):
404,
"URL cache thumbnail was unexpectedly retrieved from a storage provider",
)

def test_cache_expiry(self):
"""Test that URL cache files and thumbnails are cleaned up properly on expiry."""
self.preview_url.clock = MockClock()

_host, media_id = self._download_image()

file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
media_id
)
thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
media_id
)
thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
media_id
)

self.assertTrue(os.path.isfile(file_path))
self.assertTrue(os.path.isdir(thumbnail_dir))

self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
self.get_success(self.preview_url._expire_url_cache_data())

for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
self.assertFalse(
os.path.exists(path),
f"{os.path.relpath(path, self.media_store_path)} was not deleted",
)

0 comments on commit 2be0fde

Please sign in to comment.