Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix empty url_cache_thumbnails/yyyy-mm-dd/ directories being left behind #10924

Merged
merged 6 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10924.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where empty `yyyy-mm-dd/` directories would be left behind in the media store's `url_cache_thumbnails/` directory.
66 changes: 37 additions & 29 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import errno
import fnmatch
import itertools
import logging
Expand Down Expand Up @@ -73,6 +72,7 @@

ONE_HOUR = 60 * 60 * 1000
ONE_DAY = 24 * ONE_HOUR
IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY


@attr.s(slots=True, frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -504,20 +504,22 @@ async def _expire_url_cache_data(self) -> None:
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

removed_media.append(media_id)

try:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
try:
os.rmdir(dir)
except Exception:
pass
except FileNotFoundError:
pass # Already deleted, continue with deleting the rest
except Exception:
break # Failed, skip deleting the rest of the parent dirs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably log something here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like a good idea. I got tired of the duplicated logic and factored it out into a function.


await self.store.delete_url_cache(removed_media)

Expand All @@ -530,44 +532,50 @@ async def _expire_url_cache_data(self) -> None:
# These may be cached for a bit on the client (i.e., they
# may have a room open with a preview url thing open).
# So we wait a couple of days before deleting, just in case.
expire_before = now - 2 * ONE_DAY
expire_before = now - IMAGE_CACHE_EXPIRY_MS
media_ids = await self.store.get_url_cache_media_before(expire_before)

removed_media = []
for media_id in media_ids:
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

try:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
try:
os.rmdir(dir)
except Exception:
pass
except FileNotFoundError:
pass # Already deleted, continue with deleting the rest
except Exception:
break # Failed, skip deleting the rest of the parent dirs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here?


thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
try:
shutil.rmtree(thumbnail_dir)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

removed_media.append(media_id)

try:
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
for dir in dirs:
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
# Note that one of the directories to be deleted has already been
# removed by the `rmtree` above.
for dir in dirs:
try:
os.rmdir(dir)
except Exception:
pass
except FileNotFoundError:
pass # Already deleted, continue with deleting the rest
except Exception:
break # Failed, skip deleting the rest of the parent dirs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here?


await self.store.delete_url_cache_media(removed_media)

Expand Down
31 changes: 31 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
from twisted.test.proto_helpers import AccumulatingProtocol

from synapse.config.oembed import OEmbedEndpointConfig
from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
from synapse.util.stringutils import parse_and_validate_mxc_uri

from tests import unittest
from tests.server import FakeTransport
from tests.test_utils import SMALL_PNG
from tests.utils import MockClock

try:
import lxml
Expand Down Expand Up @@ -851,3 +853,32 @@ def test_storage_providers_exclude_thumbnails(self):
404,
"URL cache thumbnail was unexpectedly retrieved from a storage provider",
)

def test_cache_expiry(self):
"""Test that URL cache files and thumbnails are cleaned up properly on expiry."""
self.preview_url.clock = MockClock()

_host, media_id = self._download_image()

file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
media_id
)
thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
media_id
)
thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
media_id
)

self.assertTrue(os.path.isfile(file_path))
self.assertTrue(os.path.isdir(thumbnail_dir))

self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
self.get_success(self.preview_url._expire_url_cache_data())

for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
self.assertFalse(
os.path.exists(path),
f"{os.path.relpath(path, self.media_store_path)} was not deleted",
)