From f716f6b0d8c745e20ce172d40872986504321203 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 4 Jul 2024 07:01:36 +0300 Subject: [PATCH] fix: handle race condition on disk-based cache retention Disk-based cache removal listener crashed when the file to delete didn't exist, causing the metrics test to fail as the value didn't increase. By properly handling the scenario of non existent file, the await for metrics can be reduced. --- .../tieredstorage/fetch/cache/DiskChunkCache.java | 15 ++++++++++----- .../fetch/cache/DiskChunkCacheMetricsTest.java | 6 +++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCache.java b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCache.java index 005dca0d4..c0c7113e5 100644 --- a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCache.java +++ b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCache.java @@ -98,11 +98,16 @@ public RemovalListener removalListener() { return (key, path, cause) -> { try { if (path != null) { - final long fileSize = Files.size(path); - Files.delete(path); - metrics.chunkDeleted(fileSize); - log.trace("Deleted cached file for key {} with path {} from cache directory." - + " The reason of the deletion is {}", key, path, cause); + if (Files.exists(path)) { + final long fileSize = Files.size(path); + Files.delete(path); + metrics.chunkDeleted(fileSize); + log.trace("Deleted cached file for key {} with path {} from cache directory." + + " The reason of the deletion is {}", key, path, cause); + } else { + log.debug("Cached file does not exist, " + + "it may be caused by a race condition and the file is already deleted."); + } } else { log.warn("Path not present when trying to delete cached file for key {} from cache directory." + " The reason of the deletion is {}", key, cause); diff --git a/core/src/test/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCacheMetricsTest.java b/core/src/test/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCacheMetricsTest.java index 1c769a606..4e95d66de 100644 --- a/core/src/test/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCacheMetricsTest.java +++ b/core/src/test/java/io/aiven/kafka/tieredstorage/fetch/cache/DiskChunkCacheMetricsTest.java @@ -106,6 +106,10 @@ void metrics() throws IOException, JMException, StorageBackendException { assertThat(MBEAN_SERVER.getAttribute(objectName, "write-bytes-rate")) .isEqualTo(((double) size1) / METRIC_TIME_WINDOW_SEC); + assertThat(MBEAN_SERVER.getAttribute(objectName, "delete-total")) + .asInstanceOf(DOUBLE) + .isZero(); + diskChunkCache.getChunk(OBJECT_KEY_PATH, SEGMENT_MANIFEST, 1); assertThat(MBEAN_SERVER.getAttribute(objectName, "write-total")) @@ -119,7 +123,7 @@ void metrics() throws IOException, JMException, StorageBackendException { .isEqualTo(((double) (size1 + size2)) / METRIC_TIME_WINDOW_SEC); await("Deletion happens") - .atMost(Duration.ofSeconds(30)) // increase to reduce chance of flakiness + .atMost(Duration.ofSeconds(5)) .pollDelay(Duration.ofMillis(100)) .pollInterval(Duration.ofMillis(100)) .until(() -> (double) MBEAN_SERVER.getAttribute(objectName, "delete-total") > 0);