Skip to content

Commit 8dffa9e

Browse files
committed
enforce arc_dnode_limit
Fix: openzfs#17487 As linux kernel seems very lazy in freeing dentry and inode caches, dnode cache routinely overflows evicting valuable meta/data and putting additional memory pressure on the system. This patch avoid the issue by (indirectly) calling arc_evict when dnode size > dnode_limit. Moreover, it restores zfs_prune_aliases as fallback when the kernel shrinker does nothing, enabling zfs to actually free dnodes. Signed-off-by: Gionatan Danti <[email protected]>
1 parent 8de8e0d commit 8dffa9e

File tree

2 files changed

+63
-1
lines changed

2 files changed

+63
-1
lines changed

module/os/linux/zfs/zfs_vfsops.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,61 @@ zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
12161216
return (error);
12171217
}
12181218

1219+
/*
1220+
* If kernel prunes nothing, fall back to this pre-3.1 manual walk.
1221+
* To avoid scanning the same znodes multiple times they are always rotated
1222+
* to the end of the z_all_znodes list. New znodes are inserted at the
1223+
* end of the list so we're always scanning the oldest znodes first.
1224+
*/
1225+
static int
1226+
zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
1227+
{
1228+
znode_t **zp_array, *zp;
1229+
int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
1230+
int objects = 0;
1231+
int i = 0, j = 0;
1232+
1233+
zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
1234+
1235+
mutex_enter(&zfsvfs->z_znodes_lock);
1236+
while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
1237+
1238+
if ((i++ > nr_to_scan) || (j >= max_array))
1239+
break;
1240+
1241+
ASSERT(list_link_active(&zp->z_link_node));
1242+
list_remove(&zfsvfs->z_all_znodes, zp);
1243+
list_insert_tail(&zfsvfs->z_all_znodes, zp);
1244+
1245+
/* Skip active znodes and .zfs entries */
1246+
if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
1247+
continue;
1248+
1249+
if (igrab(ZTOI(zp)) == NULL)
1250+
continue;
1251+
1252+
zp_array[j] = zp;
1253+
j++;
1254+
}
1255+
mutex_exit(&zfsvfs->z_znodes_lock);
1256+
1257+
for (i = 0; i < j; i++) {
1258+
zp = zp_array[i];
1259+
1260+
ASSERT3P(zp, !=, NULL);
1261+
d_prune_aliases(ZTOI(zp));
1262+
1263+
if (atomic_read(&ZTOI(zp)->i_count) == 1)
1264+
objects++;
1265+
1266+
zrele(zp);
1267+
}
1268+
1269+
vmem_free(zp_array, max_array * sizeof (znode_t *));
1270+
1271+
return (objects);
1272+
}
1273+
12191274
/*
12201275
* The ARC has requested that the filesystem drop entries from the dentry
12211276
* and inode caches. This can occur when the ARC needs to free meta data
@@ -1267,6 +1322,10 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
12671322
*objects = (*shrinker->scan_objects)(shrinker, &sc);
12681323
#endif
12691324

1325+
/* Fall back to zfs_prune_aliases if the kernel's shrinker did nothing */
1326+
if (*objects == 0)
1327+
*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
1328+
12701329
zfs_exit(zfsvfs, FTAG);
12711330

12721331
dprintf_ds(zfsvfs->z_os->os_dsl_dataset,

module/zfs/arc.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5082,8 +5082,11 @@ arc_is_overflowing(boolean_t lax, boolean_t use_reserve)
50825082
* in the ARC. In practice, that's in the tens of MB, which is low
50835083
* enough to be safe.
50845084
*/
5085-
int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c -
5085+
int64_t arc_over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c -
50865086
zfs_max_recordsize;
5087+
int64_t dn_over = wmsum_value(&arc_sums.arcstat_dnode_size) -
5088+
arc_dnode_limit;
5089+
int64_t over = MAX(arc_over, dn_over);
50875090

50885091
/* Always allow at least one block of overflow. */
50895092
if (over < 0)

0 commit comments

Comments
 (0)