Skip to content

Commit 35cb3ee

Browse files
committed
[AArch64][Builtins] Avoid unnecessary cache cleaning
Use new control bits CTR_EL0.DIC and CTR_EL0.IDC to discover the d-cache cleaning and i-cache invalidation requirements for instruction-to-data coherence. This matches the behavior in the latest libgcc. Author: Shaokun Zhang <[email protected]> Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D69247
1 parent d2ec416 commit 35cb3ee

File tree

1 file changed

+23
-13
lines changed

1 file changed

+23
-13
lines changed

compiler-rt/lib/builtins/clear_cache.c

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,34 @@ void __clear_cache(void *start, void *end) {
9393
#elif defined(__aarch64__) && !defined(__APPLE__)
9494
uint64_t xstart = (uint64_t)(uintptr_t)start;
9595
uint64_t xend = (uint64_t)(uintptr_t)end;
96-
uint64_t addr;
9796

98-
// Get Cache Type Info
99-
uint64_t ctr_el0;
100-
__asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
97+
// Get Cache Type Info.
98+
static uint64_t ctr_el0 = 0;
99+
if (ctr_el0 == 0)
100+
__asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
101101

102-
// dc & ic instructions must use 64bit registers so we don't use
102+
// The DC and IC instructions must use 64-bit registers so we don't use
103103
// uintptr_t in case this runs in an IPL32 environment.
104-
const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
105-
for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
106-
addr += dcache_line_size)
107-
__asm __volatile("dc cvau, %0" ::"r"(addr));
104+
uint64_t addr;
105+
106+
// If CTR_EL0.IDC is set, data cache cleaning to the point of unification
107+
// is not required for instruction to data coherence.
108+
if (((ctr_el0 >> 28) & 0x1) == 0x0) {
109+
const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
110+
for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
111+
addr += dcache_line_size)
112+
__asm __volatile("dc cvau, %0" ::"r"(addr));
113+
}
108114
__asm __volatile("dsb ish");
109115

110-
const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
111-
for (addr = xstart & ~(icache_line_size - 1); addr < xend;
112-
addr += icache_line_size)
113-
__asm __volatile("ic ivau, %0" ::"r"(addr));
116+
// If CTR_EL0.DIC is set, instruction cache invalidation to the point of
117+
// unification is not required for instruction to data coherence.
118+
if (((ctr_el0 >> 29) & 0x1) == 0x0) {
119+
const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
120+
for (addr = xstart & ~(icache_line_size - 1); addr < xend;
121+
addr += icache_line_size)
122+
__asm __volatile("ic ivau, %0" ::"r"(addr));
123+
}
114124
__asm __volatile("isb sy");
115125
#elif defined(__powerpc64__)
116126
const size_t line_size = 32;

0 commit comments

Comments
 (0)