Zephyr Project API  3.3.0
A Scalable Open Source RTOS
cache.h
Go to the documentation of this file.
1/*
2 * Copyright 2021 Intel Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 */
5#ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
6#define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
7
8#include <xtensa/config/core-isa.h>
9#include <zephyr/toolchain.h>
10#include <zephyr/sys/util.h>
11#include <zephyr/debug/sparse.h>
12
13#ifdef __cplusplus
14extern "C" {
15#endif
16
17#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
18
19#if XCHAL_DCACHE_SIZE
20BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
21BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
22#endif
23
24static ALWAYS_INLINE void z_xtensa_cache_flush(void *addr, size_t bytes)
25{
26#if XCHAL_DCACHE_SIZE
27 size_t step = XCHAL_DCACHE_LINESIZE;
28 size_t first = ROUND_DOWN(addr, step);
29 size_t last = ROUND_UP(((long)addr) + bytes, step);
30 size_t line;
31
32 for (line = first; bytes && line < last; line += step) {
33 __asm__ volatile("dhwb %0, 0" :: "r"(line));
34 }
35#endif
36}
37
38static ALWAYS_INLINE void z_xtensa_cache_flush_inv(void *addr, size_t bytes)
39{
40#if XCHAL_DCACHE_SIZE
41 size_t step = XCHAL_DCACHE_LINESIZE;
42 size_t first = ROUND_DOWN(addr, step);
43 size_t last = ROUND_UP(((long)addr) + bytes, step);
44 size_t line;
45
46 for (line = first; bytes && line < last; line += step) {
47 __asm__ volatile("dhwbi %0, 0" :: "r"(line));
48 }
49#endif
50}
51
52static ALWAYS_INLINE void z_xtensa_cache_inv(void *addr, size_t bytes)
53{
54#if XCHAL_DCACHE_SIZE
55 size_t step = XCHAL_DCACHE_LINESIZE;
56 size_t first = ROUND_DOWN(addr, step);
57 size_t last = ROUND_UP(((long)addr) + bytes, step);
58 size_t line;
59
60 for (line = first; bytes && line < last; line += step) {
61 __asm__ volatile("dhi %0, 0" :: "r"(line));
62 }
63#endif
64}
65
66static ALWAYS_INLINE void z_xtensa_cache_inv_all(void)
67{
68#if XCHAL_DCACHE_SIZE
69 size_t step = XCHAL_DCACHE_LINESIZE;
70 size_t line;
71
72 for (line = 0; line < XCHAL_DCACHE_SIZE; line += step) {
73 __asm__ volatile("dii %0, 0" :: "r"(line));
74 }
75#endif
76}
77
78static ALWAYS_INLINE void z_xtensa_cache_flush_all(void)
79{
80#if XCHAL_DCACHE_SIZE
81 size_t step = XCHAL_DCACHE_LINESIZE;
82 size_t line;
83
84 for (line = 0; line < XCHAL_DCACHE_SIZE; line += step) {
85 __asm__ volatile("diwb %0, 0" :: "r"(line));
86 }
87#endif
88}
89
90static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void)
91{
92#if XCHAL_DCACHE_SIZE
93 size_t step = XCHAL_DCACHE_LINESIZE;
94 size_t line;
95
96 for (line = 0; line < XCHAL_DCACHE_SIZE; line += step) {
97 __asm__ volatile("diwbi %0, 0" :: "r"(line));
98 }
99#endif
100}
101
102
103#if defined(CONFIG_XTENSA_RPO_CACHE)
104#if defined(CONFIG_ARCH_HAS_COHERENCE)
105static inline bool arch_mem_coherent(void *ptr)
106{
107 size_t addr = (size_t) ptr;
108
109 return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
110}
111#endif
112
113static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
114{
115 /* The math here is all compile-time: when the two regions
116 * differ by a power of two, we can convert between them by
117 * setting or clearing just one bit. Otherwise it needs two
118 * operations.
119 */
120 uint32_t rxor = (rto ^ rfrom) << 29;
121
122 rto <<= 29;
123 if (Z_IS_POW2(rxor)) {
124 if ((rxor & rto) == 0) {
125 return addr & ~rxor;
126 } else {
127 return addr | rxor;
128 }
129 } else {
130 return (addr & ~(7U << 29)) | rto;
131 }
132}
153static inline void __sparse_cache *arch_xtensa_cached_ptr(void *ptr)
154{
155 return (__sparse_force void __sparse_cache *)z_xtrpoflip((uint32_t) ptr,
156 CONFIG_XTENSA_CACHED_REGION,
157 CONFIG_XTENSA_UNCACHED_REGION);
158}
159
178static inline void *arch_xtensa_uncached_ptr(void __sparse_cache *ptr)
179{
180 return (void *)z_xtrpoflip((__sparse_force uint32_t)ptr,
181 CONFIG_XTENSA_UNCACHED_REGION,
182 CONFIG_XTENSA_CACHED_REGION);
183}
184
185/* Utility to generate an unrolled and optimal[1] code sequence to set
186 * the RPO TLB registers (contra the HAL cacheattr macros, which
187 * generate larger code and can't be called from C), based on the
188 * KERNEL_COHERENCE configuration in use. Selects RPO attribute "2"
189 * for regions (including MMIO registers in region zero) which want to
190 * bypass L1, "4" for the cached region which wants writeback, and
191 * "15" (invalid) elsewhere.
192 *
193 * Note that on cores that have the "translation" option set, we need
194 * to put an identity mapping in the high bits. Also per spec
195 * changing the current code region (by definition cached) requires
196 * that WITLB be followed by an ISYNC and that both instructions live
197 * in the same cache line (two 3-byte instructions fit in an 8-byte
198 * aligned region, so that's guaranteed not to cross a cache line
199 * boundary).
200 *
201 * [1] With the sole exception of gcc's infuriating insistence on
202 * emitting a precomputed literal for addr + addrincr instead of
203 * computing it with a single ADD instruction from values it already
204 * has in registers. Explicitly assigning the variables to registers
205 * via an attribute works, but then emits needless MOV instructions
206 * instead. I tell myself it's just 32 bytes of .text, but... Sigh.
207 */
208#define _REGION_ATTR(r) \
209 ((r) == 0 ? 2 : \
210 ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \
211 ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))
212
213#define _SET_ONE_TLB(region) do { \
214 uint32_t attr = _REGION_ATTR(region); \
215 if (XCHAL_HAVE_XLT_CACHEATTR) { \
216 attr |= addr; /* RPO with translation */ \
217 } \
218 if (region != CONFIG_XTENSA_CACHED_REGION) { \
219 __asm__ volatile("wdtlb %0, %1; witlb %0, %1" \
220 :: "r"(attr), "r"(addr)); \
221 } else { \
222 __asm__ volatile("wdtlb %0, %1" \
223 :: "r"(attr), "r"(addr)); \
224 __asm__ volatile("j 1f; .align 8; 1:"); \
225 __asm__ volatile("witlb %0, %1; isync" \
226 :: "r"(attr), "r"(addr)); \
227 } \
228 addr += addrincr; \
229} while (0)
230
231#define ARCH_XTENSA_SET_RPO_TLB() do { \
232 register uint32_t addr = 0, addrincr = 0x20000000; \
233 FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \
234} while (0)
235
236#endif
237
238#ifdef __cplusplus
239} /* extern "C" */
240#endif
241
242#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */
#define ALWAYS_INLINE
Definition: common.h:124
static bool arch_mem_coherent(void *ptr)
Detect memory coherence type.
Definition: arch_interface.h:830
#define ROUND_UP(x, align)
Value of x rounded up to the next multiple of align, which must be a power of 2.
Definition: util.h:227
#define ROUND_DOWN(x, align)
Value of x rounded down to the previous multiple of align, which must be a power of 2.
Definition: util.h:235
void * ptr
Definition: printk.c:120
__UINT32_TYPE__ uint32_t
Definition: stdint.h:90
Macros to abstract toolchain specific capabilities.
Misc utilities.