|
| 1 | +/* |
| 2 | + Based on the ramspeed test from NuttX. |
| 3 | + https://github.com/apache/nuttx-apps/blob/master/benchmarks/ramspeed/ramspeed_main.c |
| 4 | + Modified for Arduino and ESP32 by Lucas Saavedra Vaz, 2024 |
| 5 | +*/ |
| 6 | + |
| 7 | +#include <Arduino.h> |
| 8 | + |
| 9 | +// Test settings |
| 10 | + |
| 11 | +// Number of runs to average |
| 12 | +#define N_RUNS 3 |
| 13 | + |
| 14 | +// Value to fill the memory with |
| 15 | +#define FILL_VALUE 0x00 |
| 16 | + |
| 17 | +// Number of copies to be performed in each test |
| 18 | +#define N_COPIES 200 |
| 19 | + |
| 20 | +// Start size for the tests. Value must be a power of 2. |
| 21 | +// Values lower or equal than 32 KB may cause the operations to use the cache instead of the PSRAM. |
| 22 | +#define START_SIZE 65536 |
| 23 | + |
| 24 | +// Max size to be copied. Must be bigger than 32 and it will be floored to the nearest power of 2 |
| 25 | +#define MAX_TEST_SIZE 1 * 1024 * 1024 // 1MB |
| 26 | + |
| 27 | +// Implementation macros |
| 28 | + |
| 29 | +#if defined(UINTPTR_MAX) && UINTPTR_MAX > 0xFFFFFFFF |
| 30 | +# define MEM_UNIT uint64_t |
| 31 | +# define ALIGN_MASK 0x7 |
| 32 | +#else |
| 33 | +# define MEM_UNIT uint32_t |
| 34 | +# define ALIGN_MASK 0x3 |
| 35 | +#endif |
| 36 | + |
| 37 | +#define COPY32 *d32 = *s32; d32++; s32++; |
| 38 | +#define COPY8 *d8 = *s8; d8++; s8++; |
| 39 | +#define SET32(x) *d32 = x; d32++; |
| 40 | +#define SET8(x) *d8 = x; d8++; |
| 41 | +#define REPEAT8(expr) expr expr expr expr expr expr expr expr |
| 42 | + |
| 43 | +/* Functions */ |
| 44 | + |
| 45 | +static void *mock_memcpy(void *dst, const void *src, size_t len) |
| 46 | +{ |
| 47 | + uint8_t *d8 = (uint8_t *) dst; |
| 48 | + const uint8_t *s8 = (uint8_t *) src; |
| 49 | + |
| 50 | + uintptr_t d_align = (uintptr_t)d8 & ALIGN_MASK; |
| 51 | + uintptr_t s_align = (uintptr_t)s8 & ALIGN_MASK; |
| 52 | + uint32_t *d32; |
| 53 | + const uint32_t *s32; |
| 54 | + |
| 55 | + /* Byte copy for unaligned memories */ |
| 56 | + |
| 57 | + if (s_align != d_align) |
| 58 | + { |
| 59 | + while (len > 32) |
| 60 | + { |
| 61 | + REPEAT8(COPY8); |
| 62 | + REPEAT8(COPY8); |
| 63 | + REPEAT8(COPY8); |
| 64 | + REPEAT8(COPY8); |
| 65 | + len -= 32; |
| 66 | + } |
| 67 | + |
| 68 | + while (len) |
| 69 | + { |
| 70 | + COPY8; |
| 71 | + len--; |
| 72 | + } |
| 73 | + |
| 74 | + return dst; |
| 75 | + } |
| 76 | + |
| 77 | + /* Make the memories aligned */ |
| 78 | + |
| 79 | + if (d_align) |
| 80 | + { |
| 81 | + d_align = ALIGN_MASK + 1 - d_align; |
| 82 | + while (d_align && len) |
| 83 | + { |
| 84 | + COPY8; |
| 85 | + d_align--; |
| 86 | + len--; |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + d32 = (uint32_t *)d8; |
| 91 | + s32 = (uint32_t *)s8; |
| 92 | + while (len > 32) |
| 93 | + { |
| 94 | + REPEAT8(COPY32); |
| 95 | + len -= 32; |
| 96 | + } |
| 97 | + |
| 98 | + while (len > 4) |
| 99 | + { |
| 100 | + COPY32; |
| 101 | + len -= 4; |
| 102 | + } |
| 103 | + |
| 104 | + d8 = (uint8_t *)d32; |
| 105 | + s8 = (const uint8_t *)s32; |
| 106 | + while (len) |
| 107 | + { |
| 108 | + COPY8; |
| 109 | + len--; |
| 110 | + } |
| 111 | + |
| 112 | + return dst; |
| 113 | +} |
| 114 | + |
| 115 | +static void mock_memset(void *dst, uint8_t v, size_t len) |
| 116 | +{ |
| 117 | + uint8_t *d8 = (uint8_t *)dst; |
| 118 | + uintptr_t d_align = (uintptr_t) d8 & ALIGN_MASK; |
| 119 | + uint32_t v32; |
| 120 | + uint32_t *d32; |
| 121 | + |
| 122 | + /* Make the address aligned */ |
| 123 | + |
| 124 | + if (d_align) |
| 125 | + { |
| 126 | + d_align = ALIGN_MASK + 1 - d_align; |
| 127 | + while (d_align && len) |
| 128 | + { |
| 129 | + SET8(v); |
| 130 | + len--; |
| 131 | + d_align--; |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + v32 = (uint32_t)v + ((uint32_t)v << 8) |
| 136 | + + ((uint32_t)v << 16) + ((uint32_t)v << 24); |
| 137 | + |
| 138 | + d32 = (uint32_t *)d8; |
| 139 | + |
| 140 | + while (len > 32) |
| 141 | + { |
| 142 | + REPEAT8(SET32(v32)); |
| 143 | + len -= 32; |
| 144 | + } |
| 145 | + |
| 146 | + while (len > 4) |
| 147 | + { |
| 148 | + SET32(v32); |
| 149 | + len -= 4; |
| 150 | + } |
| 151 | + |
| 152 | + d8 = (uint8_t *)d32; |
| 153 | + while (len) |
| 154 | + { |
| 155 | + SET8(v); |
| 156 | + len--; |
| 157 | + } |
| 158 | +} |
| 159 | + |
| 160 | +static void print_rate(const char *name, uint64_t bytes, uint32_t cost_time) |
| 161 | +{ |
| 162 | + uint32_t rate; |
| 163 | + if (cost_time == 0) |
| 164 | + { |
| 165 | + Serial.println("Error: Too little time taken, please increase N_COPIES"); |
| 166 | + return; |
| 167 | + } |
| 168 | + |
| 169 | + rate = bytes * 1000 / cost_time / 1024; |
| 170 | + Serial.printf("%s Rate = %" PRIu32 " KB/s Time: %" PRIu32 " ms\n", name, rate, cost_time); |
| 171 | +} |
| 172 | + |
| 173 | +static void memcpy_speed_test(void *dest, const void *src, size_t size, uint32_t repeat_cnt) |
| 174 | +{ |
| 175 | + uint32_t start_time; |
| 176 | + uint32_t cost_time_system; |
| 177 | + uint32_t cost_time_mock; |
| 178 | + uint32_t cnt; |
| 179 | + uint32_t step; |
| 180 | + uint64_t total_size; |
| 181 | + |
| 182 | + for (step = START_SIZE; step <= size; step <<= 1) |
| 183 | + { |
| 184 | + total_size = (uint64_t)step * (uint64_t)repeat_cnt; |
| 185 | + |
| 186 | + Serial.printf("Memcpy %" PRIu32 " Bytes test\n", step); |
| 187 | + |
| 188 | + start_time = millis(); |
| 189 | + |
| 190 | + for (cnt = 0; cnt < repeat_cnt; cnt++) |
| 191 | + { |
| 192 | + memcpy(dest, src, step); |
| 193 | + } |
| 194 | + |
| 195 | + cost_time_system = millis() - start_time; |
| 196 | + |
| 197 | + start_time = millis(); |
| 198 | + |
| 199 | + for (cnt = 0; cnt < repeat_cnt; cnt++) |
| 200 | + { |
| 201 | + mock_memcpy(dest, src, step); |
| 202 | + } |
| 203 | + |
| 204 | + cost_time_mock = millis() - start_time; |
| 205 | + |
| 206 | + print_rate("System memcpy():", total_size, cost_time_system); |
| 207 | + print_rate("Mock memcpy():", total_size, cost_time_mock); |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +static void memset_speed_test(void *dest, uint8_t value, size_t size, uint32_t repeat_num) |
| 212 | +{ |
| 213 | + uint32_t start_time; |
| 214 | + uint32_t cost_time_system; |
| 215 | + uint32_t cost_time_mock; |
| 216 | + uint32_t cnt; |
| 217 | + uint32_t step; |
| 218 | + uint64_t total_size; |
| 219 | + |
| 220 | + for (step = START_SIZE; step <= size; step <<= 1) |
| 221 | + { |
| 222 | + total_size = (uint64_t)step * (uint64_t)repeat_num; |
| 223 | + |
| 224 | + Serial.printf("Memset %" PRIu32 " Bytes test\n", step); |
| 225 | + |
| 226 | + start_time = millis(); |
| 227 | + |
| 228 | + for (cnt = 0; cnt < repeat_num; cnt++) |
| 229 | + { |
| 230 | + memset(dest, value, step); |
| 231 | + } |
| 232 | + |
| 233 | + cost_time_system = millis() - start_time; |
| 234 | + |
| 235 | + start_time = millis(); |
| 236 | + |
| 237 | + for (cnt = 0; cnt < repeat_num; cnt++) |
| 238 | + { |
| 239 | + mock_memset(dest, value, step); |
| 240 | + } |
| 241 | + |
| 242 | + cost_time_mock = millis() - start_time; |
| 243 | + |
| 244 | + print_rate("System memset():", total_size, cost_time_system); |
| 245 | + print_rate("Mock memset():", total_size, cost_time_mock); |
| 246 | + } |
| 247 | +} |
| 248 | + |
| 249 | +/* Main */ |
| 250 | + |
| 251 | +void setup() |
| 252 | +{ |
| 253 | + Serial.begin(115200); |
| 254 | + while (!Serial) delay(10); |
| 255 | + |
| 256 | + void *dest = ps_malloc(MAX_TEST_SIZE); |
| 257 | + const void *src = ps_malloc(MAX_TEST_SIZE); |
| 258 | + |
| 259 | + if (!dest || !src) |
| 260 | + { |
| 261 | + Serial.println("Memory allocation failed"); |
| 262 | + return; |
| 263 | + } |
| 264 | + |
| 265 | + log_d("Starting PSRAM speed test"); |
| 266 | + Serial.printf("Runs: %d\n", N_RUNS); |
| 267 | + Serial.printf("Copies: %d\n", N_COPIES); |
| 268 | + Serial.printf("Max test size: %d\n", MAX_TEST_SIZE); |
| 269 | + Serial.flush(); |
| 270 | + for (int i = 0; i < N_RUNS; i++) { |
| 271 | + log_d("Run %d", i); |
| 272 | + memcpy_speed_test(dest, src, MAX_TEST_SIZE, N_COPIES); |
| 273 | + Serial.flush(); |
| 274 | + memset_speed_test(dest, FILL_VALUE, MAX_TEST_SIZE, N_COPIES); |
| 275 | + Serial.flush(); |
| 276 | + } |
| 277 | + log_d("PSRAM speed test done"); |
| 278 | +} |
| 279 | + |
| 280 | +void loop() |
| 281 | +{ |
| 282 | + vTaskDelete(NULL); |
| 283 | +} |
0 commit comments