#include #include #include #define NPAD (64 - sizeof(void *)) #define ACCESSES (1 << 28) inline void clflush(volatile void *p) { asm volatile ("clflush (%0)" :: "r"(p)); } inline uint64_t rdtsc() { unsigned long a, d; asm volatile ("cpuid; rdtsc" : "=a" (a), "=d" (d) : : "ebx", "ecx"); return a | ((uint64_t)d << 32); } struct l { l *next; char pad[NPAD]; }; l array[ (1 << 26) / sizeof(l) ]; void init_sequential(int bytes) { int elems = bytes / sizeof(l); for (int i = 1; i < elems; i++) { array[i - 1].next = &array[i]; } array[elems - 1].next = &array[0]; } void measure_baseline( int accesses ) { volatile l *ptr = &array[0]; while (accesses-- > 0) ptr = ptr->next; } int rangeArr[] = {16, 32, 64, 128, 256, 512, 1024, 2048, 3072, 4096, 6144, 8192, 10240, 12*1024, 14*1024, 16*1024}; inline void test() { for (int i = 0; i < sizeof(rangeArr) / sizeof(rangeArr[0]); i++) { uint64_t start, end; int kilobytes = rangeArr[i]; init_sequential( kilobytes * 1024 ); start = rdtsc(); measure_baseline( ACCESSES ); end = rdtsc(); double ticksPerAccess = 1.0 * (end - start) / ACCESSES; printf("%d kB took %lf ticks per access\n", kilobytes, ticksPerAccess); } } int main(int ac, char **av) { test(); return 0; }