I wanted to use the CPUID instruction to get the size for each cache level (L1, L2, L3).
I've been reading "AMD64 Architecture Programmer’s Manual Volume 3: General-Purpose and System Instructions". In page 613 there is the information relevant for L1:
In page 616 there is the information relevant for L2 and L3.
I have made a simple C+assembly program to print out these values.
const char* amd_L2_L3_associotivity_str(uint32_t code)
{
switch(code) {
case 0: return "Disabled";
case 1: return "1 way (direct mapped)";
case 2: return "2 way";
case 4: return "4 way";
case 6: return "8 way";
case 8: return "16 way";
case 10: return "32 way";
case 11: return "48 way";
case 12: return "64 way";
case 13: return "96 way";
case 14: return "128 way";
case 15: return "Fully Associative";
default: assert(0);
}
return 0;
}
void cpuid_caches_amd()
{
uint32_t eax, ebx, ecx, edx;
{ // L1
eax = 0x80000005; // the specific code of the cpuid instruction for L1
__asm__ (
"cpuid" // cpuid is the name of the instruction that queries the info we want
: "+a" (eax)
, "=b" (ebx)
, "=c" (ecx)
, "=d" (edx)
);
uint32_t
dataCache_size = ecx & 0xFF,
dataCache_associativity = (ecx >> 8) & 0xFF,
dataCache_linesPerTag = (ecx >> 16) & 0xFF,
dataCache_lineSize = (ecx >> 24) & 0xFF;
uint32_t
instrCache_size = edx & 0xFF,
instrCache_associativity = (edx >> 8) & 0xFF,
instrCache_linesPerTag = (edx >> 16) & 0xFF,
instrCache_lineSize = (edx >> 24) & 0xFF;
printf(
"L1 Data Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %d\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
"\n"
"L1 Instruction Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %d\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
,
dataCache_size,
dataCache_associativity,
dataCache_linesPerTag,
dataCache_lineSize,
instrCache_size,
instrCache_associativity,
instrCache_linesPerTag,
instrCache_lineSize
);
}
{ // L2, L3
eax = 0x80000006; // the specific code of the cpuid instruction for L1
__asm__ (
"cpuid" // cpuid is the name of the instruction that queries the info we want
: "+a" (eax)
, "=b" (ebx)
, "=c" (ecx)
, "=d" (edx)
);
uint32_t
L2_size = ecx & 0xFFFF,
L2_associativity = (ecx >> 16) & 0xF,
L2_linesPerTag = (ecx >> 20) & 0xF,
L2_lineSize = (ecx >> 24) & 0xFF;
uint32_t
L3_size = edx & 0x3FFF,
L3_associativity = (edx >> 16) & 0xF,
L3_linesPerTag = (edx >> 20) & 0xF,
L3_lineSize = (edx >> 24) & 0xFF;
printf(
"L2 Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %s\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
"\n"
"L3 Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %s\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
,
L2_size,
amd_L2_L3_associotivity_str(L2_associativity),
L2_linesPerTag,
L2_lineSize,
L3_size * 512,
amd_L2_L3_associotivity_str(L3_associativity),
L3_linesPerTag,
L3_lineSize
);
}
}
int main()
{
cpuid_caches_amd();
}
This is the output of the program for my Ryzen 3700X:
L1 Data Cache:
Size: 64 KB
Associativity: 1
Lines per Tag: 8
Line Size: 32 B
L1 Instruction Cache:
Size: 64 KB
Associativity: 1
Lines per Tag: 8
Line Size: 32 B
L2 Cache:
Size: 24896 KB
Associativity: Disabled
Lines per Tag: 0
Line Size: 2 B
L3 Cache:
Size: 2260992 KB
Associativity: Disabled
Lines per Tag: 0
Line Size: 1 B
According to this I have 2GB of L3 cache which is not possible. According to the official specs, it should have L1: 512KB, L2: 4MB, L3: 32MB.
Another thing that puzzles me is that L1DcSize
is only 8 bits wide. That only allows to represent up to 255KB of L1 size, although my CPU should have 512KB L1!
What's wrong with my code? How can I get the actual cache sizes?
EDIT:
Thanks all for the replies. As people pointed out, my bit shifting was inverted. Still there is the question about cache L1. This is what the code looks like now:
void cpuid_caches_amd()
{
uint32_t eax, ebx, ecx, edx;
{ // L1
eax = 0x80000005; // the specific code of the cpuid instruction for L1
__asm__ (
"cpuid" // cpuid is the name of the instruction that queries the info we want
: "+a" (eax)
, "=b" (ebx)
, "=c" (ecx)
, "=d" (edx)
);
uint32_t
dataCache_size = (ecx >> 24) & 0xFF,
dataCache_associativity = (ecx >> 16) & 0xFF,
dataCache_linesPerTag = (ecx >> 8) & 0xFF,
dataCache_lineSize = ecx & 0xFF;
uint32_t
instrCache_size = (edx >> 24) & 0xFF,
instrCache_associativity = (edx >> 16) & 0xFF,
instrCache_linesPerTag = (edx >> 8) & 0xFF,
instrCache_lineSize = edx & 0xFF;
printf(
"L1 Data Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %d\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
"\n"
"L1 Instruction Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %d\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
,
dataCache_size,
dataCache_associativity,
dataCache_linesPerTag,
dataCache_lineSize,
instrCache_size,
instrCache_associativity,
instrCache_linesPerTag,
instrCache_lineSize
);
}
{ // L2
eax = 0x80000006; // the specific code of the cpuid instruction for L1
__asm__ (
"cpuid" // cpuid is the name of the instruction that queries the info we want
: "+a" (eax)
, "=b" (ebx)
, "=c" (ecx)
, "=d" (edx)
);
uint32_t
L2_size = (ecx >> 16) & 0xFFFF,
L2_associativity = (ecx >> 12) & 0xF,
L2_linesPerTag = (ecx >> 8) & 0xF,
L2_lineSize = ecx & 0xFF;
uint32_t
L3_size = (edx >> 18) & 0x3FFF,
L3_associativity = (edx >> 12) & 0xF,
L3_linesPerTag = (edx >> 8) & 0xF,
L3_lineSize = (edx >> 0) & 0xFF;
printf(
"L2 Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %s\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
"\n"
"L3 Cache:\n"
"\tSize: %d KB\n"
"\tAssociativity: %s\n"
"\tLines per Tag: %d\n"
"\tLine Size: %d B\n"
,
L2_size,
amd_L2_L3_associotivity_str(L2_associativity),
L2_linesPerTag,
L2_lineSize,
L3_size * 512,
amd_L2_L3_associotivity_str(L3_associativity),
L3_linesPerTag,
L3_lineSize
);
}
}
And the new output:
L1 Data Cache:
Size: 32 KB
Associativity: 8
Lines per Tag: 1
Line Size: 64 B
L1 Instruction Cache:
Size: 32 KB
Associativity: 8
Lines per Tag: 1
Line Size: 64 B
L2 Cache:
Size: 512 KB
Associativity: 8 way
Lines per Tag: 1
Line Size: 64 B
L3 Cache:
Size: 32768 KB
Associativity: Value for all fields should be determined from Fn8000_001D
Lines per Tag: 1
Line Size: 64 B