The size of the cache line is level 2 - C ++

Question

I'm writing a program to determine the size of the second-level cache line, I used the article http://igoro.com/archive/gallery-of-processor-cache-effects/. But I have completely different results. Through the Coreinfo program, I learned that the size of the first and second level strings is 64 bytes in length. While I decided to get at least the size of the first level line, but some completely inadequate results are obtained

#include "stdafx.h"
#include <time.h>
#include <iostream>
#include <string>


using namespace std;


int main()
{
    int t;
    const int N = 8000;
    volatile int arr[N];
    unsigned int A;
    char ask1 = 'y';

    srand(time(NULL));

    while (ask1 == 'y')
    {
        for (int j = 0; j < N; j++)
            arr[j] = rand();

        for (int i = 1; i <= 64; i++)
        {
            A = clock();
            for (int k = 0; k < 100000; k++)
                for (int j = 0; j < N; j += i)
                {
                    t = arr[j];
                }
            //cout << i << "\tstep, time\t" << clock() - A << '\t' << t << endl;
            arr[i] = (clock() - A); //Instead of printing, so as not to destroy the cache, I decided to write the execution time in the same array, and then output it
        }
        for (int i = 1; i <= 64; i++)
            cout << i << "\tstep, time\t" << arr[i] << endl;
        cout << "Repeat?(y/n): ";
        cin >> ask1;
        cout << endl;
    }
    /**/


    const int n = 1600000000;

    int l;

    unsigned int a;
    char ask = 'y';
    srand(time(NULL)); //it's just for random filling, so it's always been different, although here it's not really necessary for me
    while (ask == 'y')
    {
        volatile int byte8[2];

        for (int j = 0; j < 2; j++)
            byte8[j] = rand(); 

        a = clock();//write the time before reading array cycles
        for (int k = 0; k < n / 2; k++) /*divide n by the number of 
repetitions of the inner cycle, so that everywhere in
the same number of repetitions */
            for (int i = 0; i < 2; i++)
                l = byte8[i];
        cout << size(byte8) * 4 << "\tbytes\t" << clock() - a << endl; 
//output the number of ms needed for reading
// we repeat the same for arrays of longer length


        volatile int byte16[4];
        for (int j = 0; j < 4; j++)
            byte16[j] = rand();

        a = clock();
        for (int k = 0; k < n / 4; k++)
            for (int i = 0; i < 4; i++)
                l = byte16[i];
        cout << size(byte16) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte32[8];
        for (int j = 0; j < 8; j++)
            byte32[j] = rand();

        a = clock();
        for (int k = 0; k < n / 8; k++)
            for (int i = 0; i < 8; i++)
                l = byte32[i];
        cout << size(byte32) * 4 << "\tbytes\t" << clock() - a << endl;
        /*
        int byte60[15];
        for (int j = 0; j < 15; j++)
        byte60[j] = rand();

        a = clock();
        for (int k = 0; k < n / 15; k++)
        for (int i = 0; i < 15; i++)
        l = byte60[i];
        cout << size(byte60) * 4 << "\tbytes\t" << clock() - a << endl;
        */
        volatile int byte64[16];
        for (int j = 0; j < 16; j++)
            byte64[j] = rand();

        a = clock();
        for (int k = 0; k < n / 16; k++)
            for (int i = 0; i < 16; i++)
                l = byte64[i];
        cout << size(byte64) * 4 << "\tbytes\t" << clock() - a << endl;
        /*
        int byte68[17];
        for (int j = 0; j < 17; j++)
        byte68[j] = rand();

        a = clock();
        for (int k = 0; k < n / 17; k++)
        for (int i = 0; i < 17; i++)
        l = byte68[i];
        cout << size(byte68) * 4 << "\tbytes\t" << clock() - a << endl;
        */
        volatile int byte96[24];
        for (int j = 0; j < 24; j++)
            byte96[j] = rand();

        a = clock();
        for (int k = 0; k < n / 24; k++)
            for (int i = 0; i < 24; i++)
                l = byte96[i];
        cout << size(byte96) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte128[32];
        for (int j = 0; j < 32; j++)
            byte128[j] = rand();

        a = clock();
        for (int k = 0; k < n / 32; k++)
            for (int i = 0; i < 32; i++)
                l = byte128[i];
        cout << size(byte128) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte192[48];
        for (int j = 0; j < 48; j++)
            byte192[j] = rand();

        a = clock();
        for (int k = 0; k < n / 48; k++)
            for (int i = 0; i < 48; i++)
                l = byte192[i];
        cout << size(byte192) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte256[64];
        for (int j = 0; j < 64; j++)
            byte256[j] = rand();

        a = clock();
        for (int k = 0; k < n / 64; k++)
            for (int i = 0; i < 64; i++)
                l = byte256[i];
        cout << size(byte256) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte512[128];
        for (int j = 0; j < 128; j++)
            byte512[j] = rand();

        a = clock();
        for (int k = 0; k < n / 128; k++)
            for (int i = 0; i < 128; i++)
                l = byte512[i];
        cout << size(byte512) * 4 << "\tbytes\t" << clock() - a << endl;


        cout << "Repeat?(y/n): ";
        cin >> ask;
        cout << endl;
    }
    system("pause");
    return 0;
}

1 step, time 369
2 step, time 184
3 step, time 123
4 step, time 101
5 step, time 77
6 step, time 60
7 step, time 52
8 step, time 45
9 step, time 44
10 step, time 38
11 step, time 33
12 step, time 32
13 step, time 29
14 step, time 26
15 step, time 31
16 step, time 26
17 step, time 22
18 step, time 21
19 step, time 20
20 step, time 18
21 step, time 18
22 step, time 18
23 step, time 16
24 step, time 16
25 step, time 15
26 step, time 16
27 step, time 15
28 step, time 21
29 step, time 14
30 step, time 13
31 step, time 12
32 step, time 12
33 step, time 12
34 step, time 13
35 step, time 11
36 step, time 12
37 step, time 11
38 step, time 10
39 step, time 11
40 step, time 18
41 step, time 10
42 step, time 10
43 step, time 10
44 step, time 26
45 step, time 9
46 step, time 9
47 step, time 9
48 step, time 8
49 step, time 9
50 step, time 8
51 step, time 9
52 step, time 8
53 step, time 8
54 step, time 8
55 step, time 8
56 step, time 7
57 step, time 7
58 step, time 8
59 step, time 7
60 step, time 7
61 step, time 7
62 step, time 7
63 step, time 6
64 step, time 7
Repeat?(y/n): n
8 bytes 1227
16 bytes 1736
32 bytes 951
64 bytes 862
96 bytes 805
128 bytes 805
192 bytes 769
256 bytes 1232
512 bytes 909
Repeat?(y/n): y
8 bytes 1220
16 bytes 1739
32 bytes 944
64 bytes 842
96 bytes 815
128 bytes 804
192 bytes 781
256 bytes 1220
512 bytes 905
Repeat?(y/n):

The point is that I can not use functions like GetLogicalProcessorInformation, I need to define a test. I launch in Visual Studio 2017 with a configuration on Release. Sorry for my English

measures the time taken to read arrays from the cache so that based on these measurements, find the size of the cache line — Larteezy, Dec 24 '17 at 15:56
I haven't the read the code but it appears that you initialize the array, this will offset the results since the CPU has already cached and loaded the pages. A better test may be to allocate a dynamic area of memory (to rule out TLB effects, you can keep it 4KiB in size or pin it) and read from it with different strides. Without a prefetcher (you most likely have one) you should see the mean time required to read an item increase linearly with the stride. With a prefetcher, there should be a clear jump once the stride is equal the cache line size (since prefetching is useless at that point). — Margaret Bloom, Dec 25 '17 at 15:10
Thank you) But the topic can be closed. I was helped to understand this task in the Russian forum. (https://ru.stackoverflow.com/questions/762689/Размер-кэша-строки-2-го-уровня-c?noredirect=1#comment1184105_762689) — Larteezy, Dec 25 '17 at 17:48

The size of the cache line is level 2 - C ++

0 Answers0