0

I'm writing a program to determine the size of the second-level cache line, I used the article http://igoro.com/archive/gallery-of-processor-cache-effects/. But I have completely different results. Through the Coreinfo program, I learned that the size of the first and second level strings is 64 bytes in length. While I decided to get at least the size of the first level line, but some completely inadequate results are obtained

#include "stdafx.h"
#include <time.h>
#include <iostream>
#include <string>


using namespace std;


int main()
{
    int t;
    const int N = 8000;
    volatile int arr[N];
    unsigned int A;
    char ask1 = 'y';

    srand(time(NULL));

    while (ask1 == 'y')
    {
        for (int j = 0; j < N; j++)
            arr[j] = rand();

        for (int i = 1; i <= 64; i++)
        {
            A = clock();
            for (int k = 0; k < 100000; k++)
                for (int j = 0; j < N; j += i)
                {
                    t = arr[j];
                }
            //cout << i << "\tstep, time\t" << clock() - A << '\t' << t << endl;
            arr[i] = (clock() - A); //Instead of printing, so as not to destroy the cache, I decided to write the execution time in the same array, and then output it
        }
        for (int i = 1; i <= 64; i++)
            cout << i << "\tstep, time\t" << arr[i] << endl;
        cout << "Repeat?(y/n): ";
        cin >> ask1;
        cout << endl;
    }
    /**/


    const int n = 1600000000;

    int l;

    unsigned int a;
    char ask = 'y';
    srand(time(NULL)); //it's just for random filling, so it's always been different, although here it's not really necessary for me
    while (ask == 'y')
    {
        volatile int byte8[2];

        for (int j = 0; j < 2; j++)
            byte8[j] = rand(); 

        a = clock();//write the time before reading array cycles
        for (int k = 0; k < n / 2; k++) /*divide n by the number of 
repetitions of the inner cycle, so that everywhere in
the same number of repetitions */
            for (int i = 0; i < 2; i++)
                l = byte8[i];
        cout << size(byte8) * 4 << "\tbytes\t" << clock() - a << endl; 
//output the number of ms needed for reading
// we repeat the same for arrays of longer length


        volatile int byte16[4];
        for (int j = 0; j < 4; j++)
            byte16[j] = rand();

        a = clock();
        for (int k = 0; k < n / 4; k++)
            for (int i = 0; i < 4; i++)
                l = byte16[i];
        cout << size(byte16) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte32[8];
        for (int j = 0; j < 8; j++)
            byte32[j] = rand();

        a = clock();
        for (int k = 0; k < n / 8; k++)
            for (int i = 0; i < 8; i++)
                l = byte32[i];
        cout << size(byte32) * 4 << "\tbytes\t" << clock() - a << endl;
        /*
        int byte60[15];
        for (int j = 0; j < 15; j++)
        byte60[j] = rand();

        a = clock();
        for (int k = 0; k < n / 15; k++)
        for (int i = 0; i < 15; i++)
        l = byte60[i];
        cout << size(byte60) * 4 << "\tbytes\t" << clock() - a << endl;
        */
        volatile int byte64[16];
        for (int j = 0; j < 16; j++)
            byte64[j] = rand();

        a = clock();
        for (int k = 0; k < n / 16; k++)
            for (int i = 0; i < 16; i++)
                l = byte64[i];
        cout << size(byte64) * 4 << "\tbytes\t" << clock() - a << endl;
        /*
        int byte68[17];
        for (int j = 0; j < 17; j++)
        byte68[j] = rand();

        a = clock();
        for (int k = 0; k < n / 17; k++)
        for (int i = 0; i < 17; i++)
        l = byte68[i];
        cout << size(byte68) * 4 << "\tbytes\t" << clock() - a << endl;
        */
        volatile int byte96[24];
        for (int j = 0; j < 24; j++)
            byte96[j] = rand();

        a = clock();
        for (int k = 0; k < n / 24; k++)
            for (int i = 0; i < 24; i++)
                l = byte96[i];
        cout << size(byte96) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte128[32];
        for (int j = 0; j < 32; j++)
            byte128[j] = rand();

        a = clock();
        for (int k = 0; k < n / 32; k++)
            for (int i = 0; i < 32; i++)
                l = byte128[i];
        cout << size(byte128) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte192[48];
        for (int j = 0; j < 48; j++)
            byte192[j] = rand();

        a = clock();
        for (int k = 0; k < n / 48; k++)
            for (int i = 0; i < 48; i++)
                l = byte192[i];
        cout << size(byte192) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte256[64];
        for (int j = 0; j < 64; j++)
            byte256[j] = rand();

        a = clock();
        for (int k = 0; k < n / 64; k++)
            for (int i = 0; i < 64; i++)
                l = byte256[i];
        cout << size(byte256) * 4 << "\tbytes\t" << clock() - a << endl;

        volatile int byte512[128];
        for (int j = 0; j < 128; j++)
            byte512[j] = rand();

        a = clock();
        for (int k = 0; k < n / 128; k++)
            for (int i = 0; i < 128; i++)
                l = byte512[i];
        cout << size(byte512) * 4 << "\tbytes\t" << clock() - a << endl;


        cout << "Repeat?(y/n): ";
        cin >> ask;
        cout << endl;
    }
    system("pause");
    return 0;
}
  • 1 step, time 369
  • 2 step, time 184
  • 3 step, time 123
  • 4 step, time 101
  • 5 step, time 77
  • 6 step, time 60
  • 7 step, time 52
  • 8 step, time 45
  • 9 step, time 44
  • 10 step, time 38
  • 11 step, time 33
  • 12 step, time 32
  • 13 step, time 29
  • 14 step, time 26
  • 15 step, time 31
  • 16 step, time 26
  • 17 step, time 22
  • 18 step, time 21
  • 19 step, time 20
  • 20 step, time 18
  • 21 step, time 18
  • 22 step, time 18
  • 23 step, time 16
  • 24 step, time 16
  • 25 step, time 15
  • 26 step, time 16
  • 27 step, time 15
  • 28 step, time 21
  • 29 step, time 14
  • 30 step, time 13
  • 31 step, time 12
  • 32 step, time 12
  • 33 step, time 12
  • 34 step, time 13
  • 35 step, time 11
  • 36 step, time 12
  • 37 step, time 11
  • 38 step, time 10
  • 39 step, time 11
  • 40 step, time 18
  • 41 step, time 10
  • 42 step, time 10
  • 43 step, time 10
  • 44 step, time 26
  • 45 step, time 9
  • 46 step, time 9
  • 47 step, time 9
  • 48 step, time 8
  • 49 step, time 9
  • 50 step, time 8
  • 51 step, time 9
  • 52 step, time 8
  • 53 step, time 8
  • 54 step, time 8
  • 55 step, time 8
  • 56 step, time 7
  • 57 step, time 7
  • 58 step, time 8
  • 59 step, time 7
  • 60 step, time 7
  • 61 step, time 7
  • 62 step, time 7
  • 63 step, time 6
  • 64 step, time 7
  • Repeat?(y/n): n

  • 8 bytes 1227

  • 16 bytes 1736
  • 32 bytes 951
  • 64 bytes 862
  • 96 bytes 805
  • 128 bytes 805
  • 192 bytes 769
  • 256 bytes 1232
  • 512 bytes 909
  • Repeat?(y/n): y

  • 8 bytes 1220

  • 16 bytes 1739
  • 32 bytes 944
  • 64 bytes 842
  • 96 bytes 815
  • 128 bytes 804
  • 192 bytes 781
  • 256 bytes 1220
  • 512 bytes 905
  • Repeat?(y/n):

The point is that I can not use functions like GetLogicalProcessorInformation, I need to define a test. I launch in Visual Studio 2017 with a configuration on Release. Sorry for my English

Larteezy
  • 1
  • 1
  • That's a lot of code. What is it doing? – Oliver Charlesworth Dec 24 '17 at 15:39
  • measures the time taken to read arrays from the cache so that based on these measurements, find the size of the cache line – Larteezy Dec 24 '17 at 15:56
  • I haven't the read the code but it appears that you initialize the array, this will offset the results since the CPU has already cached and loaded the pages. A better test may be to allocate a dynamic area of memory (to rule out TLB effects, you can keep it 4KiB in size or pin it) and read from it with different strides. Without a prefetcher (you most likely have one) you should see the mean time required to read an item increase linearly with the stride. With a prefetcher, there should be a clear jump once the stride is equal the cache line size (since prefetching is useless at that point). – Margaret Bloom Dec 25 '17 at 15:10
  • Thank you) But the topic can be closed. I was helped to understand this task in the Russian forum. (https://ru.stackoverflow.com/questions/762689/Размер-кэша-строки-2-го-уровня-c?noredirect=1#comment1184105_762689) – Larteezy Dec 25 '17 at 17:48

0 Answers0