2

I am working on a project where I have to read the color of a pixel on screen with CreateCompatibleBitmap and Bitblt. Unfortunately this method is pretty slow, I am getting times of around 60 to 100 ms in a loop. I use this code:

HDC hdc = GetDC(NULL), hdcMem = CreateCompatibleDC(hdc);
HBITMAP hBitmap = CreateCompatibleBitmap(hdc, ScreenX, ScreenY);
BITMAPINFOHEADER bmi = { 0 };
bmi.biSize = sizeof(BITMAPINFOHEADER);
bmi.biPlanes = 1;
bmi.biBitCount = 24;
bmi.biWidth = ScreenX;
bmi.biHeight = -ScreenY;
bmi.biCompression = BI_RGB;
SelectObject(hdcMem, hBitmap);
BitBlt(hdcMem, 0, 0, ScreenX, ScreenY, hdc, 0, 0, SRCCOPY);
GetDIBits(hdc, hBitmap, 0, ScreenY, ScreenData, (BITMAPINFO*)&bmi, DIB_RGB_COLORS);
DeleteObject(hBitmap);
DeleteDC(hdcMem);
ReleaseDC(NULL, hdc);

Most of the time (95%) is spent in the Bitblt function. I have read that it takes so long because Bitblt has to convert the color formats but I dont understand how I can avoid that...

I use Windows 11 and my screen res is FHD 1920x1080

Any suggestions how I could speed this program up?

A J
  • 21
  • 2
  • What are you doing with `screenData` after you do the above? – jwezorek Oct 24 '22 at 17:41
  • I read the pixel colors out of it, for example with the code: int pixel1red = ScreenData[3 * ((1080 * ScreenX) + 1920) + 2]; – A J Oct 24 '22 at 18:50
  • in the example 1920 is the x value of the pixel and 1080 is the x value and the color is the red part of the RGB value – A J Oct 24 '22 at 18:53
  • int pixel1green = ScreenData[3 * ((1080 * ScreenX) + 1920) + 1]; would be the green part and int pixel1blue = ScreenData[3 * ((1080 * ScreenX) + 1920)]; the blue part of the RGB – A J Oct 24 '22 at 18:54

1 Answers1

1

You can use CreateDIBSection to create a kind of bitmap that gives you access to the pixel data rather than creating a regular "compatible" bitmap and then copying the pixel data into a buffer using GetDIBits but I ran a benchmark of doing it this way and the time saving is there but it is not substantial. I get about a 10% speed improvement, which is not much.

Code below, and I did write this fast so may have made a mistake. The point of the variable dummy in the following is so that everything does not get optimized away because there is not output.

#include <vector>
#include <iostream>
#include <Windows.h>
#include <chrono>

namespace c = std::chrono;

void get_screen_bytes1(void* ScreenData, int ScreenX, int ScreenY) {
    HDC hdc = GetDC(NULL), hdcMem = CreateCompatibleDC(hdc);
    HBITMAP hBitmap = CreateCompatibleBitmap(hdc, ScreenX, ScreenY);
    BITMAPINFOHEADER bmi = { 0 };
    bmi.biSize = sizeof(BITMAPINFOHEADER);
    bmi.biPlanes = 1;
    bmi.biBitCount = 24;
    bmi.biWidth = ScreenX;
    bmi.biHeight = -ScreenY;
    bmi.biCompression = BI_RGB;
    SelectObject(hdcMem, hBitmap);
    BitBlt(hdcMem, 0, 0, ScreenX, ScreenY, hdc, 0, 0, SRCCOPY);
    GetDIBits(hdc, hBitmap, 0, ScreenY, ScreenData, (BITMAPINFO*)&bmi, DIB_RGB_COLORS);
    DeleteObject(hBitmap);
    DeleteDC(hdcMem);
    ReleaseDC(NULL, hdc);
}

struct bitmap_info {
    HBITMAP handle;
    uint8_t* data;
};

bitmap_info get_screen_bytes2(int wd, int hgt) {
    HDC hdc_scr = GetDC(NULL);
    BITMAPINFO bmi;
    memset(&bmi, 0, sizeof(BITMAPINFO));
    bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bmi.bmiHeader.biWidth = wd;
    bmi.bmiHeader.biHeight = -hgt; 
    bmi.bmiHeader.biPlanes = 1;
    bmi.bmiHeader.biBitCount = 24;
    bmi.bmiHeader.biCompression = BI_RGB;

    bitmap_info bi;
    bi.handle = CreateDIBSection(hdc_scr, &bmi, DIB_RGB_COLORS, (void**)bi.data, NULL, NULL);

    HDC hdc = CreateCompatibleDC(hdc_scr);
    auto hbm_old = SelectObject(hdc, bi.handle);
    BitBlt(hdc, 0, 0, wd, hgt, hdc_scr, 0, 0, SRCCOPY);
    SelectObject(hdc, hbm_old);
    DeleteDC(hdc);
    ReleaseDC(NULL, hdc_scr);

    return bi;
}

int main() {
    constexpr auto scr_wd = 2560;
    constexpr auto scr_hgt = 1440;

    std::vector<uint8_t> buffer(scr_wd * scr_hgt * 3);
    std::chrono::high_resolution_clock timer;

    int dummy;
    double sum = 0.0;
    int n = 100;
    for (int i = 0; i < n; ++i) {
        auto start = timer.now();
        get_screen_bytes1(buffer.data(), scr_wd, scr_hgt);
        sum += c::duration_cast<c::microseconds>(timer.now() - start).count();
        dummy += buffer[0];
    }
    double time1 = sum / n;
    std::cout << "get_screen_bytes1 => " << time1 << "\n";

    sum = 0.0;
    for (int i = 0; i < n; ++i) {
        auto start = timer.now();
        auto bmp_info = get_screen_bytes2( scr_wd, scr_hgt);
        sum += c::duration_cast<c::microseconds>(timer.now() - start).count();
        dummy += bmp_info.data[0];
        DeleteObject(bmp_info.handle);
    }
    double time2 = sum / n;
    std::cout << "get_screen_bytes2 => " << time2 << "\n";
    std::cout << dummy << "\n";
    std::cout << "pcnt speed improvement => " << time2 / time1 << "\n";

}
jwezorek
  • 8,592
  • 1
  • 29
  • 46
  • Alright, 10% is not much but its already not bad, i will try to implement it into my program – A J Oct 24 '22 at 20:17