You can use CreateDIBSection
to create a kind of bitmap that gives you access to the pixel data rather than creating a regular "compatible" bitmap and then copying the pixel data into a buffer using GetDIBits
but I ran a benchmark of doing it this way and the time saving is there but it is not substantial. I get about a 10% speed improvement, which is not much.
Code below, and I did write this fast so may have made a mistake. The point of the variable dummy
in the following is so that everything does not get optimized away because there is not output.
#include <vector>
#include <iostream>
#include <Windows.h>
#include <chrono>
namespace c = std::chrono;
void get_screen_bytes1(void* ScreenData, int ScreenX, int ScreenY) {
HDC hdc = GetDC(NULL), hdcMem = CreateCompatibleDC(hdc);
HBITMAP hBitmap = CreateCompatibleBitmap(hdc, ScreenX, ScreenY);
BITMAPINFOHEADER bmi = { 0 };
bmi.biSize = sizeof(BITMAPINFOHEADER);
bmi.biPlanes = 1;
bmi.biBitCount = 24;
bmi.biWidth = ScreenX;
bmi.biHeight = -ScreenY;
bmi.biCompression = BI_RGB;
SelectObject(hdcMem, hBitmap);
BitBlt(hdcMem, 0, 0, ScreenX, ScreenY, hdc, 0, 0, SRCCOPY);
GetDIBits(hdc, hBitmap, 0, ScreenY, ScreenData, (BITMAPINFO*)&bmi, DIB_RGB_COLORS);
DeleteObject(hBitmap);
DeleteDC(hdcMem);
ReleaseDC(NULL, hdc);
}
struct bitmap_info {
HBITMAP handle;
uint8_t* data;
};
bitmap_info get_screen_bytes2(int wd, int hgt) {
HDC hdc_scr = GetDC(NULL);
BITMAPINFO bmi;
memset(&bmi, 0, sizeof(BITMAPINFO));
bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
bmi.bmiHeader.biWidth = wd;
bmi.bmiHeader.biHeight = -hgt;
bmi.bmiHeader.biPlanes = 1;
bmi.bmiHeader.biBitCount = 24;
bmi.bmiHeader.biCompression = BI_RGB;
bitmap_info bi;
bi.handle = CreateDIBSection(hdc_scr, &bmi, DIB_RGB_COLORS, (void**)bi.data, NULL, NULL);
HDC hdc = CreateCompatibleDC(hdc_scr);
auto hbm_old = SelectObject(hdc, bi.handle);
BitBlt(hdc, 0, 0, wd, hgt, hdc_scr, 0, 0, SRCCOPY);
SelectObject(hdc, hbm_old);
DeleteDC(hdc);
ReleaseDC(NULL, hdc_scr);
return bi;
}
int main() {
constexpr auto scr_wd = 2560;
constexpr auto scr_hgt = 1440;
std::vector<uint8_t> buffer(scr_wd * scr_hgt * 3);
std::chrono::high_resolution_clock timer;
int dummy;
double sum = 0.0;
int n = 100;
for (int i = 0; i < n; ++i) {
auto start = timer.now();
get_screen_bytes1(buffer.data(), scr_wd, scr_hgt);
sum += c::duration_cast<c::microseconds>(timer.now() - start).count();
dummy += buffer[0];
}
double time1 = sum / n;
std::cout << "get_screen_bytes1 => " << time1 << "\n";
sum = 0.0;
for (int i = 0; i < n; ++i) {
auto start = timer.now();
auto bmp_info = get_screen_bytes2( scr_wd, scr_hgt);
sum += c::duration_cast<c::microseconds>(timer.now() - start).count();
dummy += bmp_info.data[0];
DeleteObject(bmp_info.handle);
}
double time2 = sum / n;
std::cout << "get_screen_bytes2 => " << time2 << "\n";
std::cout << dummy << "\n";
std::cout << "pcnt speed improvement => " << time2 / time1 << "\n";
}