I can't speak for PHP, but in COM, a BSTR
is not the correct type to use for passing around binary data, use a SAFEARRAY(VT_UI1)
instead:
STDMETHODIMP CMemReaderImpl::ReadFile(BSTR filepath, SAFEARRAY** Ofile)
{
if (!Ofile)
return E_POINTER;
*Ofile = nullptr;
if (!filepath)
return E_INVALIDARG;
HANDLE hFileMap = OpenFileMapping(FILE_MAP_READ, FALSE, filepath);
if (!hFileMap) {
DWORD err = GetLastError();
return HRESULT_FROM_WIN32(err);
}
LPBYTE lpBuffer = (LPBYTE) MapViewOfFile(hFileMap, FILE_MAP_READ 0, 0, BUFF_SIZE);
if (!lpBuffer) {
DWORD err = GetLastError();
CloseHandle(hFileMap);
return HRESULT_FROM_WIN32(err);
}
SAFEARRRAYBOUND bounds;
bounds.lLbound = 0;
bounds.cElements = BUFF_SIZE;
SAFEARRAY *sa = SafeArrayCreate(VT_UI1, 1, &bounds);
if (!sa) {
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
return E_OUTOFMEMORY;
}
void *data;
SafeArrayAccessData(sa, &data);
memcpy(data, lpBuffer, BUFF_SIZE);
SafeArrayUnaccessData(sa);
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
*Ofile = sa;
return S_OK;
}
I don't know if that is compatible with PHP, though.
If you must use BSTR
, try SysAllocStringByteLen()
to store the bytes as-is without any conversion to Unicode:
STDMETHODIMP CMemReaderImpl::ReadFile(BSTR filepath, BSTR* Ofile)
{
if (!Ofile)
return E_POINTER;
*Ofile = nullptr;
if (!filepath)
return E_INVALIDARG;
HANDLE hFileMap = OpenFileMapping(FILE_MAP_READ, FALSE, filepath);
if (!hFileMap) {
DWORD err = GetLastError();
return HRESULT_FROM_WIN32(err);
}
LPSTR lpBuffer = (LPSTR) MapViewOfFile(hFileMap, FILE_MAP_READ 0, 0, BUFF_SIZE);
if (!lpBuffer) {
DWORD err = GetLastError();
CloseHandle(hFileMap);
return HRESULT_FROM_WIN32(err);
}
BSTR bstr = SysAllocStringByteLen(lpBuffer, BUFF_SIZE);
if (bstr) {
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
return E_OUTOFMEMORY;
}
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
*Ofile = bstr;
return S_OK;
}
If that does not work for PHP, DO NOT use MultiByteToWideChar(CP_ACP)
on binary data, as CP_ACP
will corrupt the data! Codepage 28591 (ISO-8859-1) is a better choice to avoid corruption, as bytes encoded in ISO-8859-1 have the same numeric values as the Unicode codepoints they represent:
STDMETHODIMP CMemReaderImpl::ReadFile(BSTR filepath, BSTR* Ofile)
{
if (!Ofile)
return E_POINTER;
*Ofile = nullptr;
if (!filepath)
return E_INVALIDARG;
HANDLE hFileMap = OpenFileMapping(FILE_MAP_READ, FALSE, filepath);
if (!hFileMap) {
DWORD err = GetLastError();
return HRESULT_FROM_WIN32(err);
}
LPSTR lpBuffer = (LPSTR) MapViewOfFile(hFileMap, FILE_MAP_READ 0, 0, BUFF_SIZE);
if (!lpBuffer) {
DWORD err = GetLastError();
CloseHandle(hFileMap);
return HRESULT_FROM_WIN32(err);
}
int wslen = MultiByteToWideChar(28591, 0, lpBuffer, BUFF_SIZE, nullptr, 0);
if (wslen == 0) {
DWORD err = GetLastError();
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
return HRESULT_FROM_WIN32(err);
}
BSTR bstr = SysAllocStringLen(nullptr, wslen);
if (bstr) {
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
return E_OUTOFMEMORY;
}
MultiByteToWideChar(28591, 0, lpBuffer, BUFF_SIZE, bstr, wslen);
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
*Ofile = bstr;
return S_OK;
}
Otherwise, you can simply promote each 8bit byte as-is to a 16bit character manually:
STDMETHODIMP CMemReaderImpl::ReadFile(BSTR filepath, BSTR* Ofile)
{
if (!Ofile)
return E_POINTER;
*Ofile = nullptr;
if (!filepath)
return E_INVALIDARG;
HANDLE hFileMap = OpenFileMapping(FILE_MAP_READ, FALSE, filepath);
if (!hFileMap) {
DWORD err = GetLastError();
return HRESULT_FROM_WIN32(err);
}
LPBYTE lpBuffer = (LPBYTE) MapViewOfFile(hFileMap, FILE_MAP_READ 0, 0, BUFF_SIZE);
if (!lpBuffer) {
DWORD err = GetLastError();
CloseHandle(hFileMap);
return HRESULT_FROM_WIN32(err);
}
BSTR bstr = SysAllocStringLen(nullptr, BUFF_SIZE);
if (!bstr) {
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
return E_OUTOFMEMORY;
}
for (int i = 0; i < BUFF_SIZE; ++i)
bstr[i] = (OLECHAR) lpBuffer[i];
UnmapViewOfFile(lpBuffer);
CloseHandle(hFileMap);
*Ofile = bstr;
return S_OK;
}
That being said, if the above still do not work for PHP, you might need to wrap the returned SAFEARRAY
/BSTR
inside of a VARIANT
, which is how many scripting languages generally handle COM data:
STDMETHODIMP CMemReaderImpl::ReadFile(BSTR filepath, VARIANT* Ofile)
{
...
VariantInit(*Ofile);
V_VT(*Ofile) = VT_UI1 | VT_ARRAY;
V_ARRAY(*Ofile) = sa;
...
}
STDMETHODIMP CMemReaderImpl::ReadFile(BSTR filepath, VARIANT* Ofile)
{
...
VariantInit(*Ofile);
V_VT(*Ofile) = VT_BSTR;
V_BSTR(*Ofile) = bstr;
...
}