-1

i need to make a program that will handle binary data, lots of it.

in short, the C++ program will load a binary file (some of them exceed 20mb) into a buffer. then it will run a Lua script and pass all this loaded data to the script, which will do some manipulation and return the result to the C++ program.

I need to do this as quickly as possible, perform at the best and get the job done faster.

A while ago I already made this program using the conventional methods of Lua, but it was extremely slow. So I lost the files, and now I want to redo it in a better, faster way that doesn't compromise performance.

Searching a bit, I found this. I had to make some small changes to adapt to the new version of Lua, but I can't get it to work. Can you help me with this?

And if there's a better way to do the job I said, what would it be?

#include "stdafx.h"

// metatable method for handling "array[index]"
static int array_index(lua_State* L) {
    int** parray = (int**)luaL_checkudata(L, 1, "array");
    int index = luaL_checkinteger(L, 2);
    lua_pushnumber(L, (*parray)[index - 1]);
    return 1;
}

// metatable method for handle "array[index] = value"
static int array_newindex(lua_State* L) {
    int** parray = (int**)luaL_checkudata(L, 1, "array");
    int index = luaL_checkinteger(L, 2);

    int value = luaL_checkinteger(L, 3);
    (*parray)[index - 1] = value;
    return 0;
}

// create a metatable for our array type
static void create_array_type(lua_State* L) {
    static const struct luaL_Reg array[] = {
       { "__index",  array_index  },
       { "__newindex",  array_newindex  },
       NULL, NULL
    };
    luaL_newmetatable(L, "array");

    luaL_setfuncs(L, array, 0);
}

// expose an array to lua, by storing it in a userdata with the array metatable
static int expose_array(lua_State* L, int array[]) {
    int** parray = (int**)lua_newuserdata(L, sizeof(int**));
    *parray = array;
    luaL_getmetatable(L, "array");
    lua_setmetatable(L, -2);
    return 1;
}

// test data
int mydata[] = { 1, 2, 3, 4 };

// test routine which exposes our test array to Lua 
static int getarray(lua_State* L) {
    return expose_array(L, mydata);
}

int __declspec(dllexport) __cdecl luaopen_array(lua_State* L) {
    create_array_type(L);

    // make our test routine available to Lua
    lua_register(L, "array", getarray);
    return 0;
}


int main()
{
    lua_State* L = luaL_newstate();
    luaL_dostring(L, "require 'array'");
    luaL_dostring(L, "foo = array()");
    luaL_dostring(L, "x = foo[1]");
    lua_getglobal(L, "x");
    lua_Number x = lua_tonumber(L, 1);

    printf("foo[1] = %d\n", (int)x);
}
Samathingamajig
  • 11,839
  • 3
  • 12
  • 34
Kassio
  • 7
  • 2
  • 1
    Why not just use strings like lua itself does when dealing with binary data? https://www.lua.org/pil/21.2.2.html – Alan Birtles Oct 20 '22 at 06:36
  • I'm trying to avoid this, because it would compromise a lot of performance. I want to do this as best I can, without it being extremely slow, because like I said, sometimes I'm going to deal with extremely large binary files – Kassio Oct 20 '22 at 06:40
  • 1
    Have you actually tried it and found the performance is poor? (20mb doesn't seem extremely large for a modern computer) – Alan Birtles Oct 20 '22 at 06:42
  • I tried it a while ago, and it ended up being really slow. I lost the files and now I want to rebuild, but in a better way – Kassio Oct 20 '22 at 11:04
  • @Kassio: "*I tried it a while ago, and it ended up being really slow.*" Maybe your code was the problem. There's no way that a function call to access every byte is going to be faster than accessing a string. So it's probably a code problem, not an interface problem. – Nicol Bolas Oct 21 '22 at 15:52

2 Answers2

0

Consider using lightuserdata to avoid copying file contents excessively.

lightuserdata is just a pointer, so you need to define some methods to work with it as well.

The idea looks like this:

#include <lauxlib.h>
#include <lualib.h>

#define BIN_DATA_MT_ID "bin data"

int get_byte(lua_State *L) {
    const char *file_contents = luaL_checkudata(L, 1, BIN_DATA_MT_ID);
    size_t byte_index = luaL_checknumber(L, 2);
    // checking OOB is your custody here, omitted for simplicity
    lua_pushlstring(L, file_contents + byte_index, 1);
    return 1;
}

static const luaL_Reg __index[] = {
    {"get_byte", get_byte},
    {NULL, NULL}
};

int main() {
    const char file_contents[4] = { 0x25, 0xAA, 0xBB, 0xCC };
    lua_State *L = luaL_newstate();
    luaopen_base(L); // adds "print" function
    lua_pushlightuserdata(L, (void *)file_contents);
    luaL_newmetatable(L, BIN_DATA_MT_ID);
    luaL_newlib(L, __index);
    lua_setfield(L, -2, "__index");
    lua_setmetatable(L, -2);
    lua_setglobal(L, "mybindata");
    luaL_dostring(L, "print(mybindata:get_byte(0))");
    lua_close(L);
    return 0;
}
marsgpl
  • 552
  • 2
  • 12
0

The fastest way for Lua to access the bytes of a large byte array is to expose that array directly as a string within Lua. Now because Lua does reference counting for strings, this means that Lua will always allocate its own storage for the string. So to efficiently load the string into Lua (ie: avoiding a 20+MiB copy), you need to use the lua_Buffer-based API to load it directly into Lua's storage.

But outside of that quirk, it will certainly be faster inside Lua to use array accesses of a string to access bytes compared to doing a function call for each byte accessed from the buffer.

Nicol Bolas
  • 449,505
  • 63
  • 781
  • 982