-2

The title is self-explanatory: I want to serialize an arbitrary large amount of trivially copyable non array "stuff" into a buffer (for academic reasons).

The basic idea is to reinterpret_cast the address of what I want to serialize as an unsigned char*, and then use std::copy into that buffer, and do the reverse operation to load.

Here's a minimal non working example.

#include <iostream>

struct A
{
    int i, j;
};

int main()
{
    int i = -1;
    A a {23, 42};
    
    unsigned char* buffer = new unsigned char[20];
    
    const unsigned char* to_serialize = reinterpret_cast<const unsigned char*>(&i); 
    std::copy(to_serialize, to_serialize + sizeof(int), buffer);
    
    int offset = sizeof(int);    
    to_serialize = reinterpret_cast<const unsigned char*>(&a); 
    std::copy(to_serialize, to_serialize + sizeof(A), buffer + offset);
    
    unsigned char serialized_i[sizeof(int)];
    std::copy(buffer, buffer + sizeof(int), serialized_i);
    int ii (*reinterpret_cast<int*>(serialized_i));
    std::cout << ii << std::endl; //outputs -1 -> ok
    
    unsigned char serialized_a[sizeof(A)];
    std::copy(buffer + offset, buffer + offset + sizeof(A), serialized_a);    
    A aa(*reinterpret_cast<A*>(serialized_a));
    std::cout << aa.i << " " << aa.j << std::endl; //suspect there is a bug
}

If I only write an A, it works fine. I can also write an arbitrary amounts of ints, it works as well. But as soon as I mix the two (as done above), it fails.

On the above example, it "seems" to work. On the full code (below), it doesn't.

File bytebuffer.h

#ifndef BYTEBUFFER_H
#define BYTEBUFFER_H

#include <vector>
#include <stdexcept>
#include <iostream>
#include <memory>

template<class T>
concept TriviallyCopyable = ! std::is_array_v<T> && std::is_trivially_copyable_v<T>;

using byte = unsigned char;

class ByteBuffer
{
    byte * _buffer;
    std::vector<size_t> _offsets;
    size_t _size;
    size_t _capacity;    
    
    public:
        ByteBuffer(size_t capacity = 100);
    
        //Rule of 5 with copy and swap idiom
        ~ByteBuffer();
        ByteBuffer(const ByteBuffer& buffer);
        ByteBuffer& operator=(const ByteBuffer& buffer);
        ByteBuffer(ByteBuffer&& buffer) noexcept;
        ByteBuffer& operator=(ByteBuffer&& buffer) noexcept;
        void swap(ByteBuffer& buffer) noexcept;
    
        //basic accessors
        const byte* buffer() const;
        const std::vector<size_t>& offsets() const;
        size_t size() const; //size in bytes
        size_t capacity() const; //capacity in bytes
        bool reserve(size_t max_size);
        void clear();
    
        //storage
        template<TriviallyCopyable T>
        size_t store(const T& t);
    
        template<TriviallyCopyable T>
        T load(size_t offset) const;            
    
        //I need to write overloads for arrays          
       
        void dump(std::ostream& out, bool clear = true);
};

void swap(ByteBuffer& buffer1, ByteBuffer& buffer2) noexcept;

///////////////////////////////////////////////////////// IMPLEMENTATIONS

template<TriviallyCopyable T>
size_t ByteBuffer::store(const T& t)
{    
    if(_buffer == nullptr)
        throw std::runtime_error("Storage buffer is nullptr");
    if(_offsets.back() + sizeof(T) > _capacity)
        reserve(2 * _capacity);
    
    size_t offset = _offsets.back() + sizeof(T); //where we need to store
    const byte* serialized = reinterpret_cast<const byte*>(std::addressof(t)); //better than &t
    std::copy(serialized, serialized + sizeof(T), _buffer + offset);
    _offsets.push_back(offset);
    _size += offset;
    return offset;
}

template<TriviallyCopyable T>
T ByteBuffer::load(size_t offset) const
{    
    if(_buffer == nullptr)
        throw std::runtime_error("Storage buffer is nullptr");
    byte serialized[sizeof(T)];
    std::copy(_buffer + offset, _buffer + offset + sizeof(T), serialized);
    return T(*reinterpret_cast<T*>(serialized)); //force copy
}

#endif

File bytebuffer.cpp

#include "bytebuffer.h"

#include <algorithm>

ByteBuffer::ByteBuffer(size_t capacity) : _buffer(capacity > 0 ? new byte[capacity] : nullptr), _offsets({0}), _size(0), _capacity(capacity)
{//remark: there is always at least '0' in the offset list, because when the buffer is empty, the first free spot has offset 0
    if(capacity > 0)
        std::fill(_buffer, _buffer + capacity, 0);
}

const unsigned char* ByteBuffer::buffer() const //alias not working here, since it's out of scope
{
    return _buffer;   
}
    
const std::vector<size_t>& ByteBuffer::offsets() const
{
    return _offsets;   
}

size_t ByteBuffer::size() const
{
    return _size;   
}

size_t ByteBuffer::capacity() const
{
    return _capacity;
}

bool ByteBuffer::reserve(size_t max_size)
{
    if(max_size <= _capacity)
        return false;
        
    try
    {
        byte* newbuffer = new byte[max_size]; //might throw
        std::copy(_buffer, _buffer + _size, newbuffer);
        _capacity = max_size;
        delete[] _buffer;
        _buffer = newbuffer;
        return true;
    }
    catch(...)
    {
        return false;
    }
}

void ByteBuffer::clear()
{
    std::fill(_buffer, _buffer + _capacity, 0);
    _offsets.clear();
    _offsets.push_back(0);
    _size = 0;    
}

void ByteBuffer::dump(std::ostream& out, bool clear)
{
    std::for_each(_buffer, _buffer + _size, [&out](byte c){ out << c; });
    
    if(clear)
        this->clear();
}

std::ostream& operator<<(std::ostream& out, ByteBuffer& buffer)
{
    buffer.dump(out, false);
    return out;
}

/////////////////////////// RULE OF 5

void ByteBuffer::swap(ByteBuffer& other) noexcept
{
    std::swap(_buffer, other._buffer);
    std::swap(_offsets, other._offsets);
    std::swap(_size, other._size);
    std::swap(_capacity, other._capacity);
}

void swap(ByteBuffer& buffer1, ByteBuffer& buffer2) noexcept
{
    buffer1.swap(buffer2);   
}

ByteBuffer::~ByteBuffer()
{
    if(_buffer)
    {
        delete[] _buffer;
        _buffer = nullptr;
    }
}

ByteBuffer::ByteBuffer(const ByteBuffer& other) : _buffer(new byte[other._capacity]), _offsets(other._offsets), _size(other._size), _capacity(other._capacity)
{
    std::copy(other._buffer, other._buffer + _capacity, _buffer);
}

ByteBuffer& ByteBuffer::operator=(const ByteBuffer& other)
{
    ByteBuffer tmp(other);
    swap(tmp);
    return *this;
}

ByteBuffer::ByteBuffer(ByteBuffer&& other) noexcept : ByteBuffer()
{
    swap(other);    
}

ByteBuffer& ByteBuffer::operator=(ByteBuffer&& other) noexcept
{    
    swap(other);
    if(other._buffer != nullptr)
    {
        delete[] other._buffer;
        other._buffer = nullptr;
    }
    return *this;
}

File main.cpp

#include <iostream>
#include "bytebuffer.h"

#include <type_traits>
#include <iomanip>

using namespace std;

struct A
{
    int i, j;
};

int main()
{
    ByteBuffer buffer;
    size_t offset1 = buffer.store(2);
    size_t offset2 = buffer.store(3.5);
    A a {23, 42};
    cout << "sizeof A : " << sizeof(a) << endl;
    cout << "A has " << a.i << " " << a.j << endl;
    cout << boolalpha << is_trivially_copyable<A>::value << endl << endl;
    size_t offset3 = buffer.store(a);
    size_t offset4 = buffer.store('d');
    //size_t offset5 = buffer.store("Hello"); //doesn't work with arrays, I need a specific template overload
    //size_t offset6 = buffer.store(std::string("Hello")); //doesn't compile, and that's ok
    
    //no template argument deduction possible down here, that's normal, and ok
    cout << buffer.load<int>(offset1) << endl; //2
    cout << buffer.load<double>(offset2) << endl; //3.5
    A loaded = buffer.load<A>(offset3); //runs, but wrong atrtibutes
    cout << loaded.i << " " << loaded.j << endl; //error: wrong value
    cout << buffer.load<char>(offset4) << endl; //d
    //cout << buffer.load<const char[6]>(offset5) << endl; //doesn't work with arrays, I need a specific template overload
    //cout << buffer.load<std::string>(offset6) << endl; //doesn't compile, and that's ok
}
R. Absil
  • 173
  • 6
  • 2
    `sizeof(int)` != `sizeof(A)`. – 273K Jan 31 '23 at 14:49
  • when you write code, always think "does this look good?" do you think this looks good? – Neil Butterworth Jan 31 '23 at 14:50
  • @273K It was a typo. I fixed it. Thing is... it works on this mwe, but not on the "full" code :( – R. Absil Jan 31 '23 at 14:51
  • fyi, such quick mirco edits after comments are highly suspicious. This tiny change on the code has big consequences, but you say it still produces same outputs? You thouroughly tested the new code within a single minute ?!? – 463035818_is_not_an_ai Jan 31 '23 at 14:52
  • Care with endianess, alignment... – Jarod42 Jan 31 '23 at 14:52
  • if the code posted works, but not your real code then it is no mre. – 463035818_is_not_an_ai Jan 31 '23 at 14:52
  • `it works on this mwe` and `//outputs 23 0` is false – 273K Jan 31 '23 at 14:53
  • I changed the typo sizeof(int) to a sizeof(A) into the serialization, recompiled the stuff, and it worked. But like I said, I suspect there is still a bug, since it seems to work on this mwe, but not on the full code. I'll post the full code. – R. Absil Jan 31 '23 at 14:53
  • Your arrays might not be aligned properly (It should be `alignas(A) unsigned char serialized_a[sizeof(A)]` and `alignas(int) serialized_int[sizeof(int)]`), and you could copy directly into your object (`A aa; std::copy(buffer + offset, buffer + offset + sizeof(A), reinterpret_cast(&aa))`), but otherwise this looks fine – Artyer Jan 31 '23 at 14:54
  • For such a not [mcve], you should use a debugger, watch a memory buffer or objects on each serialization or deseralization std::copy. – 273K Jan 31 '23 at 15:20
  • I'm directly copying into A, with an explicit call to the copy constructor. I can't really default initialize A like you propose, because maybe the 'A' I'll serialize cannot be default initialized. – R. Absil Jan 31 '23 at 15:22
  • 1
    What you are doing with offsets in `ByteBuffer::store` looks very suspicious. Say, on the first call `_offsets.back()` is zero - but you are not writing to the buffer at zero offset, you are writing at `sizeof(T)`. So the object ends up occupying `sizeof(T)` through `2*sizeof(T)` bytes. The next object (say of type `U`) is written at `sizeof(T) + sizeof(U)` - if `U` is smaller than `T`, it ends up overwriting parts of `T`. – Igor Tandetnik Jan 31 '23 at 15:22
  • `ByteBuffer` should be an input/output stream for a better design. The destructor can be just `delete[] _buffer`. `_buffer` should be a vector, then all code `RULE OF 5` is unnecessary. – 273K Jan 31 '23 at 15:24
  • 1
    Also, you check `_offsets.back() + sizeof(T) > _capacity` - but you write through `_offsets.back() + 2*sizeof(T)`, which could exceed `_capacity` and overflow the buffer. – Igor Tandetnik Jan 31 '23 at 15:25
  • @IgorTandetnik you're right, that's the problem. The actual offset is `_offsets.back();`, and the next one (what I need to `push_back`) is `_offsets.back() + sizeof(T)` . That... was stupid of me. Thanks a lot – R. Absil Jan 31 '23 at 15:33

1 Answers1

0

So, the first mwe works fine, as stated in the comments. And, for the complete fix, the error was how I stored my offsets in the _offset attribute.

I rewrote the store member function as follows:

template<TriviallyCopyable T>
size_t ByteBuffer::store(const T& t)
{    
    if(_buffer == nullptr)
        throw std::runtime_error("Storage buffer is nullptr");
    if(_offsets.back() + sizeof(T) > _capacity)
        reserve(2 * _capacity);
    
    size_t offset = _offsets.back(); //where we need to store
    const byte* serialized = reinterpret_cast<const byte*>(std::addressof(t)); //better than &t
    std::copy(serialized, serialized + sizeof(T), _buffer + offset);
    _offsets.push_back(offset + sizeof(T));
    _size += sizeof(T);
    return offset;
}
R. Absil
  • 173
  • 6