6

I'm trying to implement custom allocator for storing memory mapped files in the std::vector. Files mapping performed by boost::iostreams::mapped_file

Allocator type for file memory mapping:

template<typename T>
class mmap_allocator 
{
public:
  typedef T value_type;

  mmap_allocator(const std::string& filename) 
  : _mmfile(filename) {  } 

  T* allocate (size_t n) 
  { 
     return reinterpret_cast<T*>(_mmfile.data());
  }
  void deallocate (T* p, size_t n) 
  { 
     p = nullptr;
     _mmfile.close();
  }

private:
  boost::iostreams::mapped_file _mmfile;
};

Container for memory mapped file, based on std::vector:

//Get file size
long GetFileSize(std::string filename)
{
    FILE *p_file = NULL;
    p_file = fopen(filename.c_str(),"rb");
    fseek(p_file,0,SEEK_END);
    int size = ftell(p_file);
    fclose(p_file);
    return size;
}

template<typename T>
class mm_vector : public std::vector<T, mmap_allocator<T> >
{
public:
  typedef mmap_allocator<T> allocator_type;
  typedef std::vector<T, allocator_type > b_vector;

  mm_vector(const std::string filename) : b_vector(GetFileSize(filename)/sizeof(T), allocator_type(filename)) 
  {  
    b_vector::reserve(GetFileSize(filename)/sizeof(T));
  }
};

Test code:

int main()
{
  mm_vector<int> v("test.f");//test.f - binary file contain several integers
  for(auto x : v) std::cout<<x<<"  ";
}

This code don't work properly - output always equals to zero. File contains correct content - several integers. This code works well:

boost::iostreams::mapped_file _mmfile("test.f");
int* p = (int*)(_mmfile.data());
std::cout<<p[0];

What am I doing wrong?

gorill
  • 1,623
  • 3
  • 20
  • 29
  • You don't have any error checking in the `GetFileSize` function. `fopen` and other functions in there may fail, and if they do they return error codes to tell you what went wrong. You should check them. – Jonathan Potter Jul 23 '14 at 03:12
  • Your allocate() function always returns the same value. It should be like malloc() and return diff value every call. – brian beuning Jul 23 '14 at 14:05

3 Answers3

6

The problem is zero initialization, calling the constructor that receive the size and the allocator would initialize the vector elements to the default value of the element type (in this case 0). This is mandated by the standard.

23.3.7.2 vector constructors, copy, and assignment [vector.cons] § 23.3.7.2 789

explicit vector(size_type n, const Allocator& = Allocator());

-Effects: Constructs a vector with n default-inserted elements using the specified allocator.
-Requires: T shall be DefaultInsertable into *this.
-Complexity: Linear in n.

In my case the used file was filled with 0 too. Tested in GCC 4.9.0. Has logic because the default mapmode of mapped_file is readwrite.

In the sample code i added print of the mapped memory content when the allocation is happen (in the custom allocator), in the construction of the vector and the existed print in main. The first print output the correct data of the file and the second output the zeroed version.

#include <vector>
#include <iostream>
#include <chrono>
#include <iomanip>
#include <boost/iostreams/device/mapped_file.hpp>

template <typename T>
class mmap_allocator {
public:
    typedef T value_type;

    mmap_allocator(const std::string& filename) : _mmfile(filename) {}

    T* allocate(size_t n) {
        std::cout << "OUTPUT 1:" << std::endl;
        auto v = reinterpret_cast<T*>(_mmfile.data());
        for (unsigned long idx = 0; idx < _mmfile.size()/sizeof(int); idx++)
            std::cout << v[idx] << " ";
        return reinterpret_cast<T*>(_mmfile.data());
    }
    void deallocate(T* p, size_t n) {
        p = nullptr;
        _mmfile.close();
    }

private:
    boost::iostreams::mapped_file _mmfile;
};

// Get file size
long GetFileSize(std::string filename) {
    FILE* p_file = NULL;
    p_file = fopen(filename.c_str(), "rb");
    fseek(p_file, 0, SEEK_END);
    int size = ftell(p_file);
    fclose(p_file);
    return size;
}

template <typename T>
class mm_vector : public std::vector<T, mmap_allocator<T>> {
public:
    typedef mmap_allocator<T> allocator_type;
    typedef std::vector<T, allocator_type> b_vector;

    mm_vector(const std::string filename)
        : b_vector(GetFileSize(filename) / sizeof(T),
                   allocator_type(filename)) {
        std::cout << std::endl << std::endl << "OUTPUT 2:" << std::endl;
        for (auto x : *this)
            std::cout << x << "  ";
        b_vector::reserve(GetFileSize(filename) / sizeof(T));
    }
};

int main(int argc, char* argv[]) {
    std::chrono::system_clock::time_point begin_time =
        std::chrono::system_clock::now();

    mm_vector<int> v("H:\\save.txt");
    std::cout << std::endl << std::endl << "OUTPUT 2:" << std::endl;
    for (auto x : v)
        std::cout << x << "  ";

    std::chrono::system_clock::time_point end_time =
        std::chrono::system_clock::now();
    long long elapsed_miliseconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(
            end_time - begin_time).count();
    std::cout << "Duration (min:seg:mili): " << std::setw(2)
              << std::setfill('0') << (elapsed_miliseconds / 60000) << ":"
              << std::setw(2) << std::setfill('0')
              << ((elapsed_miliseconds / 1000) % 60) << ":" << std::setw(2)
              << std::setfill('0') << (elapsed_miliseconds % 1000) << std::endl;
    std::cout << "Total milliseconds: " << elapsed_miliseconds << std::endl;

    return 0;
}
NetVipeC
  • 4,402
  • 1
  • 17
  • 19
2

You might want to give

https://github.com/johannesthoma/mmap_allocator

a try. It uses contents of an mmap'ed file as backing storage for a vector and is LGPL so you should be able to use it in your projects. Note that currently, gcc is a requirement but it can be easily extended.

Johannes Thoma
  • 1,026
  • 10
  • 21
0

To make the advice from NetVipeC's answer explicit (with help from the mmap_allocator library suggested by Johannes Thoma), if you're using the GNU Standard C++ Library the following replacement for your mm_vector class prevents the contents of your memory-mapped vector from being initialized to zero (and eliminates the need for the GetFileSize function):

template <typename T>
class mm_vector : public std::vector<T, mmap_allocator<T>> {
public:
    typedef mmap_allocator<T> allocator_type;
    typedef std::vector<T, allocator_type> b_vector;

    mm_vector(const std::string filename)
        : b_vector(allocator_type(filename)) {

        allocator_type * a = &b_vector::_M_get_Tp_allocator();
        size_t n = a->size() / sizeof(T);
        b_vector::reserve(n);
        // _M_set_finish(n);
        this->_M_impl._M_finish = this->_M_impl._M_end_of_storage = this->_M_impl._M_start + n;
    }
};

We prevent the contents of the vector from being zeroed by allowing it to be initialized with the default size of 0, and then fiddle with its internals afterward to adjust the size. It's unlikely that this is a complete solution; I haven't checked whether operations that change the size of the vector work properly for example.

David Hull
  • 1,255
  • 1
  • 14
  • 17