6

Consider the following (contrived) memory arena (pool):

template<typename T>
class Arena {
 public:
  Arena(size_t size)
      : m_buffer(new char[size * sizeof(T)]),
        m_next_available(0),
        m_size(size) { }

  void* placement() {
    return m_buffer.get() + (m_next_available++) * sizeof(T);
  }

 private:
  std::unique_ptr<char> m_buffer;
  std::atomic<size_t> m_next_available;  
  size_t m_size;
};

As you can see, it uses an atomic variable m_next_available to keep track of the next available memory block.

When a new memory block is requested, the Arena instance should provide the pointer to the appropriate block (as indicated), and obtain the next available one; this is where I'm having problems.

I would like an atomic operation capable of expressing the following: if the next available block is bigger than the arena size, then it should be set to zero (I will be overwriting on the memory locations).

For reference, the non-atomic version of the same Arena is presented below. Notice how when I go past five elements (the size of the Arena), the address to the new element is the address corresponding to the first block (as expected).

#include<cstddef>
#include<iostream>
#include<memory>
template<typename T>
class Arena {
 public:
  Arena(size_t size)
      : m_buffer(new char[size * sizeof(T)]),
        m_next_available(0),
        m_size(size) { }  
  void* placement() {
    // this is the logic that I'd like to make atomic
    if(m_next_available == m_size) {
      m_next_available = 0;
    }    
    return m_buffer.get() + (m_next_available++) * sizeof(T);
  }
 private:
  std::unique_ptr<char> m_buffer;
  size_t m_next_available;  
  size_t m_size;
};

template<typename T>
void* operator new(size_t sz, Arena<T>& a) {
  (void)sz; // to avoid "warning: unused variable sz"
  return a.placement();
}

int main() {
  Arena<double> a(5);
  double x;
  while(std::cin>>x) {
    double *data = new(a) double(x);
    std::cout<<"address of new item: "<<data<<std::endl;
  }  
}

Compiled with GCC 4.8.1 on OS X 10.7.4 (g++ example.cpp -std=c++11)

1
address of new item: 0x7fb48b4008a0
2
address of new item: 0x7fb48b4008a8
3
address of new item: 0x7fb48b4008b0
4
address of new item: 0x7fb48b4008b8
5
address of new item: 0x7fb48b4008c0
1
address of new item: 0x7fb48b4008a0 # this is the same as the first one

Edit:

As per previous attempts and Steve Jessop's valuable suggestions, I will simply increment atomically the m_next_available counter, and modulo m_size the resulting number to obtain the cycle. If you are interested, the code below seems to work.

#include<cstddef>
#include<iostream>
#include<memory>
#include<atomic>
#include<thread>
#include<vector>

template<typename T>
class Arena {
 public:
  Arena(size_t size)
      : m_buffer(new char[size * sizeof(T)]),
        m_next_available(0),
        m_size(size) { }  
  void* placement() {
    return m_buffer.get() + (m_next_available++ % m_size) * sizeof(T);
  }
  size_t allocations() const {
    return m_next_available;
  }
  void peek() const {
    // print whatever you can
    for(size_t k=0; k<m_size; k++) {
      std::cout<<(*reinterpret_cast<double*>(m_buffer.get() + k * sizeof(T)))
               <<" ";
    }
    std::cout<<std::endl;
  }
 private:
  std::unique_ptr<char[]> m_buffer;
  std::atomic<size_t> m_next_available;  
  size_t m_size;
};
template<typename T>
void* operator new(size_t sz, Arena<T>& a) {
  (void)sz; // to avoid "warning: unused variable sz"
  return a.placement();
}

Arena<double> arena(10);
std::atomic<bool> continue_printing;

struct Worker {
  void operator()() const {
    for(size_t k=0; k<10000; k++) {
      new(arena) double(k);
    }
  }
};

int main() {
  continue_printing = true;

  std::thread t([](){ while(continue_printing) arena.peek(); });
  t.detach();

  std::vector<std::thread> threads;
  for(size_t k=0; k<100; k++) {
    threads.emplace_back(Worker());
  }
  for(auto & thread : threads) {
    thread.join();
  }
  continue_printing = false;
  std::cout<<"all threads finished"<<std::endl
           <<"final population in the arena: "<<std::endl;
  arena.peek();
  std::cout<<"Number of elements that requested allocation: "
           <<arena.allocations()<<std::endl;

}

Output:

$ ./a.out
0 9 57 949 90 371 144 976 132 384 
876 679 600 926 610 948 622 589 632 1480 
4553 4580 4499 4592 4597 4518 7512 6344 4546 6362 
7597 4595 4659 7626 4616 6459 6470 6480 4689 7676 
4666 6544 7738 6562 7755 7766 6582 6593 6604 4727 
[----- snip ----- snip ----- snip -----]
9409 9925 9934 9446 9956 9966 9977 9490 9508 9549 
9720 9811 9892 9953 9994 9995 9996 9997 9998 9999 
9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 
9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 
all threads finished
final population in the arena: 
9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 
Number of elements that requested allocation: 1000000
Escualo
  • 40,844
  • 23
  • 87
  • 135
  • 1
    Doesn't actually answer the question, but what about `(m_next_available++) % size`? For portability, you could worry about the case where `size_t` is smaller than 64 bits and therefore might eventually overflow, and use `unsigned long long` instead. – Steve Jessop Dec 03 '13 at 23:34
  • Yes, I attempted the modulo and it does work as intended, but I am afraid of the overflow (maybe I should not?) – Escualo Dec 03 '13 at 23:35
  • 1
    Well, it can overflow in theory but in practice it won't. 2^64 clock cycles is nearly 200 years at 3 GHz. If you can manage more than one allocation per clock cycle, even using all the cores on your machine, then more power to your elbow ;-) – Steve Jessop Dec 03 '13 at 23:37
  • I think you are right: I could have 18,446,744,073,709,551,615 items (as per `std::numeric_limits::max()`) before overflowing. I am over-thinking this issue. – Escualo Dec 03 '13 at 23:43
  • 3
    You can express any atomic computation using a loop; compute the desired result and use compare and exchange to store the result iff the input hasn't changed. Works well unless you have very high contention. – Alan Stokes Dec 03 '13 at 23:54
  • @SteveJessop if you were to write your comment as an answer, I would up-vote it and accept it. – Escualo Dec 04 '13 at 00:11
  • @AlanStokes thanks - I will look into your suggestion. – Escualo Dec 04 '13 at 00:12
  • 1
    Storing a pointer allocated with `new char[...]` in `std::unique_ptr` will result in undefined behavior when the `unique_ptr` destructor passes the pointer to `delete`. You need to use `std::unique_ptr` which deallocates with `delete[]`. – Casey Dec 04 '13 at 07:28
  • 1
    Your sample code has a data race on `continue_printing` - it is accessed by multiple threads simultaneously, with at least one access being a modification. This also has undefined behavior. – Casey Dec 04 '13 at 07:36
  • @Casey thanks! Good catches. I've fixed the problems. – Escualo Dec 04 '13 at 18:57
  • Another way to fix the overflow issue is to make `size` a power of two; then it wraps around correctly even if `m_next_available` does overflow. – Nate Eldredge Jan 02 '22 at 03:38

0 Answers0