5

Is there a way to ensure valarray uses aligned memory so it could be vectorized with SSE and AVX? As far as I know the STL doesn't guarantee alignment, and you can not pass an allocator to valarray. Is there another way to achieve this?

Thanks ahead!

litro
  • 195
  • 1
  • 11
  • Is there something wrong with using the types defined by your SSE implementation? – Ben Voigt Dec 04 '12 at 21:18
  • 1
    @BenVoigt alignas can only be used on variables or data members, not on types. – ecatmur Dec 04 '12 at 21:20
  • @ecatmur: I checked the grammar before returning to upvote your answer, at which point I was it was deleted. A *type-id* can have a *abstract-declarator* which can be a *ptr-abstract-declarator* which can be *noptr-abstract-declarator*... oh but *attribute-specifier-seq* is allowed only when brackets are present. – Ben Voigt Dec 04 '12 at 21:27

1 Answers1

2

I typically use std::vector with my own allocator, which has the alignment as template parameter and calls _mm_malloc() or _aligned_malloc(). That works very well, also with AVX (32-byte alignment). Appropriately written templated user code automatically picks the required alignment.

Below the code for the AlignmentAllocator<> and helper. tested under gcc and icpc.

/// allocate and de-allocate aligned memory
template<std::size_t alignment>
struct static_allocator {
  static void*allocate(std::size_t n)
  {
    if(n == 0) return 0;
    if(n > max_size())
      throw std::bad_alloc();
    void*ret =
#if defined(__GNUC__) || defined (__INTEL_COMPILER)
      _mm_malloc
#else
      _aligned_malloc
#endif
      (n,alignment);
    if(!ret)
      throw std::bad_alloc();
    return ret;
  }
  static void deallocate(void*p)
  {
#if defined(__GNUC__) || defined (__INTEL_COMPILER)
    _mm_free
#else
    _aligned_free
#endif
    (p);
  }
  static std::size_t max_size ()
  { return std::numeric_limits<std::size_t>::max(); }
};

/// allocate and de-allocate unaligned memory
template<>
struct static_allocator<1> {
  static std::size_t max_size () noexcept
  { return std::numeric_limits<std::size_t>::max(); }
  static void*allocate(std::size_t n)
  { 
    if(n == 0) return 0;
    void*ret = new char[n];
    return ret;
  }
  static void deallocate(void*p)
  { delete[] static_cast<char*>(p); }
};

template<> struct static_allocator<0>;

/// allocator with explicit alignment
template<typename _Tp, std::size_t alignment = 16>
class AlignmentAllocator
{
  typedef static_allocator<alignment> static_alloc;
public:
  typedef size_t     size_type;
  typedef ptrdiff_t  difference_type;
  typedef _Tp*       pointer;
  typedef const _Tp* const_pointer;
  typedef _Tp&       reference;
  typedef const _Tp& const_reference;
  typedef _Tp        value_type;

  template <typename _Tp1>
  struct rebind
  { typedef AlignmentAllocator<_Tp1, alignment> other; };

  AlignmentAllocator() {}

  AlignmentAllocator(const AlignmentAllocator&) {}

  template <typename _Tp1>
  AlignmentAllocator(const AlignmentAllocator<_Tp1, alignment> &) {}

  ~AlignmentAllocator() {}

  pointer address (reference x) const
  {
#if __cplusplus >= 201103L
    return std::addressof(x);
#else
    return reinterpret_cast<_Tp*>(&reinterpret_cast<char&>(x));
#endif
  }

  const_pointer address (const_reference x) const
  {
#if __cplusplus >= 201103L
    return std::addressof(x);
#else
    return reinterpret_cast<const _Tp*>(&reinterpret_cast<const char&>(x));
#endif
  }

  pointer allocate (size_type n, const void* = 0)
  { return static_cast<pointer>(static_alloc::allocate(n*sizeof(value_type))); }

  void deallocate (pointer p, size_type)
  { static_alloc::deallocate(p); }

  size_type max_size () const
  { return static_alloc::max_size() / sizeof (value_type); }

#if __cplusplus >= 201103L

  template<typename _Up, typename... _Args>
  void construct(_Up* p, _Args&&... args)
  { ::new(static_cast<void*>(p)) _Up(std::forward<_Args>(args)...); }

  template<typename _Up>
  void destroy(_Up* p)
  { p->~_Up(); }

#else

  void construct (pointer p, const_reference val)
  { ::new(static_cast<void*>(p)) value_type(val); }

  void destroy (pointer p)
  { p->~value_type (); }

#endif

  bool operator!=(const AlignmentAllocator&) const 
  { return false; }

  // Returns true if and only if storage allocated from *this
  // can be deallocated from other, and vice versa.
  // Always returns true for stateless allocators.
  bool operator==(const AlignmentAllocator&) const 
  { return true; }

};// class AlignmentAllocator<>

/// AlignmentAllocator<void> specialization.
template<std::size_t alignment>
class AlignmentAllocator<void, alignment>
{
public:
  typedef size_t      size_type;
  typedef ptrdiff_t   difference_type;
  typedef void*       pointer;
  typedef const void* const_pointer;
  typedef void        value_type;

  template<typename _Tp1>
  struct rebind
  { typedef AlignmentAllocator<_Tp1, alignment> other; };
};
Walter
  • 44,150
  • 20
  • 113
  • 196
  • 1
    Great, but what about `valarray` where you can NOT pass an allocator, to my knowledge?? – litro Dec 05 '12 at 06:05
  • I thought I was clear: **don't use `std::valarray`** here, but something else, for instance `std::vector` – Walter Dec 06 '12 at 17:47