#ifndef INCLUDED_CALC_SIMDMATH
#define INCLUDED_CALC_SIMDMATH

#ifndef INCLUDED_STDDEFX
#include "stddefx.h"
#define INCLUDED_STDDEFX
#endif

// Library headers.

// PCRaster library headers.

// Module headers.



namespace calc {
  // SIMDMath declarations.
}



namespace calc {

//! hard set, traits seems not to pass __attribute__ settings
typedef float VE __attribute__ ((aligned (16),mode(V4SF)));

template<typename R>
 struct SIMDInstrTraits;

template<>
 struct SIMDInstrTraits<float> {
   enum { NDPI=4 };
   //! Vector Element
   typedef float VE __attribute__ ((aligned (16),mode(V4SF)));
};

template<>
 struct SIMDInstrTraits<double> {
   enum { NDPI=2 };
   //! Vector Element
   typedef double VE __attribute__ ((aligned (16),mode(V2DF)));
};

template< class Instr>
struct IA32SSE {
  inline static float single(const float& a1, const float& a2) {
    // do a single cell with the same operation 4 times
    float v1[4] = {a1,a1,a1,a1};
    float v2[4] = {a2,a2,a2,a2};
    VE r=Instr::multi(((VE *)v1)[0],((VE *)v2)[0]);
    return ((float *)&r)[0];
  }
};

struct Addps: IA32SSE<Addps> {
  inline static VE multi(const VE& a1, const VE& a2) {
    return  __builtin_ia32_addps(a1,a2);
  }
};

struct Subps: IA32SSE<Subps> {
  inline static VE multi(const VE& a1, const VE& a2) {
    return  __builtin_ia32_subps(a1,a2);
  }
};
struct Mulps: IA32SSE<Mulps> {
  inline static VE multi(const VE& a1, const VE& a2) {
    return  __builtin_ia32_mulps(a1,a2);
  }
};
struct Divps: IA32SSE<Divps> {
  inline static VE multi(const VE& a1, const VE& a2) {
    return  __builtin_ia32_divps(a1,a2);
  }
};

//! Single Instruction Multiple Data / R = (float/double)
template<
  class    Instr,
  typename R>
 class SIMDInstr {
     // not used, hard set above
     // typedef typename SIMDInstrTraits<R>::VE   VE;

     //! nr of data items per per instruction
     enum { NDPI=SIMDInstrTraits<R>::NDPI };
   public:
    //! l op= r
    static void assTo1stArg(R *l,const R *r,size_t n) {
      VE *lV= (VE *)l;
      VE *rV= (VE *)r;
      size_t i=0;
       // chunks of NDPI
       for (;i<n/NDPI; ++i)
         lV[i]= Instr::multi(lV[i],rV[i]);
       // the % NDPI remainder
       for (i=i*NDPI;i<n; ++i)
          l[i]= Instr::single(l[i], r[i]);
    }
 };

//------------------------------------------------------------------------------
// INLINE FUNCTIONS
//------------------------------------------------------------------------------



//------------------------------------------------------------------------------
// FREE OPERATORS
//------------------------------------------------------------------------------



//------------------------------------------------------------------------------
// FREE FUNCTIONS
//------------------------------------------------------------------------------



} // namespace calc

#endif
