36 static const int_t SwitchToOMP = (1<<6);
55 template<int_t NThreads, int_t N,
typename NFact,
typename T,
int S,
class W1, int_t LastK = 1,
56 bool C = ((N>NThreads) && (N>(SwitchToOMP<<NThreads)))>
59 template<
int_t NThreads,
int_t N,
typename Head,
typename Tail,
typename T,
int S,
class W1,
int_t LastK>
60 class InTimeOMP<NThreads,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
62 typedef typename TempTypeTrait<T>::Result LocalVType;
63 static const int_t K = Head::first::value;
64 static const int_t M = N/K;
65 static const int_t M2 = M*2;
66 static const int_t N2 = N*2;
67 static const int_t LastK2 = LastK*2;
68 static const int_t NThreadsCreate = (NThreads > K) ? K : NThreads;
69 static const int_t NThreadsNext = (NThreads != NThreadsCreate) ? NThreads-NThreadsCreate : 1;
71 typedef typename IPowBig<W1,K>::Result WK;
72 typedef Loki::Typelist<Pair<
typename Head::first,
SInt<Head::second::value-1> >, Tail> NFactNext;
78 #pragma omp parallel for shared(data) schedule(static) num_threads(NThreadsCreate)
79 for (int_t m = 0; m < N2; m+=M2)
80 dft_str.apply(data + m);
82 dft_scaled.apply(data);
86 template<
int_t N,
typename Head,
typename Tail,
typename T,
int S,
class W1,
int_t LastK>
87 class InTimeOMP<1,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
88 :
public InTime<N,Loki::Typelist<Head,Tail>,T,S,W1,LastK> { };
90 template<
int_t NThreads,
int_t N,
typename NFact,
typename T,
int S,
class W1,
int_t LastK>
91 class InTimeOMP<NThreads,N,NFact,T,S,W1,LastK,false> :
public InTime<N,NFact,T,S,W1,LastK> { };
95 template<int_t NThreads, int_t N,
typename NFact,
typename T,
int S,
class W1, int_t LastK = 1,
96 bool C = ((N>NThreads) && (N>(SwitchToOMP<<NThreads)))>
99 template<
int_t NThreads,
int_t N,
typename Head,
typename Tail,
typename T,
int S,
class W1,
int_t LastK>
100 class InTimeOOP_OMP<NThreads,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
102 typedef typename TempTypeTrait<T>::Result LocalVType;
103 static const int_t K = Head::first::value;
104 static const int_t M = N/K;
105 static const int_t M2 = M*2;
106 static const int_t N2 = N*2;
107 static const int_t LastK2 = LastK*2;
108 static const int_t NThreadsCreate = (NThreads > K) ? K : NThreads;
109 static const int_t NThreadsNext = (NThreads != NThreadsCreate) ? NThreads-NThreadsCreate : 1;
111 typedef typename IPowBig<W1,K>::Result WK;
112 typedef Loki::Typelist<Pair<
typename Head::first,
SInt<Head::second::value-1> >, Tail> NFactNext;
113 InTimeOOP_OMP<NThreadsNext,M,NFactNext,T,S,WK,K*LastK> dft_str;
114 DFTk_x_Im_T<K,M,T,S,W1,(N<=StaticLoopLimit)> dft_scaled;
117 void apply(
const T* src, T* dst)
120 #pragma omp parallel for shared(src,dst) private(m,lk) schedule(static) num_threads(NThreadsCreate)
121 for (m = lk = 0; m < N2; m+=M2) {
122 dft_str.apply(src + lk, dst + m);
126 dft_scaled.apply(dst);
130 template<
int_t N,
typename Head,
typename Tail,
typename T,
int S,
class W1,
int_t LastK>
131 class InTimeOOP_OMP<1,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
132 :
public InTimeOOP<N,Loki::Typelist<Head,Tail>,T,S,W1,LastK> { };
134 template<
int_t NThreads,
int_t N,
typename NFact,
typename T,
int S,
class W1,
int_t LastK>
135 class InTimeOOP_OMP<NThreads,N,NFact,T,S,W1,LastK,false> :
public InTimeOOP<N,NFact,T,S,W1,LastK> { };
154 template<short_t NThreads, int_t N,
typename NFact,
typename T,
int S,
class W1, int_t LastK = 1,
155 bool C=((N>NThreads) && (N>(SwitchToOMP<<NThreads)))>
158 template<
unsigned int NThreads,
int_t N,
typename Head,
typename Tail,
typename T,
int S,
class W1,
int_t LastK>
159 class InFreqOMP<NThreads,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
161 typedef typename TempTypeTrait<T>::Result LocalVType;
162 static const int_t K = Head::first::value;
163 static const int_t M = N/K;
164 static const int_t M2 = M*2;
165 static const int_t N2 = N*2;
166 static const int_t LastK2 = LastK*2;
167 static const short_t NThreadsCreate = (NThreads > K) ? K : NThreads;
168 static const short_t NThreadsNext = (NThreads != NThreadsCreate) ? NThreads-NThreadsCreate : 1;
170 typedef typename IPowBig<W1,K>::Result WK;
171 typedef Loki::Typelist<Pair<
typename Head::first,
SInt<Head::second::value-1> >, Tail> NFactNext;
173 T_DFTk_x_Im<K,M,T,S,W1,true> dft_scaled;
178 dft_scaled.apply(data);
181 #pragma omp parallel for shared(data) schedule(static) num_threads(NThreadsCreate)
182 for (int_t m = 0; m < N2; m+=M2)
183 dft_str.apply(data + m);
187 template<
int_t N,
typename Head,
typename Tail,
typename T,
int S,
class W1,
int_t LastK>
188 class InFreqOMP<1,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
189 :
public InFreq<N,Loki::Typelist<Head,Tail>,T,S,W1,LastK> { };
191 template<
short_t NThreads,
int_t N,
typename NFact,
typename T,
int S,
class W1,
int_t LastK>
192 class InFreqOMP<NThreads,N,NFact,T,S,W1,LastK,false> :
public InFreq<N,NFact,T,S,W1,LastK> { };