Generative Fast Fourier Transforms (GFFT)  0.3
gfftomp.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2009-2014 by Vladimir Mirnyy *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 2 of the License, or *
7  * (at your option) any later version. *
8  * *
9  * This program is distributed in the hope that it will be useful, *
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12  * GNU General Public License for more details. *
13  ***************************************************************************/
14 
15 #ifndef __gfftomp_h
16 #define __gfftomp_h
17 
22 #include "gfftalg.h"
23 #include "gfftstdalg.h"
24 #include "gfftalgfreq.h"
25 #include "gfftswap.h"
26 
27 #include <omp.h>
28 
29 namespace GFFT {
30 
36 static const int_t SwitchToOMP = (1<<6);
37 
38 
55 template<int_t NThreads, int_t N, typename NFact, typename T, int S, class W1, int_t LastK = 1,
56 bool C = ((N>NThreads) && (N>(SwitchToOMP<<NThreads)))>
57 class InTimeOMP;
58 
59 template<int_t NThreads, int_t N, typename Head, typename Tail, typename T, int S, class W1, int_t LastK>
60 class InTimeOMP<NThreads,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
61 {
62  typedef typename TempTypeTrait<T>::Result LocalVType;
63  static const int_t K = Head::first::value;
64  static const int_t M = N/K;
65  static const int_t M2 = M*2;
66  static const int_t N2 = N*2;
67  static const int_t LastK2 = LastK*2;
68  static const int_t NThreadsCreate = (NThreads > K) ? K : NThreads;
69  static const int_t NThreadsNext = (NThreads != NThreadsCreate) ? NThreads-NThreadsCreate : 1;
70 
71  typedef typename IPowBig<W1,K>::Result WK;
72  typedef Loki::Typelist<Pair<typename Head::first, SInt<Head::second::value-1> >, Tail> NFactNext;
75 public:
76  void apply(T* data)
77  {
78  #pragma omp parallel for shared(data) schedule(static) num_threads(NThreadsCreate)
79  for (int_t m = 0; m < N2; m+=M2)
80  dft_str.apply(data + m);
81 
82  dft_scaled.apply(data);
83  }
84 };
85 
86 template<int_t N, typename Head, typename Tail, typename T, int S, class W1, int_t LastK>
87 class InTimeOMP<1,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
88 : public InTime<N,Loki::Typelist<Head,Tail>,T,S,W1,LastK> { };
89 
90 template<int_t NThreads, int_t N, typename NFact, typename T, int S, class W1, int_t LastK>
91 class InTimeOMP<NThreads,N,NFact,T,S,W1,LastK,false> : public InTime<N,NFact,T,S,W1,LastK> { };
92 
94 
95 template<int_t NThreads, int_t N, typename NFact, typename T, int S, class W1, int_t LastK = 1,
96 bool C = ((N>NThreads) && (N>(SwitchToOMP<<NThreads)))>
97 class InTimeOOP_OMP;
98 
99 template<int_t NThreads, int_t N, typename Head, typename Tail, typename T, int S, class W1, int_t LastK>
100 class InTimeOOP_OMP<NThreads,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
101 {
102  typedef typename TempTypeTrait<T>::Result LocalVType;
103  static const int_t K = Head::first::value;
104  static const int_t M = N/K;
105  static const int_t M2 = M*2;
106  static const int_t N2 = N*2;
107  static const int_t LastK2 = LastK*2;
108  static const int_t NThreadsCreate = (NThreads > K) ? K : NThreads;
109  static const int_t NThreadsNext = (NThreads != NThreadsCreate) ? NThreads-NThreadsCreate : 1;
110 
111  typedef typename IPowBig<W1,K>::Result WK;
112  typedef Loki::Typelist<Pair<typename Head::first, SInt<Head::second::value-1> >, Tail> NFactNext;
113  InTimeOOP_OMP<NThreadsNext,M,NFactNext,T,S,WK,K*LastK> dft_str;
114  DFTk_x_Im_T<K,M,T,S,W1,(N<=StaticLoopLimit)> dft_scaled;
115 public:
116 
117  void apply(const T* src, T* dst)
118  {
119  int_t m, lk;
120  #pragma omp parallel for shared(src,dst) private(m,lk) schedule(static) num_threads(NThreadsCreate)
121  for (m = lk = 0; m < N2; m+=M2) {
122  dft_str.apply(src + lk, dst + m);
123  lk += LastK2;
124  }
125 
126  dft_scaled.apply(dst);
127  }
128 };
129 
130 template<int_t N, typename Head, typename Tail, typename T, int S, class W1, int_t LastK>
131 class InTimeOOP_OMP<1,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
132 : public InTimeOOP<N,Loki::Typelist<Head,Tail>,T,S,W1,LastK> { };
133 
134 template<int_t NThreads, int_t N, typename NFact, typename T, int S, class W1, int_t LastK>
135 class InTimeOOP_OMP<NThreads,N,NFact,T,S,W1,LastK,false> : public InTimeOOP<N,NFact,T,S,W1,LastK> { };
136 
137 
154 template<short_t NThreads, int_t N, typename NFact, typename T, int S, class W1, int_t LastK = 1,
155 bool C=((N>NThreads) && (N>(SwitchToOMP<<NThreads)))>
156 class InFreqOMP;
157 
158 template<unsigned int NThreads, int_t N, typename Head, typename Tail, typename T, int S, class W1, int_t LastK>
159 class InFreqOMP<NThreads,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
160 {
161  typedef typename TempTypeTrait<T>::Result LocalVType;
162  static const int_t K = Head::first::value;
163  static const int_t M = N/K;
164  static const int_t M2 = M*2;
165  static const int_t N2 = N*2;
166  static const int_t LastK2 = LastK*2;
167  static const short_t NThreadsCreate = (NThreads > K) ? K : NThreads;
168  static const short_t NThreadsNext = (NThreads != NThreadsCreate) ? NThreads-NThreadsCreate : 1;
169 
170  typedef typename IPowBig<W1,K>::Result WK;
171  typedef Loki::Typelist<Pair<typename Head::first, SInt<Head::second::value-1> >, Tail> NFactNext;
173  T_DFTk_x_Im<K,M,T,S,W1,true> dft_scaled;
174 
175 public:
176  void apply(T* data)
177  {
178  dft_scaled.apply(data);
179 
180  // K times call to dft_str.apply()
181  #pragma omp parallel for shared(data) schedule(static) num_threads(NThreadsCreate)
182  for (int_t m = 0; m < N2; m+=M2)
183  dft_str.apply(data + m);
184  }
185 };
186 
187 template<int_t N, typename Head, typename Tail, typename T, int S, class W1, int_t LastK>
188 class InFreqOMP<1,N,Loki::Typelist<Head,Tail>,T,S,W1,LastK,true>
189 : public InFreq<N,Loki::Typelist<Head,Tail>,T,S,W1,LastK> { };
190 
191 template<short_t NThreads, int_t N, typename NFact, typename T, int S, class W1, int_t LastK>
192 class InFreqOMP<NThreads,N,NFact,T,S,W1,LastK,false> : public InFreq<N,NFact,T,S,W1,LastK> { };
193 
194 
195 
206 /*
207 template<short_t NThreads, uint_t M, uint_t P, typename T,
208 unsigned int I=0, bool C=(((1<<P)>NThreads) && ((1<<P)>=SwitchToOMP))>
209 class GFFTswap2OMP;
210 
211 template<short_t NThreads, uint_t P, typename T, int_t I>
212 class GFFTswap2OMP<NThreads,2,P,T,I,true> {
213  static const int_t BN = 1<<(I+1);
214  static const int_t BR = 1<<(P-I);
215  GFFTswap2OMP<NThreads/2,2,P,T,I+1> next;
216 public:
217  void apply(T* data, const int_t n=0, const int_t r=0) {
218  #pragma omp parallel shared(data)
219  {
220  #pragma omp sections
221  {
222  #pragma omp section
223  next.apply(data, n, r);
224 
225  #pragma omp section
226  next.apply(data, n|BN, r|BR);
227  }
228  }
229  }
230 };
231 
232 template<short_t NThreads, uint_t P, typename T>
233 class GFFTswap2OMP<NThreads,2,P,T,P,true> {
234 public:
235  void apply(T* data, const int_t n, const int_t r) {
236  if (n>r) {
237  swap(data[n],data[r]);
238  swap(data[n+1],data[r+1]);
239  }
240  }
241 };
242 
243 template<int_t P, typename T, int_t I>
244 class GFFTswap2OMP<1,2,P,T,I,true> : public GFFTswap2<2,P,T,I> { };
245 
246 template<int_t P, typename T>
247 class GFFTswap2OMP<1,2,P,T,P,true> : public GFFTswap2<2,P,T,P> { };
248 
249 template<short_t NThreads, int_t P, typename T, int_t I>
250 class GFFTswap2OMP<NThreads,2,P,T,I,false> : public GFFTswap2<2,P,T,I> { };
251 
252 
253 template<unsigned int NThreads, uint_t M, uint_t P, typename T,
254 template<typename> class Complex, unsigned int I>
255 class GFFTswap2OMP<NThreads,M,P,Complex<T>,I,true> {
256  static const int_t BN = 1<<I;
257  static const int_t BR = 1<<(P-I-1);
258  GFFTswap2OMP<NThreads/2,M,P,Complex<T>,I+1> next;
259 public:
260  void apply(Complex<T>* data, const int_t n=0, const int_t r=0) {
261  #pragma omp parallel shared(data)
262  {
263  #pragma omp sections
264  {
265  #pragma omp section
266  next.apply(data,n,r);
267 
268  #pragma omp section
269  next.apply(data,n|BN,r|BR);
270  }
271  }
272  }
273 };
274 
275 template<unsigned int NThreads, uint_t M, uint_t P, typename T,
276 template<typename> class Complex>
277 class GFFTswap2OMP<NThreads,M,P,Complex<T>,true> {
278 public:
279  void apply(Complex<T>* data, const int_t n, const int_t r) {
280  if (n>r)
281  swap(data[n],data[r]);
282  }
283 };
284 
285 template<uint_t M, uint_t P, typename T, unsigned int I,
286 template<typename> class Complex>
287 class GFFTswap2OMP<1,M,P,Complex<T>,I,true> : public GFFTswap2<M,P,Complex<T>,I> { };
288 
289 template<uint_t M, uint_t P, typename T,
290 template<typename> class Complex>
291 class GFFTswap2OMP<1,M,P,Complex<T>,P,true> : public GFFTswap2<M,P,Complex<T>,P> { };
292 
293 template<unsigned int NThreads, uint_t M, uint_t P, typename T, unsigned int I,
294 template<typename> class Complex>
295 class GFFTswap2OMP<NThreads,M,P,Complex<T>,I,false> : public GFFTswap2<M,P,Complex<T>,I> { };
296 */
297 
298 } //namespace
299 
300 #endif

Generated on Mon Feb 10 2014 for Generative Fast Fourier Transforms (GFFT) by DoxyGen 1.8.3.1