IT++ Logo
transforms.cpp
Go to the documentation of this file.
1
30#ifndef _MSC_VER
31# include <itpp/config.h>
32#else
33# include <itpp/config_msvc.h>
34#endif
35
36
37#if defined(HAVE_FFT_MKL)
38
39#include <stdio.h>
40#include <stdlib.h>
41
42namespace mkl
43{
44# include <mkl_dfti.h>
45# include <mkl_service.h>
46# undef DftiCreateDescriptor
47}
48
49#elif defined(HAVE_FFT_ACML)
50
51namespace acml
52{
53# include <acml.h>
54}
55
56#elif defined(HAVE_FFTW3)
57
58# include <fftw3.h>
59
60#endif
61
63
65
66//multithreading mode selector
67enum MultithreadingTag {SingleThreaded = 1, OmpThreaded};
68
69#ifdef _OPENMP
70
71#include <omp.h>
72
73static const MultithreadingTag ThreadingTag = OmpThreaded;
74//number of context records kept per transform type
75//see comments for Transform_Provider class for futher details
76static const int contexts_per_transform_type = 10;
77//specialize mutex for multi-threaded code with OMP
78class Mutex
79{
80 omp_lock_t _lck;
81 //disable copy-construction and assignment
82 Mutex(const Mutex&);
83 Mutex& operator=(const Mutex&);
84public:
85 Mutex() {omp_init_lock(&_lck);}
86 ~Mutex() {omp_destroy_lock(&_lck);}
87 //lock the mutex
88 void lock() {omp_set_lock(&_lck);}
89 //try to lock. returns true if ownership is taken
90 bool try_lock() {return (omp_test_lock(&_lck)) != 0;}
91 //unlock
92 void unlock() {omp_unset_lock(&_lck);}
93};
94
95
96#else
97
98static const MultithreadingTag ThreadingTag = SingleThreaded;
99//number of context records kept per transform type
100//see comments for Transform_Provider class for futher details
101static const int contexts_per_transform_type = 1;
102
103//specialize mutex for single-threaded code
104class Mutex
105{
106 //disable copy-construction and assignment
107 Mutex(const Mutex&);
108 Mutex& operator=(const Mutex&);
109public:
110 Mutex() {}
111 ~Mutex() {}
112 void lock() {}
113 bool try_lock() {return true;}
114 void unlock() {}
115};
116
117#endif
118
119
120//mutex-based lock
121class Lock
122{
123 Mutex& _m;
124 //disable copy-construction and assignment
125 Lock(const Lock&);
126 Lock& operator=(const Lock&);
127public:
128 Lock(Mutex& m): _m(m) {_m.lock();}
129 ~Lock() {_m.unlock();}
130};
131
132
133
134namespace itpp
135{
136
137//define traits for all supported transform types: FFT complex, FFT real, IFFT Complex, IFFT Real, DCT, IDCT
138struct FFTCplx_Traits {
139 typedef cvec InType;
140 typedef cvec OutType;
141};
142
143struct IFFTCplx_Traits {
144 typedef cvec InType;
145 typedef cvec OutType;
146};
147
148struct FFTReal_Traits {
149 typedef vec InType;
150 typedef cvec OutType;
151};
152
153struct IFFTReal_Traits {
154 typedef cvec InType;
155 typedef vec OutType;
156};
157
158struct DCT_Traits {
159 typedef vec InType;
160 typedef vec OutType;
161};
162
163struct IDCT_Traits {
164 typedef vec InType;
165 typedef vec OutType;
166};
167
168//generic transforms implementation based on transform type and specific FFT library
169template<typename TransformTraits> class Transform;
170//FFT library initializer based on mutithreading model
171template<MultithreadingTag> inline void init_fft_library();
172
173#if defined(HAVE_FFT_MKL)
174//MKL-specific implementations
175
176//MKL FFT-related notes:
177//If multithreading is enabled on ITPP level (and in user's code) MKL FFT descriptors can be created, committed and freed by multiple threads
178//In this case Intel recommends to use single-threaded FFT implementation:
179//1. "Intel® MKL 10.x threading", http://software.intel.com/en-us/articles/intel-mkl-10x-threading,
180//2. "Examples of Using Multi-Threading for FFT Computation", http://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-00422EBE-93C3-4BC9-A621-9BF0A0E93888.htm
181//Based on examples, provided by Intel, it seems to be safe to create/commit/free and run FFT on per-thread descriptor
182//without additional locking
183
184template<> inline void init_fft_library<SingleThreaded>() {} //assume no actions required. ITPP does not use threading, so FFT library is free to use it's own threading implementation
185template<> inline void init_fft_library<OmpThreaded>()
186{
187 //switch FFT domain of MKL to single-threaded mode as Intel suggests
188 //this should work starting from MKL 10.0
189 mkl::mkl_domain_set_num_threads(1, MKL_FFT);
190}
191
192//---------------------------------------------------------------------------
193// FFT/IFFT based on MKL
194//---------------------------------------------------------------------------
195
196inline void release_descriptor(mkl::DFTI_DESCRIPTOR* h)
197{
198 if(h != NULL) {
199 MKL_LONG status = mkl::DftiFreeDescriptor(&h);
200 if(status) {
201 it_info(mkl::DftiErrorMessage(status));
202 it_error("MKL library release_descriptor() failed on DftiFreeDescriptor.");
203 }
204 }
205}
206
207template<> class Transform<FFTCplx_Traits>
208{
209 mkl::DFTI_DESCRIPTOR* _h;
210 int _transform_length;
211public:
212 Transform(): _h(NULL), _transform_length(0) {}
213
214 void compute_transform(const cvec &in, cvec &out) {
215 out.set_size(in.size(), false);
216 if(_transform_length != in.size()) {
217 release_descriptor(_h);
218 _transform_length = in.size();
219
220 MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_COMPLEX, 1, _transform_length);
221 if(status) {
222 it_info(mkl::DftiErrorMessage(status));
223 it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
224 }
225
226 mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
227
228 status = mkl::DftiCommitDescriptor(_h);
229 if(status) {
230 it_info(mkl::DftiErrorMessage(status));
231 it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
232 }
233
234 }
235 mkl::DftiComputeForward(_h, (void *)in._data(), out._data());
236 }
237
238 void reset() {release_descriptor(_h); *this = Transform();}
239};
240
241template<> class Transform<IFFTCplx_Traits>
242{
243 mkl::DFTI_DESCRIPTOR* _h;
244 int _transform_length;
245public:
246 Transform(): _h(NULL), _transform_length(0) {}
247
248 void compute_transform(const cvec &in, cvec &out) {
249 out.set_size(in.size(), false);
250 if(_transform_length != in.size()) {
251 release_descriptor(_h);
252 _transform_length = in.size();
253 MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_COMPLEX, 1, _transform_length);
254 if(status) {
255 it_info(mkl::DftiErrorMessage(status));
256 it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
257 }
258
259 mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
260 mkl::DftiSetValue(_h, mkl::DFTI_BACKWARD_SCALE, 1.0 / _transform_length);
261
262 status = mkl::DftiCommitDescriptor(_h);
263 if(status) {
264 it_info(mkl::DftiErrorMessage(status));
265 it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
266 }
267
268 }
269 mkl::DftiComputeBackward(_h, (void *)in._data(), out._data());
270 }
271
272 void reset() {release_descriptor(_h); *this = Transform();}
273};
274
275template<> class Transform<FFTReal_Traits>
276{
277 mkl::DFTI_DESCRIPTOR* _h;
278 int _transform_length;
279public:
280 Transform(): _h(NULL), _transform_length(0) {}
281
282 void compute_transform(const vec &in, cvec &out) {
283 out.set_size(in.size(), false);
284 if(_transform_length != in.size()) {
285 release_descriptor(_h);
286 _transform_length = in.size();
287
288 MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_REAL, 1, _transform_length);
289 if(status) {
290 it_info(mkl::DftiErrorMessage(status));
291 it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
292 }
293
294 mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
295
296 status = mkl::DftiCommitDescriptor(_h);
297 if(status) {
298 it_info(mkl::DftiErrorMessage(status));
299 it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
300 }
301
302 }
303 mkl::DftiComputeForward(_h, (void *)in._data(), out._data());
304 // Real FFT does not compute the 2nd half of the FFT points because it
305 // is redundant to the 1st half. However, we want all of the data so we
306 // fill it in. This is consistent with Matlab's functionality
307 int istart = ceil_i(in.size() / 2.0);
308 int idelta = in.size() - istart;
309 out.set_subvector(istart, reverse(conj(out(1, idelta))));
310 }
311
312 void reset() {release_descriptor(_h); *this = Transform();}
313};
314
315template<> class Transform<IFFTReal_Traits>
316{
317 mkl::DFTI_DESCRIPTOR* _h;
318 int _transform_length;
319public:
320 Transform(): _h(NULL), _transform_length(0) {}
321
322 void compute_transform(const cvec &in, vec &out) {
323 out.set_size(in.size(), false);
324 if(_transform_length != in.size()) {
325 release_descriptor(_h);
326 _transform_length = in.size();
327
328 MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_REAL, 1, _transform_length);
329 if(status) {
330 it_info(mkl::DftiErrorMessage(status));
331 it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
332 }
333
334 mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
335 mkl::DftiSetValue(_h, mkl::DFTI_BACKWARD_SCALE, 1.0 / _transform_length);
336
337 status = mkl::DftiCommitDescriptor(_h);
338 if(status) {
339 it_info(mkl::DftiErrorMessage(status));
340 it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
341 }
342
343 }
344 mkl::DftiComputeBackward(_h, (void *)in._data(), out._data());
345 }
346
347 void reset() {release_descriptor(_h); *this = Transform();}
348};
349
350#endif // #ifdef HAVE_FFT_MKL
351
352
353#if defined(HAVE_FFT_ACML)
354//ACML-specific implementations
355
356//ACML FFT-related notes:
357//ACML documentation is not very verbose regarding the multithreaded use of the library, but multithreaded ifort-built ACML uses
358//OMP internally. AMD recommends linking with SINGLE-THREADED library if OMP is enabled in user's code. Also, they claim that
359//single-threaded functions can be used from the multiple threads simultaniously (see http://devgurus.amd.com/thread/141592 for
360//multi-threading discussion on AMD dev forums) The thread-safety of library functions is also mentioned in ACML release notes (ver 4.4.0).
361//In the following implementation we assume that ACML transform functions can be run simultaneously from different threads safely if they operate
362//on different data sets.
363template<> inline void init_fft_library<SingleThreaded>() {} //assume no actions required.
364template<> inline void init_fft_library<OmpThreaded>() {}
365
366//---------------------------------------------------------------------------
367// FFT/IFFT based on ACML
368//---------------------------------------------------------------------------
369template<> class Transform<FFTCplx_Traits>
370{
371 cvec _scratchpad;
372 int _transform_length;
373public:
374 Transform(): _transform_length(0) {}
375
376 void compute_transform(const cvec &in, cvec &out) {
377 int info;
378 out.set_size(in.size(), false);
379 if(_transform_length != in.size()) {
380 _transform_length = in.size();
381
382 int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
383 if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
384
385 acml::zfft1dx(0, 1.0, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
386 (acml::doublecomplex *)out._data(), 1,
387 (acml::doublecomplex *)_scratchpad._data(), &info);
388 }
389 acml::zfft1dx(-1, 1.0, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
390 (acml::doublecomplex *)out._data(), 1,
391 (acml::doublecomplex *)_scratchpad._data(), &info);
392 }
393
394 void reset() {*this = Transform();}
395};
396
397template<> class Transform<IFFTCplx_Traits>
398{
399 cvec _scratchpad;
400 int _transform_length;
401public:
402 Transform(): _transform_length(0) {}
403
404 void compute_transform(const cvec &in, cvec &out) {
405 int info;
406 out.set_size(in.size(), false);
407 if(_transform_length != in.size()) {
408 _transform_length = in.size();
409
410 int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
411 if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
412
413 acml::zfft1dx(0, 1.0 / _transform_length, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
414 (acml::doublecomplex *)out._data(), 1,
415 (acml::doublecomplex *)_scratchpad._data(), &info);
416 }
417 acml::zfft1dx(1, 1.0 / _transform_length, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
418 (acml::doublecomplex *)out._data(), 1,
419 (acml::doublecomplex *)_scratchpad._data(), &info);
420 }
421
422 void reset() {*this = Transform();}
423};
424
425template<> class Transform<FFTReal_Traits>
426{
427 vec _scratchpad;
428 int _transform_length;
429public:
430 Transform(): _transform_length(0) {}
431
432 void compute_transform(const vec &in, cvec &out) {
433 vec out_re = in;
434
435 int info;
436 if(_transform_length != in.size()) {
437 _transform_length = in.size();
438
439 int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
440 if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
441
442 acml::dzfft(0, _transform_length, out_re._data(), _scratchpad._data(), &info);
443 }
444 acml::dzfft(1, _transform_length, out_re._data(), _scratchpad._data(), &info);
445
446 // Normalise output data
447 double factor = std::sqrt(static_cast<double>(_transform_length));
448 out_re *= factor;
449
450 // Convert the real Hermitian DZFFT's output to the Matlab's complex form
451 vec out_im(_transform_length);
452 out_im(0) = 0.0;
453 if(!(_transform_length % 2)) out_im(_transform_length / 2) = 0.0; //even transform length
454 out_im.set_subvector(1, reverse(out_re(_transform_length / 2 + 1, _transform_length - 1)));
455 out_im.set_subvector(_transform_length / 2 + 1, -out_re(_transform_length / 2 + 1, _transform_length - 1));
456 out_re.set_subvector(_transform_length / 2 + 1, reverse(out_re(1, (_transform_length - 1) / 2)));
457
458 out.set_size(_transform_length, false);
459 out = to_cvec(out_re, out_im);
460 }
461
462 void reset() {*this = Transform();}
463};
464
465template<> class Transform<IFFTReal_Traits>
466{
467 vec _scratchpad;
468 int _transform_length;
469public:
470 Transform(): _transform_length(0) {}
471
472 void compute_transform(const cvec &in, vec &out) {
473 // Convert Matlab's complex input to the real Hermitian form
474 out.set_size(in.size());
475 out.set_subvector(0, real(in(0, in.size() / 2)));
476 out.set_subvector(in.size() / 2 + 1, -imag(in(in.size() / 2 + 1, in.size() - 1)));
477
478 int info;
479 if(_transform_length != in.size()) {
480 _transform_length = in.size();
481
482 int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
483 if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
484
485 acml::zdfft(0, _transform_length, out._data(), _scratchpad._data(), &info);
486 }
487 acml::zdfft(1, _transform_length, out._data(), _scratchpad._data(), &info);
488 out.set_subvector(1, reverse(out(1, _transform_length - 1)));
489
490 // Normalise output data
491 double factor = 1.0 / std::sqrt(static_cast<double>(_transform_length));
492 out *= factor;
493 }
494
495 void reset() {*this = Transform();}
496};
497
498
499
500#endif // defined(HAVE_FFT_ACML)
501
502
503#if defined(HAVE_FFTW3)
504//FFTW3-specific implementations
505
506//FFTW3-related notes:
507//Based on the FFtW3 documentation, it is thread-safe to call fftw_execute family functions simultaniously from several threads assuming that data sets are different in each thread.
508//FFTW plans creation-destruction is not thread-safe and should be serialized by the caller. FFTW provides some functions to execute transforms with multiple threads (assuming FFTW
509// is compiled and linked with multithreading support). Current ITPP implementation does not use any of them.
510
511template<> inline void init_fft_library<SingleThreaded>() {} //assume no actions required.
512template<> inline void init_fft_library<OmpThreaded>() {}
513//define global lock for operations with FFTW plans.
514Mutex& get_library_lock()
515{
516 static Mutex FFTW3LibraryLock;
517 return FFTW3LibraryLock;
518}
519//---------------------------------------------------------------------------
520// FFT/IFFT based on FFTW
521//---------------------------------------------------------------------------
522inline void destroy_plan(fftw_plan p)
523{
524 if(p != NULL) fftw_destroy_plan(p); // destroy the plan
525}
526
527template<> class Transform<FFTCplx_Traits>
528{
529 fftw_plan _p;
530 int _transform_length;
531public:
532 Transform(): _p(NULL), _transform_length(0) {}
533
534 void compute_transform(const cvec &in, cvec &out) {
535 out.set_size(in.size(), false);
536 if(_transform_length != in.size()) {
537 Lock l(get_library_lock()); //apply global library lock on plan changes
538
539 _transform_length = in.size();
540 destroy_plan(_p); // destroy the previous plan
541 // create a new plan (creation of plan guarantees not to return NULL)
542 _p = fftw_plan_dft_1d(_transform_length, (fftw_complex *)in._data(),
543 (fftw_complex *)out._data(),
544 FFTW_FORWARD, FFTW_ESTIMATE);
545 }
546 //compute FFT using the GURU FFTW interface
547 fftw_execute_dft(_p, (fftw_complex *)in._data(),
548 (fftw_complex *)out._data());
549 }
550
551 void reset() {destroy_plan(_p); *this = Transform();}
552};
553
554template<> class Transform<IFFTCplx_Traits>
555{
556 fftw_plan _p;
557 int _transform_length;
558public:
559 Transform(): _p(NULL), _transform_length(0) {}
560
561 void compute_transform(const cvec &in, cvec &out) {
562 out.set_size(in.size(), false);
563 if(_transform_length != in.size()) {
564 Lock l(get_library_lock()); //apply global library lock on plan changes
565
566 _transform_length = in.size();
567 destroy_plan(_p); // destroy the previous plan
568
569 // create a new plan (creation of plan guarantees not to return NULL)
570 _p = fftw_plan_dft_1d(_transform_length, (fftw_complex *)in._data(),
571 (fftw_complex *)out._data(),
572 FFTW_BACKWARD, FFTW_ESTIMATE);
573 }
574 //compute FFT using the GURU FFTW interface
575 fftw_execute_dft(_p, (fftw_complex *)in._data(),
576 (fftw_complex *)out._data());
577 // scale output
578 double inv_N = 1.0 / _transform_length;
579 out *= inv_N;
580
581 }
582
583 void reset() {destroy_plan(_p); *this = Transform();}
584};
585
586template<> class Transform<FFTReal_Traits>
587{
588 fftw_plan _p;
589 int _transform_length;
590public:
591 Transform(): _p(NULL), _transform_length(0) {}
592
593 void compute_transform(const vec &in, cvec &out) {
594 out.set_size(in.size(), false);
595 if(_transform_length != in.size()) {
596 Lock l(get_library_lock()); //apply global library lock on plan changes
597
598 _transform_length = in.size();
599 destroy_plan(_p); // destroy the previous plan
600
601 // create a new plan (creation of plan guarantees not to return NULL)
602 _p = fftw_plan_dft_r2c_1d(_transform_length, (double *)in._data(),
603 (fftw_complex *)out._data(),
604 FFTW_ESTIMATE);
605 }
606 //compute FFT using the GURU FFTW interface
607 fftw_execute_dft_r2c(_p, (double *)in._data(),
608 (fftw_complex *)out._data());
609 // Real FFT does not compute the 2nd half of the FFT points because it
610 // is redundant to the 1st half. However, we want all of the data so we
611 // fill it in. This is consistent with Matlab's functionality
612 int offset = ceil_i(_transform_length / 2.0);
613 int n_elem = _transform_length - offset;
614 for(int i = 0; i < n_elem; ++i) {
615 out(offset + i) = std::conj(out(n_elem - i));
616 }
617 }
618
619 void reset() {destroy_plan(_p); *this = Transform();}
620};
621
622template<> class Transform<IFFTReal_Traits>
623{
624 fftw_plan _p;
625 int _transform_length;
626public:
627 Transform(): _p(NULL), _transform_length(0) {}
628
629 void compute_transform(const cvec &in, vec &out) {
630 out.set_size(in.size(), false);
631 if(_transform_length != in.size()) {
632 Lock l(get_library_lock()); //apply global library lock on plan changes
633
634 _transform_length = in.size();
635 destroy_plan(_p); // destroy the previous plan
636
637 // create a new plan (creation of plan guarantees not to return NULL)
638 _p = fftw_plan_dft_c2r_1d(_transform_length, (fftw_complex *)in._data(),
639 (double *)out._data(),
640 FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
641 }
642 //compute FFT using the GURU FFTW interface
643 fftw_execute_dft_c2r(_p, (fftw_complex *)in._data(),
644 (double *)out._data());
645 // scale output
646 double inv_N = 1.0 / _transform_length;
647 out *= inv_N;
648
649 }
650
651 void reset() {destroy_plan(_p); *this = Transform();}
652};
653
654//---------------------------------------------------------------------------
655// DCT/IDCT based on FFTW
656//---------------------------------------------------------------------------
657template<> class Transform<DCT_Traits>
658{
659 fftw_plan _p;
660 int _transform_length;
661public:
662 Transform(): _p(NULL), _transform_length(0) {}
663
664 void compute_transform(const vec &in, vec &out) {
665 out.set_size(in.size(), false);
666 if(_transform_length != in.size()) {
667 Lock l(get_library_lock()); //apply global library lock on plan changes
668
669 _transform_length = in.size();
670 destroy_plan(_p); // destroy the previous plan
671
672 // create a new plan (creation of plan guarantees not to return NULL)
673 _p = fftw_plan_r2r_1d(_transform_length, (double *)in._data(),
674 (double *)out._data(),
675 FFTW_REDFT10, FFTW_ESTIMATE);
676 }
677 // compute FFT using the GURU FFTW interface
678 fftw_execute_r2r(_p, (double *)in._data(), (double *)out._data());
679
680 // Scale to matlab definition format
681 out /= std::sqrt(2.0 * _transform_length);
682 out(0) /= std::sqrt(2.0);
683 }
684
685 void reset() {destroy_plan(_p); *this = Transform();}
686};
687
688template<> class Transform<IDCT_Traits>
689{
690 fftw_plan _p;
691 int _transform_length;
692public:
693 Transform(): _p(NULL), _transform_length(0) {}
694
695 void compute_transform(const vec &in, vec &out) {
696 out = in;
697
698 // Rescale to FFTW format
699 out(0) *= std::sqrt(2.0);
700 out /= std::sqrt(2.0 * in.size());
701
702 if(_transform_length != in.size()) {
703 Lock l(get_library_lock()); //apply global library lock on plan changes
704
705 _transform_length = in.size();
706 destroy_plan(_p); // destroy the previous plan
707
708 // create a new plan (creation of plan guarantees not to return NULL)
709 _p = fftw_plan_r2r_1d(_transform_length, (double *)in._data(),
710 (double *)out._data(),
711 FFTW_REDFT01, FFTW_ESTIMATE);
712 }
713 // compute FFT using the GURU FFTW interface
714 fftw_execute_r2r(_p, (double *)out._data(), (double *)out._data());
715 }
716
717 void reset() {destroy_plan(_p); *this = Transform();}
718};
719
720#endif // defined(HAVE_FFTW3)
721
722#if defined(HAVE_FFT_MKL) || defined(HAVE_FFT_ACML)
723
724//---------------------------------------------------------------------------
725// DCT/IDCT based on MKL or ACML
726//---------------------------------------------------------------------------
727
728//use FFT on real values to perform DCT
729template<> class Transform<DCT_Traits>
730{
731 Transform<FFTReal_Traits> _tr;
732public:
733 Transform() {}
734
735 void compute_transform(const vec &in, vec &out) {
736 int N = in.size();
737 if(N == 1)
738 out = in;
739 else {
740 cvec c;
741 _tr.compute_transform(concat(in, reverse(in)), c);
742 c.set_size(N, true);
743 for(int i = 0; i < N; i++) {
744 c(i) *= std::complex<double>(std::cos(pi * i / N / 2), std::sin(-pi * i / N / 2))
745 / std::sqrt(2.0 * N);
746 }
747 out = real(c);
748 out(0) /= std::sqrt(2.0);
749 }
750 }
751
752 void reset() {_tr.reset();}
753};
754
755//use IFFT with real output to perform IDCT
756template<> class Transform<IDCT_Traits>
757{
758 Transform<IFFTReal_Traits> _tr;
759public:
760 Transform() {}
761
762 void compute_transform(const vec &in, vec &out) {
763 int N = in.size();
764 if(N == 1)
765 out = in;
766 else {
767 cvec c = to_cvec(in);
768 c.set_size(2 * N, true);
769 c(0) *= std::sqrt(2.0);
770 for(int i = 0; i < N; i++) {
771 c(i) *= std::complex<double>(std::cos(pi * i / N / 2), std::sin(pi * i / N / 2))
772 * std::sqrt(2.0 * N);
773 }
774 for(int i = N - 1; i >= 1; i--) {
775 c(c.size() - i) = c(i) * std::complex<double>(std::cos(pi * i / N),
776 std::sin(-pi * i / N));
777 }
778 _tr.compute_transform(c, out);
779 out.set_size(N, true);
780 }
781 }
782
783 void reset() {_tr.reset();}
784};
785
786#endif
787
788#if defined(HAVE_FFT)
789//lock-protected transform to serialize accesses to the context from several threads
790template<typename TransformTraits> class Locked_Transform : private Transform<TransformTraits>
791{
792 typedef Transform<TransformTraits> Base;
793 Mutex _m;
794public:
795 Locked_Transform() {}
796 //release context
797 void release_context() {Lock l(_m); Base::reset();}
798 void run_transform(const typename TransformTraits::InType& in, typename TransformTraits::OutType& out) {Lock l(_m); Base::compute_transform(in, out);}
799};
800
801//Typical multithreaded application creates several threads upon entry to parallel region and join them upon exit from it.
802//Threads used to perform parallel computations can either be terminated upon exit from the parallel region or left in the
803//parked state, so in the next parallel region application can reuse already created threads from the team instead of the
804//time-consuming creation of new threads. There is no way to control threads creation-destruction with OMP and, therefore
805//there is no way to implement automatic clean-up of transform computation contexts for each thread (basically, this means that
806//we can not appropriately release FFT library resources and this results in memory leak)
807
808//In order to solve this problem and implement the FFT transforms in multithreded environment library relyes on the statically
809//created pool of transform contexts.
810
811//Each thread willing to run the transfrom queries the context index from transfrom provider. Thread uses assigned index and
812//corresponding context to compute the transforms during it's lifetime. Provider assigns contexts in round-robbin fashion, so
813//context used by the exited threads are reused by newly created ones.
814
815//Single context can be reused by multiple threads if application created more then contexts_per_transform_type threads performing
816//some type of transform.
817static bool is_library_initialized = false;
818
819template<typename TransformTraits> class Transform_Provider
820{
821 typedef Locked_Transform<TransformTraits> Transform;
822 Transform _transforms[contexts_per_transform_type];
823 int _id;
824public:
825 Transform_Provider(): _id(0) {
826 if(!is_library_initialized) {
827 //initialize FFT library on first conctruction of any of Transform_Provider objects
828 init_fft_library<ThreadingTag>();
829 is_library_initialized = true;
830 }
831 }
832 int get_context_id() {
833 //assign id in round-robin fashion.
834 int ret = _id + 1;
835 if(ret == contexts_per_transform_type)
836 _id = 0;
837 else
838 _id = ret;
839 return ret;
840 }
841 void run_transform(int id, const typename TransformTraits::InType& in, typename TransformTraits::OutType& out) {
842 _transforms[id - 1].run_transform(in, out);
843 }
844 //provider destructor. releases context resources.
845 //destructor is called after the main() exits, so there is no need to protect context release with mutex
846 ~Transform_Provider() {
847 for(int i = 0; i < contexts_per_transform_type; ++i)
848 _transforms[i].release_context();
849 }
850};
851
852//Transform_Provider is constructed upon the first request
853template<typename TransformTraits> Transform_Provider<TransformTraits>& get_transform_provider()
854{
855 static Transform_Provider<TransformTraits> p;
856 return p;
857}
858
859void fft(const cvec &in, cvec &out)
860{
861 static int context_id = 0;
862 #pragma omp threadprivate(context_id)
863
864 if(context_id == 0) {
865 //first-time transform call
866 #pragma omp critical
867 {
868 //serialize access to transform provider to get the id
869 context_id = get_transform_provider<FFTCplx_Traits>().get_context_id();
870 }
871 }
872 it_assert(in.size() > 0, "fft(): zero-sized input detected");
873 //there is no need to serialize here, since provider is constructed at this point
874 get_transform_provider<FFTCplx_Traits>().run_transform(context_id, in, out);
875}
876
877void ifft(const cvec &in, cvec &out)
878{
879 static int context_id = 0;
880 #pragma omp threadprivate(context_id)
881
882 if(context_id == 0) {
883 //first-time transform call
884 #pragma omp critical
885 {
886 //serialize access to transform provider to get the id
887 context_id = get_transform_provider<IFFTCplx_Traits>().get_context_id();
888 }
889 }
890 it_assert(in.size() > 0, "ifft(): zero-sized input detected");
891 //there is no need to serialize here, since provider is constructed at this point
892 get_transform_provider<IFFTCplx_Traits>().run_transform(context_id, in, out);
893}
894
895void fft_real(const vec &in, cvec &out)
896{
897 static int context_id = 0;
898 #pragma omp threadprivate(context_id)
899
900 if(context_id == 0) {
901 //first-time transform call
902 #pragma omp critical
903 {
904 //serialize access to transform provider to get the id
905 context_id = get_transform_provider<FFTReal_Traits>().get_context_id();
906 }
907 }
908 it_assert(in.size() > 0, "fft_real(): zero-sized input detected");
909 //there is no need to serialize here, since provider is constructed at this point
910 get_transform_provider<FFTReal_Traits>().run_transform(context_id, in, out);
911}
912
913void ifft_real(const cvec &in, vec &out)
914{
915 static int context_id = 0;
916 #pragma omp threadprivate(context_id)
917
918 if(context_id == 0) {
919 //first-time transform call
920 #pragma omp critical
921 {
922 //serialize access to transform provider to get the id
923 context_id = get_transform_provider<IFFTReal_Traits>().get_context_id();
924 }
925 }
926 it_assert(in.size() > 0, "ifft_real(): zero-sized input detected");
927 //there is no need to serialize here, since provider is constructed at this point
928 get_transform_provider<IFFTReal_Traits>().run_transform(context_id, in, out);
929}
930
931void dct(const vec &in, vec &out)
932{
933 static int context_id = 0;
934 #pragma omp threadprivate(context_id)
935
936 if(context_id == 0) {
937 //first-time transform call
938 #pragma omp critical
939 {
940 //serialize access to transform provider to get the id
941 context_id = get_transform_provider<DCT_Traits>().get_context_id();
942 }
943 }
944 it_assert(in.size() > 0, "dct(): zero-sized input detected");
945 //there is no need to serialize here, since provider is definitely constructed at this point
946 get_transform_provider<DCT_Traits>().run_transform(context_id, in, out);
947}
948
949void idct(const vec &in, vec &out)
950{
951 static int context_id = 0;
952 #pragma omp threadprivate(context_id)
953
954 if(context_id == 0) {
955 //first-time transform call
956 #pragma omp critical
957 {
958 //serialize access to transform provider to get the id
959 context_id = get_transform_provider<IDCT_Traits>().get_context_id();
960 }
961 }
962 it_assert(in.size() > 0, "dct(): zero-sized input detected");
963 //there is no need to serialize here, since provider is definitely constructed at this point
964 get_transform_provider<IDCT_Traits>().run_transform(context_id, in, out);
965}
966
967bool have_fourier_transforms() {return true;}
968bool have_cosine_transforms() {return true;}
969#else
970
971void fft(const cvec &in, cvec &out)
972{
973 it_error("FFT library is needed to use fft() function");
974}
975
976void ifft(const cvec &in, cvec &out)
977{
978 it_error("FFT library is needed to use ifft() function");
979}
980
981void fft_real(const vec &in, cvec &out)
982{
983 it_error("FFT library is needed to use fft_real() function");
984}
985
986void ifft_real(const cvec &in, vec & out)
987{
988 it_error("FFT library is needed to use ifft_real() function");
989}
990
991void dct(const vec &in, vec &out)
992{
993 it_error("FFT library is needed to use dct() function");
994}
995
996void idct(const vec &in, vec &out)
997{
998 it_error("FFT library is needed to use idct() function");
999}
1000
1001bool have_fourier_transforms() {return false;}
1002bool have_cosine_transforms() {return false;}
1003
1004#endif // defined(HAVE_FFT)
1005
1006
1007cvec fft(const cvec &in)
1008{
1009 cvec out;
1010 fft(in, out);
1011 return out;
1012}
1013
1014cvec fft(const cvec &in, const int N)
1015{
1016 cvec in2 = in;
1017 cvec out;
1018 in2.set_size(N, true);
1019 fft(in2, out);
1020 return out;
1021}
1022
1023cvec ifft(const cvec &in)
1024{
1025 cvec out;
1026 ifft(in, out);
1027 return out;
1028}
1029
1030cvec ifft(const cvec &in, const int N)
1031{
1032 cvec in2 = in;
1033 cvec out;
1034 in2.set_size(N, true);
1035 ifft(in2, out);
1036 return out;
1037}
1038
1039cvec fft_real(const vec& in)
1040{
1041 cvec out;
1042 fft_real(in, out);
1043 return out;
1044}
1045
1046cvec fft_real(const vec& in, const int N)
1047{
1048 vec in2 = in;
1049 cvec out;
1050 in2.set_size(N, true);
1051 fft_real(in2, out);
1052 return out;
1053}
1054
1055vec ifft_real(const cvec &in)
1056{
1057 vec out;
1058 ifft_real(in, out);
1059 return out;
1060}
1061
1062vec ifft_real(const cvec &in, const int N)
1063{
1064 cvec in2 = in;
1065 in2.set_size(N, true);
1066 vec out;
1067 ifft_real(in2, out);
1068 return out;
1069}
1070
1071vec dct(const vec &in)
1072{
1073 vec out;
1074 dct(in, out);
1075 return out;
1076}
1077
1078vec dct(const vec &in, const int N)
1079{
1080 vec in2 = in;
1081 vec out;
1082 in2.set_size(N, true);
1083 dct(in2, out);
1084 return out;
1085}
1086
1087
1088vec idct(const vec &in)
1089{
1090 vec out;
1091 idct(in, out);
1092 return out;
1093}
1094
1095vec idct(const vec &in, const int N)
1096{
1097 vec in2 = in;
1098 vec out;
1099 in2.set_size(N, true);
1100 idct(in2, out);
1101 return out;
1102}
1103// ----------------------------------------------------------------------
1104// Instantiation
1105// ----------------------------------------------------------------------
1106
1107template ITPP_EXPORT vec dht(const vec &v);
1108template ITPP_EXPORT cvec dht(const cvec &v);
1109
1110template ITPP_EXPORT void dht(const vec &vin, vec &vout);
1111template ITPP_EXPORT void dht(const cvec &vin, cvec &vout);
1112
1113template ITPP_EXPORT void self_dht(vec &v);
1114template ITPP_EXPORT void self_dht(cvec &v);
1115
1116template ITPP_EXPORT vec dwht(const vec &v);
1117template ITPP_EXPORT cvec dwht(const cvec &v);
1118
1119template ITPP_EXPORT void dwht(const vec &vin, vec &vout);
1120template ITPP_EXPORT void dwht(const cvec &vin, cvec &vout);
1121
1122template ITPP_EXPORT void self_dwht(vec &v);
1123template ITPP_EXPORT void self_dwht(cvec &v);
1124
1125template ITPP_EXPORT mat dht2(const mat &m);
1126template ITPP_EXPORT cmat dht2(const cmat &m);
1127
1128template ITPP_EXPORT mat dwht2(const mat &m);
1129template ITPP_EXPORT cmat dwht2(const cmat &m);
1130
1131} // namespace itpp
1132
ITPP_EXPORT void idct(const vec &in, vec &out)
Inverse Discrete Cosine Transform (IDCT)
ITPP_EXPORT void dct(const vec &in, vec &out)
Discrete Cosine Transform (DCT)
ITPP_EXPORT bool have_cosine_transforms()
Run-time test if library is built with cosine transforms enabled.
#define it_error(s)
Abort unconditionally.
Definition: itassert.h:126
#define it_info(s)
Print information message.
Definition: itassert.h:136
#define it_assert(t, s)
Abort if t is not true.
Definition: itassert.h:94
ITPP_EXPORT void ifft(const cvec &in, cvec &out)
Inverse Fast Fourier Transform.
ITPP_EXPORT void fft_real(const vec &in, cvec &out)
Real Fast Fourier Transform.
ITPP_EXPORT void ifft_real(const cvec &in, vec &out)
Inverse Real Fast Fourier Transform.
ITPP_EXPORT bool have_fourier_transforms()
Run-time test if library is built with Fast Fourier Transforms enabled.
ITPP_EXPORT void fft(const cvec &in, cvec &out)
Fast Fourier Transform.
Mat< T > dwht2(const Mat< T > &m)
Fast 2D Walsh Hadamard Transform.
Definition: transforms.h:376
Vec< T > dwht(const Vec< T > &v)
Fast Walsh Hadamard Transform.
Definition: transforms.h:333
Vec< T > dht(const Vec< T > &v)
Fast Hadamard Transform.
Definition: transforms.h:243
void self_dht(Vec< T > &v)
Fast Hadamard Transform - memory efficient. Stores the result in v.
Definition: transforms.h:307
Mat< T > dht2(const Mat< T > &m)
Fast 2D Hadamard Transform.
Definition: transforms.h:356
void self_dwht(Vec< T > &v)
Fast Walsh Hadamard Transform - memory efficient (result in v)
Definition: transforms.h:349
vec imag(const cvec &data)
Imaginary part of complex values.
Definition: elem_math.cpp:180
vec real(const cvec &data)
Real part of complex values.
Definition: elem_math.cpp:157
cvec conj(const cvec &x)
Conjugate of complex value.
Definition: elem_math.h:226
vec sqrt(const vec &x)
Square root of the elements.
Definition: elem_math.h:123
Vec< T > reverse(const Vec< T > &in)
Reverse the input vector.
Definition: matfunc.h:777
vec sin(const vec &x)
Sine function.
Definition: trig_hyp.h:54
vec cos(const vec &x)
Cosine function.
Definition: trig_hyp.h:58
itpp namespace
Definition: itmex.h:37
cvec to_cvec(const Vec< T > &v)
Converts a Vec<T> to cvec.
Definition: converters.h:107
const Array< T > concat(const Array< T > &a, const T &e)
Append element e to the end of the Array a.
Definition: array.h:486
const double pi
Constant Pi.
Definition: misc.h:103
int ceil_i(double x)
The nearest larger integer.
Definition: converters.h:339
Fourier, Hadamard, Walsh-Hadamard, and 2D Hadamard transforms - header file.
SourceForge Logo

Generated on Sun Jun 5 2022 21:26:42 for IT++ by Doxygen 1.9.3