Commit cd409ed6 authored by Alessandro Mirone's avatar Alessandro Mirone

wrapped fftwf_malloc within semaphores for thread safety

parent b8d1ca52
......@@ -1853,8 +1853,11 @@ void CCspace_precalculations( CCspace * self, int ncpus ) {
dim_fft*=2;
self->fbp_precalculated.dim_fft = dim_fft ;
for(i=0; i<ncpus; i++) {
sem_wait( &(self->fftw_sem));
dumf [i] = (float *) fftwf_malloc( dim_fft * sizeof(float) );
dumfC[i] = (fcomplex *) fftwf_malloc( dim_fft * sizeof(fftwf_complex ) );
sem_post( &(self->fftw_sem) );
}
// printf( "AAAA in precalculation \n" ) ;
......@@ -1872,10 +1875,13 @@ void CCspace_precalculations( CCspace * self, int ncpus ) {
X_ENDS = (int*) malloc(num_bins * sizeof(int));
self->fbp_precalculated.Lipschitz_fbdl=-1.0f;
sem_wait( &(self->fftw_sem));
self->fbp_precalculated.FILTER = (float *) fftwf_malloc((2*dim_fft) * sizeof(float));
sem_wait( &(self->fftw_sem));
fftwf_plan_with_nthreads(1 );
planr2c = fftwf_plan_dft_r2c_1d( dim_fft , dumf[0] , (fftwf_complex*) dumfC[0] , FFTW_MEASURE );
planc2r = fftwf_plan_dft_c2r_1d( dim_fft , (fftwf_complex*) dumfC[0], dumf[0] , FFTW_MEASURE );
......@@ -2524,8 +2530,10 @@ void CCspace_precalculations( CCspace * self, int ncpus ) {
for(i=0; i<ncpus; i++) {
sem_wait( &(self->fftw_sem));
fftwf_free(dumf[i]);
fftwf_free(dumfC[i]);
sem_post( &(self->fftw_sem) );
}
}
......@@ -2640,8 +2648,10 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
/* this for extra symmetric padding with fai360 */
dim_fft*=2;
sem_wait( &(self->fftw_sem));
dumf = (float *) fftwf_malloc( dim_fft * sizeof(float) );
dumfC = (fcomplex *) fftwf_malloc( dim_fft * sizeof(fftwf_complex ) );
sem_post( &(self->fftw_sem) );
LT_infos *lt_merged_infos=NULL;
LT_infos *lt_infos_coarse=NULL;
......@@ -3263,15 +3273,17 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
float* ramp_filter;
fftwf_complex* ramp_filter_fourier=NULL;
if (self->params.DO_PRECONDITION) {
sem_wait( &(self->fftw_sem)); //CAUTION : FFTW plan creation is not thread-safe
ramp_filter = (float*) fftwf_malloc(dimfft_preco_rings*sizeof(float));
dumfftwf = (float*) fftwf_malloc(dimfft_preco_rings*sizeof(float));
drings_fourier = (fftwf_complex*) fftwf_malloc(( dimfft_preco_rings/2+1 )*sizeof(fftwf_complex));
sem_wait( &(self->fftw_sem)); //CAUTION : FFTW plan creation is not thread-safe
planr2c = fftwf_plan_dft_r2c_1d(dimfft_preco_rings,ramp_filter , drings_fourier, FFTW_ESTIMATE);
planc2r = fftwf_plan_dft_c2r_1d(dimfft_preco_rings, drings_fourier, ramp_filter, FFTW_ESTIMATE);
sem_post( &(self->fftw_sem)); //----
//Fourier transform of discrete ramp filter
ramp_filter = (float*) fftwf_malloc(dimfft_preco_rings*sizeof(float));
sem_post( &(self->fftw_sem)); //----
memset( ramp_filter,0, dimfft_preco_rings*sizeof(float) );
ramp_filter[0] = 0.25f;
float val;
......@@ -3280,9 +3292,17 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
ramp_filter[i] = val;
ramp_filter[dimfft_preco_rings-i] = val;
}
sem_wait( &(self->fftw_sem));
ramp_filter_fourier = (fftwf_complex*) fftwf_malloc(dimfft_rings*sizeof(fftwf_complex));
sem_post( &(self->fftw_sem)); //----
fftwf_execute_dft_r2c(planr2c, ramp_filter, ramp_filter_fourier);
sem_wait( &(self->fftw_sem)); //CAUTION : FFTW plan creation is not thread-safe
fftwf_free(ramp_filter);
sem_post( &(self->fftw_sem)); //----
}
//-----------------
......@@ -3781,11 +3801,11 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
if (false && self->params.DO_PRECONDITION) {
sem_wait( &(self->fftw_sem));
fftwf_free(dumfftwf);
fftwf_free(drings_fourier);
fftwf_free(ramp_filter_fourier);
sem_wait( &(self->fftw_sem));
fftwf_destroy_plan(planr2c);
fftwf_destroy_plan(planc2r);
sem_post( &(self->fftw_sem));
......@@ -4152,8 +4172,10 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
// free( WORK[0] ) ;
// free( WORK_perproje - (self->params.CONICITY==0 )*(1+try_2by2)*(num_bins)*self->params.OVERSAMPLING_FACTOR) ;
sem_wait( &(self->fftw_sem));
fftwf_free(dumf);
fftwf_free(dumfC);
sem_post( &(self->fftw_sem)); //----
}
//free_LT_infos(lt_merged_infos);
......@@ -6899,9 +6921,11 @@ float FindNoise(CCspace * self,float *SLICE_a, int CALM_ZONE_LEN, int size_pa0,
// }
// // -------------------------------------------------------------------------------------
sem_wait( &(self->fftw_sem));
gn0 = (fcomplex*) fftwf_malloc(sizeof(fftwf_complex) *size_pa0 );
gn1 = (fcomplex*) fftwf_malloc(sizeof(fftwf_complex) *size_pa1 );
sem_post( &(self->fftw_sem) );
for(i=0; i< size_pa0; i++) {
((float*)gn0)[2*i]=0;
((float*)gn0)[2*i+1]=0;
......@@ -6933,8 +6957,8 @@ float FindNoise(CCspace * self,float *SLICE_a, int CALM_ZONE_LEN, int size_pa0,
sem_post( &(self->fftw_sem) );
}
float * auxbuffer ;
auxbuffer = (float *) fftwf_malloc(sizeof(fftwf_complex) *size_pa0*size_pa1 );
sem_wait( &(self->fftw_sem));
auxbuffer = (float *) fftwf_malloc(sizeof(fftwf_complex) *size_pa0*size_pa1 );
fftwf_plan_with_nthreads(1 );
plan2D_forward = fftwf_plan_dft_2d(size_pa0,size_pa1 ,
(fftwf_complex*)auxbuffer,(fftwf_complex*)auxbuffer,
......@@ -6962,10 +6986,10 @@ float FindNoise(CCspace * self,float *SLICE_a, int CALM_ZONE_LEN, int size_pa0,
}
}
fftwf_execute(plan2D_backward );
sem_wait( &(self->fftw_sem));
fftwf_free(gn0);
fftwf_free(gn1);
sem_wait( &(self->fftw_sem));
fftwf_destroy_plan(plan2D_forward);
fftwf_destroy_plan(plan2D_backward);
sem_post( &(self->fftw_sem));
......@@ -7049,7 +7073,12 @@ void rotational2zero(CCspace *self,float *SLICE,float *SLICEres) {
float *Y = SLICE + dimy * dimx ;
float * auxbuffer;
sem_wait( &(self->fftw_sem));
auxbuffer = (float *) fftwf_malloc(sizeof(fftwf_complex) * dimy * dimx );
sem_post( &(self->fftw_sem) );
memset( auxbuffer, 0, sizeof(fftwf_complex) * dimy * dimx);
{
double sum=0,d;
......@@ -7148,8 +7177,8 @@ void rotational2zero(CCspace *self,float *SLICE,float *SLICEres) {
#undef auxbuffer2
fftwf_free(auxbuffer);
sem_wait( &(self->fftw_sem));
fftwf_free(auxbuffer);
fftwf_destroy_plan(plan2D_forward);
fftwf_destroy_plan(plan2D_backward);
sem_post( &(self->fftw_sem));
......@@ -7342,8 +7371,13 @@ void Paganin(CCspace * self, float * RawptrA,
f0 =(float*) malloc(sizeof(float)*size_pa0);
f1 =(float*) malloc(sizeof(float)*size_pa1);
sem_wait( &(self->fftw_sem));
gn0 = (fcomplex*) fftwf_malloc(sizeof(fftwf_complex) *size_pa0 );
gn1 = (fcomplex*) fftwf_malloc(sizeof(fftwf_complex) *size_pa1 );
sem_post( &(self->fftw_sem) );
fgn0 =(float*) malloc(sizeof(float)*size_pa0);
fgn1 =(float*) malloc(sizeof(float)*size_pa1);
......@@ -7415,9 +7449,9 @@ void Paganin(CCspace * self, float * RawptrA,
sem_post( &(self->fftw_sem) );
}
sem_wait( &(self->fftw_sem));
auxbuffer = (float *) fftwf_malloc(sizeof(fftwf_complex) *size_pa0*size_pa1 );
kernelbuffer = (float *) fftwf_malloc(sizeof(fftwf_complex) *size_pa0*size_pa1 );
sem_wait( &(self->fftw_sem));
fftwf_plan_with_nthreads(ncpus ); // controllare che sia 1 per default dove deve esserlo
plan2D_forward = fftwf_plan_dft_2d(size_pa0,size_pa1 ,
......@@ -7756,12 +7790,12 @@ void Paganin(CCspace * self, float * RawptrA,
free(f1);
free(fgn0);
free(fgn1);
sem_wait( &(self->fftw_sem));
fftwf_free(gn0);
fftwf_free(gn1);
if(auxbuffer) fftwf_free(auxbuffer);
if(kernelbuffer) fftwf_free(kernelbuffer);
sem_wait( &(self->fftw_sem));
fftwf_destroy_plan(plan2D_forward);
fftwf_destroy_plan(plan2D_backward);
sem_post( &(self->fftw_sem));
......
......@@ -86,7 +86,7 @@ DO_LINK = 0
global version
global aumento_versione
aumento_versione="d"
aumento_versione="f"
global version
......
[DEFAULT]
Depends: python-glymur, python-mpi4py,python-h5py,fftw3, openmpi-bin, libqt-dev, libtiff-dev, python-fabio
Depends3: python3-glymur, python3-mpi4py,python3-h5py,fftw3, openmpi-bin, python3-fabio
\ No newline at end of file
Depends: python-glymur, python-mpi4py,python-h5py,fftw3, openmpi-bin, python-fabio
Depends3: python3-glymur, python3-mpi4py,python3-h5py,fftw3, openmpi-bin, python3-fabio
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment