From f0388c82609f74d92b8a0c987a96dc6d90e0685c Mon Sep 17 00:00:00 2001 From: Nicola Vigano <nicola.vigano@esrf.fr> Date: Mon, 14 Mar 2016 17:04:03 +0100 Subject: [PATCH] gtPlaceSubVolumes: added more control on the number of threads spawned Signed-off-by: Nicola Vigano <nicola.vigano@esrf.fr> --- .../3D_ops/gtCxxPlaceSubVolumeAssign.cpp | 10 +++++++++ .../3D_ops/gtCxxPlaceSubVolumeInterf.cpp | 10 +++++++++ zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp | 10 +++++++++ zUtil_Cxx/include/gtCxxPlaceSubVolOps.h | 21 +++++++++++++++++-- 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp index 740270ba..58309898 100644 --- a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp +++ b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp @@ -15,6 +15,16 @@ void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] ) return; } + if (nrhs >= 6) + { + const mxArray * const num_threads = prhs[5]; + initialize_multithreading(*mxGetPr(num_threads)); + } + else + { + initialize_multithreading(); + } + const mxArray * const mat_input = prhs[1]; const mxArray * const mat_shifts_op = prhs[2]; const mxArray * const mat_shifts_ip = prhs[3]; diff --git a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp index 5e27fd04..8cac869a 100644 --- a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp +++ b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp @@ -15,6 +15,16 @@ void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] ) return; } + if (nrhs >= 6) + { + const mxArray * const num_threads = prhs[5]; + initialize_multithreading(*mxGetPr(num_threads)); + } + else + { + initialize_multithreading(); + } + const mxArray * const mat_input = prhs[1]; const mxArray * const mat_shifts_op = prhs[2]; const mxArray * const mat_shifts_ip = prhs[3]; diff --git a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp index 77f3b9e7..c91c7ee9 100644 --- a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp +++ b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp @@ -15,6 +15,16 @@ void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] ) return; } + if (nrhs >= 6) + { + const mxArray * const num_threads = prhs[5]; + initialize_multithreading(*mxGetPr(num_threads)); + } + else + { + initialize_multithreading(); + } + const mxArray * const mat_input = prhs[1]; const mxArray * const mat_shifts_op = prhs[2]; const mxArray * const mat_shifts_ip = prhs[3]; diff --git a/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h b/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h index 7a6a15c2..04720b21 100644 --- a/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h +++ b/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h @@ -32,6 +32,24 @@ extern "C" mxArray *mxUnreference(const mxArray *pr); } +void +initialize_multithreading(const double & suggested_num_threads = 0) +{ +#ifndef DEBUG + if (suggested_num_threads > 0) + { + int num_threads = (const int)suggested_num_threads; + num_threads = std::min(num_threads, omp_get_num_procs()); + num_threads = std::max(num_threads, 1); + omp_set_num_threads(num_threads); + } + else + { + omp_set_num_threads(std::max(omp_get_num_procs()/4*3, 1)); + } +#endif +} + # define ROUND_DOWN(x, s) ((x) & ~((s)-1)) namespace GT3D { @@ -257,12 +275,11 @@ namespace GT3D { const double * const chunk_dims_d = (double *) mxGetData(mat_chunk_dims); const mwSize chunk_dims[3] = {chunk_dims_d[0], chunk_dims_d[1], chunk_dims_d[2]}; - const mwSize num_threads = std::max( omp_get_max_threads()/4*3 , 1); const mwSize line_length_unroll = ROUND_DOWN(chunk_dims[0], 4); /* These loops extensively use pointer arithmetics to determine the chuck * of the matrix to be computed */ -#pragma omp parallel for num_threads(num_threads) +#pragma omp parallel for for(mwIndex counter3 = 0; counter3 < chunk_dims[2]; counter3++) { /* Base vectors, which save computation of the 3rd dimension */ -- GitLab