From f0388c82609f74d92b8a0c987a96dc6d90e0685c Mon Sep 17 00:00:00 2001
From: Nicola Vigano <nicola.vigano@esrf.fr>
Date: Mon, 14 Mar 2016 17:04:03 +0100
Subject: [PATCH] gtPlaceSubVolumes: added more control on the number of
 threads spawned

Signed-off-by: Nicola Vigano <nicola.vigano@esrf.fr>
---
 .../3D_ops/gtCxxPlaceSubVolumeAssign.cpp      | 10 +++++++++
 .../3D_ops/gtCxxPlaceSubVolumeInterf.cpp      | 10 +++++++++
 zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp   | 10 +++++++++
 zUtil_Cxx/include/gtCxxPlaceSubVolOps.h       | 21 +++++++++++++++++--
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp
index 740270ba..58309898 100644
--- a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp
+++ b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeAssign.cpp
@@ -15,6 +15,16 @@ void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] )
     return;
   }
 
+  if (nrhs >= 6)
+  {
+    const mxArray * const num_threads = prhs[5];
+    initialize_multithreading(*mxGetPr(num_threads));
+  }
+  else
+  {
+    initialize_multithreading();
+  }
+
   const mxArray * const mat_input = prhs[1];
   const mxArray * const mat_shifts_op = prhs[2];
   const mxArray * const mat_shifts_ip = prhs[3];
diff --git a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp
index 5e27fd04..8cac869a 100644
--- a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp
+++ b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeInterf.cpp
@@ -15,6 +15,16 @@ void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] )
     return;
   }
 
+  if (nrhs >= 6)
+  {
+    const mxArray * const num_threads = prhs[5];
+    initialize_multithreading(*mxGetPr(num_threads));
+  }
+  else
+  {
+    initialize_multithreading();
+  }
+
   const mxArray * const mat_input = prhs[1];
   const mxArray * const mat_shifts_op = prhs[2];
   const mxArray * const mat_shifts_ip = prhs[3];
diff --git a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp
index 77f3b9e7..c91c7ee9 100644
--- a/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp
+++ b/zUtil_Cxx/3D_ops/gtCxxPlaceSubVolumeSum.cpp
@@ -15,6 +15,16 @@ void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] )
     return;
   }
 
+  if (nrhs >= 6)
+  {
+    const mxArray * const num_threads = prhs[5];
+    initialize_multithreading(*mxGetPr(num_threads));
+  }
+  else
+  {
+    initialize_multithreading();
+  }
+
   const mxArray * const mat_input = prhs[1];
   const mxArray * const mat_shifts_op = prhs[2];
   const mxArray * const mat_shifts_ip = prhs[3];
diff --git a/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h b/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h
index 7a6a15c2..04720b21 100644
--- a/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h
+++ b/zUtil_Cxx/include/gtCxxPlaceSubVolOps.h
@@ -32,6 +32,24 @@ extern "C"
   mxArray *mxUnreference(const mxArray *pr);
 }
 
+void
+initialize_multithreading(const double & suggested_num_threads = 0)
+{
+#ifndef DEBUG
+  if (suggested_num_threads > 0)
+  {
+    int num_threads = (const int)suggested_num_threads;
+    num_threads = std::min(num_threads, omp_get_num_procs());
+    num_threads = std::max(num_threads, 1);
+    omp_set_num_threads(num_threads);
+  }
+  else
+  {
+    omp_set_num_threads(std::max(omp_get_num_procs()/4*3, 1));
+  }
+#endif
+}
+
 # define ROUND_DOWN(x, s) ((x) & ~((s)-1))
 
 namespace GT3D {
@@ -257,12 +275,11 @@ namespace GT3D {
       const double * const chunk_dims_d = (double *) mxGetData(mat_chunk_dims);
       const mwSize chunk_dims[3] = {chunk_dims_d[0], chunk_dims_d[1], chunk_dims_d[2]};
 
-      const mwSize num_threads = std::max( omp_get_max_threads()/4*3 , 1);
       const mwSize line_length_unroll = ROUND_DOWN(chunk_dims[0], 4);
 
       /* These loops extensively use pointer arithmetics to determine the chuck
        * of the matrix to be computed */
-#pragma omp parallel for num_threads(num_threads)
+#pragma omp parallel for
       for(mwIndex counter3 = 0; counter3 < chunk_dims[2]; counter3++)
       {
         /* Base vectors, which save computation of the 3rd dimension */
-- 
GitLab