From 6af8d3d7c41c2740620c077d7dec530aa440e31c Mon Sep 17 00:00:00 2001 From: Nicola Vigano <nicola.vigano@esrf.fr> Date: Wed, 8 Apr 2015 17:27:23 +0200 Subject: [PATCH] 6D-C++: fixed performance issues in blobs<->sinos transformations Signed-off-by: Nicola Vigano <nicola.vigano@esrf.fr> --- .../include/gt6DBlobsSinogramsTransforms.h | 101 +++++++++++------- 1 file changed, 61 insertions(+), 40 deletions(-) diff --git a/zUtil_Cxx/include/gt6DBlobsSinogramsTransforms.h b/zUtil_Cxx/include/gt6DBlobsSinogramsTransforms.h index 09e67777..76b1ba35 100644 --- a/zUtil_Cxx/include/gt6DBlobsSinogramsTransforms.h +++ b/zUtil_Cxx/include/gt6DBlobsSinogramsTransforms.h @@ -11,6 +11,7 @@ #include "internal_cell_defs.h" #include <algorithm> +#include <vector> namespace GT6D { @@ -364,38 +365,48 @@ namespace GT6D { const mwSize & line_length = dims_sino[0]; const mwSize & lines_num = dims_sino[2]; const mwSize line_length_unroll = ROUND_DOWN(line_length, 4); + Type * const sino_data = ((Type *) mxGetData(sino)); const mwSize sino_skip = dims_sino[0] * dims_sino[1]; - #pragma omp parallel for - for (mwIndex line_num = 0; line_num < lines_num; line_num++) + const mwSize & num_projs = mxGetNumberOfElements(proj_offsets_a); + + std::vector<const Type *> blob_data_bases(num_projs); + std::vector<Type *> sino_data_bases(num_projs); + std::vector<mwSize> blob_skips(num_projs); + std::vector<Type> coeffs_t(num_projs); + for (mwIndex m = 0; m < num_projs; m++) { - const mwSize line_sino_skip = line_num * sino_skip; + const mwIndex p = (const mwIndex) proj_offsets[m]-1; + const mwIndex b = (const mwIndex) blob_offsets[m]-1; + const mwIndex s = (const mwIndex) sino_offsets[m]-1; - const mwSize num_projs = mxGetNumberOfElements(proj_offsets_a); - for (mwIndex m = 0; m < num_projs; m++) - { - const mwIndex p = (const mwIndex) proj_offsets[m]-1; - const mwIndex b = (const mwIndex) blob_offsets[m]-1; - const mwIndex s = (const mwIndex) sino_offsets[m]-1; + coeffs_t[m] = (const Type) proj_coeffs[m]; - const Type & c = (const Type) proj_coeffs[m]; + const mwSize sino_initial_offset = line_length * s; - inner_sum_scale<Type> op(c); + const mxArray * const cell_blobs = mxGetCell(blobs, b); + sino_data_bases[m] = sino_data + sino_initial_offset; - const mwSize sino_initial_offset = line_length * s; + const mwSize blob_initial_offset = line_length * p; + blob_data_bases[m] = ((const Type *) mxGetData(cell_blobs)) + blob_initial_offset; - const mxArray * const cell_blobs = mxGetCell(blobs, b); + const mwSize * const dims_blob = mxGetDimensions(cell_blobs); + blob_skips[m] = dims_blob[0] * dims_blob[1]; + } - const mwSize * const dims_blob = mxGetDimensions(cell_blobs); - const mwSize blob_initial_offset = line_length * p; - const mwSize blob_skip = dims_blob[0] * dims_blob[1]; + #pragma omp parallel for + for (mwIndex line_num = 0; line_num < lines_num; line_num++) + { + const mwSize line_sino_skip = line_num * sino_skip; - const Type * const blob_data_base = ((const Type *) mxGetData(cell_blobs)) + blob_initial_offset; - const Type * const blob_data_line = blob_data_base + line_num * blob_skip; + for (mwIndex m = 0; m < num_projs; m++) + { + const Type & c = coeffs_t[m]; + inner_sum_scale<Type> op(c); - Type * const sino_data_base = ((Type *) mxGetData(sino)) + sino_initial_offset; - Type * const sino_data_line = sino_data_base + line_sino_skip; + const Type * const blob_data_line = blob_data_bases[m] + line_num * blob_skips[m]; + Type * const sino_data_line = sino_data_bases[m] + line_sino_skip; for (mwIndex elem = 0; elem < line_length_unroll; elem += 4) { @@ -431,38 +442,48 @@ namespace GT6D { const mwSize & line_length = dims_sino[0]; const mwSize & lines_num = dims_sino[2]; const mwSize line_length_unroll = ROUND_DOWN(line_length, 4); + const Type * const sino_data = ((const Type *) mxGetData(sino)); const mwSize sino_skip = dims_sino[0] * dims_sino[1]; - #pragma omp parallel for - for (mwIndex line_num = 0; line_num < lines_num; line_num++) + const mwSize & num_projs = mxGetNumberOfElements(proj_offsets_a); + + std::vector<Type *> blob_data_bases(num_projs); + std::vector<const Type *> sino_data_bases(num_projs); + std::vector<mwSize> blob_skips(num_projs); + std::vector<Type> coeffs_t(num_projs); + for (mwIndex m = 0; m < num_projs; m++) { - const mwSize line_sino_skip = line_num * sino_skip; + const mwIndex p = (const mwIndex) proj_offsets[m]-1; + const mwIndex b = (const mwIndex) blob_offsets[m]-1; + const mwIndex s = (const mwIndex) sino_offsets[m]-1; - const mwSize num_projs = mxGetNumberOfElements(proj_offsets_a); - for (mwIndex m = 0; m < num_projs; m++) - { - const mwIndex p = (const mwIndex) proj_offsets[m]-1; - const mwIndex b = (const mwIndex) blob_offsets[m]-1; - const mwIndex s = (const mwIndex) sino_offsets[m]-1; + coeffs_t[m] = (const Type) proj_coeffs[m]; - const Type & c = (const Type) proj_coeffs[m]; + const mwSize sino_initial_offset = line_length * s; - inner_sum_scale<Type> op(c); + const mxArray * const cell_blobs = mxGetCell(blobs, b); + sino_data_bases[m] = sino_data + sino_initial_offset; - const mwSize sino_initial_offset = line_length * s; + const mwSize blob_initial_offset = line_length * p; + blob_data_bases[m] = ((Type *) mxGetData(cell_blobs)) + blob_initial_offset; - mxArray * const cell_blobs = mxGetCell(blobs, b); + const mwSize * const dims_blob = mxGetDimensions(cell_blobs); + blob_skips[m] = dims_blob[0] * dims_blob[1]; + } - const mwSize * const dims_blob = mxGetDimensions(cell_blobs); - const mwSize blob_initial_offset = line_length * p; - const mwSize blob_skip = dims_blob[0] * dims_blob[1]; + #pragma omp parallel for + for (mwIndex line_num = 0; line_num < lines_num; line_num++) + { + const mwSize line_sino_skip = line_num * sino_skip; - Type * const blob_data_base = ((Type *) mxGetData(cell_blobs)) + blob_initial_offset; - Type * const blob_data_line = blob_data_base + line_num * blob_skip; + for (mwIndex m = 0; m < num_projs; m++) + { + const Type & c = coeffs_t[m]; + inner_sum_scale<Type> op(c); - const Type * const sino_data_base = ((const Type *) mxGetData(sino)) + sino_initial_offset; - const Type * const sino_data_line = sino_data_base + line_sino_skip; + Type * const blob_data_line = blob_data_bases[m] + line_num * blob_skips[m]; + const Type * const sino_data_line = sino_data_bases[m] + line_sino_skip; for (mwIndex elem = 0; elem < line_length_unroll; elem += 4) { -- GitLab