Skip to content
Snippets Groups Projects
Commit 6af8d3d7 authored by Nicola Vigano's avatar Nicola Vigano
Browse files

6D-C++: fixed performance issues in blobs<->sinos transformations

parent 4d995a0a
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@
#include "internal_cell_defs.h"
#include <algorithm>
#include <vector>
namespace GT6D {
......@@ -364,38 +365,48 @@ namespace GT6D {
const mwSize & line_length = dims_sino[0];
const mwSize & lines_num = dims_sino[2];
const mwSize line_length_unroll = ROUND_DOWN(line_length, 4);
Type * const sino_data = ((Type *) mxGetData(sino));
const mwSize sino_skip = dims_sino[0] * dims_sino[1];
#pragma omp parallel for
for (mwIndex line_num = 0; line_num < lines_num; line_num++)
const mwSize & num_projs = mxGetNumberOfElements(proj_offsets_a);
std::vector<const Type *> blob_data_bases(num_projs);
std::vector<Type *> sino_data_bases(num_projs);
std::vector<mwSize> blob_skips(num_projs);
std::vector<Type> coeffs_t(num_projs);
for (mwIndex m = 0; m < num_projs; m++)
{
const mwSize line_sino_skip = line_num * sino_skip;
const mwIndex p = (const mwIndex) proj_offsets[m]-1;
const mwIndex b = (const mwIndex) blob_offsets[m]-1;
const mwIndex s = (const mwIndex) sino_offsets[m]-1;
const mwSize num_projs = mxGetNumberOfElements(proj_offsets_a);
for (mwIndex m = 0; m < num_projs; m++)
{
const mwIndex p = (const mwIndex) proj_offsets[m]-1;
const mwIndex b = (const mwIndex) blob_offsets[m]-1;
const mwIndex s = (const mwIndex) sino_offsets[m]-1;
coeffs_t[m] = (const Type) proj_coeffs[m];
const Type & c = (const Type) proj_coeffs[m];
const mwSize sino_initial_offset = line_length * s;
inner_sum_scale<Type> op(c);
const mxArray * const cell_blobs = mxGetCell(blobs, b);
sino_data_bases[m] = sino_data + sino_initial_offset;
const mwSize sino_initial_offset = line_length * s;
const mwSize blob_initial_offset = line_length * p;
blob_data_bases[m] = ((const Type *) mxGetData(cell_blobs)) + blob_initial_offset;
const mxArray * const cell_blobs = mxGetCell(blobs, b);
const mwSize * const dims_blob = mxGetDimensions(cell_blobs);
blob_skips[m] = dims_blob[0] * dims_blob[1];
}
const mwSize * const dims_blob = mxGetDimensions(cell_blobs);
const mwSize blob_initial_offset = line_length * p;
const mwSize blob_skip = dims_blob[0] * dims_blob[1];
#pragma omp parallel for
for (mwIndex line_num = 0; line_num < lines_num; line_num++)
{
const mwSize line_sino_skip = line_num * sino_skip;
const Type * const blob_data_base = ((const Type *) mxGetData(cell_blobs)) + blob_initial_offset;
const Type * const blob_data_line = blob_data_base + line_num * blob_skip;
for (mwIndex m = 0; m < num_projs; m++)
{
const Type & c = coeffs_t[m];
inner_sum_scale<Type> op(c);
Type * const sino_data_base = ((Type *) mxGetData(sino)) + sino_initial_offset;
Type * const sino_data_line = sino_data_base + line_sino_skip;
const Type * const blob_data_line = blob_data_bases[m] + line_num * blob_skips[m];
Type * const sino_data_line = sino_data_bases[m] + line_sino_skip;
for (mwIndex elem = 0; elem < line_length_unroll; elem += 4)
{
......@@ -431,38 +442,48 @@ namespace GT6D {
const mwSize & line_length = dims_sino[0];
const mwSize & lines_num = dims_sino[2];
const mwSize line_length_unroll = ROUND_DOWN(line_length, 4);
const Type * const sino_data = ((const Type *) mxGetData(sino));
const mwSize sino_skip = dims_sino[0] * dims_sino[1];
#pragma omp parallel for
for (mwIndex line_num = 0; line_num < lines_num; line_num++)
const mwSize & num_projs = mxGetNumberOfElements(proj_offsets_a);
std::vector<Type *> blob_data_bases(num_projs);
std::vector<const Type *> sino_data_bases(num_projs);
std::vector<mwSize> blob_skips(num_projs);
std::vector<Type> coeffs_t(num_projs);
for (mwIndex m = 0; m < num_projs; m++)
{
const mwSize line_sino_skip = line_num * sino_skip;
const mwIndex p = (const mwIndex) proj_offsets[m]-1;
const mwIndex b = (const mwIndex) blob_offsets[m]-1;
const mwIndex s = (const mwIndex) sino_offsets[m]-1;
const mwSize num_projs = mxGetNumberOfElements(proj_offsets_a);
for (mwIndex m = 0; m < num_projs; m++)
{
const mwIndex p = (const mwIndex) proj_offsets[m]-1;
const mwIndex b = (const mwIndex) blob_offsets[m]-1;
const mwIndex s = (const mwIndex) sino_offsets[m]-1;
coeffs_t[m] = (const Type) proj_coeffs[m];
const Type & c = (const Type) proj_coeffs[m];
const mwSize sino_initial_offset = line_length * s;
inner_sum_scale<Type> op(c);
const mxArray * const cell_blobs = mxGetCell(blobs, b);
sino_data_bases[m] = sino_data + sino_initial_offset;
const mwSize sino_initial_offset = line_length * s;
const mwSize blob_initial_offset = line_length * p;
blob_data_bases[m] = ((Type *) mxGetData(cell_blobs)) + blob_initial_offset;
mxArray * const cell_blobs = mxGetCell(blobs, b);
const mwSize * const dims_blob = mxGetDimensions(cell_blobs);
blob_skips[m] = dims_blob[0] * dims_blob[1];
}
const mwSize * const dims_blob = mxGetDimensions(cell_blobs);
const mwSize blob_initial_offset = line_length * p;
const mwSize blob_skip = dims_blob[0] * dims_blob[1];
#pragma omp parallel for
for (mwIndex line_num = 0; line_num < lines_num; line_num++)
{
const mwSize line_sino_skip = line_num * sino_skip;
Type * const blob_data_base = ((Type *) mxGetData(cell_blobs)) + blob_initial_offset;
Type * const blob_data_line = blob_data_base + line_num * blob_skip;
for (mwIndex m = 0; m < num_projs; m++)
{
const Type & c = coeffs_t[m];
inner_sum_scale<Type> op(c);
const Type * const sino_data_base = ((const Type *) mxGetData(sino)) + sino_initial_offset;
const Type * const sino_data_line = sino_data_base + line_sino_skip;
Type * const blob_data_line = blob_data_bases[m] + line_num * blob_skips[m];
const Type * const sino_data_line = sino_data_bases[m] + line_sino_skip;
for (mwIndex elem = 0; elem < line_length_unroll; elem += 4)
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment