Skip to content
Snippets Groups Projects
Commit 5c886f4f authored by Nicola Vigano's avatar Nicola Vigano
Browse files

gtPlaceSubVolumes: added support from the C++ side

parent 51a67ffb
No related branches found
No related tags found
No related merge requests found
......@@ -205,19 +205,28 @@ namespace GT3D {
switch (op)
{
case ASSIGN:
this->loopOver3D<Type, assign_voxel>();
this->loopOverVols<Type, assign_voxel>();
break;
case SUM:
this->loopOver3D<Type, sum_voxel>();
this->loopOverVols<Type, sum_voxel>();
break;
case INTERF:
this->loopOver3D<Type, interf_voxel>();
this->loopOverVols<Type, interf_voxel>();
break;
default:
break;
}
}
template<typename Type, template<typename Type> class action>
void loopOverVols()
{
if (mxIsCell(sub_vol)) {
this->loopOverCell3D<Type, action>();
} else {
this->loopOver3D<Type, action>();
}
}
template<typename Type, template<typename Type> class action>
void loopOver3D()
......@@ -281,6 +290,99 @@ namespace GT3D {
}
}
}
template<typename Type, template<typename Type> class action>
void loopOverCell3D()
{
action<Type> act;
const mwSize num_big_vol_dims = mxGetNumberOfDimensions(big_vol);
const mwSize * big_vol_dims = mxGetDimensions(big_vol);
const mwSize num_sub_vols = mxGetNumberOfElements(sub_vol);
const double * data_shift_big_vol = (double *)mxGetData(shift_big_vol);
const double * data_shift_sub_vol = (double *)mxGetData(shift_sub_vol);
double big_vol_shifts[num_big_vol_dims];
const mwSize big_vol_skip_dim_3 = big_vol_dims[0] * big_vol_dims[1];
const mwSize big_vol_skip_dim_2 = big_vol_dims[0];
const double * const chunk_dims_d = (double *) mxGetData(mat_chunk_dims);
//#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
//# pragma omp parallel for simd
//#else
//# pragma omp parallel for
//#endif
#pragma omp parallel for
for (mwIndex vol_num = 0; vol_num < num_sub_vols; vol_num++)
{
const mxArray * const the_sub_vol = mxGetCell(sub_vol, vol_num);
const mwSize num_sub_vol_dims = mxGetNumberOfDimensions(the_sub_vol);
const mwSize * sub_vol_dims = mxGetDimensions(the_sub_vol);
for (mwSize shift_ind = 0; shift_ind < num_big_vol_dims; shift_ind++)
{
big_vol_shifts[shift_ind] = data_shift_big_vol[vol_num + shift_ind * num_sub_vols];
}
const mwIndex initial_big_vol_shift = this->compute_shift(
num_big_vol_dims, big_vol_dims, big_vol_shifts);
double sub_vol_shifts[num_sub_vol_dims];
for (mwSize shift_ind = 0; shift_ind < num_sub_vol_dims; shift_ind++)
{
sub_vol_shifts[shift_ind] = data_shift_sub_vol[vol_num + shift_ind * num_sub_vols];
}
const mwIndex initial_sub_vol_shift = this->compute_shift(
num_sub_vol_dims, sub_vol_dims, sub_vol_shifts);
Type * const big_vol_data = ((Type *) mxGetData(big_vol)) + initial_big_vol_shift;
const Type * const sub_vol_data = ((const Type *) mxGetData(the_sub_vol)) + initial_sub_vol_shift;
const mwSize sub_vol_skip_dim_3 = sub_vol_dims[0] * sub_vol_dims[1];
const mwSize sub_vol_skip_dim_2 = sub_vol_dims[0];
const mwSize chunk_dims[3] = {
chunk_dims_d[0 * num_sub_vols + vol_num],
chunk_dims_d[1 * num_sub_vols + vol_num],
chunk_dims_d[2 * num_sub_vols + vol_num]};
const mwSize line_length_unroll = ROUND_DOWN(chunk_dims[0], 4);
/* These loops extensively use pointer arithmetics to determine the chuck
* of the matrix to be computed */
for(mwIndex counter3 = 0; counter3 < chunk_dims[2]; counter3++)
{
/* Base vectors, which save computation of the 3rd dimension */
Type * const base_big_vol_3 = big_vol_data + counter3 * big_vol_skip_dim_3;
const Type * const base_sub_vol_3 = sub_vol_data + counter3 * sub_vol_skip_dim_3;
for(mwIndex counter2 = 0; counter2 < chunk_dims[1]; counter2++)
{
/* Base vectors, which save computation of the 2nd dimension */
Type * const base_big_vol_2 = base_big_vol_3 + counter2 * big_vol_skip_dim_2;
const Type * const base_sub_vol_2 = base_sub_vol_3 + counter2 * sub_vol_skip_dim_2;
/* Inner loop on the 1st dimension: the contiguous one in memory */
for(mwIndex counter1 = 0; counter1 < line_length_unroll; counter1 += 4)
{
act(base_big_vol_2[counter1 + 0], base_sub_vol_2[counter1 + 0]);
act(base_big_vol_2[counter1 + 1], base_sub_vol_2[counter1 + 1]);
act(base_big_vol_2[counter1 + 2], base_sub_vol_2[counter1 + 2]);
act(base_big_vol_2[counter1 + 3], base_sub_vol_2[counter1 + 3]);
}
for(mwIndex counter1 = line_length_unroll; counter1 < chunk_dims[0]; counter1++)
{
act(base_big_vol_2[counter1], base_sub_vol_2[counter1]);
}
}
}
}
}
};
} // namespace GT3D
......
......@@ -94,15 +94,15 @@ function output = gtPlaceSubVolumes(output, input, shift, assign_op, use_c_funct
switch (assign_op)
case 'sum'
for ii = 1:num_input
output = gtCxxPlaceSubVolumeSum(output, input{ii}, shifts_op(ii, :), shifts_ip(ii, :), dims(ii, :));
end
% output = gtCxxPlaceSubVolumeSum(output, input, shifts_op, shifts_ip, dims);
% for ii = 1:num_input
% output = gtCxxPlaceSubVolumeSum(output, input{ii}, shifts_op(ii, :), shifts_ip(ii, :), dims(ii, :));
% end
output = gtCxxPlaceSubVolumeSum(output, input, shifts_op, shifts_ip, dims);
case 'assign'
for ii = 1:num_input
output = gtCxxPlaceSubVolumeAssign(output, input{ii}, shifts_op(ii, :), shifts_ip(ii, :), dims(ii, :));
end
% output = gtCxxPlaceSubVolumeAssign(output, input, shifts_op, shifts_ip, dims);
% for ii = 1:num_input
% output = gtCxxPlaceSubVolumeAssign(output, input{ii}, shifts_op(ii, :), shifts_ip(ii, :), dims(ii, :));
% end
output = gtCxxPlaceSubVolumeAssign(output, input, shifts_op, shifts_ip, dims);
otherwise
error('PLACE:wrong_argument', 'No option for "%s"', assign_op);
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment