Skip to content
Snippets Groups Projects
Commit 566ac883 authored by Nicola Vigano's avatar Nicola Vigano
Browse files

Cells/C function: added functions to compute square norm and dot product of cell arrays


Signed-off-by: default avatarNicola Vigano <nicola.vigano@esrf.fr>

git-svn-id: https://svn.code.sf.net/p/dct/code/trunk@668 4c865b51-4357-4376-afb4-474e03ccb993
parent 30dd5406
No related branches found
No related tags found
No related merge requests found
/*
* internal_cell_dot_product.cpp
*
* Created on: Jul 13, 2012
* Author: ben
*/
#include "internal_cell_defs.h"
void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] )
{
if (nrhs != 2) {
mexErrMsgIdAndTxt("C_FUN:cell_dot_product:wrong_argument",
"Not enough arguments! you need to provide two cell arrays");
}
if (!(mxIsCell(prhs[0]) && mxIsCell(prhs[1]))) {
mexErrMsgIdAndTxt("C_FUN:cell_dot_product:wrong_argument",
"The arguments need to be Cell arrays");
}
plhs[0] = mxCreateDoubleScalar(cell_dot_product_sse(prhs[0], prhs[1]));
}
/*
* internal_cell_square_norm.cpp
*
* Created on: Jul 13, 2012
* Author: ben
*/
#include "internal_cell_defs.h"
void mexFunction( int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[] )
{
if (nrhs != 1) {
mexErrMsgIdAndTxt("C_FUN:cell_square_norm:wrong_argument",
"Not enough arguments! you need to provide a cell array");
}
if (!mxIsCell(prhs[0])) {
mexErrMsgIdAndTxt("C_FUN:cell_square_norm:wrong_argument",
"The argument needs to be a Cell array");
}
plhs[0] = mxCreateDoubleScalar(cell_square_norm_sse(prhs[0]));
}
......@@ -270,6 +270,110 @@ cell_inner_cycle_sse(Type * const __restrict outData,
outData[elemIdx] = func2(inData1[elemIdx], inData2[elemIdx]);
}
}
inline double
cell_square_norm_sse(const mxArray * const inArray)
{
double temp_sum = 0;
v2df accumul = {0, 0};
const mwSize numCells = mxGetNumberOfElements(inArray);
#pragma omp parallel
{
for(mwIndex cellIdx = 0; cellIdx < numCells; cellIdx++) {
const mxArray * const inCell = mxGetCell(inArray, cellIdx);
const mwSize numElems = mxGetNumberOfElements(inCell);
const double * const inData = mxGetPr(inCell);
const mwSize num32Elems = ROUND_DOWN(numElems, 32);
#pragma omp for reduction(+:accumul) nowait
for(mwIndex count = 0; count < num32Elems; count += 32) {
const v2df in00 = *((v2df *)&inData[count+0]);
const v2df in02 = *((v2df *)&inData[count+2]);
const v2df in04 = *((v2df *)&inData[count+4]);
const v2df in06 = *((v2df *)&inData[count+6]);
const v2df in08 = *((v2df *)&inData[count+8]);
const v2df in10 = *((v2df *)&inData[count+10]);
const v2df in12 = *((v2df *)&inData[count+12]);
const v2df in14 = *((v2df *)&inData[count+14]);
const v2df in16 = *((v2df *)&inData[count+16]);
const v2df in18 = *((v2df *)&inData[count+18]);
const v2df in20 = *((v2df *)&inData[count+20]);
const v2df in22 = *((v2df *)&inData[count+22]);
const v2df in24 = *((v2df *)&inData[count+24]);
const v2df in26 = *((v2df *)&inData[count+26]);
const v2df in28 = *((v2df *)&inData[count+28]);
const v2df in30 = *((v2df *)&inData[count+30]);
accumul += ( in00 * in00 + in02 * in02
+ in04 * in04 + in06 * in06
+ in08 * in08 + in10 * in10
+ in12 * in12 + in14 * in14
+ in16 * in16 + in18 * in18
+ in20 * in20 + in22 * in22
+ in24 * in24 + in26 * in26
+ in28 * in28 + in30 * in30 );
}
#pragma omp for reduction(+:temp_sum) nowait
for(mwIndex count = num32Elems; count < numElems; count++) {
temp_sum += inData[count] * inData[count];
}
}
}
const double * const __restrict accumuld = (double *)&accumul;
return (temp_sum + accumuld[0] + accumuld[1]);
}
inline double
cell_dot_product_sse(const mxArray * const inArray1, const mxArray * const inArray2)
{
double temp_sum = 0;
v2df accumul = {0, 0};
const mwSize numCells = mxGetNumberOfElements(inArray1);
#pragma omp parallel
{
for(mwIndex cellIdx = 0; cellIdx < numCells; cellIdx++) {
const mxArray * const inCell1 = mxGetCell(inArray1, cellIdx);
const mxArray * const inCell2 = mxGetCell(inArray2, cellIdx);
const mwSize numElems = mxGetNumberOfElements(inCell1);
const double * const inData1 = mxGetPr(inCell1);
const double * const inData2 = mxGetPr(inCell2);
const mwSize num32Elems = ROUND_DOWN(numElems, 32);
#pragma omp for reduction(+:accumul) nowait
for(mwIndex count = 0; count < num32Elems; count += 32) {
const v2df in00 = *((v2df *)&inData1[count+0]) * *((v2df *)&inData2[count+0]);
const v2df in02 = *((v2df *)&inData1[count+2]) * *((v2df *)&inData2[count+2]);
const v2df in04 = *((v2df *)&inData1[count+4]) * *((v2df *)&inData2[count+4]);
const v2df in06 = *((v2df *)&inData1[count+6]) * *((v2df *)&inData2[count+6]);
const v2df in08 = *((v2df *)&inData1[count+8]) * *((v2df *)&inData2[count+8]);
const v2df in10 = *((v2df *)&inData1[count+10]) * *((v2df *)&inData2[count+10]);
const v2df in12 = *((v2df *)&inData1[count+12]) * *((v2df *)&inData2[count+12]);
const v2df in14 = *((v2df *)&inData1[count+14]) * *((v2df *)&inData2[count+14]);
const v2df in16 = *((v2df *)&inData1[count+16]) * *((v2df *)&inData2[count+16]);
const v2df in18 = *((v2df *)&inData1[count+18]) * *((v2df *)&inData2[count+18]);
const v2df in20 = *((v2df *)&inData1[count+20]) * *((v2df *)&inData2[count+20]);
const v2df in22 = *((v2df *)&inData1[count+22]) * *((v2df *)&inData2[count+22]);
const v2df in24 = *((v2df *)&inData1[count+24]) * *((v2df *)&inData2[count+24]);
const v2df in26 = *((v2df *)&inData1[count+26]) * *((v2df *)&inData2[count+26]);
const v2df in28 = *((v2df *)&inData1[count+28]) * *((v2df *)&inData2[count+28]);
const v2df in30 = *((v2df *)&inData1[count+30]) * *((v2df *)&inData2[count+30]);
accumul += ( in00 + in02 + in04 + in06 + in08 + in10 + in12 + in14
+ in16 + in18 + in20 + in22 + in24 + in26 + in28 + in30 );
}
#pragma omp for reduction(+:temp_sum) nowait
for(mwIndex count = num32Elems; count < numElems; count++) {
temp_sum += inData1[count] * inData2[count];
}
}
}
const double * const __restrict accumuld = (double *)&accumul;
return (temp_sum + accumuld[0] + accumuld[1]);
}
#endif
template<const bool allocate, typename Function>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment