Commit 3e75896a authored by Sebastien Petitdemange's avatar Sebastien Petitdemange
Browse files

video: some optimization.

parent 6a9b0495
Pipeline #50335 failed with stages
in 23 minutes and 3 seconds
#include <limits>
#ifdef __AVX2__
#include <immintrin.h>
#endif
#include "lima/Exceptions.h"
#include "lima/VideoUtils.h"
......@@ -196,13 +202,67 @@ void lima::data2Image(Data &aData,VideoImage &anImage)
}
template<class TYPE>
void _accumulate(void *src_ptr,void *dst_ptr,int nb_items)
inline void _accumulate_raw(void *src_ptr,void *dst_ptr,int nb_items)
{
TYPE *src = (TYPE*)src_ptr;
TYPE *dst = (TYPE*)dst_ptr;
for(int i = 0;i < nb_items;++i,++dst,++src)
*dst += *src;
{
long long sum = *src;
sum += *dst;
if(sum > std::numeric_limits<TYPE>::max())
*dst = std::numeric_limits<TYPE>::max();
else if(sum < std::numeric_limits<TYPE>::min())
*dst = std::numeric_limits<TYPE>::min();
else
*dst = TYPE(sum);
}
}
template<class TYPE>
void _accumulate(void *src_ptr,void *dst_ptr,int nb_items)
{
_accumulate_raw<TYPE>(src_ptr,dst_ptr,nb_items);
}
#ifdef __AVX2__
template<>
void _accumulate<unsigned short>(void *srcp,void *dstp,int nb_items)
{
char *src_ptr = (char*)srcp;
char *dst_ptr = (char*)dstp;
__m256i src;
__m256i dst;
if(!((long)srcp & 31)) // 32 bytes alignment
for(;nb_items >= 16;nb_items -= 16,src_ptr += 32,dst_ptr += 32)
{
src = _mm256_load_si256((__m256i*)src_ptr);
dst = _mm256_load_si256((__m256i*)dst_ptr);
__m256i sum = _mm256_adds_epu16(src,dst);
_mm256_store_si256((__m256i*)dst_ptr,sum);
}
_accumulate_raw<unsigned short>(src_ptr,dst_ptr,nb_items);
}
template<>
void _accumulate<unsigned char>(void *srcp,void *dstp,int nb_items)
{
char *src_ptr = (char*)srcp;
char *dst_ptr = (char*)dstp;
__m256i src;
__m256i dst;
if(!((long)srcp & 31)) // 32 bytes alignment
for(;nb_items >= 32;nb_items -= 32,src_ptr += 32,dst_ptr += 32)
{
src = _mm256_load_si256((__m256i*)src_ptr);
dst = _mm256_load_si256((__m256i*)dst_ptr);
__m256i sum = _mm256_adds_epu8(src,dst);
_mm256_store_si256((__m256i*)dst_ptr,sum);
}
_accumulate_raw<unsigned char>(src_ptr,dst_ptr,nb_items);
}
#endif
void lima::data_accumulate_to_image(Data &aData,VideoImage &anImage)
{
if(aData.empty())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment