Commit 91a39292 authored by Samuel Debionne's avatar Samuel Debionne

Merge branch 'buffer_numa_ctl' into 'master'

Add buffer NUMA control

See merge request !116
parents 3ce322c1 ac789bec
Pipeline #8172 passed with stages
in 19 minutes and 54 seconds
......@@ -60,6 +60,11 @@ endif()
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
include(LimaTools)
if(UNIX AND LIMA_ENABLE_NUMA)
# Numa is needed for advanced buffer management
find_package(Numa REQUIRED)
endif()
# Import pthread
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
find_package(Threads REQUIRED)
......@@ -253,6 +258,11 @@ if(LIMA_BUILD_SUBMODULES)
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third-party/Processlib/tasks/include>")
endif()
if(UNIX AND LIMA_ENABLE_NUMA)
add_compile_definitions(LIMA_USE_NUMA)
target_include_directories(limacore PRIVATE "${NUMA_INCLUDE_DIR}")
endif()
if(LIMA_ENABLE_SPS_IMAGE)
target_compile_definitions(limacore PUBLIC -DWITH_SPS_IMAGE)
target_include_directories(limacore PRIVATE "${CMAKE_SOURCE_DIR}/third-party/Sps/Include")
......@@ -275,6 +285,9 @@ target_link_libraries(limacore PRIVATE ${saving_private_libs})
if(UNIX)
target_compile_definitions(limacore PUBLIC -DHAS_INOTIFY)
target_link_libraries(limacore PUBLIC "rt")
if(LIMA_ENABLE_NUMA)
target_link_libraries(limacore PUBLIC ${NUMA_LIBRARY})
endif()
endif()
if(WIN32)
......
# Module for locating libnuma
#
# Read-only variables:
# NUMA_FOUND
# Indicates that the library has been found.
#
# NUMA_INCLUDE_DIR
# Points to the libnuma include directory.
#
# NUMA_LIBRARY_DIR
# Points to the directory that contains the libraries.
# The content of this variable can be passed to link_directories.
#
# NUMA_LIBRARY
# Points to the libnuma that can be passed to target_link_libararies.
#
# Copyright (c) 2015 Steve Borho
include(FindPackageHandleStandardArgs)
find_path(NUMA_ROOT_DIR
NAMES include/numa.h
PATHS ENV NUMA_ROOT
DOC "NUMA root directory")
find_path(NUMA_INCLUDE_DIR
NAMES numa.h
HINTS ${NUMA_ROOT_DIR}
PATH_SUFFIXES include
DOC "NUMA include directory")
find_library(NUMA_LIBRARY
NAMES numa
HINTS ${NUMA_ROOT_DIR}
DOC "NUMA library")
if (NUMA_LIBRARY)
get_filename_component(NUMA_LIBRARY_DIR ${NUMA_LIBRARY} PATH)
endif()
mark_as_advanced(NUMA_INCLUDE_DIR NUMA_LIBRARY_DIR NUMA_LIBRARY)
find_package_handle_standard_args(NUMA REQUIRED_VARS NUMA_ROOT_DIR NUMA_INCLUDE_DIR NUMA_LIBRARY)
......@@ -56,6 +56,10 @@ class LIMACORE_API MemBuffer
MemBuffer(MemBuffer&&) = default;
MemBuffer& operator=(MemBuffer&&) = default;
#ifdef LIMA_USE_NUMA
void setCPUAffinityMask(unsigned long cpu_mask);
#endif
void alloc(int size);
void deepCopy(const MemBuffer& buffer);
void release();
......@@ -66,12 +70,71 @@ class LIMACORE_API MemBuffer
void clear();
operator void*();
operator const void*() const;
operator void *();
operator const void *() const;
private:
class Allocator
{
public:
// Allocate a buffer of a given size
virtual void alloc(MemBuffer& buffer, int& size);
// Fill buffer with zeros (hot page)
virtual void init(MemBuffer& buffer);
// Copy a buffer from src to dst
virtual void copy(MemBuffer& dst, const MemBuffer& src);
// Fill buffer with zeros
virtual void clear(MemBuffer& buffer);
//
virtual void release(MemBuffer& buffer);
// Returns a Singleton
static Allocator *getAllocator();
// Returns the size of a page aligned buffer (multiple of page size)
static int getPageAlignedSize(int size);
#ifdef __unix
// Returns true if mmap is available
static bool useMmap(int size);
// Allocate a buffer with mmap (virtual address mapping)
static void *allocMmap(int& size);
#endif
protected:
Allocator() {}
};
friend class Allocator;
void init();
void allocMemory(int& size);
void initMemory();
int m_size;
void *m_ptr;
Allocator *m_allocator;
#ifdef LIMA_USE_NUMA
class NumaAllocator : public Allocator
{
public:
virtual void alloc(MemBuffer& buffer, int& size);
virtual void init(MemBuffer& buffer);
virtual void copy(MemBuffer& dst, const MemBuffer& src);
virtual void clear(MemBuffer& buffer);
virtual void release(MemBuffer& buffer);
// Returns a Singleton
static NumaAllocator *getAllocator();
// Given a cpu_mask, returns the memory node mask
// used by alloc to bind memory with the proper socket
void getNUMANodeMask(unsigned long cpu_mask,
unsigned long& node_mask,
int& max_node);
};
friend class NumaAllocator;
unsigned long m_cpu_mask; //<! if NUMA is used, keep the cpu_mask for later use
#endif
};
inline int MemBuffer::getSize() const
......@@ -99,8 +162,6 @@ inline MemBuffer::operator const void *() const
return getConstPtr();
}
} // namespace lima
......
......@@ -26,6 +26,11 @@
#include <sstream>
#ifdef __unix
#include <sys/sysinfo.h>
#ifdef LIMA_USE_NUMA
#include <numa.h>
#include <numaif.h>
#endif
#include <sys/mman.h>
#ifdef __SSE2__
#include <emmintrin.h>
#endif
......@@ -102,50 +107,144 @@ void lima::ClearBuffer(void *ptr, int nb_concat_frames,
memset(ptr, 0, nb_concat_frames * frame_dim.getMemSize());
}
MemBuffer::Allocator *MemBuffer::Allocator::getAllocator()
{
static Allocator allocator;
return &allocator;
}
MemBuffer::MemBuffer()
: m_size(0), m_ptr(NULL)
void MemBuffer::Allocator::alloc(MemBuffer& buffer, int& size)
{
void *ptr;
#ifdef __unix
if (useMmap(size)) {
ptr = allocMmap(size);
} else {
int ret = posix_memalign(&ptr, Alignment, size);
if (ret != 0)
throw LIMA_COM_EXC(Error, "Error in posix_memalign: ")
<< strerror(ret);
}
#else
ptr = _aligned_malloc(size, Alignment);
if (!ptr)
throw LIMA_COM_EXC(Error, "Error in _aligned_malloc: ")
<< "NULL pointer return";
#endif
buffer.m_ptr = ptr;
buffer.m_size = size;
}
MemBuffer::MemBuffer(int size)
: m_size(0), m_ptr(NULL)
void MemBuffer::Allocator::init(MemBuffer& buffer)
{
alloc(size);
char* ptr = (char*)m_ptr;
char *ptr = (char *) buffer.getPtr();
int size = buffer.getSize();
#ifdef __unix
long page_size = sysconf(_SC_PAGESIZE);
int page_size;
GetPageSize(page_size);
#ifdef __SSE2__
if(!((long)ptr & 15)) // aligned to 128 bits
{
__m128i zero = _mm_setzero_si128();
for(long i = 0;i < size;i += page_size,ptr+=page_size)
{
if(size_t(size - i) >= sizeof(__m128i))
_mm_store_si128((__m128i*)ptr,zero);
else
*ptr = 0;
if (!((long) ptr & 15)) { // aligned to 128 bits
__m128i zero = _mm_setzero_si128();
for (long i = 0; i < size; i += page_size, ptr += page_size) {
if (size_t(size - i) >= sizeof(__m128i))
_mm_store_si128((__m128i *) ptr, zero);
else
*ptr = 0;
}
_mm_empty();
}
else
{
_mm_empty();
} else {
#endif
for(long i = 0;i < size;i += page_size,ptr+=page_size)
*ptr = 0;
for (long i = 0; i < size; i += page_size, ptr += page_size)
*ptr = 0;
#ifdef __SSE2__
}
}
#endif
#else
memset(getPtr(), 0, size);
memset(ptr, 0, size);
#endif
}
void MemBuffer::Allocator::copy(MemBuffer& buffer, const MemBuffer& src)
{
memcpy(buffer.getPtr(), src.getConstPtr(), src.getSize());
}
void MemBuffer::Allocator::clear(MemBuffer& buffer)
{
ClearBuffer(buffer.getPtr(), 1, FrameDim(buffer.getSize(), 1, Bpp8));
}
void MemBuffer::Allocator::release(MemBuffer& buffer)
{
void *ptr = buffer.getPtr();
int size = buffer.getSize();
#ifdef __unix
if (useMmap(size))
munmap(ptr, size);
else
free(ptr);
#else
_aligned_free(ptr);
#endif
buffer.m_ptr = NULL;
buffer.m_size = 0;
}
int MemBuffer::Allocator::getPageAlignedSize(int size)
{
int page_size;
GetPageSize(page_size);
int misaligned = size & (page_size - 1);
if (misaligned)
size += page_size - misaligned;
return size;
}
#ifdef __unix
bool MemBuffer::Allocator::useMmap(int size)
{
return size >= 128 * 1024;
}
void *MemBuffer::Allocator::allocMmap(int& size)
{
size = getPageAlignedSize(size);
void *ptr = (char *) mmap(0, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!ptr)
throw LIMA_COM_EXC(Error, "Error in mmap: ")
<< strerror(errno);
return ptr;
}
#endif
inline void MemBuffer::init()
{
m_size = 0;
m_ptr = NULL;
m_allocator = NULL;
#ifdef LIMA_USE_NUMA
m_cpu_mask = 0;
#endif
}
MemBuffer::MemBuffer()
{
init();
}
MemBuffer::MemBuffer(int size)
{
init();
alloc(size);
}
MemBuffer::MemBuffer(const MemBuffer& buffer)
: m_size(0), m_ptr(NULL)
{
init();
deepCopy(buffer);
}
......@@ -155,30 +254,34 @@ MemBuffer::~MemBuffer()
}
void MemBuffer::alloc(int size)
{
allocMemory(size);
initMemory();
}
void MemBuffer::allocMemory(int& size)
{
if (m_size == size)
return;
release();
#ifdef __unix
int ret = posix_memalign(&m_ptr, Alignment, size);
if (ret != 0)
throw LIMA_COM_EXC(Error, "Error in posix_memalign: ")
<< strerror(ret);
#else
m_ptr = _aligned_malloc(size,Alignment);
if(!m_ptr)
throw LIMA_COM_EXC(Error, "Error in _aligned_malloc: NULL pointer return");
#endif
m_size = size;
if (!m_allocator)
m_allocator = Allocator::getAllocator();
m_allocator->alloc(*this, size);
}
void MemBuffer::initMemory()
{
m_allocator->init(*this);
}
void MemBuffer::deepCopy(const MemBuffer& buffer)
{
int size = buffer.getSize();
alloc(size);
memcpy(getPtr(), buffer.getConstPtr(), size);
allocMemory(size);
m_allocator->copy(*this, buffer);
}
MemBuffer& MemBuffer::operator =(const MemBuffer& buffer)
......@@ -189,19 +292,77 @@ MemBuffer& MemBuffer::operator =(const MemBuffer& buffer)
void MemBuffer::release()
{
if (!m_size)
if (m_size)
m_allocator->release(*this);
}
void MemBuffer::clear()
{
if (m_size)
m_allocator->clear(*this);
}
#ifdef LIMA_USE_NUMA
void MemBuffer::setCPUAffinityMask(unsigned long cpu_mask)
{
m_cpu_mask = cpu_mask;
if (m_cpu_mask != 0)
m_allocator = NumaAllocator::getAllocator();
}
void MemBuffer::NumaAllocator::alloc(MemBuffer& buffer, int& size)
{
Allocator::alloc(buffer, size);
if (!useMmap(size) || !buffer.m_cpu_mask)
return;
#ifdef __unix
free(m_ptr);
#else
_aligned_free(m_ptr);
#endif
m_ptr = NULL;
m_size = 0;
void *ptr = buffer.getPtr();
unsigned long node_mask;
int max_node;
getNUMANodeMask(buffer.m_cpu_mask, node_mask, max_node);
mbind(ptr, size, MPOL_BIND, &node_mask, max_node, 0);
}
void MemBuffer::clear()
void MemBuffer::NumaAllocator::init(MemBuffer& buffer)
{
Allocator::init(buffer);
}
void MemBuffer::NumaAllocator::copy(MemBuffer& buffer, const MemBuffer& src)
{
Allocator::copy(buffer, src);
}
void MemBuffer::NumaAllocator::clear(MemBuffer& buffer)
{
ClearBuffer(getPtr(), 1, FrameDim(getSize(), 1, Bpp8));
Allocator::clear(buffer);
}
void MemBuffer::NumaAllocator::release(MemBuffer& buffer)
{
Allocator::release(buffer);
}
MemBuffer::NumaAllocator *MemBuffer::NumaAllocator::getAllocator()
{
static NumaAllocator allocator;
return &allocator;
}
void MemBuffer::NumaAllocator::getNUMANodeMask(unsigned long cpu_mask,
unsigned long& node_mask,
int& max_node)
{
int nb_nodes = numa_max_node() + 1;
max_node = nb_nodes + 1;
node_mask = 0;
for (unsigned int i = 0; i < sizeof(cpu_mask) * 8; ++i) {
if ((cpu_mask >> i) & 1) {
unsigned int n = numa_node_of_cpu(i);
node_mask |= 1L << n;
}
}
}
#endif
#!/bin/bash
#./install.sh --install-prefix=$PREFIX --install-python-prefix=$SP_DIR --find-root-path=$PREFIX hdf5 hdf5-bs edfgz edflz4 cbf tiff python pytango-server basler pilatus v4l2 espia maxipix frelon andor andor3 prosilica
#./install.sh --install-prefix=$PREFIX --install-python-prefix=$SP_DIR --find-root-path=$PREFIX hdf5 hdf5-bs edfgz edflz4 cbf tiff numa python pytango-server basler pilatus v4l2 espia maxipix frelon andor andor3 prosilica
cmake -Bbuild -H. -DLIMA_BUILD_SUBMODULES=0 -DLIMA_ENABLE_PYTHON=1 -DLIMA_ENABLE_TESTS=1 -DLIMA_ENABLE_CONFIG=1 -DLIMA_ENABLE_TIFF=1 -DLIMA_ENABLE_HDF5=1 -DLIMA_ENABLE_HDF5_BS=1 -DLIMA_ENABLE_EDFGZ=1 -DLIMA_ENABLE_EDFLZ4=1 -DCMAKE_INSTALL_PREFIX=$PREFIX -DPYTHON_SITE_PACKAGES_DIR=$SP_DIR -DCMAKE_FIND_ROOT_PATH=$PREFIX -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY
cmake -Bbuild -H. -DLIMA_BUILD_SUBMODULES=0 -DLIMA_ENABLE_PYTHON=1 -DLIMA_ENABLE_TESTS=1 -DLIMA_ENABLE_CONFIG=1 -DLIMA_ENABLE_TIFF=1 -DLIMA_ENABLE_HDF5=1 -DLIMA_ENABLE_HDF5_BS=1 -DLIMA_ENABLE_EDFGZ=1 -DLIMA_ENABLE_EDFLZ4=1 -DLIMA_ENABLE_NUMA=1 -DCMAKE_INSTALL_PREFIX=$PREFIX -DPYTHON_SITE_PACKAGES_DIR=$SP_DIR -DCMAKE_FIND_ROOT_PATH=$PREFIX -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY
cmake --build build --target install
......@@ -24,7 +24,7 @@ requirements:
- hdf5 1.10*
- libtiff
- cbflib # [linux]
#- openssl # [linux]
- libnuma # [linux]
build:
- git
- cmake
......@@ -41,6 +41,7 @@ requirements:
- hdf5 1.10*
- libtiff
- cbflib # [linux]
- libnuma # [linux]
test:
requires:
......
......@@ -89,6 +89,11 @@ class LIMACORE_API SoftBufferAllocMgr : public BufferAllocMgr
virtual void releaseBuffers();
virtual void *getBufferPtr(int buffer_nb);
#ifdef LIMA_USE_NUMA
void setCPUAffinityMask(unsigned long cpu_mask);
void getCPUAffinityMask(unsigned long& cpu_mask);
#endif
private:
typedef std::vector<MemBuffer> BufferList;
......@@ -96,6 +101,10 @@ class LIMACORE_API SoftBufferAllocMgr : public BufferAllocMgr
FrameDim m_frame_dim;
BufferList m_buffer_list;
#ifdef LIMA_USE_NUMA
unsigned long m_cpu_mask;
#endif
};
......@@ -305,6 +314,11 @@ class LIMACORE_API SoftBufferCtrlObj : public HwBufferCtrlObj
virtual void registerFrameCallback(HwFrameCallback& frame_cb);
virtual void unregisterFrameCallback(HwFrameCallback& frame_cb);
#ifdef LIMA_USE_NUMA
void setCPUAffinityMask(unsigned long cpu_mask);
void getCPUAffinityMask(unsigned long& cpu_mask);
#endif
StdBufferCbMgr& getBuffer();
int getNbAcquiredFrames();
......
......@@ -42,7 +42,7 @@ using namespace lima;
virtual void clearAllBuffers();
private:
BufferAllocMgr(const BufferAllocMgr&);
BufferAllocMgr(const BufferAllocMgr& o);
};
class SoftBufferAllocMgr : BufferAllocMgr
......@@ -182,3 +182,41 @@ using namespace lima;
BufferCbMgr& getAcqBufferMgr();
AcqMode getAcqMode();
};
class SoftBufferCtrlObj : public HwBufferCtrlObj
{
%TypeHeaderCode
#include "lima/HwBufferMgr.h"
using namespace lima;
%End
public:
SoftBufferCtrlObj();
virtual void setFrameDim(const FrameDim& frame_dim);
virtual void getFrameDim(FrameDim& frame_dim /Out/);
virtual void setNbBuffers(int nb_buffers);
virtual void getNbBuffers(int& nb_buffers /Out/);
virtual void setNbConcatFrames(int nb_concat_frames);
virtual void getNbConcatFrames(int& nb_concat_frames /Out/);
virtual void getMaxNbBuffers(int& max_nb_buffers /Out/);
virtual void *getBufferPtr(int buffer_nb, int concat_frame_nb = 0);
virtual void *getFramePtr(int acq_frame_nb);
virtual void getStartTimestamp(Timestamp& start_ts /Out/);
virtual void getFrameInfo(int acq_frame_nb, HwFrameInfoType& info /Out/);
virtual void registerFrameCallback(HwFrameCallback& frame_cb);
virtual void unregisterFrameCallback(HwFrameCallback& frame_cb);
StdBufferCbMgr& getBuffer();
int getNbAcquiredFrames();
private:
SoftBufferCtrlObj(const SoftBufferCtrlObj& o);
};
......@@ -62,6 +62,10 @@ void BufferAllocMgr::clearAllBuffers()
SoftBufferAllocMgr::SoftBufferAllocMgr()
{
DEB_CONSTRUCTOR();
#ifdef LIMA_USE_NUMA
m_cpu_mask = 0;
#endif
}
SoftBufferAllocMgr::~SoftBufferAllocMgr()
......@@ -75,6 +79,22 @@ int SoftBufferAllocMgr::getMaxNbBuffers(const FrameDim& frame_dim)
return GetDefMaxNbBuffers(frame_dim);
}
#ifdef LIMA_USE_NUMA
void SoftBufferAllocMgr::setCPUAffinityMask(unsigned long cpu_mask)
{
DEB_MEMBER_FUNCT();
DEB_PARAM() << DEB_VAR1(DEB_HEX(cpu_mask));
m_cpu_mask = cpu_mask;
}
void SoftBufferAllocMgr::getCPUAffinityMask(unsigned long& cpu_mask)
{
DEB_MEMBER_FUNCT();
cpu_mask = m_cpu_mask;
DEB_RETURN() << DEB_VAR1(DEB_HEX(cpu_mask));
}
#endif
void SoftBufferAllocMgr::allocBuffers(int nb_buffers,
const FrameDim& frame_dim)
{
......@@ -107,8 +127,13 @@ void SoftBufferAllocMgr::allocBuffers(int nb_buffers,
if (to_alloc > 0) {
bl.resize(nb_buffers);
DEB_TRACE() << "Allocating " << to_alloc << " buffers";
for (int i = 0; i < nb_buffers; i++)
for (int i = 0; i < nb_buffers; i++) {
#ifdef LIMA_USE_NUMA
if (m_cpu_mask)
bl[i].setCPUAffinityMask(m_cpu_mask);
#endif
bl[i].alloc(frame_size);
}
} else {
DEB_TRACE() << "Releasing " << -to_alloc << " buffers";
}
......@@ -748,6 +773,18 @@ void SoftBufferCtrlObj::unregisterFrameCallback(HwFrameCallback& frame_cb)
m_mgr.unregisterFrameCallback(frame_cb);
}
#ifdef LIMA_USE_NUMA
void SoftBufferCtrlObj::setCPUAffinityMask(unsigned long cpu_mask)
{
m_buffer_alloc_mgr.setCPUAffinityMask(cpu_mask);
}
void SoftBufferCtrlObj::getCPUAffinityMask(unsigned long& cpu_mask)
{
m_buffer_alloc_mgr.getCPUAffinityMask(cpu_mask);
}
#endif
StdBufferCbMgr& SoftBufferCtrlObj::getBuffer()
{
return m_buffer_cb_mgr;
......
......@@ -433,6 +433,7 @@ class GitHelper:
not_submodules = (
'python', 'tests', 'cbf', 'lz4', 'fits', 'gz', 'tiff', 'hdf5',
'numa',
)
submodule_map = {
......
......@@ -68,6 +68,7 @@ LIMA_ENABLE_HDF5_BS=0
LIMA_ENABLE_SPS_IMAGE=0
LIMA_ENABLE_CONFIG=0
LIMA_ENABLE_GLDISPLAY=0
LIMA_ENABLE_NUMA=0
LIMA_ENABLE_PYTHON=0
LIMA_ENABLE_PYTANGO_SERVER=0
LIMA_ENABLE_TESTS=0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment