Add Transparent Huge Pages (THP) support
Lima buffer allocators would benefit from Transparent Huge Pages, a least for large detectors (e.g. Ximea, 6144x6144).
For instance, with the Ximea, a 1000 frames acquisition, that's 70GB allocated:
Lima | With THP and mlock
|
---|---|
$ sudo cat /proc/meminfo |
$ sudo cat /proc/meminfo |
Look at AnonHugePages
and MLocked
for instance.
By default, on Ubuntu, the THP policy is set to madvise
:
$ cat /sys/kernel/mm/transparent_hugepage/enabled
always [madvise] never
The following code shows how memory can be allocated with Huge Pages (without the heavy artillery of hugetlbfs
), the key being a call to madvise(..., MADV_HUGEPAGE)
. Allocated memory need to be aligned with the size of the huge pages (typically 2MB) to help the kernel trigger the use of Huge Pages.
$ cat test_alloc.cpp
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cerrno>
#include <cstring>
#include <cstdint>
#include <sys/mman.h> // madvise, mlock
const int huge_page_size = 2 * 1024 * 1024;
template <typename T, typename U>
static inline T align_up(T val, U alignment) {
assert((alignment & (alignment - 1)) == 0);
return (val + alignment - 1) & ~(alignment - 1);
}
void* map_hgpages(size_t nb_pages)
{
// Initial mmapped area is large enough to contain the aligned huge pages
size_t alloc_size = nb_pages * huge_page_size;
void* ptr = mmap(
nullptr,
alloc_size + huge_page_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (ptr == MAP_FAILED) {
printf("PTR %p - ERRNO %d - %s\n", ptr, errno, strerror(errno));
return 0;
}
uintptr_t first_page = align_up((uintptr_t)ptr, huge_page_size);
// Unmap left-over 4k pages
munmap(ptr, first_page - (uintptr_t)ptr);
munmap(
(void*)(first_page + alloc_size),
huge_page_size - (first_page - (uintptr_t)ptr));
// Tell the kernel to please give us huge pages for this range
int ret = madvise((void*)first_page, huge_page_size * nb_pages, MADV_HUGEPAGE);
if (ret)
printf("PTR %ld - ERRNO %d - %s\n", first_page, errno, strerror(errno));
ret = mlock((void*)first_page, huge_page_size * nb_pages);
if (ret)
printf("PTR %p - ERRNO %d - %s\n", ptr, errno, strerror(errno));
return (void*)first_page;
}
int main(int argc, char* argv[])
{
int res;
const size_t frame_size = 6144 * 6144 * 2;
void* ptrs[1000];
for (void*& ptr : ptrs)
{
ptr = map_hgpages(frame_size / huge_page_size);
if (ptr)
printf("#");
return res;
}
mlock
require the CAP_IPC_LOCK capability:
sudo setcap cap_ipc_lock=+ep ./test_alloc
Checking for THP usage for a specific process:
sudo cat /proc/$(pidof test_alloc)/smaps | grep Huge