Commit 1b89e472 authored by Alessandro Mirone's avatar Alessandro Mirone Committed by test

adapted to power9. Paganin filter is now concurrent on both gpus and cpus

OK
parent 230a3195
This diff is collapsed.
......@@ -1122,6 +1122,10 @@ struct Gpu_pag_Context_struct {
// set by init
void * d_fftwork, * d_kernelbuffer;
void * FFTplan_ptr;
int gpu_pagCtx_initialised ;
int NBunches_todo;
} ;
......@@ -1270,12 +1274,14 @@ void CCspace_set_nchunks( CCspace * self ,int nchunks ) ;
#define IN_ACQUISITION 1
#define ACQUIRED 2
void Paganin(CCspace * self, float *buffer, float * Rawptr,
void Paganin(CCspace * self, float * Rawptr,
int Pos0, int Pos1, int Size0, int Size1,
int pos0, int pos1, int size0, int size1,
Cparameters *P , int ncpus, sem_t* fftw_sem,
int pstart, int pend, int poffset,
int p_num_offset) ;
int p_num_offset,
int mystart,
int npbunches) ;
/* for ccd_filter */
#define CCD_FILTER_NONE_ID 0
......
......@@ -34,7 +34,9 @@
#include <stdio.h>
#include <stdlib.h>
#include<math.h>
#ifdef __SSE__
#include<emmintrin.h>
#endif
#include<semaphore.h>
#include"cpu_main.h"
......@@ -45,8 +47,13 @@
//#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#ifdef __SSE__
#define FLOAT_TO_INT(in,out) \
out=_mm_cvtss_si32(_mm_load_ss(&(in)));
out=_mm_cvtss_si32(_mm_load_ss(&(in)));
#else
#define FLOAT_TO_INT(in,out) \
out = (int) in ;
#endif
......
......@@ -19,7 +19,7 @@
#include<tiffio.h>
#include <emmintrin.h> //SSE header.
// #include <emmintrin.h> //SSE header.
......@@ -899,8 +899,8 @@ void writetiff( char * outname, int islice , float *results , int width, int hei
buffer = new unsigned short [ width*height ] ;
for(int i=0; i<width*height; i++) {
if( results[i] > 0xffff ) results[i] = 0xffff ;
if(sd2int(results[i]) >0) {
buffer[i] = (unsigned short) sd2int(results[i]) ;
if((results[i]) >0) {
buffer[i] = (unsigned short) (results[i]) ;
} else {
buffer[i] =0;
}
......
......@@ -7529,16 +7529,23 @@ void gpu_pagCtxCreate (Gpu_pag_Context * self) {
}
void gpu_pagCtxDestroy(Gpu_pag_Context * self) {
cuCtxSetCurrent ( *((CUcontext *) self->gpuctx )) ;
cuCtxDestroy( *((CUcontext *) (self->gpuctx)) );
}
void gpu_pag( Gpu_pag_Context * self , float * auxbuffer ) {
char messaggio[1000];
int dimbuff = self->size_pa0*self->size_pa1;
cuCtxSetCurrent ( *((CUcontext *) self->gpuctx )) ;
CUDA_SAFE_CALL(cudaMemcpy(self->d_fftwork,auxbuffer,
sizeof(cufftComplex)*dimbuff,
cudaMemcpyHostToDevice));
CUFFT_SAFE_CALL( cufftExecC2C(*((cufftHandle*)self->FFTplan_ptr),
(cufftComplex *) self->d_fftwork,
(cufftComplex *) self->d_fftwork,
......@@ -7598,6 +7605,9 @@ void FreePinned(Gpu_Context * self,void *ptr) {
void gpu_pagFree( Gpu_pag_Context * self ) {
char messaggio[1000];
cuCtxSetCurrent ( *((CUcontext *) self->gpuctx )) ;
CUDA_SAFE_CALL(cudaFree(self->d_fftwork) );
CUDA_SAFE_CALL(cudaFree(self->d_kernelbuffer) );
CUFFT_SAFE_CALL (cufftDestroy(*((cufftHandle*)self->FFTplan_ptr)), "doing cufftDestroy ");
......
......@@ -187,7 +187,7 @@ machinefile_name="machinefile"
try:
args=["oarprint", "host", "-P", "host,cpu", "-F", "'% %'" ]
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p = sub.Popen(args=args ,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
elif (sys.version_info > (3, 0)):
p = sub.Popen(args=args, stdout=sub.PIPE,stderr=sub.PIPE, universal_newlines=True)
......@@ -262,7 +262,7 @@ else:
try:
args=["oarprint", "host", "-P", "host,gpu_num", "-F", "'% %'" ]
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p = sub.Popen(args=args ,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
elif (sys.version_info > (3, 0)):
......@@ -355,7 +355,7 @@ if(sys.argv[0][-12:]!="sphinx-build"):
mpi_extra=" --byslot "
comando = 'mpirun -V'
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p1 = sub.Popen(args= comando.split( " ") ,stdin=sub.PIPE,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
......@@ -425,7 +425,7 @@ if(sys.argv[0][-12:]!="sphinx-build"):
if P.MULTI_PAGANIN_PARS is None:
callpyhst( inputfile )
else:
import tempfile
from . import tempfile
steps = [1,1,1,1,1,1,1]
files_to_cancel = []
......
......@@ -47,15 +47,18 @@ from . import string_six
def getCpuSetRange(cpuset_string):
def getCpuSetRange(cpuset_string, first=True):
pos1 = cpuset_string.find( ":")
pos2 = max(cpuset_string.rfind( "-"), cpuset_string.rfind( ","))
if pos1 == -1 :
raise Exception( "ERROR : wrong output in cpuset_string " )
if pos2 == -1 :
print( " **********************************!!!!!!!!!!!!!!!!!!!!!!!!!!\n"*100)
print( " ERROR : you must request several OAR cores. PyHst cannot run on one core only ")
raise Exception( "ERROR : you must request several OAR cores. PyHst cannot run on one core only " )
if first:
print( " **********************************!!!!!!!!!!!!!!!!!!!!!!!!!!\n"*10)
print( " ERROR : you must request several OAR cores. PyHst cannot run on one core only , cpuset_string was "+cpuset_string)
raise Exception( "ERROR : you must request several OAR cores. PyHst cannot run on one core only , cpuset_string was "+cpuset_string )
else:
return []
result = []
msg = cpuset_string[pos1+1:]
msgs = msg.split(",")
......@@ -72,7 +75,7 @@ def getCoresOrdered():
comando = 'lscpu'
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p1 = sub.Popen(args=comando.split( " ") ,stdin=sub.PIPE,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
elif (sys.version_info > (3, 0)):
p1 = sub.Popen(args=comando.split( " ") ,stdin=sub.PIPE,stdout=sub.PIPE,stderr=sub.PIPE, universal_newlines=True)
......@@ -105,11 +108,13 @@ def getCoresOrdered_fromstring(msg):
result = []
first= True
for t in msgs[count:]:
# if "CPU" in t:
if "NUMA" in t:
res = getCpuSetRange(t)
res = getCpuSetRange(t, first = first)
first=False
print ( " RES " , res)
if(len(res)):
result = result + res
ncores4cpu = len(res)
......
......@@ -64,7 +64,7 @@ def setCpuSet(maxnargs=3):
import subprocess as sub
comando = 'taskset -cp %d'%(os.getpid())
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p = sub.Popen(args=comando.split( " ") ,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
elif (sys.version_info > (3, 0)):
......@@ -82,7 +82,7 @@ def setCpuSet(maxnargs=3):
comando = 'grep#physical id#/proc/cpuinfo'
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p1 = sub.Popen(args=comando.split( "#") ,stdout=sub.PIPE,stderr=sub.PIPE, text=True )
elif (sys.version_info > (3, 0)):
......@@ -94,7 +94,7 @@ def setCpuSet(maxnargs=3):
p1 = sub.Popen(args=comando.split( "#") ,stdout=sub.PIPE,stderr=sub.PIPE)
comando = 'sort -u'
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p2 = sub.Popen(args=comando.split( " ") ,stdin=p1.stdout,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
......@@ -108,7 +108,7 @@ def setCpuSet(maxnargs=3):
p2 = sub.Popen(args=comando.split( " ") ,stdin=p1.stdout,stdout=sub.PIPE,stderr=sub.PIPE)
comando = 'wc -l'
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p3 = sub.Popen(args=comando.split( " ") ,stdin=p2.stdout,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
......@@ -123,7 +123,7 @@ def setCpuSet(maxnargs=3):
nofprocessors=string.atoi(nofprocessors)
comando = "grep MemTotal /proc/meminfo"
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p1 = sub.Popen(args=comando.split( " ") ,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
elif (sys.version_info > (3, 0)):
......@@ -219,7 +219,7 @@ def setCpuSet(maxnargs=3):
comando = 'taskset -pc %s %d'%( s, os.getpid())
if (sys.version_info > (3, 6)):
if (sys.version_info >= (3, 7)):
p = sub.Popen(args=comando.split( " ") ,stdout=sub.PIPE,stderr=sub.PIPE, text=True)
elif (sys.version_info > (3, 0)):
......
......@@ -181,7 +181,12 @@ def do_pyhst():
jn = os.sep.join
mpi_dirs = ["/usr/include/openmpi/"]
mpilibs_dirs = []
# mpi_dirs = ["/home/test/packages/include"]
# mpilibs_dirs = ["/home/test/packages/lib"]
if 1:
hdf5_dirs = ["/usr/include/hdf5/serial/"]
......@@ -371,7 +376,7 @@ def do_pyhst():
e.extra_link_args = [ trans[a][1] if a in trans else a
for a in e.extra_link_args]
if e.libraries !=[ "fftw3f"] and e.libraries !=[ "mpi_cxx"]:
if e.libraries !=[ "fftw3f"] and e.libraries !=[ "mpi"]:
e.libraries = filter(None, [ trans[a] if a in trans else None
for a in e.libraries])
......@@ -433,6 +438,7 @@ def do_pyhst():
module = Extension(name='PyHST2_'+version+'.Cspace',
sources=c_sorgenti ,
depends=depends,
library_dirs= mpilibs_dirs,
libraries=["fftw3f_threads", "fftw3f",hdf5_lib, "mpi"],
#extra_compile_args=['-fopenmp'] ,
#extra_link_args=['-fopenmp'] ,
......@@ -558,7 +564,8 @@ def do_pyhst():
'PyHST2_'+version+'.segment_cy',
sources=c_sorgenti,
depends=depends,
libraries=[ "mpi_cxx"],
library_dirs = mpilibs_dirs,
libraries=[ "mpi"],
include_dirs=get_numpy_include_dirs()+ mpi_dirs+[],
language="c++", # generate C++ code
extra_compile_args={'gcc':['-fopenmp',"-fPIC", "-O3"] },
......@@ -570,7 +577,8 @@ def do_pyhst():
'PyHST2_'+version+'.segment_cy',
sources=c_sorgenti,
depends=depends,
libraries=[ "mpi_cxx"],
library_dirs = mpilibs_dirs,
libraries=[ "mpi"],
include_dirs=get_numpy_include_dirs()+ mpi_dirs+[],
language="c++", # generate C++ code
extra_compile_args={'gcc':['-fopenmp',"-fPIC", "-O3"] },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment