Commit ff225be4 authored by Alessandro Mirone's avatar Alessandro Mirone Committed by Alessandro Mirone

adattato a slurm

problems with gpus on slurm?

corretto per slurm

done doc

OK
parent 83b9afaf
......@@ -2316,7 +2316,9 @@ void CCspace_precalculations( CCspace * self, int ncpus ) {
self->gpu_context = ( Gpu_Context*) malloc(sizeof(Gpu_Context));
self->gpu_context->void_ccspace_ptr = (void *) self;
self->gpu_context->inuse=0;
self->gpu_context->gpuctx = NULL ;
self->gpu_context->tv_denoising_fistagpu = fn_denois ;
self->gpu_context->tv_denoising_patches_L1 = fn_denois_patches_L1 ;
self->gpu_context->tv_denoising_patches_OMP = fn_denois_patches_OMP ;
......@@ -7267,6 +7269,7 @@ void Paganin(CCspace * self, float * RawptrA,
self->gpu_is_apriori_usable=0;
} else {
selfP->gpu_pag_context = ( Gpu_pag_Context*) malloc(sizeof(Gpu_pag_Context));
selfP->gpu_pag_context->gpuctx = NULL;
selfP->gpu_pag_context->inuse=0;
selfP->gpu_pag_context->gpu_pagCtxCreate = fncreate ;
selfP->gpu_pag_context->gpu_pagCtxDestroy = fndestroy ;
......@@ -7278,7 +7281,6 @@ void Paganin(CCspace * self, float * RawptrA,
/* selfP->gpu_pag_context->size_pa0 = size_pa0 ; */
/* selfP->gpu_pag_context->size_pa1 = size_pa1 ; */
selfP->gpu_pag_context->MYGPU = self->params.MYGPU ;
selfP->gpu_pag_context->gpu_pagCtxCreate( selfP->gpu_pag_context ) ;
}
......
......@@ -600,6 +600,7 @@ typedef struct {
cufftHandle fft2d_plan; //!< Complex plan for forward 2D fourier transformations
} DFP_params;
void * getLibNameHandle( const char *dirname, const char *prefix) ;
typedef struct Gpu_Context_struct Gpu_Context;
......@@ -737,7 +738,6 @@ typedef int (*gpu_project_Symbol )(
float fan_factor,
float source_x
);
//PP.add :
typedef int (*gpu_backproject_Symbol )(
Gpu_Context * self,
......
......@@ -47,7 +47,7 @@
#include"cpyutils.h"
#include<mpi.h>
#include <unistd.h>
#include <dlfcn.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_ARRAY_UNIQUE_SYMBOL chst_ARRAY_API
......
......@@ -1713,7 +1713,8 @@ __global__ void kern_compute_discrete_ramp(int length, cufftReal* oArray) {
else if (gid == length/2) oArray[gid] = val;
else {
oArray[gid] = val;
oArray[length-gid] = val;
if(gid)
oArray[length-gid] = val;
}
}
}
......@@ -2356,49 +2357,75 @@ __global__ void dfi_cuda_crop_roi(cufftReal *input, int x, int y, int roi_x, int
output[idy * roi_x + idx] = input[(idy + y) * raster_size + (idx + x)] * scale;
}
}
int gpu_mainInit(Gpu_Context * self, float *filter) {
// printf(" HELLO ZUBAIR! from gpu_mainInit\n");
int icount=0;
self->gpuctx = (void*) malloc(sizeof(CUcontext)) ;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
cudaSetDevice(self->MYGPU);
cuCtxCreate( (CUcontext *) self->gpuctx ,
// CU_CTX_SCHED_YIELD || CU_CTX_MAP_HOST ,
CU_CTX_SCHED_SPIN ,
self->MYGPU
) ;
if( self->gpuctx == NULL ) {
cuInit(0);
self->gpuctx = (void*) malloc(sizeof(CUcontext)) ;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
printf(" SETTING %d \n", self->MYGPU) ;
cudaSetDevice(self->MYGPU);
cuCtxCreate( (CUcontext *) self->gpuctx ,
// CU_CTX_SCHED_YIELD || CU_CTX_MAP_HOST ,
CU_CTX_SCHED_SPIN ,
self->MYGPU
) ;
}
cuCtxSetCurrent ( *((CUcontext *) self->gpuctx )) ;
//PP.add : allocate gpu memory and create plan for precondition ramp filter
if (self->DO_PRECONDITION) {
int num_bins = self->num_bins;
{
cudaError_t last = cudaGetLastError();
if(last!=cudaSuccess) {
printf("ERRORaaa: %s \n", cudaGetErrorString( last));
exit(1);
}
}
CUDA_SAFE_CALL(cudaMalloc(&self->precond_params_dl.d_r_sino_error, fftbunch*nextpow2_padded(num_bins)*sizeof(cufftReal)));
CUDA_SAFE_CALL(cudaMalloc(&self->precond_params_dl.d_i_sino_error, fftbunch*nextpow2_padded(num_bins)*sizeof(cufftComplex)));
CUDA_SAFE_FFT(cufftPlan1d((cufftHandle *) &self->precond_params_dl.planRamp_forward,(num_bins),CUFFT_R2C,fftbunch));
CUDA_SAFE_FFT(cufftPlan1d((cufftHandle *) &self->precond_params_dl.planRamp_backward,(num_bins),CUFFT_C2R,fftbunch));
cufftComplex* d_i_discrete_ramp = compute_discretized_ramp_filter((num_bins), self->precond_params_dl.d_r_sino_error, self->precond_params_dl.d_i_sino_error, self->precond_params_dl.planRamp_forward);
self->precond_params_dl.filter_coeffs = d_i_discrete_ramp; //size : nextpow2_padded(num_bins)/2+1
}
if (self->USE_DFP) {
//TODO : these as guru-user-parameters
self->DFP_KERNEL_SIZE = 7;
self->DFP_NOFVALUES = 2047;
self->DFP_OVERSAMPLING_RATE = 2;
puts("--------------------------------------------------------");
puts("--------- Initializing Direct Fourier Projection -------");
puts("--------------------------------------------------------");
float angle_step = M_PI / (float)self->nprojs_span;
//common params
self->dfp_params.rho_len = self->num_bins;
self->dfp_params.rho_len2 = self->num_bins/2;
self->dfp_params.theta_len = self->nprojs_span;
self->dfp_params.rho_ext_len = pow(2, ceil(log2f(self->num_bins))) * self->DFP_OVERSAMPLING_RATE;
self->dfp_params.slice_size_x = self->num_bins;
self->dfp_params.slice_size_y = self->num_bins;
self->dfp_params.L = (float)self->DFP_KERNEL_SIZE;
self->dfp_params.L2 = self->dfp_params.L/2.0f;
self->dfp_params.ktbl_len = self->DFP_NOFVALUES;
self->dfp_params.ktbl_len2 = (self->dfp_params.ktbl_len - 1)/2;
......@@ -2613,23 +2640,15 @@ int gpu_mainInit(Gpu_Context * self, float *filter) {
// cufftSetCompatibilityMode(self->dfi_params.fft1d_plan, CUFFT_COMPATIBILITY_NATIVE);
} //end of DFI initialization
//PP.add : allocate gpu memory and create plan for precondition ramp filter
if (self->DO_PRECONDITION) {
int num_bins = self->num_bins;
CUDA_SAFE_CALL(cudaMalloc(&self->precond_params_dl.d_r_sino_error, fftbunch*nextpow2_padded(num_bins)*sizeof(cufftReal)));
CUDA_SAFE_CALL(cudaMalloc(&self->precond_params_dl.d_i_sino_error, fftbunch*nextpow2_padded(num_bins)*sizeof(cufftComplex)));
CUDA_SAFE_FFT(cufftPlan1d((cufftHandle *) &self->precond_params_dl.planRamp_forward,(num_bins),CUFFT_R2C,fftbunch));
CUDA_SAFE_FFT(cufftPlan1d((cufftHandle *) &self->precond_params_dl.planRamp_backward,(num_bins),CUFFT_C2R,fftbunch));
cufftComplex* d_i_discrete_ramp = compute_discretized_ramp_filter((num_bins), self->precond_params_dl.d_r_sino_error, self->precond_params_dl.d_i_sino_error, self->precond_params_dl.planRamp_forward);
self->precond_params_dl.filter_coeffs = d_i_discrete_ramp; //size : nextpow2_padded(num_bins)/2+1
}
// creare qui gli stream usando i type cast come al solito da void
// creare qui gli stream usando i type cast come al solito da void
if(filter[0] > 1.0e6) { // this should never happen in normal cases
......@@ -8430,15 +8449,20 @@ int gpu_main_2by2(Gpu_Context * self, float *WORK , float * SLICE, int do_precon
void gpu_pagCtxCreate (Gpu_pag_Context * self) {
self->gpuctx = (void*) malloc(sizeof(CUcontext)) ;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
cudaSetDevice(self->MYGPU);
if( self->gpuctx == NULL ) {
cuCtxCreate( (CUcontext *) self->gpuctx ,
// CU_CTX_SCHED_YIELD*0,
CU_CTX_SCHED_SPIN,
self->MYGPU
);
cuInit(0);
self->gpuctx = (void*) malloc(sizeof(CUcontext)) ;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
cudaSetDevice(self->MYGPU);
cuCtxCreate( (CUcontext *) self->gpuctx ,
// CU_CTX_SCHED_YIELD*0,
CU_CTX_SCHED_SPIN,
self->MYGPU
);
}
}
void gpu_pagCtxDestroy(Gpu_pag_Context * self) {
......
......@@ -33,6 +33,7 @@ from __future__ import division
from __future__ import print_function
import string
import sys
from . import string_six
......@@ -434,8 +435,6 @@ def get_proj_reading(preanalisi=0):
coeffa = 1.0-coeffb
print(" FILE PREFIX " , P.FILE_PREFIX )
if P.FILE_PREFIX[-3:] == ".h5" or P.FILE_PREFIX[-4:] == ".nxs":
fftype="h5"
if isinstance( P.FF_PREFIX, tuple) or isinstance( P.FF_PREFIX, list):
......
This diff is collapsed.
......@@ -30,5 +30,5 @@
# is a problem for you.
#############################################################################*/
version = "2019anatomix"
version = "2019b"
......@@ -41,9 +41,10 @@ mailserver = None
# mailserver="tuodomain.country"
# LAUNCHING_INSTRUCTION = "echo 'localhost\n' > machinefile ; time PyHST2_2017c input.par gpu2-1304,0"
LAUNCHING_INSTRUCTION = "time PyHST2_2018b input.par scisoft13,0 | tee output"
# LAUNCHING_INSTRUCTION = "time PyHST2_2018b input.par scisoft13,0 | tee output"
LAUNCHING_INSTRUCTION = "PyHST2_2019b input.par | tee output"
outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/scisoft13_2018bb_corrected/"
outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/slurm_2019b/"
# outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2017c/2gpu_bis/tests"
# outputprefix="/data/scisofttmp/paleo/TEST_PYHST/DATASETS_RED/OUTPUTS"
......@@ -66,9 +67,11 @@ casi = [ "ID11_SNOW" ]
##################################################
PREFIX="/home/mirone/WORKS/TEST_PYHST/DATASETS"
PREFIX="/scisoft/users/mirone/WORKS/TEST_PYHST/DATASETS"
casi=["CRAYON"]
LAUNCHING_INSTRUCTION = "time PyHST2_2018b input.par nslino,0 | tee output"
outputprefix="/home/mirone/TEST_PYHST/RESULTS/nslino_2018b/"
LAUNCHING_INSTRUCTION = "PyHST2_2019b input.par "
outputprefix="/home/mirone/TEST_PYHST/RESULTS/slurm_2019b/"
outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/slurm_2019b/"
###############################################################
......
......@@ -9,12 +9,23 @@ Installation
git clone https://gitlab.esrf.fr/mirone/pyhst2
* If you compile on a Debian system, for a local installation you can use ::
* If you compile on a Debian system, for a local installation you can use, as an example ::
export TD=${PWD}/dummy/
python setup.py install --prefix ${TD}
python setup.py install --prefix ${TD}
then to run the code you must do beforehand ::
If your needs clang ::
export USECLANG4NVCC=YES
before compiling the code. You can possibly edit setup.py to change the extra parameters
that are passed to nvcc in this case.
PyHST2 has been tested with python3, python3, on intel and powerpc machines, and different CUDA versions.
In case of very recente CUDA version, which have dismissed compute capabilities 2.0 you might have to remove
the mentions to compute_20, from the setup.py, and possibly add new ones.
* Then to run the code you must do beforehand ::
export PYTHONPATH=${PWD}/dummy/lib/python2.7/site-packages
export PATH=${PWD}/dummy/bin/:$PATH
......
......@@ -302,7 +302,7 @@ def do_pyhst():
postargs = extra_postargs['nvcc']
if "USECLANG4NVCC" in os.environ and os.environ ["USECLANG4NVCC"]=="YES":
postargs = postargs + ["-ccbin clang-3.8"]
postargs = postargs + [" -ccbin clang-3.8"]
else:
self.set_executable('compiler_so', "mpicc")
postargs = extra_postargs['gcc']
......@@ -404,9 +404,9 @@ def do_pyhst():
if DOCUDA:
CUDA={'include':[]}
CUDA .update(locate_cuda())
CUDA["arch"] = [ # "-gencode", "arch=compute_20,code=compute_20",
"-gencode", "arch=compute_30,code=compute_30",
"-gencode", "arch=compute_50,code=compute_50"
CUDA["arch"] = [ "-gencode", "arch=compute_20,code=compute_20",
"-gencode", "arch=compute_30,code=compute_30" ,
"-gencode", "arch=compute_50,code=compute_50"
]
# print( CUDA)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment