Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
pyhst2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
3
Issues
3
List
Boards
Labels
Service Desk
Milestones
Jira
Jira
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
myron
pyhst2
Commits
ff225be4
Commit
ff225be4
authored
Sep 02, 2019
by
Alessandro Mirone
Committed by
Alessandro Mirone
Sep 04, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adattato a slurm
problems with gpus on slurm? corretto per slurm done doc OK
parent
83b9afaf
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
450 additions
and
247 deletions
+450
-247
PyHST/Cspace/CCspace.c
PyHST/Cspace/CCspace.c
+4
-2
PyHST/Cspace/CCspace.h
PyHST/Cspace/CCspace.h
+1
-1
PyHST/Cspace/Cspace.c
PyHST/Cspace/Cspace.c
+1
-1
PyHST/Cspace/gputomo.cu
PyHST/Cspace/gputomo.cu
+67
-43
PyHST/Parameters_module.py
PyHST/Parameters_module.py
+1
-2
PyHST/PyHST.py
PyHST/PyHST.py
+350
-186
PyHST/__init__.py
PyHST/__init__.py
+1
-1
TEST_PYHST/nonregression.py
TEST_PYHST/nonregression.py
+7
-4
doc/installation.rst
doc/installation.rst
+14
-3
setup.py
setup.py
+4
-4
No files found.
PyHST/Cspace/CCspace.c
View file @
ff225be4
...
...
@@ -2316,7 +2316,9 @@ void CCspace_precalculations( CCspace * self, int ncpus ) {
self
->
gpu_context
=
(
Gpu_Context
*
)
malloc
(
sizeof
(
Gpu_Context
));
self
->
gpu_context
->
void_ccspace_ptr
=
(
void
*
)
self
;
self
->
gpu_context
->
inuse
=
0
;
self
->
gpu_context
->
gpuctx
=
NULL
;
self
->
gpu_context
->
tv_denoising_fistagpu
=
fn_denois
;
self
->
gpu_context
->
tv_denoising_patches_L1
=
fn_denois_patches_L1
;
self
->
gpu_context
->
tv_denoising_patches_OMP
=
fn_denois_patches_OMP
;
...
...
@@ -7267,6 +7269,7 @@ void Paganin(CCspace * self, float * RawptrA,
self
->
gpu_is_apriori_usable
=
0
;
}
else
{
selfP
->
gpu_pag_context
=
(
Gpu_pag_Context
*
)
malloc
(
sizeof
(
Gpu_pag_Context
));
selfP
->
gpu_pag_context
->
gpuctx
=
NULL
;
selfP
->
gpu_pag_context
->
inuse
=
0
;
selfP
->
gpu_pag_context
->
gpu_pagCtxCreate
=
fncreate
;
selfP
->
gpu_pag_context
->
gpu_pagCtxDestroy
=
fndestroy
;
...
...
@@ -7278,7 +7281,6 @@ void Paganin(CCspace * self, float * RawptrA,
/* selfP->gpu_pag_context->size_pa0 = size_pa0 ; */
/* selfP->gpu_pag_context->size_pa1 = size_pa1 ; */
selfP
->
gpu_pag_context
->
MYGPU
=
self
->
params
.
MYGPU
;
selfP
->
gpu_pag_context
->
gpu_pagCtxCreate
(
selfP
->
gpu_pag_context
)
;
}
...
...
PyHST/Cspace/CCspace.h
View file @
ff225be4
...
...
@@ -600,6 +600,7 @@ typedef struct {
cufftHandle
fft2d_plan
;
//!< Complex plan for forward 2D fourier transformations
}
DFP_params
;
void
*
getLibNameHandle
(
const
char
*
dirname
,
const
char
*
prefix
)
;
typedef
struct
Gpu_Context_struct
Gpu_Context
;
...
...
@@ -737,7 +738,6 @@ typedef int (*gpu_project_Symbol )(
float
fan_factor
,
float
source_x
);
//PP.add :
typedef
int
(
*
gpu_backproject_Symbol
)(
Gpu_Context
*
self
,
...
...
PyHST/Cspace/Cspace.c
View file @
ff225be4
...
...
@@ -47,7 +47,7 @@
#include"cpyutils.h"
#include<mpi.h>
#include <unistd.h>
#include <dlfcn.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_ARRAY_UNIQUE_SYMBOL chst_ARRAY_API
...
...
PyHST/Cspace/gputomo.cu
View file @
ff225be4
...
...
@@ -1713,7 +1713,8 @@ __global__ void kern_compute_discrete_ramp(int length, cufftReal* oArray) {
else
if
(
gid
==
length
/
2
)
oArray
[
gid
]
=
val
;
else
{
oArray
[
gid
]
=
val
;
oArray
[
length
-
gid
]
=
val
;
if
(
gid
)
oArray
[
length
-
gid
]
=
val
;
}
}
}
...
...
@@ -2356,49 +2357,75 @@ __global__ void dfi_cuda_crop_roi(cufftReal *input, int x, int y, int roi_x, int
output
[
idy
*
roi_x
+
idx
]
=
input
[(
idy
+
y
)
*
raster_size
+
(
idx
+
x
)]
*
scale
;
}
}
int
gpu_mainInit
(
Gpu_Context
*
self
,
float
*
filter
)
{
// printf(" HELLO ZUBAIR! from gpu_mainInit\n");
int
icount
=
0
;
self
->
gpuctx
=
(
void
*
)
malloc
(
sizeof
(
CUcontext
))
;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
cudaSetDevice
(
self
->
MYGPU
);
cuCtxCreate
(
(
CUcontext
*
)
self
->
gpuctx
,
// CU_CTX_SCHED_YIELD || CU_CTX_MAP_HOST ,
CU_CTX_SCHED_SPIN
,
self
->
MYGPU
)
;
if
(
self
->
gpuctx
==
NULL
)
{
cuInit
(
0
);
self
->
gpuctx
=
(
void
*
)
malloc
(
sizeof
(
CUcontext
))
;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
printf
(
" SETTING %d
\n
"
,
self
->
MYGPU
)
;
cudaSetDevice
(
self
->
MYGPU
);
cuCtxCreate
(
(
CUcontext
*
)
self
->
gpuctx
,
// CU_CTX_SCHED_YIELD || CU_CTX_MAP_HOST ,
CU_CTX_SCHED_SPIN
,
self
->
MYGPU
)
;
}
cuCtxSetCurrent
(
*
((
CUcontext
*
)
self
->
gpuctx
))
;
//PP.add : allocate gpu memory and create plan for precondition ramp filter
if
(
self
->
DO_PRECONDITION
)
{
int
num_bins
=
self
->
num_bins
;
{
cudaError_t
last
=
cudaGetLastError
();
if
(
last
!=
cudaSuccess
)
{
printf
(
"ERRORaaa: %s
\n
"
,
cudaGetErrorString
(
last
));
exit
(
1
);
}
}
CUDA_SAFE_CALL
(
cudaMalloc
(
&
self
->
precond_params_dl
.
d_r_sino_error
,
fftbunch
*
nextpow2_padded
(
num_bins
)
*
sizeof
(
cufftReal
)));
CUDA_SAFE_CALL
(
cudaMalloc
(
&
self
->
precond_params_dl
.
d_i_sino_error
,
fftbunch
*
nextpow2_padded
(
num_bins
)
*
sizeof
(
cufftComplex
)));
CUDA_SAFE_FFT
(
cufftPlan1d
((
cufftHandle
*
)
&
self
->
precond_params_dl
.
planRamp_forward
,(
num_bins
),
CUFFT_R2C
,
fftbunch
));
CUDA_SAFE_FFT
(
cufftPlan1d
((
cufftHandle
*
)
&
self
->
precond_params_dl
.
planRamp_backward
,(
num_bins
),
CUFFT_C2R
,
fftbunch
));
cufftComplex
*
d_i_discrete_ramp
=
compute_discretized_ramp_filter
((
num_bins
),
self
->
precond_params_dl
.
d_r_sino_error
,
self
->
precond_params_dl
.
d_i_sino_error
,
self
->
precond_params_dl
.
planRamp_forward
);
self
->
precond_params_dl
.
filter_coeffs
=
d_i_discrete_ramp
;
//size : nextpow2_padded(num_bins)/2+1
}
if
(
self
->
USE_DFP
)
{
//TODO : these as guru-user-parameters
self
->
DFP_KERNEL_SIZE
=
7
;
self
->
DFP_NOFVALUES
=
2047
;
self
->
DFP_OVERSAMPLING_RATE
=
2
;
puts
(
"--------------------------------------------------------"
);
puts
(
"--------- Initializing Direct Fourier Projection -------"
);
puts
(
"--------------------------------------------------------"
);
float
angle_step
=
M_PI
/
(
float
)
self
->
nprojs_span
;
//common params
self
->
dfp_params
.
rho_len
=
self
->
num_bins
;
self
->
dfp_params
.
rho_len2
=
self
->
num_bins
/
2
;
self
->
dfp_params
.
theta_len
=
self
->
nprojs_span
;
self
->
dfp_params
.
rho_ext_len
=
pow
(
2
,
ceil
(
log2f
(
self
->
num_bins
)))
*
self
->
DFP_OVERSAMPLING_RATE
;
self
->
dfp_params
.
slice_size_x
=
self
->
num_bins
;
self
->
dfp_params
.
slice_size_y
=
self
->
num_bins
;
self
->
dfp_params
.
L
=
(
float
)
self
->
DFP_KERNEL_SIZE
;
self
->
dfp_params
.
L2
=
self
->
dfp_params
.
L
/
2.0
f
;
self
->
dfp_params
.
ktbl_len
=
self
->
DFP_NOFVALUES
;
self
->
dfp_params
.
ktbl_len2
=
(
self
->
dfp_params
.
ktbl_len
-
1
)
/
2
;
...
...
@@ -2613,23 +2640,15 @@ int gpu_mainInit(Gpu_Context * self, float *filter) {
// cufftSetCompatibilityMode(self->dfi_params.fft1d_plan, CUFFT_COMPATIBILITY_NATIVE);
}
//end of DFI initialization
//PP.add : allocate gpu memory and create plan for precondition ramp filter
if
(
self
->
DO_PRECONDITION
)
{
int
num_bins
=
self
->
num_bins
;
CUDA_SAFE_CALL
(
cudaMalloc
(
&
self
->
precond_params_dl
.
d_r_sino_error
,
fftbunch
*
nextpow2_padded
(
num_bins
)
*
sizeof
(
cufftReal
)));
CUDA_SAFE_CALL
(
cudaMalloc
(
&
self
->
precond_params_dl
.
d_i_sino_error
,
fftbunch
*
nextpow2_padded
(
num_bins
)
*
sizeof
(
cufftComplex
)));
CUDA_SAFE_FFT
(
cufftPlan1d
((
cufftHandle
*
)
&
self
->
precond_params_dl
.
planRamp_forward
,(
num_bins
),
CUFFT_R2C
,
fftbunch
));
CUDA_SAFE_FFT
(
cufftPlan1d
((
cufftHandle
*
)
&
self
->
precond_params_dl
.
planRamp_backward
,(
num_bins
),
CUFFT_C2R
,
fftbunch
));
cufftComplex
*
d_i_discrete_ramp
=
compute_discretized_ramp_filter
((
num_bins
),
self
->
precond_params_dl
.
d_r_sino_error
,
self
->
precond_params_dl
.
d_i_sino_error
,
self
->
precond_params_dl
.
planRamp_forward
);
self
->
precond_params_dl
.
filter_coeffs
=
d_i_discrete_ramp
;
//size : nextpow2_padded(num_bins)/2+1
}
// creare qui gli stream usando i type cast come al solito da void
// creare qui gli stream usando i type cast come al solito da void
if
(
filter
[
0
]
>
1.0e6
)
{
// this should never happen in normal cases
...
...
@@ -8430,15 +8449,20 @@ int gpu_main_2by2(Gpu_Context * self, float *WORK , float * SLICE, int do_precon
void
gpu_pagCtxCreate
(
Gpu_pag_Context
*
self
)
{
self
->
gpuctx
=
(
void
*
)
malloc
(
sizeof
(
CUcontext
))
;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
cudaSetDevice
(
self
->
MYGPU
);
if
(
self
->
gpuctx
==
NULL
)
{
cuCtxCreate
(
(
CUcontext
*
)
self
->
gpuctx
,
// CU_CTX_SCHED_YIELD*0,
CU_CTX_SCHED_SPIN
,
self
->
MYGPU
);
cuInit
(
0
);
self
->
gpuctx
=
(
void
*
)
malloc
(
sizeof
(
CUcontext
))
;
// cudaSetDeviceFlags( cudaDeviceMapHost ) ;
cudaSetDevice
(
self
->
MYGPU
);
cuCtxCreate
(
(
CUcontext
*
)
self
->
gpuctx
,
// CU_CTX_SCHED_YIELD*0,
CU_CTX_SCHED_SPIN
,
self
->
MYGPU
);
}
}
void
gpu_pagCtxDestroy
(
Gpu_pag_Context
*
self
)
{
...
...
PyHST/Parameters_module.py
View file @
ff225be4
...
...
@@ -33,6 +33,7 @@ from __future__ import division
from
__future__
import
print_function
import
string
import
sys
from
.
import
string_six
...
...
@@ -434,8 +435,6 @@ def get_proj_reading(preanalisi=0):
coeffa
=
1.0
-
coeffb
print
(
" FILE PREFIX "
,
P
.
FILE_PREFIX
)
if
P
.
FILE_PREFIX
[
-
3
:]
==
".h5"
or
P
.
FILE_PREFIX
[
-
4
:]
==
".nxs"
:
fftype
=
"h5"
if
isinstance
(
P
.
FF_PREFIX
,
tuple
)
or
isinstance
(
P
.
FF_PREFIX
,
list
):
...
...
PyHST/PyHST.py
View file @
ff225be4
This diff is collapsed.
Click to expand it.
PyHST/__init__.py
View file @
ff225be4
...
...
@@ -30,5 +30,5 @@
# is a problem for you.
#############################################################################*/
version
=
"2019
anatomix
"
version
=
"2019
b
"
TEST_PYHST/nonregression.py
View file @
ff225be4
...
...
@@ -41,9 +41,10 @@ mailserver = None
# mailserver="tuodomain.country"
# LAUNCHING_INSTRUCTION = "echo 'localhost\n' > machinefile ; time PyHST2_2017c input.par gpu2-1304,0"
LAUNCHING_INSTRUCTION
=
"time PyHST2_2018b input.par scisoft13,0 | tee output"
# LAUNCHING_INSTRUCTION = "time PyHST2_2018b input.par scisoft13,0 | tee output"
LAUNCHING_INSTRUCTION
=
"PyHST2_2019b input.par | tee output"
outputprefix
=
"/data/scisofttmp/mirone/TEST_PYHST/RESULTS/s
cisoft13_2018bb_corrected
/"
outputprefix
=
"/data/scisofttmp/mirone/TEST_PYHST/RESULTS/s
lurm_2019b
/"
# outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2017c/2gpu_bis/tests"
# outputprefix="/data/scisofttmp/paleo/TEST_PYHST/DATASETS_RED/OUTPUTS"
...
...
@@ -66,9 +67,11 @@ casi = [ "ID11_SNOW" ]
##################################################
PREFIX
=
"/home/mirone/WORKS/TEST_PYHST/DATASETS"
PREFIX
=
"/scisoft/users/mirone/WORKS/TEST_PYHST/DATASETS"
casi
=
[
"CRAYON"
]
LAUNCHING_INSTRUCTION
=
"time PyHST2_2018b input.par nslino,0 | tee output"
outputprefix
=
"/home/mirone/TEST_PYHST/RESULTS/nslino_2018b/"
LAUNCHING_INSTRUCTION
=
"PyHST2_2019b input.par "
outputprefix
=
"/home/mirone/TEST_PYHST/RESULTS/slurm_2019b/"
outputprefix
=
"/data/scisofttmp/mirone/TEST_PYHST/RESULTS/slurm_2019b/"
###############################################################
...
...
doc/installation.rst
View file @
ff225be4
...
...
@@ -9,12 +9,23 @@ Installation
git clone https://gitlab.esrf.fr/mirone/pyhst2
* If you compile on a Debian system, for a local installation you can use ::
* If you compile on a Debian system, for a local installation you can use
, as an example
::
export TD=${PWD}/dummy/
python setup.py install --prefix ${TD}
python setup.py install --prefix ${TD}
then to run the code you must do beforehand ::
If your needs clang ::
export USECLANG4NVCC=YES
before compiling the code. You can possibly edit setup.py to change the extra parameters
that are passed to nvcc in this case.
PyHST2 has been tested with python3, python3, on intel and powerpc machines, and different CUDA versions.
In case of very recente CUDA version, which have dismissed compute capabilities 2.0 you might have to remove
the mentions to compute_20, from the setup.py, and possibly add new ones.
* Then to run the code you must do beforehand ::
export PYTHONPATH=${PWD}/dummy/lib/python2.7/site-packages
export PATH=${PWD}/dummy/bin/:$PATH
...
...
setup.py
View file @
ff225be4
...
...
@@ -302,7 +302,7 @@ def do_pyhst():
postargs
=
extra_postargs
[
'nvcc'
]
if
"USECLANG4NVCC"
in
os
.
environ
and
os
.
environ
[
"USECLANG4NVCC"
]
==
"YES"
:
postargs
=
postargs
+
[
"-ccbin clang-3.8"
]
postargs
=
postargs
+
[
"
-ccbin clang-3.8"
]
else
:
self
.
set_executable
(
'compiler_so'
,
"mpicc"
)
postargs
=
extra_postargs
[
'gcc'
]
...
...
@@ -404,9 +404,9 @@ def do_pyhst():
if
DOCUDA
:
CUDA
=
{
'include'
:[]}
CUDA
.
update
(
locate_cuda
())
CUDA
[
"arch"
]
=
[
#
"-gencode", "arch=compute_20,code=compute_20",
"-gencode"
,
"arch=compute_30,code=compute_30"
,
"-gencode"
,
"arch=compute_50,code=compute_50"
CUDA
[
"arch"
]
=
[
"-gencode"
,
"arch=compute_20,code=compute_20"
,
"-gencode"
,
"arch=compute_30,code=compute_30"
,
"-gencode"
,
"arch=compute_50,code=compute_50"
]
# print( CUDA)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment