Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
pyhst2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
3
Issues
3
List
Boards
Labels
Service Desk
Milestones
Jira
Jira
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
myron
pyhst2
Commits
08500a27
Commit
08500a27
authored
Jan 29, 2020
by
Alessandro Mirone
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
https://gitlab.esrf.fr/mirone/pyhst2
parents
f3b358b3
32385d0e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
18 deletions
+41
-18
PyHST/Cspace/gputomo.cu
PyHST/Cspace/gputomo.cu
+32
-16
PyHST/setCpuSet.py
PyHST/setCpuSet.py
+8
-1
setup.py
setup.py
+1
-1
No files found.
PyHST/Cspace/gputomo.cu
View file @
08500a27
...
...
@@ -7232,8 +7232,19 @@ __global__ static void multi_pro_gputomo_conicity_kernel(float *d_SINO, // da
{
for
(
imult
=
0
;
imult
<
PROCONO_GPU_MULT
*
PROCONO_GPU_MULT
;
imult
++
)
res
[
imult
]
=
0.0
f
;
}
float
Area_fz
=
1.0
f
;
{
float
L
=
(
source_distance
+
detector_distance
);
float
H
=
(
ix
-
SOURCE_X
)
*
v_size
/
v2x
;
float
V
=
(
iz
-
SOURCE_Z
)
*
v_size
/
v2x
;
Area_fz
=
sqrt
(
(
L
*
L
+
H
*
H
+
V
*
V
)
/
(
L
*
L
+
H
*
H
)
)
;
}
if
(
fabs
(
sin_angle
)
>
fabs
(
cos_angle
))
{
Area
=
1.0
/
fabs
(
sin_angle
);
Area
=
1.0
/
fabs
(
sin_angle
)
*
Area_fz
;
for
(
ivx
=
0
;
ivx
<
num_x
;
ivx
++
)
{
Ddeno
=
Ddeno0
+
ivx
*
sin_angle
*
v_size
*
1.0e-6
;
d_F_x
=
cos_angle
*
v_size
*
1.0e-6
/
Dnumex
;
...
...
@@ -7250,14 +7261,15 @@ __global__ static void multi_pro_gputomo_conicity_kernel(float *d_SINO, // da
fvyb
=
fvy
+
jmult
*
d_fvy_x
;
for
(
imult
=
0
;
imult
<
PROCONO_GPU_MULT
;
imult
++
)
{
fvzb
=
fvz
+
imult
*
d_z_z
+
jmult
*
d_z_x
;
if
(
fvyb
>
0
&&
fvyb
<
num_y
)
{
res
[
imult
*
PROCONO_GPU_MULT
+
jmult
]
+=
tex3D
(
texfor3D
,
ivx
+
0.5
f
,
fvyb
+
0.5
f
,
fvzb
+
1.5
f
);
// if( fvyb>-0.01 && fvyb< num_y+0.01 )
{
res
[
imult
*
PROCONO_GPU_MULT
+
jmult
]
+=
tex3D
(
texfor3D
,
ivx
+
0.5
f
,
fvyb
+
0.5
f
,
fvzb
+
1.5
f
);
// 0.5+1 , 1 is for the margin
}
}
}
}
}
else
{
Area
=
1.0
/
fabs
(
cos_angle
);
Area
=
1.0
/
fabs
(
cos_angle
)
*
Area_fz
;
for
(
ivy
=
0
;
ivy
<
num_y
;
ivy
++
)
{
Ddeno
=
Ddeno0
+
ivy
*
cos_angle
*
v_size
*
1.0e-6
;
d_F_x
=
-
sin_angle
*
v_size
*
1.0e-6
/
Dnumex
;
...
...
@@ -7274,7 +7286,8 @@ __global__ static void multi_pro_gputomo_conicity_kernel(float *d_SINO, // da
fvxb
=
fvx
+
jmult
*
d_fvx_x
;
for
(
imult
=
0
;
imult
<
PROCONO_GPU_MULT
;
imult
++
)
{
fvzb
=
fvz
+
imult
*
d_z_z
+
jmult
*
d_z_x
;
if
(
fvxb
>
0
&&
fvxb
<
num_x
)
{
// if( fvxb>-0.01 && fvxb< num_x+0.01 ) // il passaggio a zero deve essere assicurato dalle slice zeros che fanno sandwich e che ho aggiunto
{
res
[
imult
*
PROCONO_GPU_MULT
+
jmult
]
+=
tex3D
(
texfor3D
,
fvxb
+
0.5
f
,
ivy
+
0.5
f
,
fvzb
+
1.5
f
);
}
}
...
...
@@ -7399,11 +7412,11 @@ __global__ static void gputomo_conicity_kernel(float *d_SLICE, // da allocare
for
(
int
ix
=
0
;
ix
<
CONO_GPU_MULT
;
ix
++
)
{
Z
=
pz
+
ix
*
d_pz_ix
+
iy
*
d_pz_iy
;
if
(
Z
<
0
)
{
if
(
Z
<
-
0.5
f
)
{
continue
;
// Z=0;
}
if
(
Z
>
nslices_data
-
1
)
{
if
(
Z
>
nslices_data
-
0.5
f
)
{
continue
;
// Z = nslices_data-1;
}
...
...
@@ -7416,11 +7429,11 @@ __global__ static void gputomo_conicity_kernel(float *d_SLICE, // da allocare
}
}
else
{
Z
=
pz
;
if
(
Z
<
0
)
{
if
(
Z
<
-
0.5
f
)
{
continue
;
// Z=0;
}
if
(
Z
>
nslices_data
-
1
)
{
if
(
Z
>
nslices_data
-
0.5
f
)
{
continue
;
// Z = nslices_data-1;
}
...
...
@@ -7483,10 +7496,13 @@ int gpu_main_conicity(Gpu_Context * self, float * SLICE, float *WORK_perp
mynproj
*
self
->
num_bins
*
sizeof
(
float
)
*
nslices_data
,
cudaMemcpyHostToDevice
)
);
CUDA_SAFE_CALL
(
cudaBindTextureToArray
(
texProjes
,
a_Proje_voidptr
)
);
texProjes
.
filterMode
=
cudaFilterModeLinear
;
texProjes
.
addressMode
[
0
]
=
cudaAddressModeClamp
;
texProjes
.
addressMode
[
1
]
=
cudaAddressModeClamp
;
texProjes
.
addressMode
[
0
]
=
cudaAddressModeBorder
;
texProjes
.
normalized
=
false
;
CUDA_SAFE_CALL
(
cudaBindTextureToArray
(
texProjes
,
a_Proje_voidptr
)
);
{
...
...
@@ -7577,8 +7593,8 @@ int pro_gpu_main_conicity(Gpu_Context * self, float * SLICE, float *SINO
}
CUDA_SAFE_CALL
(
cudaBindTextureToArray
(
texfor3D
,
a_SLICE_voidptr
)
);
texfor3D
.
filterMode
=
cudaFilterModeLinear
;
texfor3D
.
addressMode
[
0
]
=
cudaAddressMode
Clamp
;
texfor3D
.
addressMode
[
1
]
=
cudaAddressMode
Clamp
;
texfor3D
.
addressMode
[
0
]
=
cudaAddressMode
Border
;
texfor3D
.
addressMode
[
1
]
=
cudaAddressMode
Border
;
texfor3D
.
addressMode
[
2
]
=
cudaAddressModeClamp
;
texfor3D
.
normalized
=
false
;
...
...
@@ -7969,8 +7985,10 @@ int gpu_main(Gpu_Context * self, float *WORK , float * SLICE, int do_preconditio
CUDA_SAFE_CALL
(
cudaMemcpyToArray
((
cudaArray
*
)
self
->
a_Proje_voidptr
,
0
,
0
,
self
->
dev_Work_perproje
,
self
->
nprojs_span
*
self
->
num_bins
*
sizeof
(
float
)
,
cudaMemcpyDeviceToDevice
)
);
CUDA_SAFE_CALL
(
cudaBindTextureToArray
(
texProjes
,(
cudaArray
*
)
self
->
a_Proje_voidptr
)
);
// !! unbind ??
texProjes
.
addressMode
[
0
]
=
cudaAddressModeBorder
;
texProjes
.
addressMode
[
1
]
=
cudaAddressModeClamp
;
texProjes
.
filterMode
=
cudaFilterModeLinear
;
CUDA_SAFE_CALL
(
cudaBindTextureToArray
(
texProjes
,(
cudaArray
*
)
self
->
a_Proje_voidptr
)
);
// !! unbind ??
int
inizio
=
0
,
fine
=
0
;
...
...
@@ -7983,8 +8001,6 @@ int gpu_main(Gpu_Context * self, float *WORK , float * SLICE, int do_preconditio
int
endpro_angle
=-
1
;
int
npis
=
0
;
int
projection
;
for
(
projection
=
0
;
projection
<
self
->
nprojs_span
;
projection
++
)
{
...
...
PyHST/setCpuSet.py
View file @
08500a27
...
...
@@ -76,8 +76,15 @@ def setCpuSet(maxnargs=3):
cpuset_string
,
errors
=
p
.
communicate
()
##########################################################################################
## 25/01/2020 : this should fix the distribution on some recent architecture
myavailable_cores_on_host
=
getCpuSetRange
.
getCpuSetRange
(
str
(
cpuset_string
).
strip
())
tmp_ncores4cpu
,
myavailable_cores_on_host
=
getCpuSetRange
.
getCoresOrdered
()
# previously the followin line applied
# myavailable_cores_on_host = getCpuSetRange.getCpuSetRange(str(cpuset_string).strip())
#
##########################################################################################
Ntotal_cores_on_host
=
multiprocessing
.
cpu_count
()
comando
=
'grep#physical id#/proc/cpuinfo'
...
...
setup.py
View file @
08500a27
...
...
@@ -404,7 +404,7 @@ def do_pyhst():
if
DOCUDA
:
CUDA
=
{
'include'
:[]}
CUDA
.
update
(
locate_cuda
())
CUDA
[
"arch"
]
=
[
"-gencode"
,
"arch=compute_20,code=compute_20"
,
CUDA
[
"arch"
]
=
[
#
"-gencode", "arch=compute_20,code=compute_20",
"-gencode"
,
"arch=compute_30,code=compute_30"
,
"-gencode"
,
"arch=compute_50,code=compute_50"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment