Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tomotools
Nabu
Commits
2a6341d4
Commit
2a6341d4
authored
Nov 18, 2019
by
Pierre Paleo
Browse files
Merge branch 'fbp' into 'master'
FBP on stack of sinograms See merge request paleo/nabu!6
parents
fbf9b946
bc21aca9
Changes
5
Hide whitespace changes
Inline
Side-by-side
nabu/cuda/src/backproj.cu
View file @
2a6341d4
#define SHARED_SIZE 256
texture
<
float
,
2
,
cudaReadModeElementType
>
tex_projections
;
//~ texture<float, 2, cudaReadModeElementType> tex_msin_lut;
//~ texture<float, 2, cudaReadModeElementType> tex_cos_lut;
/**
...
...
@@ -25,7 +20,7 @@ threads (32, 32). However, it turns out that performances are best with (16, 16)
blocks.
**/
// Backproject one s
lice
// Backproject one s
inogram
// One thread handles up to 4 pixels in the output slice
// the case num_projs > 1024 has to be included.
__global__
void
backproj
(
...
...
@@ -63,7 +58,7 @@ __global__ void backproj(
for
(
int
proj
=
0
;
proj
<
num_projs
;
proj
++
)
{
if
(
proj
==
next_fetch
)
{
// Fetch
"blockDim.x"
values to shared memory
// Fetch
SHARED_SIZE
values to shared memory
__syncthreads
();
if
(
next_fetch
+
tid
<
num_projs
)
{
s_cos
[
tid
]
=
d_cos
[
next_fetch
+
tid
];
...
...
@@ -91,13 +86,12 @@ __global__ void backproj(
}
d_slice
[
y
*
(
n_x
)
+
x
]
=
sum1
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slice
[
y
*
(
n_x
)
+
Gx
+
x
]
=
sum2
*
scale_factor
;
if
(
Gy
+
y
<
n_y
)
{
d_slice
[(
y
+
Gy
)
*
(
n_x
)
+
x
]
=
sum3
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slice
[(
y
+
Gy
)
*
(
n_x
)
+
Gx
+
x
]
=
sum4
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slice
[(
y
+
Gy
)
*
(
n_x
)
+
Gx
+
x
]
=
sum4
*
scale_factor
;
}
}
...
...
@@ -105,75 +99,15 @@ __global__ void backproj(
#ifdef BACKPROJ3D
texture
<
float
,
3
,
cudaReadModeElementType
>
tex_projections3D
;
/**
*
* Old stuff / tinkering
*
*
**/
/*
#define BLOCK_SIZE 512
#define BLOCK_SIZ2 BLOCK_SIZE*2
__global__ void backproj_cust(
float* d_slice,
int num_projs,
int num_bins,
float axis_position,
int n_x,
int n_y,
float scale_factor
)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x >= n_x) || (y >= n_y)) return;
int xr = (x - axis_position), yr = (y - axis_position);
float sum = 0.0f;
for (int proj = 0; proj < num_projs; proj++) {
// h = axis_pos + x*cos - y*sin
// = (cos, -sin) .* (x, y) + axis_pos
#if USE_F2 == 1
float2 cos_msin = tex2D(tex_sincos_lut, proj + 0.5f, 0.5f);
float h = cos_msin.x * xr + cos_msin.y * yr + axis_position; // TODO dot() ?
//~ float h = __cosf(3.141592f/500*proj) * xr - __sinf(3.141592f/500*proj) * yr + axis_position; // fastest (3x with __intrinsics)
//~ float h = xr*0.1f - yr*0.2f + axis_position;
#else
//~ float pcos = tex2D(tex_cos_lut, proj + 0.5f, 0.5f);
//~ float pmsin = tex2D(tex_msin_lut, proj + 0.5f, 0.5f);
//~ float h = pcos * xr + pmsin * yr + axis_position; // TODO dot() ?
float h = cosf(3.141592f/500*proj) * xr - sinf(3.141592f/500*proj) * yr + axis_position;
#endif
// if(h >= 0 && h < num_bins)
sum += tex2D(tex_projections, h + 0.5f, proj + 0.5f);
}
d_slice[y*num_bins + x] = sum * scale_factor;
}
__global__ void backproj_cust2(
float* d_slice,
// Backproject multiple sinograms
__global__
void
backproj_multi
(
float
*
d_slices
,
int
num_projs
,
int
num_bins
,
int
num_slices
,
float
axis_position
,
int
n_x
,
int
n_y
,
...
...
@@ -184,59 +118,63 @@ __global__ void backproj_cust2(
{
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if ((x >= n_x) || (y >= n_y)) return;
float xr = (x - axis_position), yr = (y - axis_position);
volatile __shared__ float shared_mem[BLOCK_SIZ2];
volatile float* s_cos = shared_mem;
volatile float* s_msin = shared_mem + BLOCK_SIZE;
int
z
=
blockDim
.
z
*
blockIdx
.
z
+
threadIdx
.
z
;
int
Gx
=
blockDim
.
x
*
gridDim
.
x
;
int
Gy
=
blockDim
.
y
*
gridDim
.
y
;
// Fetch "blockDim.x" values to shared memory
int tid = threadIdx.x;
s_cos[tid] = d_cos[tid];
s_msin[tid] = d_msin[tid];
//
//
int next_fetch = BLOCK_SIZE;
__syncthreads();
// ------------
float sum = 0.0f;
int proj = 0, proj_loc = 0;
// (xr, yr) (xrp, yr)
// (xr, yrp) (xrp, yrp)
float
xr
=
x
-
axis_position
,
yr
=
y
-
axis_position
;
float
xrp
=
xr
+
Gx
,
yrp
=
yr
+
Gy
;
for (proj = 0, proj_loc = 0; proj < num_projs; proj++, proj_loc++) {
//~ if (proj == next_fetch) {
//~ if (tid + next_fetch < num_projs) {
//~ s_cos[tid] = d_cos[next_fetch + tid];
//~ s_msin[tid] = d_msin[next_fetch + tid];
//~ __syncthreads();
//~ }
//~ next_fetch += BLOCK_SIZE;
//~ proj_loc = 0;
//~ }
/*volatile*/
__shared__
float
s_cos
[
SHARED_SIZE
];
/*volatile*/
__shared__
float
s_msin
[
SHARED_SIZE
];
int
next_fetch
=
0
;
int
tid
=
threadIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
// h = axis_pos + x*cos - y*sin
// = (cos, -sin) .* (x, y) + axis_pos
float
costheta
,
msintheta
;
float
h1
,
h2
,
h3
,
h4
;
float
sum1
=
0.0
f
,
sum2
=
0.0
f
,
sum3
=
0.0
f
,
sum4
=
0.0
f
;
//~ float h = s_cos[proj_loc] * xr + s_msin[proj_loc] * yr + axis_position; // TODO dot() ?
float h =
fmaf(s_cos[proj_loc], xr, axis_position) +
fmaf(s_msin[proj_loc], yr, 0.0f);
for
(
int
proj
=
0
;
proj
<
num_projs
;
proj
++
)
{
if
(
proj
==
next_fetch
)
{
// Fetch SHARED_SIZE values to shared memory
__syncthreads
();
if
(
next_fetch
+
tid
<
num_projs
)
{
s_cos
[
tid
]
=
d_cos
[
next_fetch
+
tid
];
s_msin
[
tid
]
=
d_msin
[
next_fetch
+
tid
];
}
next_fetch
+=
SHARED_SIZE
;
__syncthreads
();
}
costheta
=
s_cos
[
proj
-
(
next_fetch
-
SHARED_SIZE
)];
msintheta
=
s_msin
[
proj
-
(
next_fetch
-
SHARED_SIZE
)];
float
c1
=
fmaf
(
costheta
,
xr
,
axis_position
);
// cos(theta)*xr + axis_pos
float
c2
=
fmaf
(
costheta
,
xrp
,
axis_position
);
// cos(theta)*(xr + Gx) + axis_pos
float
s1
=
fmaf
(
msintheta
,
yr
,
0.0
f
);
// -sin(theta)*yr
float
s2
=
fmaf
(
msintheta
,
yrp
,
0.0
f
);
// -sin(theta)*(yr + Gy)
h1
=
c1
+
s1
;
h2
=
c2
+
s1
;
h3
=
c1
+
s2
;
h4
=
c2
+
s2
;
if
(
h1
>=
0
&&
h1
<
num_bins
)
sum1
+=
tex3D
(
tex_projections3D
,
h1
+
0.5
f
,
proj
+
0.5
f
,
z
+
0.5
f
);
if
(
h2
>=
0
&&
h2
<
num_bins
)
sum2
+=
tex3D
(
tex_projections3D
,
h2
+
0.5
f
,
proj
+
0.5
f
,
z
+
0.5
f
);
if
(
h3
>=
0
&&
h3
<
num_bins
)
sum3
+=
tex3D
(
tex_projections3D
,
h3
+
0.5
f
,
proj
+
0.5
f
,
z
+
0.5
f
);
if
(
h4
>=
0
&&
h4
<
num_bins
)
sum4
+=
tex3D
(
tex_projections3D
,
h4
+
0.5
f
,
proj
+
0.5
f
,
z
+
0.5
f
);
}
//~ if (h >= 0 && h < num_bins)
sum += tex2D(tex_projections, h + 0.5f, proj + 0.5f);
d_slices
[(
z
*
n_y
+
y
)
*
(
n_x
)
+
x
]
=
sum1
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slices
[(
z
*
n_y
+
y
)
*
(
n_x
)
+
Gx
+
x
]
=
sum2
*
scale_factor
;
if
(
Gy
+
y
<
n_y
)
{
d_slices
[(
z
*
n_y
+
y
+
Gy
)
*
(
n_x
)
+
x
]
=
sum3
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slices
[(
z
*
n_y
+
y
+
Gy
)
*
(
n_x
)
+
Gx
+
x
]
=
sum4
*
scale_factor
;
}
d_slice[y*n_x + x] = sum * scale_factor;
//~ d_slice[y*n_x + x] = d_cos[min(x, 500)];
}
// les threads font trop d'i/O par rapport au calcul
// hst_backproj fait travailler les threads "4x plus" => meilleure occupancy
// chaque thread ne traite pas un seul pixel (x, y) de l'image, il traite (x +- 1, y +- 1)
*/
#endif
nabu/cuda/utils.py
View file @
2a6341d4
...
...
@@ -89,7 +89,7 @@ def get_shape_dtype(arr):
elif
isinstance
(
arr
,
cuda
.
Array
):
return
cuarray_shape_dtype
(
arr
)
else
:
raise
ValueError
(
"Unknown array type
"
)
raise
ValueError
(
"Unknown array type
%s"
%
str
(
type
(
arr
))
)
def
copy_array
(
dst
,
src
,
check
=
False
,
src_dtype
=
None
):
...
...
@@ -111,8 +111,6 @@ def copy_array(dst, src, check=False, src_dtype=None):
shape_src
,
dtype_src
=
get_shape_dtype
(
src
)
shape_dst
,
dtype_dst
=
get_shape_dtype
(
dst
)
dtype_src
=
src_dtype
or
dtype_src
#~ print("src: %s, %s" % (str(shape_src), str(dtype_src))) # DEBUG
#~ print("dst: %s, %s" % (str(shape_dst), str(dtype_dst))) # DEBUG
if
check
:
if
shape_src
!=
shape_dst
:
raise
ValueError
(
"shape_src != shape_dst : have %s and %s"
%
(
str
(
shape_src
),
str
(
shape_dst
)))
...
...
@@ -144,6 +142,11 @@ def copy_array(dst, src, check=False, src_dtype=None):
copy
.
width_in_bytes
=
copy
.
dst_pitch
=
w
*
np
.
dtype
(
dtype_src
).
itemsize
copy
.
dst_height
=
copy
.
height
=
h
copy
(
True
)
# ??
if
len
(
shape_src
)
==
2
:
copy
(
True
)
else
:
copy
()
###
nabu/reconstruction/fbp.py
View file @
2a6341d4
...
...
@@ -13,31 +13,61 @@ from .filtering import SinoFilter
class
Backprojector
(
CudaProcessing
):
"""
TODO docstring
Cuda Backprojector.
"""
def
__init__
(
self
,
slice_shape
,
angles
,
dwidth_x
=
None
,
dwidth_z
=
None
,
sino_shape
,
slice_shape
=
None
,
angles
=
None
,
rot_center
=
None
,
filter_name
=
None
,
extra_options
=
{},
cuda_options
=
{},
):
"""
Initialize a Cuda Backprojector.
Parameters
-----------
sino_shape: tuple
Shape of the sinogram, in the form `(n_angles, detector_width)`
(for backprojecting one sinogram) or `(n_sinos, n_angles, detector_width)`.
slice_shape: int or tuple, optional
Shape of the slice. By default, the slice shape is (n_x, n_x) where
`n_x = detector_width`
angles: array-like, optional
Rotation anles in radians.
By default, angles are equispaced between [0, pi[.
rot_center: float, optional
Rotation axis position. Default is `(detector_width - 1)/2.0`
filter_name: str, optional
Name of the filter for filtered-backprojection.
extra_options: dict, optional
Advanced extra options.
See the "Extra options" section for more information.
cuda_options: dict, optional
Cuda options.
Extra options
--------------
The parameter `extra_options` is a dictionary with the following defaults:
- "padding_mode": "zeros"
- "axis_correction": None
"""
super
().
__init__
(
**
cuda_options
)
self
.
configure_extra_options
(
extra_options
=
extra_options
)
self
.
init_geometry
(
slice_shape
,
angles
,
dwidth_x
,
dwidth_z
,
rot_center
)
self
.
allocate_memory
()
self
.
compute_angles
()
self
.
compile_kernels
()
self
.
bind_texture
()
self
.
_configure_extra_options
(
extra_options
=
extra_options
)
self
.
_init_geometry
(
sino_shape
,
slice_shape
,
angles
,
rot_center
)
self
.
_init_filter
(
filter_name
)
self
.
_allocate_memory
()
self
.
_compute_angles
()
self
.
_compile_kernels
()
self
.
_bind_textures
()
def
configure_extra_options
(
self
,
extra_options
=
{}):
def
_
configure_extra_options
(
self
,
extra_options
=
{}):
self
.
_axis_array
=
None
self
.
extra_options
=
{
"filter_name"
:
"ramlak"
,
"padding_mode"
:
"zeros"
,
"axis_correction"
:
None
,
}
...
...
@@ -45,53 +75,86 @@ class Backprojector(CudaProcessing):
self
.
_axis_array
=
self
.
extra_options
[
"axis_correction"
]
def
init_geometry
(
self
,
slice_shape
,
angles
,
dwidth_x
,
dwidth_z
,
rot_center
):
if
np
.
isscalar
(
slice_shape
):
slice_shape
=
(
slice_shape
,
slice_shape
)
self
.
slice_shape
=
slice_shape
n_y
,
n_x
=
slice_shape
def
_init_geometry
(
self
,
sino_shape
,
slice_shape
,
angles
,
rot_center
):
self
.
sino_shape
=
sino_shape
if
len
(
sino_shape
)
==
2
:
n_angles
,
dwidth
=
sino_shape
n_slices
=
1
elif
len
(
sino_shape
)
==
3
:
n_slices
,
n_angles
,
dwidth
=
sino_shape
else
:
raise
ValueError
(
"Expected 2D or 3D sinogram"
)
n_sinos
=
n_slices
n_y
=
dwidth
n_x
=
dwidth
if
slice_shape
is
not
None
:
if
np
.
isscalar
(
slice_shape
):
slice_shape
=
(
slice_shape
,
slice_shape
)
n_y
,
n_x
=
slice_shape
self
.
n_x
=
n_x
self
.
n_y
=
n_y
self
.
dwidth_x
=
dwidth_x
or
max
(
n_x
,
n_y
)
self
.
slice_shape
=
(
n_y
,
n_x
)
if
n_slices
>
1
:
self
.
slice_shape
=
(
n_slices
,)
+
self
.
slice_shape
self
.
n_slices
=
n_slices
self
.
n_sinos
=
n_sinos
self
.
n_angles
=
n_angles
self
.
dwidth
=
dwidth
if
angles
is
None
:
angles
=
n_angles
if
np
.
isscalar
(
angles
):
angles
=
np
.
linspace
(
0
,
np
.
pi
,
angles
,
False
)
self
.
angles
=
angles
else
:
assert
len
(
angles
)
==
self
.
n_angles
self
.
angles
=
angles
self
.
n_angles
=
len
(
self
.
angles
)
self
.
sino_shape
=
(
self
.
n_angles
,
self
.
dwidth_x
)
self
.
rot_center
=
rot_center
or
(
self
.
dwidth_x
-
1
)
/
2.
self
.
rot_center
=
rot_center
or
(
self
.
dwidth
-
1
)
/
2.
self
.
axis_pos
=
self
.
rot_center
def
allocate_memory
(
self
):
self
.
_d_slice
=
garray
.
zeros
(
self
.
slice_shape
,
"f"
)
self
.
_d_sino
=
garray
.
zeros
(
self
.
sino_shape
,
"f"
)
def
_allocate_memory
(
self
):
self
.
_d_sino_cua
=
cuda
.
np_to_array
(
np
.
zeros
(
self
.
sino_shape
,
"f"
),
"C"
)
# 1D textures are not supported in pycuda
self
.
h_msin
=
np
.
zeros
((
1
,
self
.
n_angles
),
"f"
)
self
.
h_cos
=
np
.
zeros
((
1
,
self
.
n_angles
),
"f"
)
self
.
_d_sino
=
garray
.
zeros
(
self
.
sino_shape
,
"f"
)
# ~ self._d_slice = garray.zeros(self.slice_shape, "f")
self
.
_init_arrays_to_none
([
"_d_slice"
])
def
compute_angles
(
self
):
def
_compute_angles
(
self
):
self
.
h_cos
[
0
]
=
np
.
cos
(
self
.
angles
).
astype
(
"f"
)
self
.
h_msin
[
0
]
=
(
-
np
.
sin
(
self
.
angles
)).
astype
(
"f"
)
self
.
_d_msin
=
garray
.
to_gpu
(
self
.
h_msin
[
0
])
###
self
.
_d_cos
=
garray
.
to_gpu
(
self
.
h_cos
[
0
])
###
self
.
_d_msin
=
garray
.
to_gpu
(
self
.
h_msin
[
0
])
self
.
_d_cos
=
garray
.
to_gpu
(
self
.
h_cos
[
0
])
def
compile_kernels
(
self
):
# Configure sinogram
filter
def
_init_filter
(
self
,
filter_name
):
self
.
filter_name
=
filter
_name
self
.
sino_filter
=
SinoFilter
(
self
.
sino_shape
,
filter_name
=
self
.
extra_options
[
"
filter_name
"
]
,
filter_name
=
self
.
filter_name
,
padding_mode
=
self
.
extra_options
[
"padding_mode"
],
ctx
=
self
.
ctx
,
)
def
_compile_kernels
(
self
):
# Configure backprojector
fname
=
get_cuda_srcfile
(
"backproj.cu"
)
self
.
gpu_projector
=
CudaKernel
(
"backproj"
,
filename
=
fname
)
self
.
texref_proj
=
self
.
gpu_projector
.
module
.
get_texref
(
"tex_projections"
)
if
self
.
n_sinos
>
1
:
kernel_name
=
"backproj_multi"
tex_name
=
"tex_projections3D"
kernel_signature
=
"PiiifiiPPf"
sourcemodule_options
=
[
"-DBACKPROJ3D"
]
else
:
kernel_name
=
"backproj"
tex_name
=
"tex_projections"
kernel_signature
=
"PiifiiPPf"
sourcemodule_options
=
None
self
.
gpu_projector
=
CudaKernel
(
kernel_name
,
filename
=
fname
,
options
=
sourcemodule_options
)
self
.
texref_proj
=
self
.
gpu_projector
.
module
.
get_texref
(
tex_name
)
self
.
texref_proj
.
set_filter_mode
(
cuda
.
filter_mode
.
LINEAR
)
self
.
gpu_projector
.
prepare
(
"PiifiiPPf"
,
[
self
.
texref_proj
])
self
.
gpu_projector
.
prepare
(
kernel_signature
,
[
self
.
texref_proj
])
# We use blocks of 16*16 (see why in kernel doc), and one thread
# handles 2 pixels per dimension.
self
.
block
=
(
16
,
16
,
1
)
...
...
@@ -99,14 +162,36 @@ class Backprojector(CudaProcessing):
updiv
(
updiv
(
self
.
n_x
,
self
.
block
[
0
]),
2
),
updiv
(
updiv
(
self
.
n_y
,
self
.
block
[
1
]),
2
)
)
if
self
.
n_sinos
>
1
:
self
.
grid
+=
(
self
.
n_sinos
,)
# Prepare kernel arguments
self
.
kern_proj_args
=
[
None
,
# output d_slice holder
self
.
n_angles
,
self
.
dwidth
,
self
.
axis_pos
,
self
.
n_x
,
self
.
n_y
,
self
.
_d_cos
,
self
.
_d_msin
,
1.0
# scale factor # TODO customize
]
if
self
.
n_slices
>
1
:
self
.
kern_proj_args
.
insert
(
3
,
self
.
n_slices
)
self
.
kern_proj_kwargs
=
{
"grid"
:
self
.
grid
,
"block"
:
self
.
block
,
"shared_size"
:
int
(
np
.
prod
(
self
.
block
))
*
2
*
4
,
}
def
bind_texture
(
self
):
def
_
bind_texture
s
(
self
):
self
.
texref_proj
.
set_array
(
self
.
_d_sino_cua
)
def
set_output
(
self
,
output
,
check
=
False
):
def
_
set_output
(
self
,
output
,
check
=
False
):
if
output
is
None
:
self
.
_allocate_array
(
"_d_slice"
,
self
.
slice_shape
,
dtype
=
np
.
float32
)
return
self
.
_d_slice
if
check
:
assert
output
.
dtype
==
np
.
float32
...
...
@@ -117,24 +202,13 @@ class Backprojector(CudaProcessing):
return
output
# TODO support output=<numpy array>
def
backproj
(
self
,
sino
,
output
=
None
,
do_checks
=
True
):
copy_array
(
self
.
_d_sino_cua
,
sino
,
check
=
do_checks
)
d_slice
=
self
.
set_output
(
output
,
check
=
do_checks
)
d_slice
=
self
.
_
set_output
(
output
,
check
=
do_checks
)
self
.
kern_proj_args
[
0
]
=
d_slice
self
.
gpu_projector
(
d_slice
,
self
.
n_angles
,
self
.
dwidth_x
,
self
.
axis_pos
,
self
.
n_x
,
self
.
n_y
,
self
.
_d_cos
,
self
.
_d_msin
,
1.0
,
grid
=
self
.
grid
,
block
=
self
.
block
,
shared_size
=
int
(
np
.
prod
(
self
.
block
))
*
2
*
4
*
self
.
kern_proj_args
,
**
self
.
kern_proj_kwargs
)
if
output
is
not
None
:
return
output
...
...
nabu/reconstruction/filtering.py
View file @
2a6341d4
...
...
@@ -3,7 +3,7 @@ from bisect import bisect
from
itertools
import
product
import
numpy
as
np
import
pycuda.gpuarray
as
garray
from
silx.math.fft
import
FFT
from
silx.math.fft
.cufft
import
CU
FFT
from
silx.image.tomography
import
compute_fourier_filter
,
get_next_power
from
..cuda.kernel
import
CudaKernel
from
..cuda.processing
import
CudaProcessing
...
...
@@ -51,9 +51,9 @@ class SinoFilter(CudaProcessing):
self
.
ndim
=
len
(
sino_shape
)
if
self
.
ndim
==
2
:
n_angles
,
dwidth
=
sino_shape
dwidth_z
=
1
n_sinos
=
1
elif
self
.
ndim
==
3
:
dwidth_z
,
n_angles
,
dwidth
=
sino_shape
n_sinos
,
n_angles
,
dwidth
=
sino_shape
else
:
raise
ValueError
(
"Invalid sinogram number of dimensions"
)
self
.
sino_shape
=
sino_shape
...
...
@@ -64,7 +64,7 @@ class SinoFilter(CudaProcessing):
self
.
dwidth_padded
=
int
(
get_next_power
(
2
*
self
.
dwidth
))
self
.
sino_padded_shape
=
(
n_angles
,
self
.
dwidth_padded
)
if
self
.
ndim
==
3
:
self
.
sino_padded_shape
=
(
dwidth_z
,
)
+
self
.
sino_padded_shape
self
.
sino_padded_shape
=
(
n_sinos
,
)
+
self
.
sino_padded_shape
sino_f_shape
=
list
(
self
.
sino_padded_shape
)
sino_f_shape
[
-
1
]
=
sino_f_shape
[
-
1
]
//
2
+
1
self
.
sino_f_shape
=
tuple
(
sino_f_shape
)
...
...
@@ -74,11 +74,10 @@ class SinoFilter(CudaProcessing):
def
_init_fft
(
self
):
self
.
fft
=
FFT
(
self
.
fft
=
CU
FFT
(
self
.
sino_padded_shape
,
dtype
=
np
.
float32
,
axes
=
(
-
1
,),
backend
=
"cuda"
)
...
...
@@ -172,7 +171,6 @@ class SinoFilter(CudaProcessing):
grid
=
self
.
_pad_grid
,
block
=
self
.
_pad_block
)
else
:
# zeros
self
.
d_sino_padded
.
fill
(
0
)
if
self
.
ndim
==
2
:
...
...
nabu/reconstruction/tests/test_fbp.py
View file @
2a6341d4
...
...
@@ -33,7 +33,7 @@ class TestFBP(ParametrizedTestCase):
"""
Simple test of a FBP on a 512x512 slice
"""
B
=
Backprojector
(
512
,
500
)
B
=
Backprojector
(
(
500
,
512
)
)
res
=
B
.
fbp
(
self
.
sino_512
)
delta_clipped
=
self
.
clip_to_inner_circle
(
res
-
self
.
ref_512
)
...
...
@@ -47,7 +47,7 @@ class TestFBP(ParametrizedTestCase):
"""
Test FBP of a 511x511 slice where the rotation axis is at (512-1)/2.0
"""
B
=
Backprojector
(
511
,
500
,
rot_center
=
255.5
)
B
=
Backprojector
(
(
500
,
511
)
,
rot_center
=
255.5
)
res
=
B
.
fbp
(
self
.
sino_511
)
ref
=
self
.
ref_512
[:
-
1
,
:
-
1
]
...
...
@@ -58,6 +58,23 @@ class TestFBP(ParametrizedTestCase):
def
test_multi_fbp
(
self
):
multi_sino
=
np
.
tile
(
self
.
sino_511
,
(
4
,
1
,
1
))
B
=
Backprojector
(
multi_sino
.
shape
,
rot_center
=
255.5
)
res
=
B
.
fbp
(
multi_sino
)
self
.
assertLess
(
np
.
max
(
np
.
abs
(
np
.
std
(
res
,
axis
=
0
))),
1e-5
,
"Something wrong with multi-FBP"
)
ref
=
self
.
ref_512
[:
-
1
,
:
-
1
]
delta_clipped
=
self
.
clip_to_inner_circle
(
res
[
0
]
-
ref
)
err_max
=
np
.
max
(
np
.
abs
(
delta_clipped
))
self
.
assertLess
(
err_max
,
self
.
tol
,
"Max error is too high"
)
...
...
Write
Preview