Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tomotools
Nabu
Commits
466842d1
Commit
466842d1
authored
Nov 14, 2019
by
Pierre Paleo
Browse files
Some clean-up
parent
fbf9b946
Changes
2
Hide whitespace changes
Inline
Side-by-side
nabu/cuda/src/backproj.cu
View file @
466842d1
...
...
@@ -91,152 +91,17 @@ __global__ void backproj(
}
d_slice
[
y
*
(
n_x
)
+
x
]
=
sum1
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slice
[
y
*
(
n_x
)
+
Gx
+
x
]
=
sum2
*
scale_factor
;
if
(
Gy
+
y
<
n_y
)
{
d_slice
[(
y
+
Gy
)
*
(
n_x
)
+
x
]
=
sum3
*
scale_factor
;
if
(
Gx
+
x
<
n_x
)
d_slice
[(
y
+
Gy
)
*
(
n_x
)
+
Gx
+
x
]
=
sum4
*
scale_factor
;
}
}
/**
*
* Old stuff / tinkering
*
*
**/
/*
#define BLOCK_SIZE 512
#define BLOCK_SIZ2 BLOCK_SIZE*2
__global__ void backproj_cust(
float* d_slice,
int num_projs,
int num_bins,
float axis_position,
int n_x,
int n_y,
float scale_factor
)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x >= n_x) || (y >= n_y)) return;
int xr = (x - axis_position), yr = (y - axis_position);
float sum = 0.0f;
for (int proj = 0; proj < num_projs; proj++) {
// h = axis_pos + x*cos - y*sin
// = (cos, -sin) .* (x, y) + axis_pos
#if USE_F2 == 1
float2 cos_msin = tex2D(tex_sincos_lut, proj + 0.5f, 0.5f);
float h = cos_msin.x * xr + cos_msin.y * yr + axis_position; // TODO dot() ?
//~ float h = __cosf(3.141592f/500*proj) * xr - __sinf(3.141592f/500*proj) * yr + axis_position; // fastest (3x with __intrinsics)
//~ float h = xr*0.1f - yr*0.2f + axis_position;
#else
//~ float pcos = tex2D(tex_cos_lut, proj + 0.5f, 0.5f);
//~ float pmsin = tex2D(tex_msin_lut, proj + 0.5f, 0.5f);
//~ float h = pcos * xr + pmsin * yr + axis_position; // TODO dot() ?
float h = cosf(3.141592f/500*proj) * xr - sinf(3.141592f/500*proj) * yr + axis_position;
#endif
// if(h >= 0 && h < num_bins)
sum += tex2D(tex_projections, h + 0.5f, proj + 0.5f);
if
(
Gx
+
x
<
n_x
)
d_slice
[(
y
+
Gy
)
*
(
n_x
)
+
Gx
+
x
]
=
sum4
*
scale_factor
;
}
d_slice[y*num_bins + x] = sum * scale_factor;
}
__global__ void backproj_cust2(
float* d_slice,
int num_projs,
int num_bins,
float axis_position,
int n_x,
int n_y,
float* d_cos,
float* d_msin,
float scale_factor
)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x >= n_x) || (y >= n_y)) return;
float xr = (x - axis_position), yr = (y - axis_position);
volatile __shared__ float shared_mem[BLOCK_SIZ2];
volatile float* s_cos = shared_mem;
volatile float* s_msin = shared_mem + BLOCK_SIZE;
// Fetch "blockDim.x" values to shared memory
int tid = threadIdx.x;
s_cos[tid] = d_cos[tid];
s_msin[tid] = d_msin[tid];
//
//
int next_fetch = BLOCK_SIZE;
__syncthreads();
// ------------
float sum = 0.0f;
int proj = 0, proj_loc = 0;
for (proj = 0, proj_loc = 0; proj < num_projs; proj++, proj_loc++) {
//~ if (proj == next_fetch) {
//~ if (tid + next_fetch < num_projs) {
//~ s_cos[tid] = d_cos[next_fetch + tid];
//~ s_msin[tid] = d_msin[next_fetch + tid];
//~ __syncthreads();
//~ }
//~ next_fetch += BLOCK_SIZE;
//~ proj_loc = 0;
//~ }
// h = axis_pos + x*cos - y*sin
// = (cos, -sin) .* (x, y) + axis_pos
//~ float h = s_cos[proj_loc] * xr + s_msin[proj_loc] * yr + axis_position; // TODO dot() ?
float h =
fmaf(s_cos[proj_loc], xr, axis_position) +
fmaf(s_msin[proj_loc], yr, 0.0f);
//~ if (h >= 0 && h < num_bins)
sum += tex2D(tex_projections, h + 0.5f, proj + 0.5f);
}
d_slice[y*n_x + x] = sum * scale_factor;
//~ d_slice[y*n_x + x] = d_cos[min(x, 500)];
}
// les threads font trop d'i/O par rapport au calcul
// hst_backproj fait travailler les threads "4x plus" => meilleure occupancy
// chaque thread ne traite pas un seul pixel (x, y) de l'image, il traite (x +- 1, y +- 1)
*/
nabu/reconstruction/fbp.py
View file @
466842d1
...
...
@@ -74,8 +74,8 @@ class Backprojector(CudaProcessing):
def
compute_angles
(
self
):
self
.
h_cos
[
0
]
=
np
.
cos
(
self
.
angles
).
astype
(
"f"
)
self
.
h_msin
[
0
]
=
(
-
np
.
sin
(
self
.
angles
)).
astype
(
"f"
)
self
.
_d_msin
=
garray
.
to_gpu
(
self
.
h_msin
[
0
])
###
self
.
_d_cos
=
garray
.
to_gpu
(
self
.
h_cos
[
0
])
###
self
.
_d_msin
=
garray
.
to_gpu
(
self
.
h_msin
[
0
])
self
.
_d_cos
=
garray
.
to_gpu
(
self
.
h_cos
[
0
])
def
compile_kernels
(
self
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment