Commit 08ea80b9 authored by myron's avatar myron

small modification to conic routines. Setting border mode to teture in gpu_main

parent ff225be4
......@@ -7232,8 +7232,19 @@ __global__ static void multi_pro_gputomo_conicity_kernel(float *d_SINO, // da
{
for(imult=0; imult<PROCONO_GPU_MULT*PROCONO_GPU_MULT ; imult ++) res[imult]= 0.0f;
}
float Area_fz=1.0f ;
{
float L = (source_distance+detector_distance);
float H = (ix-SOURCE_X) * v_size/v2x;
float V = (iz-SOURCE_Z)* v_size/v2x;
Area_fz = sqrt( (L*L+H*H+V*V)/(L*L+H*H) ) ;
}
if(fabs(sin_angle)>fabs(cos_angle)) {
Area = 1.0/ fabs(sin_angle);
Area = 1.0/ fabs(sin_angle)*Area_fz;
for(ivx=0; ivx<num_x; ivx++) {
Ddeno = Ddeno0 + ivx*sin_angle * v_size *1.0e-6;
d_F_x = cos_angle*v_size*1.0e-6/Dnumex;
......@@ -7250,14 +7261,15 @@ __global__ static void multi_pro_gputomo_conicity_kernel(float *d_SINO, // da
fvyb = fvy + jmult * d_fvy_x ;
for(imult=0; imult < PROCONO_GPU_MULT; imult++) {
fvzb = fvz + imult*d_z_z + jmult * d_z_x ;
if( fvyb>0 && fvyb< num_y ) {
res[ imult*PROCONO_GPU_MULT + jmult ] += tex3D(texfor3D, ivx + 0.5f , fvyb + 0.5f , fvzb+ 1.5f );
// if( fvyb>-0.01 && fvyb< num_y+0.01 )
{
res[ imult*PROCONO_GPU_MULT + jmult ] += tex3D(texfor3D, ivx + 0.5f , fvyb + 0.5f , fvzb+ 1.5f ); // 0.5+1 , 1 is for the margin
}
}
}
}
} else {
Area = 1.0/ fabs(cos_angle);
Area = 1.0/ fabs(cos_angle)*Area_fz;
for(ivy=0; ivy<num_y; ivy++) {
Ddeno = Ddeno0 + ivy*cos_angle * v_size *1.0e-6;
d_F_x = -sin_angle*v_size*1.0e-6/Dnumex;
......@@ -7274,7 +7286,8 @@ __global__ static void multi_pro_gputomo_conicity_kernel(float *d_SINO, // da
fvxb = fvx + jmult * d_fvx_x ;
for(imult=0; imult < PROCONO_GPU_MULT; imult++) {
fvzb = fvz + imult*d_z_z + jmult * d_z_x ;
if( fvxb>0 && fvxb< num_x ) {
// if( fvxb>-0.01 && fvxb< num_x+0.01 ) // il passaggio a zero deve essere assicurato dalle slice zeros che fanno sandwich e che ho aggiunto
{
res[ imult*PROCONO_GPU_MULT + jmult ] += tex3D(texfor3D, fvxb + 0.5f , ivy + 0.5f , fvzb + 1.5f );
}
}
......@@ -7399,11 +7412,11 @@ __global__ static void gputomo_conicity_kernel(float *d_SLICE, // da allocare
for(int ix=0; ix< CONO_GPU_MULT ; ix++) {
Z = pz + ix*d_pz_ix + iy*d_pz_iy ;
if(Z<0) {
if(Z<-0.5f) {
continue;
// Z=0;
}
if(Z>nslices_data-1) {
if(Z>nslices_data-0.5f) {
continue;
// Z = nslices_data-1;
}
......@@ -7416,11 +7429,11 @@ __global__ static void gputomo_conicity_kernel(float *d_SLICE, // da allocare
}
} else {
Z = pz ;
if(Z<0) {
if(Z<-0.5f) {
continue;
// Z=0;
}
if(Z>nslices_data-1) {
if(Z>nslices_data-0.5f) {
continue;
// Z = nslices_data-1;
}
......@@ -7483,10 +7496,13 @@ int gpu_main_conicity(Gpu_Context * self, float * SLICE, float *WORK_perp
mynproj*self->num_bins*sizeof(float) *nslices_data,
cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL( cudaBindTextureToArray(texProjes,a_Proje_voidptr) );
texProjes.filterMode = cudaFilterModeLinear;
texProjes.addressMode[0] = cudaAddressModeClamp;
texProjes.addressMode[1] = cudaAddressModeClamp;
texProjes.addressMode[0] = cudaAddressModeBorder;
texProjes.normalized = false;
CUDA_SAFE_CALL( cudaBindTextureToArray(texProjes,a_Proje_voidptr) );
{
......@@ -7577,8 +7593,8 @@ int pro_gpu_main_conicity(Gpu_Context * self, float * SLICE, float *SINO
}
CUDA_SAFE_CALL( cudaBindTextureToArray(texfor3D,a_SLICE_voidptr) );
texfor3D.filterMode = cudaFilterModeLinear;
texfor3D.addressMode[0] = cudaAddressModeClamp;
texfor3D.addressMode[1] = cudaAddressModeClamp;
texfor3D.addressMode[0] = cudaAddressModeBorder;
texfor3D.addressMode[1] = cudaAddressModeBorder;
texfor3D.addressMode[2] = cudaAddressModeClamp;
texfor3D.normalized = false;
......@@ -7969,8 +7985,10 @@ int gpu_main(Gpu_Context * self, float *WORK , float * SLICE, int do_preconditio
CUDA_SAFE_CALL( cudaMemcpyToArray((cudaArray*) self->a_Proje_voidptr, 0, 0,self->dev_Work_perproje ,
self->nprojs_span*self->num_bins*sizeof(float) , cudaMemcpyDeviceToDevice) );
CUDA_SAFE_CALL( cudaBindTextureToArray(texProjes,(cudaArray*) self->a_Proje_voidptr) ); // !! unbind ??
texProjes.addressMode[0] = cudaAddressModeBorder;
texProjes.addressMode[1] = cudaAddressModeClamp;
texProjes.filterMode = cudaFilterModeLinear;
CUDA_SAFE_CALL( cudaBindTextureToArray(texProjes,(cudaArray*) self->a_Proje_voidptr) ); // !! unbind ??
int inizio=0 , fine=0;
......@@ -7983,8 +8001,6 @@ int gpu_main(Gpu_Context * self, float *WORK , float * SLICE, int do_preconditio
int endpro_angle =-1;
int npis=0;
int projection;
for(projection=0; projection < self->nprojs_span ; projection++) {
......
......@@ -404,7 +404,7 @@ def do_pyhst():
if DOCUDA:
CUDA={'include':[]}
CUDA .update(locate_cuda())
CUDA["arch"] = [ "-gencode", "arch=compute_20,code=compute_20",
CUDA["arch"] = [ # "-gencode", "arch=compute_20,code=compute_20",
"-gencode", "arch=compute_30,code=compute_30" ,
"-gencode", "arch=compute_50,code=compute_50"
]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment