Commit 953c60c1 authored by Alessandro Mirone's avatar Alessandro Mirone

all non regression tests pass

parent f1826c23
......@@ -47,9 +47,10 @@ DOSHARED=1
class Control:
def __init__(self, Parameters, cspaceA, nofCpus , mygpus, npbunches ):
def __init__(self, Parameters, cspaceA, nofCpus , mygpus, npbunches , Aspace):
global P,C
global NPBUNCHES
self.Aspace = Aspace
P=Parameters
C=cspaceA
NPBUNCHES=npbunches
......@@ -157,7 +158,8 @@ class Control:
-1:["MPI_Resources","TransposedDataResources", "TreatedDataResources" ]
}[STEAM_DIRECTION],
pmanager = self.PacketsManager,
STEAM_DIRECTION = STEAM_DIRECTION
STEAM_DIRECTION = STEAM_DIRECTION,
Ctr = self
)
if STEAM_DIRECTION==1:
......@@ -841,11 +843,12 @@ class Processing_InterleavedReadingPreProcessing(ProcessingBaseClass):
class Processing_Transposing(ProcessingBaseClass):
num_of_algorithms=1
def __init__(self, processingLevel=0, choosedAlgorithm=0
, resources={}, resources_free=[], pmanager=None,STEAM_DIRECTION=1):
, resources={}, resources_free=[], pmanager=None,STEAM_DIRECTION=1, Ctr = None):
self.pmanager=pmanager
self.pid_inprocess=-1
self.nperpid =0
self.nperpid_tr =0
self.Ctr = Ctr
ProcessingBaseClass.__init__(self, processingLevel, choosedAlgorithm,
resources, resources_free, pmanager=pmanager, STEAM_DIRECTION = STEAM_DIRECTION )
......@@ -854,6 +857,11 @@ class Processing_Transposing(ProcessingBaseClass):
## ----------------- data for specific requirement. Resources know their name -----------------------------
self.requiredCpusGpus = [ 0 , 0 ]
self.event = threading.Event()
self.event_tr = threading.Event()
self.semaphore = threading.Semaphore()
def processSpecial(self,psn ):
if self.choosedAlgorithm==0:
......@@ -869,19 +877,34 @@ class Processing_Transposing(ProcessingBaseClass):
pid = psn / NPBUNCHES
pid_i = psn % NPBUNCHES
if not self.pid_inprocess== pid:
self.pid_inprocess = pid
self.nperpid =0
self.nperpid += 1
with self.semaphore:
if not self.pid_inprocess== pid:
self.pid_inprocess = pid
self.nperpid =0
self.nperpid_tr = 0
with self.semaphore:
self.nperpid_tr += 1
if self.nperpid_tr == NPBUNCHES :
self.Ctr.Aspace["transposeddatatokens"][0][:]=0
self.event_tr.set()
else:
self.event_tr.clear()
self.event_tr.wait()
if(self.nperpid) == NPBUNCHES:
ntoktreated = self.resources["TreatedDataResources"] .tokenperpacketid[pid]
ntoktransposed = self.resources["TransposedDataResources"].tokenperpacketid[pid]
C.transpose_chunk(psn, ntoktreated, ntoktransposed, NPBUNCHES, self.STEAM_DIRECTION )
self.event.set()
self.event.clear()
else:
self.event.wait()
with self.semaphore:
self.nperpid += 1
if(self.nperpid) == NPBUNCHES:
ntoktreated = self.resources["TreatedDataResources"] .tokenperpacketid[pid]
ntoktransposed = self.resources["TransposedDataResources"].tokenperpacketid[pid]
C.transpose_chunk(psn, ntoktreated, ntoktransposed, NPBUNCHES, self.STEAM_DIRECTION )
self.event.set()
else:
self.event.clear()
self.event.wait()
class Processing_Dispenser(ProcessingBaseClass):
......
......@@ -2559,6 +2559,7 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
OVERSAMPLE = (float*)malloc( 3*(num_bins)*self->params.OVERSAMPLING_FACTOR* sizeof(float )) ;
memset( OVERSAMPLE , 0, 3*(num_bins)*self->params.OVERSAMPLING_FACTOR * sizeof(float ) );
WORKbis = (float*) malloc((num_bins)* sizeof(float ) ) ;
// memset( WORKbis , 0, (num_bins)* sizeof(float ) );
for(projection=0; projection < self->params.nprojs_span ; projection++) {
WORK [projection] = WORK[0 ] + projection*dim_fft ;
}
......@@ -2579,6 +2580,7 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
// memset(SLICE,0, self->params.num_x * self->params.num_y *sizeof(float) ); // sara scritta per pezzi dentro
data = (float*)malloc( nslice_data_granularity * VECTORIALITY* self->params.nprojs_span * num_bins * sizeof(float )) ;
// memset(data, 0, nslice_data_granularity * VECTORIALITY* self->params.nprojs_span * num_bins * sizeof(float ) );
// {
......@@ -2936,6 +2938,10 @@ void CCspace_Sino_2_Slice( CCspace * self, float * dataA, int nslices, int ns
/* printf(" sum %e\n", sum); */
/* } */
// da estendere alle altre recostruzioni in seguito
// memset( WORK_perproje , 0 ,nslice_workppj_granularity* (self->params.nprojs_span)*(num_bins)*self->params.OVERSAMPLING_FACTOR*sizeof(float ) );
conic_driver( self, data, SLICE, nslices,
nslices_data,
Nfirstslice,
......@@ -4650,8 +4656,12 @@ void CCspace_tranpose_chunk (CCspace * self , int sn, int ntoktreated, int
int cmar = self->params.CONICITY_MARGIN;
transmit_buffer = (float*) malloc( num_my_proj * (size0 +(nprocs)*2*cmar) * size1*sizeof(float) ) ;
transmit_buffer = (float*) malloc( num_my_proj * (size0 +(nprocs)*2*cmar) * size1*sizeof(float) ) ;
receive_buffer = (float*) malloc( (n_myslices +2*cmar ) * self->params.nprojs_span * size1*sizeof(float) ) ;
// memset(transmit_buffer, 0, num_my_proj * (size0 +(nprocs)*2*cmar) * size1*sizeof(float) );
// memset(receive_buffer, 0, (n_myslices +2*cmar ) * self->params.nprojs_span * size1*sizeof(float) );
if (self->params.verbosity>0) printf(" processo %d alloca %ld per t and %ld per r \n", self->iproc,
num_my_proj * size0 * size1,
......@@ -4725,6 +4735,8 @@ void CCspace_tranpose_chunk (CCspace * self , int sn, int ntoktreated, int
if(self->params.verbosity>10) printf(" QUI numpjs_jproc %d \n", numpjs_jproc);
for(iproj=0 ; iproj< numpjs_jproc ; iproj++) {
for(ix=0; ix<size1; ix++) {
Tra_ptr[(islice*numprojs + (iproj+numpjs_offset))*size1 + ix ] =
......@@ -5052,9 +5064,10 @@ void CCspace_preprocess_chunk(CCspace * self , int sn, int ntok, int ntokt ,
float current ;
if (self->params.CORRECT_FLATFIELD ) {
int iread = self->reading_infos.tot_proj_num_list[npj+num_previous_projs] + self->reading_infos. proj_num_offset_list[sn_nb] ;
// int iread = self->reading_infos.tot_proj_num_list[npj+num_previous_projs] + self->reading_infos. proj_num_offset_list[sn_nb] ;
int iread = npj ;
assert( iread< self->reading_infos.currents_lenght) ;
current = self->reading_infos.currents[iread];
} else {
current = 1.0;
......@@ -5270,6 +5283,13 @@ void CCspace_preprocess_chunk(CCspace * self , int sn, int ntok, int ntokt ,
Rawptr = self-> rawdatatokens->datatokens[ntok] +npj*Size0*Size1;
Tptr = self-> datatokens->datatokens[ntokt] +npj*size_s[2*sn_nb +0]*size_s[2*sn_nb +1];
// memset( Tptr, 0, size_s[2*sn_nb +0]*size_s[2*sn_nb +1]*sizeof(float) );
if(self->params.TAKE_LOGARITHM ) {
/* // #pragma omp parallel for private(i,j) num_threads (ncpus) */
for(i=0; i<Size0; i++) {
......@@ -5531,7 +5551,6 @@ void CCspace_InterleavedReadingPreProcessing_chunk(CCspace * self ,int sn,int
int iread = npj ;
// printf( " IREAD CLENGHT %d %d iproc %d\n", iread, self->reading_infos.currents_lenght, self->iproc ) ;
assert( iread< self->reading_infos.currents_lenght) ;
current = self->reading_infos.currents[iread];
} else {
current = 1.0;
......@@ -5940,6 +5959,10 @@ void CCspace_InterleavedReadingPreProcessing_chunk(CCspace * self ,int sn,int
Tptr = self-> datatokens->datatokens[ntokt] +npj*size_s[2*sn_nb +0]*size_s[2*sn_nb +1];
// DFptr = self->ffcorr ;
// memset( Tptr, 0, size_s[2*sn_nb +0]*size_s[2*sn_nb +1]*sizeof(float) );
if(self->params.TAKE_LOGARITHM ) {
for(i=0; i<Size0; i++) {
......@@ -6137,10 +6160,18 @@ void CCspace_read_chunk (CCspace * self , int sn, int ntok , int npbunches,
// printf(" NPJ %d %d %d\n", npj,size_s_[2*sn_nb +0],size_s_[2*sn_nb +1]);
float *target;
if(!reduced_case) {
target = (Aptr+ npj*size_s_[2*sn_nb +0]*size_s_[2*sn_nb +1]) ;
//if(self->params.CONICITY ) memset(target,0, size_s_[2*sn_nb +0]*size_s_[2*sn_nb +1]*sizeof(float) );
} else {
target = (Aptr+ (npj+reshift)*size_s_[2*sn_nb +0]*size_s_[2*sn_nb +1]) ;
// if(self->params.CONICITY ) memset(target,0, size_s_[2*sn_nb +0]*size_s_[2*sn_nb +1]*sizeof(float) );
}
// printf(" leggo da edf %d %d %d %d \n", pos_s_[2*sn_nb], pos_s_[2*sn_nb+1],size_s_[2*sn_nb +0], size_s_[2*sn_nb +1]);
// pos_s_[2*sn_nb+1] is supposed to be 0
......@@ -6259,6 +6290,8 @@ void CCspace_read_chunk (CCspace * self , int sn, int ntok , int npbunches,
}
} else if (strcmp(self->reading_infos.proj_reading_type , "h5")==0){
if(self->params.DZPERPROJ==0) {
read_data_from_h5( proj_file_list[ 0 ],
self->reading_infos.proj_h5_dsname,
my_proj_num_list[iread],
......
......@@ -6728,8 +6728,14 @@ __global__ static void gputomo_conicity_kernel(float *d_SLICE, // da allocare
for(int ix=0; ix< CONO_GPU_MULT ; ix++) {
Z = pz + ix*d_pz_ix + iy*d_pz_iy ;
if(Z<0) Z=0;
if(Z>nslices_data-1) Z = nslices_data-1;
if(Z<0) {
continue;
// Z=0;
}
if(Z>nslices_data-1) {
continue;
// Z = nslices_data-1;
}
res[iy* CONO_GPU_MULT +ix] += tex2D(texProjes,
px + 0.5f + ix*d_px_ix + iy* d_px_iy ,
Z + proje*nslices_data +0.5f
......@@ -6739,8 +6745,14 @@ __global__ static void gputomo_conicity_kernel(float *d_SLICE, // da allocare
}
} else {
Z = pz ;
if(Z<0) Z=0;
if(Z>nslices_data-1) Z = nslices_data-1;
if(Z<0) {
continue;
// Z=0;
}
if(Z>nslices_data-1) {
continue;
// Z = nslices_data-1;
}
res[0] += tex2D(texProjes, px + 0.5f , Z+ proje*nslices_data + 0.5f ) ; ;
}
}
......@@ -6828,7 +6840,6 @@ int gpu_main_conicity(Gpu_Context * self, float * SLICE, float *WORK_perp
}
// CUDA_SAFE_CALL( cudaMemcpy( SLICE, d_SLICE, sizeof(float) * nslices*self->dimrecx* self->dimrecy , cudaMemcpyDeviceToHost) );
CUDA_SAFE_CALL( cudaMemcpy( SLICE, d_SLICE, sizeof(float) * nslices*self->num_x* self->num_y , cudaMemcpyDeviceToHost) );
......
......@@ -137,7 +137,7 @@ cspace.configure_readings( proj_reading_dict )
import Control
# mygpus is a list of integers
control=Control.Control( Parameters_module.Parameters, cspace ,
ncpus, Parameters_module.mygpus,Parameters_module.Parameters.NPBUNCHES )
ncpus, Parameters_module.mygpus,Parameters_module.Parameters.NPBUNCHES ,arrays_space_in_python )
control.execute(Parameters_module.Postprocess)
MPI.COMM_WORLD.Barrier()
......
......@@ -31,6 +31,11 @@
from __future__ import division
__SF_VERSION__ = 0.2
import mpi4py.MPI as MPI
myrank = MPI.COMM_WORLD.Get_rank()
import numpy as np
from tomography import AstraToolbox, clipCircle
import os
......@@ -226,7 +231,9 @@ class SirtFilter:
# Take fft.rfft, or fft.fft[:, len/2+1]
if savedir is not None:
_save(fname, result, n_x, n_y, nDet, nAng, niter, lambda_tikhonov)
if myrank==0:
_save(fname, result, n_x, n_y, nDet, nAng, niter, lambda_tikhonov)
MPI.COMM_WORLD.Barrier()
return result
def reconst(self, sino):
......
......@@ -7,8 +7,9 @@ import numpy as np
from PyQt4 import Qt, QtCore
from PyQt4.QtCore import SIGNAL, SLOT
newversionprefix ="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb/1gpu/tests/"
referenceprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb/2gpu_bis/tests/"
newversionprefix ="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb_corrected/1gpu/tests/MULTIPAGANIN"
referenceprefix ="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb_corrected/2gpu/tests/MULTIPAGANIN"
# referenceprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb/2gpu_bis/tests/"
......@@ -55,6 +56,12 @@ def compara_vols(ref, new ):
n = numpy.reshape(numpy.fromstring(sn,"f"), [ parInfo.NUM_Y, parInfo.NUM_X ] )
err = (r-n)
err=(err*err).sum()
rsum = (r*r).sum()
nsum = (n*n).sum()
if rsum==0 and nsum==0 :
err = -1
print iz, " err " , err
if err> maxerr:
maxerr=err
......
......@@ -41,9 +41,9 @@ mailserver = None
# mailserver="tuodomain.country"
# LAUNCHING_INSTRUCTION = "echo 'localhost\n' > machinefile ; time PyHST2_2017c input.par gpu2-1304,0"
LAUNCHING_INSTRUCTION = "time PyHST2_2018bb input.par"
LAUNCHING_INSTRUCTION = "time PyHST2_2018bb input.par | tee output"
outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb/2gpu_bis/tests"
outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2018bb_corrected/1gpu/tests"
# outputprefix="/data/scisofttmp/mirone/TEST_PYHST/RESULTS/2017c/2gpu_bis/tests"
# outputprefix="/data/scisofttmp/paleo/TEST_PYHST/DATASETS_RED/OUTPUTS"
......@@ -64,9 +64,7 @@ CASI_partial = [ "ID11_SNOW", "LENA", "LENA_MULTIRINGS", "MOUSSE", "MULTIP
casi = CASI_ALL
casi = [
"LENA", "LENA_MULTIRINGS", "MOUSSE", "MULTIPAGANIN","NANOPOINTS", "NNFBP",
"PATCHES_VECTORIAL", "SINO_THRESHOLD", "SIRT_LENA" ]
import fnmatch
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment