frsv.cc 20.1 KB
Newer Older
myron's avatar
myron committed
1
2
3
4
5
6
7
#include<stdlib.h>
#include <yaml-cpp/yaml.h>
#include <string>
#include<iostream>
#include<stdio.h>
#include <boost/filesystem.hpp>
#include<math.h>
myron's avatar
myron committed
8
9
10
11
12
13

#include <highfive/H5File.hpp>
#include <highfive/H5DataSet.hpp>
#include <highfive/H5DataSpace.hpp>
#include <highfive/H5DataType.hpp>

myron's avatar
myron committed
14
15
#define assertm(exp, msg) assert(((void)msg, exp))

myron's avatar
myron committed
16
17
namespace h5 = HighFive;

myron's avatar
myron committed
18
struct DimPars_struct {
myron's avatar
myron committed
19
20
21
22
23
24
  size_t  NE  ;    
  size_t  NV  ;    
  size_t  NROI; 
  size_t  DIMZ; 
  size_t  DIMY; 
  size_t  DIMX;
myron's avatar
myron committed
25
  int  ZSTART ;
myron's avatar
myron committed
26
  int  ZEND ; 
myron's avatar
myron committed
27
28
29
30
31
32
33
34
35
36
37
};
typedef DimPars_struct  DimPars;

struct InputFileNames_struct {
  std::string DSname;
  std::string DDname;
  std::string SSname;
  std::string COEFFSname;
};
typedef  InputFileNames_struct InputFileNames;

myron's avatar
myron committed
38
39
40
float * read_volume(std::string fn, std::vector<size_t> &dims) {

  float * result; 
myron's avatar
myron committed
41
  std::string testh5_name(fn) ;
myron's avatar
myron committed
42
43
44
45
46
47
48
49
50
  h5::File test_file( testh5_name  , h5::File::ReadOnly);
  h5::DataSet test_dataset = test_file.getDataSet("data");
  dims = test_dataset.getDimensions();
  size_t tot = 1;
  for(size_t  i=0; i< dims.size(); i++) tot *= dims[i];
  result  =  (float*) malloc( tot *  sizeof(float));
  test_dataset.read( result , h5::AtomicType<float>()    ) ;
  
  return result;  
myron's avatar
myron committed
51
52
}

myron's avatar
myron committed
53
54
void save_volume(std::string fn, float * X , std::vector<size_t> dims) {

myron's avatar
myron committed
55
  std::string testh5_name(fn) ;
myron's avatar
myron committed
56
57
58
  h5::File test_file( testh5_name  , h5::File::ReadWrite | h5::File::Create | h5::File::Truncate );
  h5::DataSet dataset = test_file.createDataSet<float>("data",  h5::DataSpace(dims));
  dataset.write_raw(X);
myron's avatar
myron committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
}



class SolVol {
public:
  float *__restrict__ ptr;
};
  
class ProjectionVol {
public:
  float *__restrict__   ptr;
};

class CoefficientsVol {
public:
  float * __restrict__ ptr;
};

class FreeFactsVol {
public:
  float * __restrict__ ptr;
};

// DS SHAPE  (7, 72, 10, 41, 241)
//  DD SHAPE  (7, 72)
//  SS SHAPE  (7, 72, 241, 241)
class Problem {
public:
myron's avatar
myron committed
88
  Problem(InputFileNames &if_names_a, DimPars & dimpars_a ){
myron's avatar
myron committed
89
90
91
    this->if_names = if_names_a;
    this->dimpars  = dimpars_a;

myron's avatar
myron committed
92
    {
myron's avatar
myron committed
93
94
95
96
97
98
99
      std::vector<size_t> dims ; 
      this->DS = read_volume( if_names.DSname, dims) ;
      dimpars.NV   = dims[0] ; 
      dimpars.NROI = dims[1] ; 
      dimpars.DIMZ = dims[2] ; 
      dimpars.DIMY = dims[3] ; 
      dimpars.DIMX = dims[4] ;
myron's avatar
myron committed
100
101
102
    }
    
    
myron's avatar
myron committed
103
    if(dimpars.ZSTART>=0) {
myron's avatar
myron committed
104
      int dimz = 1+ dimpars.ZEND - dimpars.ZSTART ;
myron's avatar
myron committed
105
      float *ds = (float*) malloc(size_t(dimpars.NV)*size_t(dimpars.NROI)*size_t(dimz)* size_t(dimpars.DIMY)* size_t(dimpars.DIMX) *  sizeof(float) ) ;
myron's avatar
myron committed
106
107
      size_t BLOCK = size_t(dimpars.DIMZ)* size_t(dimpars.DIMY)* size_t(dimpars.DIMX  ) ;
      size_t block = size_t(        dimz)* size_t(dimpars.DIMY)* size_t(dimpars.DIMX  ) ;
myron's avatar
myron committed
108
      size_t offset  = size_t(dimpars.ZSTART)* size_t(dimpars.DIMY)* size_t(dimpars.DIMX) ;
myron's avatar
myron committed
109
110
      for(int iv=0; iv< (int) dimpars.NV; iv++) {
	for(int ir=0; ir< (int) dimpars.NROI; ir++) {
myron's avatar
myron committed
111
	  memcpy(
myron's avatar
myron committed
112
113
114
		 ds + (iv*dimpars.NROI+ir)*block    ,
		 DS + (iv*dimpars.NROI+ir)*BLOCK+offset,
		 block*sizeof(float)
myron's avatar
myron committed
115
116
117
118
119
120
121
122
		 );
	}
      }
      dimpars.DIMZ  = dimz ;
      free( DS ) ;
      this->DS = ds ;
    }
    
myron's avatar
myron committed
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    {
      std::vector<size_t> dims ; 
      this->DD =  read_volume( if_names.DDname, dims) ;
      assert( dims[0]  == dimpars.NV   ) ;
      assert( dims[1]  == dimpars.NROI ) ;
    }

    {
      std::vector<size_t> dims ; 
      this->SS = read_volume( if_names.SSname, dims) ;
      assert( dims[0]  == dimpars.NROI ) ;
      assert( dims[1]  == dimpars.DIMX   ) ;
      assert( dims[2]  == dimpars.DIMX   ) ;      
    }
      
    {
      std::vector<size_t> dims ; 
      this->coefficients = read_volume( if_names.COEFFSname, dims) ;
      dimpars.NE = dims[0]  ;
      assert(      dims[1]  == dimpars.NV     ) ;
      assert(      dims[2]  == dimpars.NROI   ) ;
    }
myron's avatar
myron committed
145
146
    

myron's avatar
myron committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    /*{
      int cnt=0;
      for(int iE=0; iE< dimpars.NE; iE++) {
	for(int iV=0; iV< dimpars.NV; iV++) {
	  for(int iR=0; iR< dimpars.NROI; iR++) {
	    this->coefficients[cnt] = ( iE==iV  );
	    if(iR!=1) 
	      this->coefficients[cnt] = 0;
	    cnt++;
	  }
	}
      }
    }
    */
    
myron's avatar
myron committed
162
163
164
165
166
167

    DimPars *d = & dimpars;
    

    this->Matff = (float*) malloc(   d->NE * d->NE * d->NROI * sizeof(float) ) ; 
    this->MatSffF2  = (float*) malloc(   d->NE*d->NE* d->DIMX*d->DIMX   *      sizeof(float) ) ;    
myron's avatar
myron committed
168
    this->setMatff();
myron's avatar
myron committed
169
170
171
172
173
174
175
176
177
    
    this->VectA = (float*) malloc(   d->NE *  d->DIMZ *  d->DIMY * d->DIMX * sizeof(float) ) ;


    this->MatR  = (float*) malloc(   d->NROI * sizeof(float) ) ;
    
    this->VectR  = (float*) malloc(   d->NROI * sizeof(float) ) ;
    
  }
myron's avatar
myron committed
178
  
myron's avatar
myron committed
179
180
181
182
#define SOL_addr( iE, iz, iy , ix  ) ((( (iE)*DIMZ + (iz))*DIMY+  (iy))*DIMX+  (ix))
#define SS_addr(  iroi, ix1, ix2  ) (( (iroi)*DIMX+  (ix1))*DIMX+  (ix2))
#define PRO_addr( iV, iroi, iz, iy,  ix  ) ((( (  (iV)*NROI        +iroi  )*DIMZ + (iz))*DIMY+  (iy))*DIMX+  (ix))

myron's avatar
myron committed
183
184
185
  void setVectA_and_Mat(SolVol target,   FreeFactsVol F ) {
    setMatSffF2(F) ;
    setVectA( target,    F ) ;
myron's avatar
myron committed
186
187
  };
  
myron's avatar
myron committed
188
  void setVectA(SolVol target,   FreeFactsVol F ) {
myron's avatar
myron committed
189
190
191
192
193
194
    int NE = dimpars.NE;
    int NV = dimpars.NV;
    int NROI = dimpars.NROI;
    int DIMZ = dimpars.DIMZ;
    int DIMY = dimpars.DIMY;
    int DIMX = dimpars.DIMX;
myron's avatar
myron committed
195
#pragma omp   parallel for    
myron's avatar
myron committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
    for(int ie=0 ; ie<NE; ie++) {	
      for( int iz=0; iz<DIMZ; iz++) {
	for( int iy=0; iy<DIMY; iy++) {
	  for( int ix=0; ix<DIMX; ix++) {
	    float tmp=0.0;
	    for(int iv=0 ; iv<NV; iv++) {	
	      for(int ir=0 ; ir<NROI; ir++) {	
		tmp +=    DS[  PRO_addr( iv,ir,  iz, iy,  ix        )  ] *  coefficients[ (ie*NV   +iv )*NROI +ir]   *F.ptr[ir] ; 
	      }
	    }
	    target.ptr[ SOL_addr( ie, iz, iy , ix  )   ]= tmp ; 
	  }
	}
      }
    }
  }
myron's avatar
myron committed
212
213
214
215
216
  void project_solution(SolVol target){
    int NE = dimpars.NE;
    int DIMZ = dimpars.DIMZ;
    int DIMY = dimpars.DIMY;
    int DIMX = dimpars.DIMX;
myron's avatar
myron committed
217
#pragma omp   parallel for    
myron's avatar
myron committed
218
219
220
221
222
223
224
225
226
227
228
229
    for(int ie=NE-1 ; ie<NE; ie++) {	
      for( int iz=0; iz<DIMZ; iz++) {
	for( int iy=0; iy<DIMY; iy++) {
	  for( int ix=0; ix<DIMX; ix++) {
	    target.ptr[ SOL_addr( ie, iz, iy , ix  )   ]= 0.6 ; 
	  }
	}
      }
    }
  }

  void setMatff() {
myron's avatar
myron committed
230
231
232
    int NE = dimpars.NE;
    int NV = dimpars.NV;
    int NROI = dimpars.NROI;
myron's avatar
myron committed
233
#pragma omp   parallel for    
myron's avatar
myron committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
    for(int ir=0; ir<NROI; ir++) {
      for(int ie1=0 ; ie1<NE; ie1++) {	
	for(int ie2=0 ; ie2<NE; ie2++) {
	  float tmp = 0;
	  for(int iv=0; iv<NV; iv++) {
	    tmp+=coefficients[ (ie1*NV   +iv )*NROI +ir]*coefficients[ (ie2*NV   +iv )*NROI +ir  ] ; 
	  }
	  Matff[    (ie1*NE +ie2 )*NROI  +  ir ] = tmp;
	}
      }
    }
  }
  
  
myron's avatar
myron committed
248
  void setMatSffF2(FreeFactsVol F) {
myron's avatar
myron committed
249
250
251
    int NE = dimpars.NE;
    int NROI = dimpars.NROI;
    int DIMX = dimpars.DIMX;
myron's avatar
myron committed
252
#pragma omp   parallel for    
myron's avatar
myron committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
    for(int ie1=0 ; ie1<NE; ie1++) {	
      for(int ie2=0 ; ie2<NE; ie2++) {
	for(int ix1=0 ; ix1<DIMX; ix1++) {	
	  for(int ix2=0 ; ix2<DIMX; ix2++) {
	    float tmp=0;
	    for(int ir=0; ir<NROI; ir++) {
	      tmp +=   F.ptr[ir]*F.ptr[ir] * Matff[    (ie1*NE +ie2 )*NROI  +  ir ] *     SS[  SS_addr (  ir, ix1, ix2  ) ]  ;

	    }
	    MatSffF2[  ((ie1*NE +ie2 )*DIMX + ix1)*DIMX + ix2] = tmp;
	  }
	}
      }
    }
  }
  
myron's avatar
myron committed
269
  void setFreeFacts(FreeFactsVol V , SolVol C) {
myron's avatar
myron committed
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
      float buff[100];
    {
      int NE = dimpars.NE;
      int NV = dimpars.NV;
      int NROI = dimpars.NROI;
      int DIMZ = dimpars.DIMZ;
      int DIMY = dimpars.DIMY;
      int DIMX = dimpars.DIMX;

      
      float *Ctmp = (float*)malloc(  NV*NROI*DIMZ*DIMY*DIMX               *sizeof(float));
      {
	
	memset(Ctmp,0,  NV*NROI*DIMZ*DIMY*DIMX               *sizeof(float));
	float tmp=0;
myron's avatar
myron committed
285
#pragma omp  parallel for      
myron's avatar
myron committed
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
	for(int iv=0 ; iv<NV; iv++) {
	  for(int ir=0 ; ir<NROI; ir++) {	
	    for( int iz=0; iz<DIMZ; iz++) {
	      for( int iy=0; iy<DIMY; iy++) {
		for(int ix=0 ; ix<DIMX; ix++) {
		  tmp=0.0;
		  for(int ie=0 ; ie<NE; ie++) {
		    tmp += C.ptr[SOL_addr( ie,iz,iy,ix   )] * coefficients[ (ie*NV   +iv )*NROI +ir] ; 
		  }
		  Ctmp[ ((((iv)*NROI+ir)*DIMZ+iz)*DIMY+iy)*DIMX+ix] = tmp;
		}
	      }
	    }
	  }
	}
      }
      
      for(int ir=0 ; ir<NROI; ir++) {
	double forza =0;
	float tmp = 0.0f ; 
	for( int iz=0; iz<DIMZ; iz++) {
	  for( int iy=0; iy<DIMY; iy++) {
	    
	    for(int iv=0 ; iv<NV; iv++) {

	      forza=0;
	      for(int ix1=0 ; ix1<DIMX; ix1++) {	
		for(int ix2=0 ; ix2<DIMX; ix2++) {
		  
		  tmp +=
		    Ctmp[ ((((iv)*NROI+ir)*DIMZ+iz)*DIMY+iy)*DIMX+ix1] *
		    Ctmp[ ((((iv)*NROI+ir)*DIMZ+iz)*DIMY+iy)*DIMX+ix2] *
		    SS[  SS_addr (  ir, ix1, ix2  ) ] ;
		  forza += SS[  SS_addr (  ir, ix1, ix2  ) ]; 
		}
	      }
	    }
	  }
	}
	buff[ir] = tmp ;
	printf(" FORZA%d %e\n", ir, forza);
      }
      free(Ctmp);
    }
    printf("1\n");
    {
      int NE = dimpars.NE;
      int NV = dimpars.NV;
      int NROI = dimpars.NROI;
      int DIMZ = dimpars.DIMZ;
      int DIMY = dimpars.DIMY;
      int DIMX = dimpars.DIMX;
      for(int ir=0 ; ir<NROI; ir++) {	
	float tmp = 0.0f ; 
	for(int ie=0 ; ie<NE; ie++) {
	  for(int iv=0 ; iv<NV; iv++) {	
	    for( int iz=0; iz<DIMZ; iz++) {
	      for( int iy=0; iy<DIMY; iy++) {
		for(int ix=0 ; ix<DIMX; ix++) {	
		  tmp +=   C.ptr[SOL_addr( ie,iz,iy,ix   )] *  DS[  PRO_addr (  iv, ir,iz,iy,ix ) ]* coefficients[ (ie*NV   +iv )*NROI +ir  ]  ;
		}
	      }
	    }
	  }
	}
	{
	  float sum=0;
	  {
	    for(int iv=0; iv<NV; iv++) {
	      sum+= DD[iv*NROI+ir];
	    }
	  }
	  float f = V.ptr[ir];
	  printf(" L'errore per %d sarebbe %e\n", ir,      f*f *buff[ir] - 2* tmp*f  + sum );
	  f = tmp/buff[ir];
	  printf(" L'errore dopo per %d sarebbe %e   %e %e %e\n", ir,      f*f *buff[ir] - 2* tmp*f  + sum , buff[ir], tmp, sum);
	}
	if( buff[ir]) {
	  V.ptr[ir] = tmp / buff[ir];
	} else {
	  V.ptr[ir] = 0 ; 
	}
	printf(" setto %d a %e\n", ir, V.ptr[ir]);
      }
    }
  }
  
  void applyMatA( SolVol target,SolVol source) {
    int NE = dimpars.NE;
    int DIMZ = dimpars.DIMZ;
    int DIMY = dimpars.DIMY;
    int DIMX = dimpars.DIMX;

myron's avatar
myron committed
379
#pragma omp  parallel for          
myron's avatar
myron committed
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    for(int ie1=0 ; ie1<NE; ie1++) {	
      for(int iz=0; iz< DIMZ; iz++) {
	for(int iy=0; iy< DIMY; iy++) {
	  
	  for(int ix1=0; ix1< DIMX; ix1++) {
	    float tmp = 0.0 ; 
	    for(int ie2=0 ; ie2<NE; ie2++) {
	      for(int ix2=0; ix2< DIMX; ix2++) {
		tmp +=  source.ptr[SOL_addr( ie2, iz, iy , ix2  ) ]*MatSffF2[  ((ie1*NE +ie2 )*DIMX + ix1)*DIMX + ix2] ;

	      }
	    }
	    target.ptr[SOL_addr( ie1, iz, iy , ix1  ) ]= tmp ; 
	  }
	}
      }
    }
  };
  
  void save(SolVol X, std::string name) {
myron's avatar
myron committed
400
401
    std::vector<size_t> dims = {size_t(dimpars.NE),size_t(dimpars.DIMZ), size_t(dimpars.DIMY),  size_t(dimpars.DIMX) };
    save_volume( name,X.ptr, dims);
myron's avatar
myron committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
    
  };

  
  template<class T> size_t size() { return 0; };  
  



  void gradReg_2_Add(SolVol   target, SolVol   source , float betaE, float betaV) {
    int NE = dimpars.NE;
    int DIMZ = dimpars.DIMZ;
    int DIMY = dimpars.DIMY;
    int DIMX = dimpars.DIMX;

    if( betaE==0 && betaV ==0 ) return;
    
    for( int iE=0; iE<NE; iE++) {
      for( int iz=0; iz<DIMZ; iz++) {
	for( int iy=0; iy<DIMY; iy++) {
	  for( int ix=0; ix<DIMX; ix++) {
	    float add = 0.0 ; 
	    if(iE<NE-1) {
	      add +=    betaE*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE+1,iz,iy,ix   ) ]  )   ; 
	    }
	    if(iE>0) {
	      add +=    betaE*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE-1,iz,iy,ix   ) ]  )   ; 
	    }
	    if(iz<DIMZ-1) {
	      add +=    betaV*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz+1,iy,ix   ) ]  )   ; 
	    }
	    if(iz>0) {
	      add +=    betaV*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz-1,iy,ix   ) ]  )   ; 
	    }
	    if(iy<DIMY-1) {
	      add +=    betaV*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy+1,ix   ) ]  )   ; 
	    }
	    if(iy>0) {
	      add +=    betaV*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy-1,ix   ) ]  )   ; 
	    }
	    if(ix<DIMX-1) {
	      add +=    betaV*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy,ix+1   ) ]  )   ; 
	    }
	    if(ix>0) {
	      add +=    betaV*(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy,ix-1   ) ]  )   ; 
	    }
	    target.ptr[SOL_addr( iE,iz,iy,ix   )] += add; 
	  }
	}
      }
    }
  }


  void gradReg_1_Add(SolVol  target, SolVol  source , float betaE, float betaV) {
    int NE = dimpars.NE;
    int DIMZ = dimpars.DIMZ;
    int DIMY = dimpars.DIMY;
    int DIMX = dimpars.DIMX;

    if( betaE==0 && betaV ==0 ) return;
    
    for( int iE=0; iE<NE; iE++) {
      for( int iz=0; iz<DIMZ; iz++) {
	for( int iy=0; iy<DIMY; iy++) {
	  for( int ix=0; ix<DIMX; ix++) {
	    float add = 0.0 ; 
	    if(iE<NE-1) {
	      add +=   copysign( betaE,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE+1,iz,iy,ix   ) ]  )  ) ; 
	    }
	    if(iE>0) {
	      add +=   copysign( betaE,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE-1,iz,iy,ix   ) ]  )   ); 
	    }
	    if(iz<DIMZ-1) {
	      add +=   copysign( betaV,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz+1,iy,ix   ) ]  )   ); 
	    }
	    if(iz>0) {
	      add +=   copysign( betaV,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz-1,iy,ix   ) ]  )   ); 
	    }
	    if(iy<DIMY-1) {
	      add +=   copysign( betaV,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy+1,ix   ) ]  )   ); 
	    }
	    if(iy>0) {
	      add +=   copysign( betaV,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy-1,ix   ) ]  )   ); 
	    }
	    if(ix<DIMX-1) {
	      add +=   copysign( betaV,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy,ix+1   ) ]  )   ); 
	    }
	    if(ix>0) {
	      add +=   copysign( betaV,(  source.ptr[SOL_addr( iE,iz,iy,ix   )]-source.ptr[ SOL_addr( iE,iz,iy,ix-1   ) ]  )   ); 
	    }
	    target.ptr[SOL_addr( iE,iz,iy,ix   )] += add; 
	  }
	}
      }
    }
  }
  
  template <typename T>
  void allocate(T &vol) {
    vol.ptr  =  (float *) malloc(  size<T>() * sizeof(float) );
  };
  
  template <typename T>
  void settozero(T vol) {
    memset( vol.ptr, 0,  size<T>() * sizeof(float) );
  };

  template <typename T>
  void settoVal(T __restrict__ vol, float val) {
    size_t numels =   this->size<T>();
    for(size_t i = 0; i<numels; i++) {
      vol.ptr[i] = val ; 
    }
  }
  
  template <typename T>
  void copy(T __restrict__ target, T __restrict__ source ) {
    size_t numels =   this->size<T>();
    for(size_t i = 0; i<numels; i++) {
      target[i] = source[i] ; 
    }
  }

  template <typename T>
  void Scal( T  source , float alpha) {
    size_t numels =   this->size<T>();
    for(size_t i = 0; i<numels; i++) {
      source.ptr[i] = source.ptr[i] *alpha ; 
    }
  }

  template <typename T>
  void copyScal(T  target, T  source , float alpha) {
    size_t numels =   this->size<T>();
    for(size_t i = 0; i<numels; i++) {
      target.ptr[i] = source.ptr[i] *alpha ; 
    }
  }

  template <typename T>
  void copyScal(T  target, float *  source , float alpha) {
    size_t numels =   this->size<T>();
    for(size_t i = 0; i<numels; i++) {
      target.ptr[i] = source[i] *alpha ; 
    }
  }

  
  template <typename T>
  double  scalar(T  target, T  source ) {
    size_t numels =   this->size<T>();
    double res=0.0;

    for(size_t i = 0; i<numels; i++) {
      res += target.ptr[i] *source.ptr[i]  ; 
    }

    return res;
  }
  
  template <typename T>
  void AXPBYCR(float a , T  vol_a, float b, T  vol_b , float c, T __restrict__ vol_res ){
    size_t numels =   this->size<T>();
    for(size_t i = 0; i<numels; i++) {
      vol_res.ptr[i] = a*vol_a.ptr[i] + b*vol_b.ptr[i] +c*vol_res.ptr[i]; 
    }
  };
  
  DimPars dimpars;
  InputFileNames if_names ; 
  
  float * __restrict__ DS;
  float * __restrict__ DD;
  float * __restrict__ SS;
  
  float *coefficients ; 
  
  
  float *Matff     ;
  float *MatSffF2  ;
  float *VectA     ;
  float *MatR      ;
  float *VectR     ;
  
  
};

template<>  size_t Problem::size<SolVol> () {  return size_t(dimpars.NE)*size_t(dimpars.DIMZ)*size_t(dimpars.DIMY)*size_t(dimpars.DIMX) ;   };
template<>  size_t Problem::size<ProjectionVol>() {  return  size_t(dimpars.NV)*size_t(dimpars.NROI)*size_t(dimpars.DIMZ)* size_t(dimpars.DIMY)* size_t(dimpars.DIMX) ;  };
template<>  size_t Problem::size<FreeFactsVol>() {  return  size_t(dimpars.NROI) ;  };


int main(int argc, char ** argv)  {

  int index_input;
  {
    const char * usage= "frsv [-h] input_file   \n"
                        "   options :\n"
                        "        -h   prints this help \n"
                        "   arguments :\n"
                        "       input_file : a yaml file containing \n"
myron's avatar
myron committed
604
605
606
607
                        "         DSname : test0_DS.h5 \n"
                        "         DDname : test0_DD.h5 \n"
                        "         SSname : test0_SS.h5 \n"
                        "         COEFFSname : coefficients.h5 \n";
myron's avatar
myron committed
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
  
    int hflag = 0;
    // char *cvalue = NULL;
    int c;
    opterr = 0;
    //  while ((c = getopt (argc, argv, "hc:")) != -1) {
    while ((c = getopt (argc, argv, "h")) != -1) {
      switch (c)
	{
	case 'h':
	  hflag = 1;
	  break;
	  
	  // case 'c':
	  //   cvalue = optarg;
	  //   break;
	case '?':
	  if (optopt == 'c')
	    fprintf (stderr, "Option -%c requires an argument.\n", optopt);
	  else if (isprint (optopt))
	    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
	  else
	    fprintf (stderr,
		     "Unknown option character `\\x%x'.\n",
		     optopt);
	  return 1;
	default:
	  abort ();
	}
    }
    index_input = optind ; 
    if (hflag || index_input != (argc-1) ) {
      std::cout << " USAGE \n"  << usage ; 
    }
  }
    
  YAML::Node mockup_config;
  mockup_config = YAML::LoadFile(argv[index_input]);

  assert(  mockup_config["DSname"] ) ;
  assert(  mockup_config["DDname"] ) ;
  assert(  mockup_config["SSname"] ) ;
  assert(  mockup_config["COEFFSname"] ) ;

myron's avatar
myron committed
652
653
  DimPars dimpars ; 
  dimpars.ZSTART = -1; dimpars.ZEND = 100000 ; 
myron's avatar
myron committed
654
  if( mockup_config["ZSTART"] )  {
myron's avatar
myron committed
655
656
    dimpars.ZSTART =      mockup_config["ZSTART"].as<int>();
    dimpars.ZEND   =      mockup_config["ZEND"  ].as<int>();
myron's avatar
myron committed
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
  } ;



  float betaE = 0.0, betaV = 0.0 ;
  
  if( mockup_config["betaE"] )  {
    betaE = mockup_config["betaE"].as<float>();
  } ;
  if( mockup_config["betaV"] )  {
    betaV = mockup_config["betaV"].as<float>();
  } ;

  
  boost::filesystem::path p(argv[index_input]);
  std::string dirname = p.parent_path().string()+"/";
myron's avatar
myron committed
673
674

  
myron's avatar
myron committed
675
676
677
678
679
680
681
  InputFileNames if_names = {
    dirname + mockup_config["DSname"].as<std::string>() ,
    dirname + mockup_config["DDname"].as<std::string>() ,
    dirname + mockup_config["SSname"].as<std::string>(), 
    dirname + mockup_config["COEFFSname"].as<std::string>() 
  };

myron's avatar
myron committed
682
  Problem pb( if_names, dimpars );
myron's avatar
myron committed
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703


  SolVol X, grad ,   XvectA, Xtmp;
  
  pb.allocate(X);
  pb.allocate(Xtmp);
  pb.allocate(grad);
  pb.allocate(XvectA);

  ProjectionVol Perror;
  
  pb.allocate(Perror);


  FreeFactsVol ffacts,fftmp;
  
  pb.allocate(ffacts);
  pb.allocate(fftmp);


  pb.settoVal(ffacts     ,1.0f);
myron's avatar
myron committed
704
705
706
707
708
709
710
711


  /*
 	ffacts.ptr[0]=0;
	  ffacts.ptr[2]=0;
	  ffacts.ptr[3]=0;
   */
  
myron's avatar
myron committed
712
713
714
715
716
717
  {
    double norma = sqrt(pb.scalar(ffacts, ffacts)); 
    pb.Scal( ffacts,1.0/norma ) ;
  }
  pb.settoVal(X     ,0.0f);
  
myron's avatar
myron committed
718
  for(int iter_c = 0; iter_c<10; iter_c++) {
myron's avatar
myron committed
719
    
myron's avatar
myron committed
720
    pb.setVectA_and_Mat( XvectA,   ffacts ) ;
myron's avatar
myron committed
721
722
723
724
725

    float Lip=1.0; 
    
    pb.settoVal(Xtmp     ,1.0f);

myron's avatar
myron committed
726
    for(int i=0; i< 10; i++) {
myron's avatar
myron committed
727
728
      if(0) {
	pb.copyScal(fftmp, ffacts, 1.0);
myron's avatar
myron committed
729
	pb.setFreeFacts( fftmp ,  X) ;
myron's avatar
myron committed
730
731
732
      }

      pb.applyMatA(grad , Xtmp);
myron's avatar
myron committed
733

myron's avatar
myron committed
734
      pb.gradReg_2_Add(grad, Xtmp , betaE, betaV) ;
myron's avatar
myron committed
735

myron's avatar
myron committed
736
737
738
739
740
741
742
      double norma = sqrt(pb.scalar(grad, grad  ));
      pb.copyScal( Xtmp, grad,1.0/norma ) ;
      printf("Norma Lipschitz %e\n", norma);
      Lip = norma*2;
    }


myron's avatar
myron committed
743
    for(int iter = 0; iter<10; iter++) {
myron's avatar
myron committed
744

myron's avatar
myron committed
745
746
747
748
      pb.settozero(grad);
      
      pb.applyMatA(grad , X);
      
myron's avatar
myron committed
749
750
751
      pb.gradReg_2_Add(grad, X , betaE, betaV ) ;
      
      pb.AXPBYCR( -1.0/Lip, grad,  +1.0/Lip, XvectA, 1.0,  X);
myron's avatar
myron committed
752
753
754
755

      
      // pb.project_solution(X);

myron's avatar
myron committed
756
      
myron's avatar
myron committed
757
758
      double merit =    pb.scalar(grad,  grad)  + pb.scalar( XvectA,  XvectA) - 2*pb.scalar(grad,  XvectA);      
      printf(" iter         %d %e\n", iter, merit);
myron's avatar
myron committed
759
760
      
    }
myron's avatar
myron committed
761
    pb.save(X, "solution.h5");
myron's avatar
myron committed
762
    
myron's avatar
myron committed
763
    pb.setFreeFacts( ffacts ,  X) ;
myron's avatar
myron committed
764
765
766
767
768
    
    {
      double norma = sqrt(pb.scalar(ffacts, ffacts)); 
      pb.Scal( ffacts,1.0/norma ) ;
    }
myron's avatar
myron committed
769
    printf(" set ok \n");
myron's avatar
myron committed
770
771
  }

myron's avatar
myron committed
772
  pb.save(X, "solution.h5");
myron's avatar
myron committed
773
774
775
  
}