kspaceFirstOrder3D-OMP 1.0
The C++ implementation of the k-wave toolbox for the time-domain simulation of acoustic wave fields in 3D
KSpaceSolver/KSpaceFirstOrder3DSolver.cpp
Go to the documentation of this file.
00001 
00033 #include <iostream>
00034 #include <omp.h>
00035 #include <sys/resource.h>
00036 #include <emmintrin.h>
00037 #include <math.h>
00038 #include <time.h>
00039 #include <sstream>
00040 
00041 #include <KSpaceSolver/KSpaceFirstOrder3DSolver.h>
00042 
00043 #include <Utils/ErrorMessages.h>
00044 
00045 #include <MatrixClasses/FFTWComplexMatrix.h>
00046 #include <MatrixClasses/MatrixContainer.h>
00047 
00048 using namespace std;
00049 
00050 //----------------------------------------------------------------------------//
00051 //                              Constants                                     //
00052 //----------------------------------------------------------------------------//
00053 
00054 
00055 
00056 
00057 //----------------------------------------------------------------------------//
00058 //                              Public methods                                //
00059 //----------------------------------------------------------------------------//
00060 
00061 
00066 TKSpaceFirstOrder3DSolver::TKSpaceFirstOrder3DSolver():
00067         p_sensor_raw_OutputStream (NULL),     
00068         ux_sensor_raw_OutputStream(NULL),   
00069         uy_sensor_raw_OutputStream(NULL),
00070         uz_sensor_raw_OutputStream(NULL),          
00071 
00072         MatrixContainer(),
00073         t_index(0), ActPercent(0),
00074         Parameters(NULL),
00075         TotalTime(), PreProcessingTime(), DataLoadTime (), SimulationTime(),     
00076         PostProcessingTime(), IterationTime()
00077                 
00078 {    
00079     TotalTime.Start();
00080                 
00081     Parameters        = TParameters::GetInstance();            
00082     
00083 }// end of TKSpace3DSolver
00084 //------------------------------------------------------------------------------
00085 
00086 
00091 TKSpaceFirstOrder3DSolver::~TKSpaceFirstOrder3DSolver(){
00092     FreeMemory();
00093 }// end of TKSpace3DSolver
00094 //------------------------------------------------------------------------------
00095 
00096 
00101 void TKSpaceFirstOrder3DSolver::AllocateMemory(){
00102     
00103     
00104     // create container, then all matrices 
00105     MatrixContainer.AddMatricesIntoContainer();        
00106     MatrixContainer.CreateAllObjects();
00107         
00108     
00109     //------------------------ Output matrices ---------------------------//
00110                 
00111     if (Parameters->IsStore_p_raw()) {
00112         p_sensor_raw_OutputStream = new TOutputHDF5Stream();
00113         if (!p_sensor_raw_OutputStream)  throw bad_alloc();        
00114     }
00115             
00116     if (Parameters->IsStore_u_raw()){
00117         
00118         ux_sensor_raw_OutputStream = new TOutputHDF5Stream();
00119         if (!ux_sensor_raw_OutputStream) throw bad_alloc();
00120         
00121         uy_sensor_raw_OutputStream = new TOutputHDF5Stream();
00122         if (!uy_sensor_raw_OutputStream) throw bad_alloc();
00123                 
00124         uz_sensor_raw_OutputStream = new TOutputHDF5Stream();
00125         if (!uz_sensor_raw_OutputStream) throw bad_alloc();        
00126     }// GetSaveUxyz
00127     
00128     
00129 }// end of AllocateMemory
00130 //------------------------------------------------------------------------------
00131 
00136 void TKSpaceFirstOrder3DSolver::FreeMemory(){
00137     
00138      
00139      MatrixContainer.FreeAllMatrices();
00140       
00141       //-- output matrices --//
00142       if (p_sensor_raw_OutputStream){
00143           delete p_sensor_raw_OutputStream;
00144           p_sensor_raw_OutputStream = NULL;
00145       }
00146      
00147       
00148       if (ux_sensor_raw_OutputStream){
00149           delete ux_sensor_raw_OutputStream;
00150           ux_sensor_raw_OutputStream = NULL;
00151       }
00152       
00153       if (uy_sensor_raw_OutputStream){
00154           delete uy_sensor_raw_OutputStream;
00155           uy_sensor_raw_OutputStream = NULL;
00156       }
00157       
00158       if (uz_sensor_raw_OutputStream){
00159           delete uz_sensor_raw_OutputStream;
00160           uz_sensor_raw_OutputStream = NULL;
00161       }
00162             
00163 }// end of FreeMemory
00164 //------------------------------------------------------------------------------
00165     
00171 void TKSpaceFirstOrder3DSolver::LoadInputData(){
00172    
00173    DataLoadTime.Start();
00174     
00175    //-- open and load input file
00176    
00177    THDF5_File& HDF5_InputFile  = Parameters->HDF5_InputFile; // file is opened (in Parameters)
00178    THDF5_File& HDF5_OutputFile = Parameters->HDF5_OutputFile;
00179    
00180    
00181    MatrixContainer.LoadMatricesDataFromDisk(HDF5_InputFile);
00182    
00183    HDF5_InputFile.Close();
00184    
00185    
00186    
00187    //--------------- outputs ------------//
00188    
00189    HDF5_OutputFile.Create(Parameters->GetOutputFileName().c_str());
00190    
00191    
00192    TDimensionSizes TotalSizes(Get_sensor_mask_ind().GetTotalElementCount(),Parameters->Get_Nt() - Parameters->GetStartTimeIndex(), 1);
00193    TDimensionSizes ChunkSizes(Get_sensor_mask_ind().GetTotalElementCount(),1, 1);
00194    
00195 
00196    if (Parameters->IsStore_p_raw()) {
00197         p_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,
00198                                                 MatrixContainer[p].HDF5MatrixName.c_str(),
00199                                                 TotalSizes, ChunkSizes, Parameters->GetCompressionLevel());
00200    }
00201       
00202        
00203    if (Parameters->IsStore_u_raw()) {
00204         
00205         ux_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,ux_Name,TotalSizes, ChunkSizes, Parameters->GetCompressionLevel());
00206         uy_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,uy_Name,TotalSizes, ChunkSizes, Parameters->GetCompressionLevel());
00207         uz_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,uz_Name,TotalSizes, ChunkSizes, Parameters->GetCompressionLevel());
00208     }
00209 
00210    
00211 
00212    DataLoadTime.Stop();
00213    
00214 }// end of LoadInputData
00215 //------------------------------------------------------------------------------
00216 
00217 
00224 void TKSpaceFirstOrder3DSolver::Compute(){
00225   
00226   PreProcessingTime.Start();                    
00227   
00228   fprintf(stdout,"FFT plans creation.........."); fflush(stdout);
00229   
00230         InitializeFFTWPlans( );
00231 
00232   fprintf(stdout,"Done \n"); 
00233   fprintf(stdout,"Pre-processing phase........"); fflush(stdout);
00234 
00235   
00236         PreProcessingPhase( );  
00237   
00238   
00239   fprintf(stdout,"Done \n");
00240   fprintf(stdout,"Current memory in use:%8ldMB\n", ShowMemoryUsageInMB());
00241   PreProcessingTime.Stop();
00242   
00243   fprintf(stdout,"Elapsed time:          %8.2fs\n",PreProcessingTime.GetElapsedTime());
00244   
00245   
00246   SimulationTime.Start();
00247     
00248         Compute_MainLoop();
00249   
00250   SimulationTime.Stop();
00251   
00252   fprintf(stdout,"-------------------------------------------------------------\n");           
00253   fprintf(stdout,"Elapsed time:                                       %8.2fs\n",SimulationTime.GetElapsedTime());
00254   fprintf(stdout,"-------------------------------------------------------------\n");           
00255     
00256   fprintf(stdout,"Post-processing phase......."); fflush(stdout);       
00257   PostProcessingTime.Start();
00258   
00259 
00260         PostPorcessing();  
00261   PostProcessingTime.Stop();
00262   fprintf(stdout,"Done \n");
00263   fprintf(stdout,"Elapsed time:          %8.2fs\n",PostProcessingTime.GetElapsedTime());
00264 
00265   
00266   WriteOutputDataInfo();
00267   
00268   Parameters->HDF5_OutputFile.Close();
00269   
00270   
00271 }// end of Compute()
00272 //------------------------------------------------------------------------------
00273 
00274 
00275 
00276 
00281 void TKSpaceFirstOrder3DSolver::PrintParametersOfSimulation(FILE * file){
00282         
00283     
00284     fprintf(file,"Domain dims:   [%4ld, %4ld,%4ld]\n",              
00285                 Parameters->GetFullDimensionSizes().X,
00286                 Parameters->GetFullDimensionSizes().Y,
00287                 Parameters->GetFullDimensionSizes().Z);
00288     
00289     fprintf(file,"Simulation time steps:  %8ld\n", Parameters->Get_Nt());    
00290 }// end of PrintParametersOfTask
00291 //------------------------------------------------------------------------------
00292 
00293 
00294 
00300 size_t TKSpaceFirstOrder3DSolver::ShowMemoryUsageInMB(){
00301     
00302   struct rusage mem_usage;
00303   getrusage(RUSAGE_SELF, &mem_usage);
00304   
00305   return mem_usage.ru_maxrss >> 10;
00306 
00307 
00308 }// end of ShowMemoryUsageInMB
00309 //------------------------------------------------------------------------------
00310 
00311 
00318 void TKSpaceFirstOrder3DSolver::PrintFullNameCodeAndLicense(FILE * file){
00319     fprintf(file,"\n");
00320     fprintf(file,"+--------------------------------------------------+\n");
00321     fprintf(file,"| Build Number:     kspaceFirstOrder3D v2.13       |\n");
00322     fprintf(file,"| Operating System: Linux x64                      |\n");
00323     fprintf(file,"|                                                  |\n");    
00324     fprintf(file,"| Copyright (C) 2012 Jiri Jaros and Bradley Treeby |\n");
00325     fprintf(file,"| http://www.k-wave.org                            |\n");    
00326     fprintf(file,"+--------------------------------------------------+\n");
00327     fprintf(file,"\n");
00328 }// end of GetFullCodeAndLincence
00329 //------------------------------------------------------------------------------
00330 
00331 
00332 
00333 //----------------------------------------------------------------------------//
00334 //                            Protected methods                               //
00335 //----------------------------------------------------------------------------//
00336 
00341 void TKSpaceFirstOrder3DSolver::InitializeFFTWPlans(){
00342 
00343     // initialization of FFTW library
00344     fftwf_init_threads();        
00345     fftwf_plan_with_nthreads(Parameters->GetNumberOfThreads());
00346         
00347     // create real to complex plans
00348     Get_FFT_X_temp().CreateFFTPlan3D_R2C(Get_p());   
00349     Get_FFT_Y_temp().CreateFFTPlan3D_R2C(Get_p());   
00350     Get_FFT_Z_temp().CreateFFTPlan3D_R2C(Get_p());   
00351     
00352     // create real to complex plans
00353     Get_FFT_X_temp().CreateFFTPlan3D_C2R(Get_p());   
00354     Get_FFT_Y_temp().CreateFFTPlan3D_C2R(Get_p());   
00355     Get_FFT_Z_temp().CreateFFTPlan3D_C2R(Get_p());   
00356 }// end of PrepareData
00357 //------------------------------------------------------------------------------
00358 
00359 
00360 
00366 void TKSpaceFirstOrder3DSolver::PreProcessingPhase(){
00367     
00368     Get_sensor_mask_ind().RecomputeIndices();
00369         
00370     
00371     if ((Parameters->Get_transducer_source_flag() != 0) || 
00372         (Parameters->Get_ux_source_flag() != 0)         ||
00373         (Parameters->Get_uy_source_flag() != 0)         ||
00374         (Parameters->Get_uz_source_flag() != 0)            ){
00375                     Get_u_source_index().RecomputeIndices();
00376     }                
00377     
00378     if (Parameters->Get_transducer_source_flag() != 0){
00379         Get_delay_mask().RecomputeIndices();
00380     }
00381     
00382     if (Parameters->Get_p_source_flag() != 0){
00383         Get_p_source_index().RecomputeIndices();
00384     }
00385     
00386     
00387     
00388     // compute dt / rho0_sg...                
00389     if (Parameters->Get_rho0_scalar_flag()){    // rho is scalar
00390 
00391             Parameters->Get_rho0_sgx_scalar() = Parameters->Get_dt() / Parameters->Get_rho0_sgx_scalar();
00392             Parameters->Get_rho0_sgy_scalar() = Parameters->Get_dt() / Parameters->Get_rho0_sgy_scalar();
00393             Parameters->Get_rho0_sgz_scalar() = Parameters->Get_dt() / Parameters->Get_rho0_sgz_scalar();    
00394             
00395             // non-uniform grid cannot be pre-calculated :-(
00396                     
00397     }else { // rho is matrix
00398         if (Parameters->Get_nonuniform_grid_flag()) {
00399             Caclucalte_dt_rho0_non_uniform();
00400         
00401         }else{                                    
00402             Get_dt_rho0_sgx().ScalarDividedBy(Parameters->Get_dt());
00403             Get_dt_rho0_sgy().ScalarDividedBy(Parameters->Get_dt());
00404             Get_dt_rho0_sgz().ScalarDividedBy(Parameters->Get_dt());                    
00405         }    
00406     }
00407     
00408             
00409     
00410     // generate different matrices 
00411     if (Parameters->Get_absorbing_flag() != 0) {
00412         Generate_kappa_absorb_nabla1_absorb_nabla2();
00413         Generate_absorb_tau_absorb_eta_matrix();
00414     }    
00415     else {
00416         Generate_kappa();
00417     }    
00418     
00419     // calculate c^2. It has to be after kappa gen... because of c modification
00420     Compute_c2();
00421 
00422     
00423 }// end of ComputeInitPhase
00424 //------------------------------------------------------------------------------
00425 
00426 
00432 void TKSpaceFirstOrder3DSolver::Generate_kappa(){
00433      
00434     #ifndef __NO_OMP__    
00435        #pragma omp parallel 
00436     #endif
00437     {
00438     
00439         const float dx_sq_rec = 1.0f / (Parameters->Get_dx()*Parameters->Get_dx());
00440         const float dy_sq_rec = 1.0f / (Parameters->Get_dy()*Parameters->Get_dy());
00441         const float dz_sq_rec = 1.0f / (Parameters->Get_dz()*Parameters->Get_dz());
00442         
00443         const float c_ref_dt_pi = Parameters->Get_c_ref() * Parameters->Get_dt() * float(M_PI);
00444         
00445         const float Nx_rec   = 1.0f / (float) Parameters->GetFullDimensionSizes().X;
00446         const float Ny_rec   = 1.0f / (float) Parameters->GetFullDimensionSizes().Y;
00447         const float Nz_rec   = 1.0f / (float) Parameters->GetFullDimensionSizes().Z;
00448         
00449         
00450         const size_t X_Size  = Parameters->GetReducedDimensionSizes().X;
00451         const size_t Y_Size  = Parameters->GetReducedDimensionSizes().Y;
00452         const size_t Z_Size  = Parameters->GetReducedDimensionSizes().Z;
00453                 
00454         
00455         float * kappa = Get_kappa().GetRawData();        
00456         
00457         #ifndef __NO_OMP__    
00458             #pragma omp for schedule (static)     
00459         #endif
00460         for (size_t z = 0; z < Z_Size; z++){
00461                        
00462             const float z_f = (float) z;           
00463             float z_part = 0.5f - fabsf(0.5f - z_f * Nz_rec );
00464                   z_part = (z_part * z_part) * dz_sq_rec;
00465                   
00466                   
00467             for (size_t y = 0; y < Y_Size; y++){
00468                 
00469                 const float y_f = (float) y;
00470                       float y_part = 0.5f - fabsf(0.5f - y_f * Ny_rec);
00471                             y_part = (y_part * y_part) * dy_sq_rec;
00472 
00473                 const float yz_part = z_part + y_part;
00474                 for (size_t x = 0; x < X_Size; x++){
00475                 
00476                     const float x_f = (float) x;                    
00477                           float x_part = 0.5f - fabsf(0.5f - x_f * Nx_rec);
00478                                 x_part = (x_part * x_part) * dx_sq_rec;
00479     
00480                          float  k = c_ref_dt_pi * sqrtf(x_part + yz_part);
00481                          
00482                          // kappa element
00483                          kappa[(z*Y_Size + y) * X_Size + x ] = (k == 0.0f) ? 1.0f : sinf(k)/k;
00484                 
00485                 }//x
00486                 
00487             }//y
00488         }// z
00489         
00490         
00491         
00492     }// parallel
00493     
00494     
00495 }// end of GenerateKappa
00496 //------------------------------------------------------------------------------
00497 
00503 void TKSpaceFirstOrder3DSolver::Generate_kappa_absorb_nabla1_absorb_nabla2(){
00504   
00505     
00506     #ifndef __NO_OMP__    
00507        #pragma omp parallel 
00508     #endif
00509     {
00510     
00511         const float dx_sq_rec = 1.0f / (Parameters->Get_dx()*Parameters->Get_dx());
00512         const float dy_sq_rec = 1.0f / (Parameters->Get_dy()*Parameters->Get_dy());
00513         const float dz_sq_rec = 1.0f / (Parameters->Get_dz()*Parameters->Get_dz());
00514         
00515         
00516         const float c_ref_dt_2 = Parameters->Get_c_ref() * Parameters->Get_dt() * 0.5f;
00517         const float pi_2       = float(M_PI) * 2.0f;
00518         
00519         const size_t Nx = Parameters->GetFullDimensionSizes().X;
00520         const size_t Ny = Parameters->GetFullDimensionSizes().Y;
00521         const size_t Nz = Parameters->GetFullDimensionSizes().Z;
00522         
00523         const float Nx_rec   = 1.0f / (float) Nx;
00524         const float Ny_rec   = 1.0f / (float) Ny;
00525         const float Nz_rec   = 1.0f / (float) Nz;
00526         
00527         
00528         
00529         const size_t X_Size  = Parameters->GetReducedDimensionSizes().X;
00530         const size_t Y_Size  = Parameters->GetReducedDimensionSizes().Y;
00531         const size_t Z_Size  = Parameters->GetReducedDimensionSizes().Z;
00532         
00533         
00534         
00535         float * kappa           = Get_kappa().GetRawData();        
00536         float * absorb_nabla1   = Get_absorb_nabla1().GetRawData();
00537         float * absorb_nabla2   = Get_absorb_nabla2().GetRawData();
00538         const float alpha_power = Parameters->Get_alpha_power();
00539                 
00540         
00541         #ifndef __NO_OMP__    
00542             #pragma omp for schedule (static)     
00543         #endif
00544         for (size_t z = 0; z < Z_Size; z++){            
00545             
00546                  const float z_f = (float) z;           
00547             float z_part = 0.5f - fabsf(0.5f - z_f * Nz_rec );
00548                   z_part = (z_part * z_part) * dz_sq_rec;
00549 
00550             for (size_t y = 0; y < Y_Size; y++){
00551                                 
00552                 const float y_f = (float) y;
00553                       float y_part = 0.5f - fabsf(0.5f - y_f * Ny_rec);
00554                             y_part = (y_part * y_part) * dy_sq_rec;
00555 
00556                 const float yz_part = z_part + y_part;
00557                 
00558                 
00559                 size_t i = (z*Y_Size + y) * X_Size;
00560                 
00561                 for (size_t x = 0; x < X_Size; x++){                
00562                      const float x_f = (float) x;                    
00563                          
00564                          float x_part = 0.5f - fabsf(0.5f - x_f * Nx_rec);
00565                                x_part = (x_part * x_part) * dx_sq_rec;
00566     
00567                                
00568                          float  k         = pi_2 * sqrtf(x_part + yz_part);
00569                          float  c_ref_k   = c_ref_dt_2 * k;
00570                          
00571                          absorb_nabla1[i] = powf(k, alpha_power - 2);
00572                          absorb_nabla2[i] = powf(k, alpha_power - 1);
00573                          
00574                          kappa[i]         =  (c_ref_k == 0.0f) ? 1.0f : sinf(c_ref_k)/c_ref_k;                         
00575                          
00576                                                   
00577                         
00578                          if (absorb_nabla1[i] == INFINITY) absorb_nabla1[i] = 0.0f;                         
00579                          if (absorb_nabla2[i] == INFINITY) absorb_nabla2[i] = 0.0f;
00580                          
00581                          i++;
00582                 }//x
00583                 
00584             }//y
00585         }// z
00586         
00587         
00588         
00589     }// parallel
00590     
00591 }// end of Generate_kappa_absorb_nabla1_absorb_nabla2
00592 //------------------------------------------------------------------------------
00593 
00597 void TKSpaceFirstOrder3DSolver::Generate_absorb_tau_absorb_eta_matrix(){
00598     
00599     
00600     // test for scalars
00601     if ((Parameters->Get_alpha_coeff_scallar_flag()) && (Parameters->Get_c0_scalar_flag())){
00602 
00603           const float alpha_power = Parameters->Get_alpha_power();                
00604           const float tan_pi_y_2  = tanf(float(M_PI_2)* alpha_power);
00605           const float alpha_db_neper_coeff = (100.0f * powf(1.0e-6f / (2.0f * (float) M_PI), alpha_power)) / 
00606                                              (20.0f * (float) M_LOG10E);
00607         
00608          const float alpha_coeff_2 = 2.0f * Parameters->Get_alpha_coeff_scallar() * alpha_db_neper_coeff;
00609           
00610          Parameters->Get_absorb_tau_scalar() =  (-alpha_coeff_2) * powf(Parameters->Get_c0_scalar(),alpha_power-1);
00611          Parameters->Get_absorb_eta_scalar() =    alpha_coeff_2  * powf(Parameters->Get_c0_scalar(),alpha_power) * tan_pi_y_2;        
00612         
00613     }
00614     else {
00615 
00616         #ifndef __NO_OMP__    
00617            #pragma omp parallel 
00618         #endif
00619         {
00620 
00621 
00622             const size_t Z_Size  = Parameters->GetFullDimensionSizes().Z;
00623             const size_t Y_Size  = Parameters->GetFullDimensionSizes().Y;
00624             const size_t X_Size  = Parameters->GetFullDimensionSizes().X;
00625 
00626             float * absorb_tau = Get_absorb_tau().GetRawData();        
00627             float * absorb_eta = Get_absorb_eta().GetRawData();        
00628 
00629 
00630             float * alpha_coeff;      
00631             size_t  alpha_shift;         
00632 
00633             if (Parameters->Get_alpha_coeff_scallar_flag()) {
00634                 alpha_coeff = &(Parameters->Get_alpha_coeff_scallar());
00635                 alpha_shift = 0;
00636             }else {
00637                 alpha_coeff = Get_Temp_1_RS3D().GetRawData();   
00638                 alpha_shift = 1;
00639             }
00640 
00641 
00642             float * c0;
00643             size_t  c0_shift;
00644             if (Parameters->Get_c0_scalar_flag()) {
00645                 c0 = &(Parameters->Get_c0_scalar());
00646                 c0_shift = 0;
00647             }else {
00648                 c0 = Get_c2().GetRawData();   
00649                 c0_shift = 1;
00650             }
00651 
00652 
00653             const float alpha_power = Parameters->Get_alpha_power();                
00654             const float tan_pi_y_2  = tanf(float(M_PI_2)* alpha_power);
00655 
00656             //alpha = 100*alpha.*(1e-6/(2*pi)).^y./
00657             //                  (20*log10(exp(1)));
00658             const float alpha_db_neper_coeff = (100.0f * powf(1.0e-6f / (2.0f * (float) M_PI), alpha_power)) / 
00659                                                (20.0f * (float) M_LOG10E);
00660 
00661             #ifndef __NO_OMP__    
00662                 #pragma omp for schedule (static)     
00663             #endif
00664             for (size_t z = 0; z < Z_Size; z++){                                           
00665                 for (size_t y = 0; y < Y_Size; y++){                           
00666                     size_t i = (z*Y_Size + y) * X_Size;
00667                     for (size_t x = 0; x < X_Size; x++){                
00668 
00669                         const float alpha_coeff_2 = 2.0f * alpha_coeff[i * alpha_shift] * alpha_db_neper_coeff;
00670                         absorb_tau[i] = (-alpha_coeff_2) * powf(c0[i * c0_shift],alpha_power-1);
00671                         absorb_eta[i] =   alpha_coeff_2  * powf(c0[i * c0_shift],alpha_power) * tan_pi_y_2;
00672                         i++;        
00673 
00674                     }//x
00675 
00676                 }//y
00677             }// z                
00678         }// parallel
00679     } // absorb_tau and aborb_eta = matrics
00680     
00681 }// end of Generate_absorb_tau_absorb_eta_matrix
00682 //------------------------------------------------------------------------------
00683 
00688 void TKSpaceFirstOrder3DSolver::Caclucalte_dt_rho0_non_uniform(){
00689 
00690     
00691 #ifndef __NO_OMP__    
00692     #pragma omp parallel 
00693 #endif
00694   {
00695        
00696       float * dt_rho0_sgx   = Get_dt_rho0_sgx().GetRawData();
00697       float * dt_rho0_sgy   = Get_dt_rho0_sgy().GetRawData();
00698       float * dt_rho0_sgz   = Get_dt_rho0_sgz().GetRawData();
00699       
00700       const float dt = Parameters->Get_dt();
00701       
00702       const float * duxdxn_sgx = Get_dxudxn_sgx().GetRawData();      
00703       const float * duydyn_sgy = Get_dyudyn_sgy().GetRawData();
00704       const float * duzdzn_sgz = Get_dzudzn_sgz().GetRawData();
00705       
00706       const size_t Z_Size = Get_dt_rho0_sgx().GetDimensionSizes().Z;
00707       const size_t Y_Size = Get_dt_rho0_sgx().GetDimensionSizes().Y;
00708       const size_t X_Size = Get_dt_rho0_sgx().GetDimensionSizes().X;
00709       
00710       const size_t SliceSize = (X_Size * Y_Size );
00711 
00712       
00713       
00714        #ifndef __NO_OMP__            
00715              #pragma omp for schedule (static)     
00716        #endif  
00717         for (size_t z = 0; z < Z_Size; z++){
00718 
00719             
00720             register size_t i = z* SliceSize;                        
00721             for (size_t y = 0; y< Y_Size; y++){                                               
00722                 for (size_t x = 0; x < X_Size; x++){              
00723                     dt_rho0_sgx[i] =  (dt * duxdxn_sgx[x])  /  dt_rho0_sgx[i];                    
00724                     i++;                    
00725                 } // x
00726             } // y
00727           } // z
00728      
00729       
00730       #ifndef __NO_OMP__            
00731              #pragma omp for schedule (static)    
00732        #endif  
00733         for (size_t z = 0; z < Z_Size; z++){
00734 
00735             register size_t i = z* SliceSize;                        
00736             for (size_t y = 0; y< Y_Size; y++){                                 
00737                 const float duydyn_el = duydyn_sgy[y];
00738                 for (size_t x = 0; x < X_Size; x++){
00739                             
00740                     dt_rho0_sgy[i] =  (dt * duydyn_el)  /  dt_rho0_sgy[i];                                  
00741                     i++;                    
00742                 } // x
00743             } // y
00744           } // z
00745       
00746       
00747       
00748       #ifndef __NO_OMP__            
00749              #pragma omp for schedule (static)     
00750        #endif  
00751         for (size_t z = 0; z < Z_Size; z++){
00752 
00753             register size_t i = z* SliceSize;                                                
00754             const float duzdzn_el = duzdzn_sgz[z];
00755             for (size_t y = 0; y< Y_Size; y++){                                               
00756                 for (size_t x = 0; x < X_Size; x++){                                  
00757                     dt_rho0_sgz[i] =  (dt * duzdzn_el)  /  dt_rho0_sgz[i];                    
00758                     i++;                    
00759                 } // x
00760             } // y
00761           } // z
00762       
00763       } // parallel
00764     
00765       
00766     
00767 }// end of Caclucalte_dt_rho0_non_uniform
00768 //------------------------------------------------------------------------------
00769 
00774 void TKSpaceFirstOrder3DSolver::Calculate_p0_source(){
00775     
00776     
00777     Get_p().CopyData(Get_p0_source_input());
00778     
00779     const float * p0 = Get_p0_source_input().GetRawData();
00780           
00781     float * c2;
00782     size_t c2_shift;
00783     
00784     if (Parameters->Get_c0_scalar_flag()) {
00785         c2 = &Parameters->Get_c0_scalar();
00786         c2_shift = 0;
00787         
00788     }else{
00789         c2 = Get_c2().GetRawData();
00790         c2_shift = 1;
00791     }
00792         
00793     
00794     
00795     float * rhox = Get_rhox().GetRawData();
00796     float * rhoy = Get_rhoy().GetRawData();
00797     float * rhoz = Get_rhoz().GetRawData();
00798 
00799     //-- add the initial pressure to rho as a mass source --//    
00800 
00801     float tmp;
00802     
00803 #ifndef __NO_OMP__    
00804     #pragma omp parallel for schedule (static) private(tmp)
00805 #endif
00806     for (size_t i = 0; i < Get_rhox().GetTotalElementCount(); i++){
00807       
00808       tmp = p0[i] / (3.0f* c2[i * c2_shift]);                
00809       rhox[i] = tmp;
00810       rhoy[i] = tmp;
00811       rhoz[i] = tmp;
00812     }
00813     
00814 
00815     
00816     //------------------------------------------------------------------------//
00817     //--  compute u(t = t1 + dt/2) based on the assumption u(dt/2) = -u(-dt/2) --//
00818     //--    which forces u(t = t1) = 0 --//
00819     //------------------------------------------------------------------------//   
00820     
00821     
00822     Compute_ddx_kappa_fft_p(Get_p(),
00823                             Get_FFT_X_temp(),Get_FFT_Y_temp(),Get_FFT_Z_temp(),
00824                             Get_kappa(),
00825                             Get_ddx_k_shift_pos(),Get_ddy_k_shift_pos(),Get_ddz_k_shift_pos()
00826                             );
00827                 
00828     
00829     if (Parameters->Get_rho0_scalar_flag()){
00830      
00831         if (Parameters->Get_nonuniform_grid_flag() ) {
00832             Get_ux_sgx().Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_x(
00833                     Parameters->Get_rho0_sgx_scalar(), Get_dxudxn_sgx(),   Get_FFT_X_temp());    
00834             Get_uy_sgy(). Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_y
00835                     (Parameters->Get_rho0_sgy_scalar(), Get_dyudyn_sgy(), Get_FFT_Y_temp());    
00836             Get_uz_sgz().Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_z(
00837                     Parameters->Get_rho0_sgz_scalar(), Get_dzudzn_sgz(),Get_FFT_Z_temp());
00838                        
00839         } else {
00840             Get_ux_sgx().Compute_dt_rho_sg_mul_ifft_div_2(Parameters->Get_rho0_sgx_scalar(), Get_FFT_X_temp());    
00841             Get_uy_sgy().Compute_dt_rho_sg_mul_ifft_div_2(Parameters->Get_rho0_sgy_scalar(), Get_FFT_Y_temp());    
00842             Get_uz_sgz().Compute_dt_rho_sg_mul_ifft_div_2(Parameters->Get_rho0_sgz_scalar(), Get_FFT_Z_temp());
00843         }
00844     } else {
00845       // divide the matrix by 2 and multiply with st./rho0_sg
00846       Get_ux_sgx().Compute_dt_rho_sg_mul_ifft_div_2(Get_dt_rho0_sgx(), Get_FFT_X_temp());    
00847       Get_uy_sgy().Compute_dt_rho_sg_mul_ifft_div_2(Get_dt_rho0_sgy(), Get_FFT_Y_temp());    
00848       Get_uz_sgz().Compute_dt_rho_sg_mul_ifft_div_2(Get_dt_rho0_sgz(), Get_FFT_Z_temp());
00849     }  
00850     
00851 }// end of Calculate_p0_source
00852 //------------------------------------------------------------------------------
00853 
00854 
00855 
00860 void TKSpaceFirstOrder3DSolver::Compute_c2(){
00861     
00862     if (Parameters->Get_c0_scalar_flag()) { //scalar
00863         float c = Parameters->Get_c0_scalar();
00864         Parameters->Get_c0_scalar() = c * c;
00865         
00866     }else {
00867     
00868         float * c2 =  Get_c2().GetRawData();
00869 
00870         #ifndef __NO_OMP__            
00871            #pragma omp parallel for schedule (static)     
00872         #endif  
00873         for (size_t i=0; i< Get_c2().GetTotalElementCount(); i++){
00874                c2[i] = c2[i] * c2[i];        
00875         }
00876     }// matrix
00877 }// ComputeC2
00878 //------------------------------------------------------------------------------
00879 
00880 
00881 
00898 void TKSpaceFirstOrder3DSolver::Compute_ddx_kappa_fft_p(TRealMatrix& X_Matrix,
00899                                      TFFTWComplexMatrix& FFT_X, TFFTWComplexMatrix& FFT_Y, TFFTWComplexMatrix& FFT_Z,
00900                                      TRealMatrix& kappa,
00901                                      TComplexMatrix& ddx, TComplexMatrix& ddy, TComplexMatrix& ddz)
00902 {
00903     // Compute FFT of X
00904   FFT_X.Compute_FFT_3D_R2C(X_Matrix);
00905 
00906 #ifndef __NO_OMP__    
00907     #pragma omp parallel 
00908 #endif
00909   {
00910       float * p_k_x_data   = FFT_X.GetRawData();
00911       float * p_k_y_data   = FFT_Y.GetRawData();
00912       float * p_k_z_data   = FFT_Z.GetRawData();
00913       
00914       float * kappa_data = kappa.GetRawData();
00915       float * ddx_data   = ddx.GetRawData();
00916       float * ddy_data   = ddy.GetRawData();
00917       float * ddz_data   = ddz.GetRawData();
00918 
00919       const size_t Z_Size = FFT_X.GetDimensionSizes().Z;
00920       const size_t Y_Size = FFT_X.GetDimensionSizes().Y;
00921       const size_t X_Size = FFT_X.GetDimensionSizes().X;
00922       
00923       const size_t SliceSize = (X_Size * Y_Size ) << 1;
00924       
00925 #ifndef __NO_OMP__ 
00926       #pragma omp for schedule (static)      
00927 #endif             
00928     for (size_t z = 0; z < Z_Size; z++){
00929         
00930         register size_t i = z * SliceSize;
00931         const size_t z2 = z<<1;
00932         for (size_t y = 0; y< Y_Size; y++){                                
00933             const size_t y2 = y<<1;
00934             for (size_t x = 0; x < X_Size;  x++){
00935                         
00936                // kappa ./ p_k
00937                const float kappa_el  = kappa_data[i>>1]; 
00938                const float p_k_el_re = p_k_x_data[i]   * kappa_el;
00939                const float p_k_el_im = p_k_x_data[i+1] * kappa_el;
00940                const size_t x2 = x<<1;
00941          
00942                
00943                //bsxfun(ddx...)
00944                 p_k_x_data[i]   = p_k_el_re * ddx_data[x2]   - p_k_el_im * ddx_data[x2+1];
00945                 p_k_x_data[i+1] = p_k_el_re * ddx_data[x2+1] + p_k_el_im * ddx_data[x2]; 
00946                         
00947                //bsxfun(ddy...)
00948                 p_k_y_data[i]   = p_k_el_re * ddy_data[y2]   - p_k_el_im * ddy_data[y2+1]; 
00949                 p_k_y_data[i+1] = p_k_el_re * ddy_data[y2+1] + p_k_el_im * ddy_data[y2]; 
00950                                        
00951                //bsxfun(ddz...)
00952                 p_k_z_data[i]   = p_k_el_re * ddz_data[z2]   - p_k_el_im * ddz_data[z2+1]; 
00953                 p_k_z_data[i+1] = p_k_el_re * ddz_data[z2+1] + p_k_el_im * ddz_data[z2]; 
00954                 
00955                
00956                 i +=2;
00957             } // x
00958         } // y
00959     } // z
00960     
00961   }
00962 }// end of KSpaceFirstOrder3DSolver
00963 //------------------------------------------------------------------------------
00964 
00965 
00970 void  TKSpaceFirstOrder3DSolver::Compute_duxyz(){
00971    
00972      Get_FFT_X_temp().Compute_FFT_3D_R2C(Get_ux_sgx());     
00973      Get_FFT_Y_temp().Compute_FFT_3D_R2C(Get_uy_sgy());
00974      Get_FFT_Z_temp().Compute_FFT_3D_R2C(Get_uz_sgz());
00975      
00976      #ifndef __NO_OMP__   
00977        #pragma omp parallel
00978      #endif   
00979      {
00980       
00981        float * Temp_FFT_X_Data  = Get_FFT_X_temp().GetRawData();
00982        float * Temp_FFT_Y_Data  = Get_FFT_Y_temp().GetRawData();
00983        float * Temp_FFT_Z_Data  = Get_FFT_Z_temp().GetRawData();
00984        
00985               
00986        const float * kappa   = Get_kappa().GetRawData();
00987        
00988        const size_t FFT_Z_dim = Get_FFT_X_temp().GetDimensionSizes().Z;
00989        const size_t FFT_Y_dim = Get_FFT_X_temp().GetDimensionSizes().Y;
00990        const size_t FFT_X_dim = Get_FFT_X_temp().GetDimensionSizes().X;
00991 
00992        const size_t SliceSize = (FFT_X_dim * FFT_Y_dim) << 1;
00993        const float  Divider = 1.0f / Get_ux_sgx().GetTotalElementCount();
00994        
00995        const TFloatComplex * ddx = (TFloatComplex *) Get_ddx_k_shift_neg().GetRawData();
00996        const TFloatComplex * ddy = (TFloatComplex *) Get_ddy_k_shift_neg().GetRawData();
00997        const TFloatComplex * ddz = (TFloatComplex *) Get_ddz_k_shift_neg().GetRawData();
00998        
00999         
01000    #ifndef __NO_OMP__            
01001          #pragma omp for schedule (static)     
01002    #endif  
01003     for (size_t z = 0; z < FFT_Z_dim; z++){
01004 
01005         register size_t i = z* SliceSize;
01006 
01007         const float ddz_neg_re = ddz[z].real;
01008         const float ddz_neg_im = ddz[z].imag;
01009         for (size_t y = 0; y< FFT_Y_dim; y++){               
01010 
01011             const float ddy_neg_re = ddy[y].real;
01012             const float ddy_neg_im = ddy[y].imag;
01013 
01014             for (size_t x = 0; x < FFT_X_dim; x++){
01015 
01016                 float FFT_el_x_re = Temp_FFT_X_Data[i];
01017                 float FFT_el_x_im = Temp_FFT_X_Data[i+1];
01018 
01019                 float FFT_el_y_re = Temp_FFT_Y_Data[i];
01020                 float FFT_el_y_im = Temp_FFT_Y_Data[i+1];
01021 
01022                 float FFT_el_z_re = Temp_FFT_Z_Data[i];
01023                 float FFT_el_z_im = Temp_FFT_Z_Data[i+1];
01024 
01025                 const float kappa_el = kappa[i >> 1];
01026 
01027                 FFT_el_x_re   *= kappa_el;
01028                 FFT_el_x_im   *= kappa_el;
01029 
01030                 FFT_el_y_re   *= kappa_el;
01031                 FFT_el_y_im   *= kappa_el;
01032 
01033                 FFT_el_z_re   *= kappa_el;
01034                 FFT_el_z_im   *= kappa_el;
01035                
01036 
01037                 Temp_FFT_X_Data[i]     = ((FFT_el_x_re * ddx[x].real) - 
01038                                           (FFT_el_x_im * ddx[x].imag)
01039                                          ) * Divider;
01040                 Temp_FFT_X_Data[i + 1] = ((FFT_el_x_im * ddx[x].real) + 
01041                                           (FFT_el_x_re * ddx[x].imag)
01042                                          )* Divider;
01043 
01044                 Temp_FFT_Y_Data[i]     = ((FFT_el_y_re * ddy_neg_re) - 
01045                                           (FFT_el_y_im * ddy_neg_im)
01046                                          ) * Divider;
01047                 Temp_FFT_Y_Data[i + 1] = ((FFT_el_y_im * ddy_neg_re) + 
01048                                           (FFT_el_y_re * ddy_neg_im)
01049                                          )* Divider;
01050 
01051                 Temp_FFT_Z_Data[i]     = ((FFT_el_z_re * ddz_neg_re) - 
01052                                           (FFT_el_z_im * ddz_neg_im)
01053                                          ) * Divider;
01054                 Temp_FFT_Z_Data[i + 1] = ((FFT_el_z_im * ddz_neg_re) + 
01055                                           (FFT_el_z_re * ddz_neg_im)
01056                                          )* Divider;
01057 
01058                 i+=2;
01059             } // x
01060         } // y
01061       } // z
01062             
01063      } // parallel;    
01064     
01065    Get_FFT_X_temp().Compute_iFFT_3D_C2R(Get_duxdx());     
01066    Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Get_duydy());
01067    Get_FFT_Z_temp().Compute_iFFT_3D_C2R(Get_duzdz());                  
01068    
01069    //-------------------------------------------------------------------------//
01070    //--------------------- Non linear grid -----------------------------------//
01071    //-------------------------------------------------------------------------//
01072    if (Parameters->Get_nonuniform_grid_flag() != 0){
01073    
01074    
01075       #ifndef __NO_OMP__    
01076         #pragma omp parallel 
01077       #endif
01078       {
01079 
01080           float * duxdx = Get_duxdx().GetRawData();
01081           float * duydy = Get_duydy().GetRawData();
01082           float * duzdz = Get_duzdz().GetRawData();
01083 
01084           const float * duxdxn = Get_dxudxn().GetRawData();      
01085           const float * duydyn = Get_dyudyn().GetRawData();
01086           const float * duzdzn = Get_dzudzn().GetRawData();
01087 
01088           const size_t Z_Size = Get_duxdx().GetDimensionSizes().Z;
01089           const size_t Y_Size = Get_duxdx().GetDimensionSizes().Y;
01090           const size_t X_Size = Get_duxdx().GetDimensionSizes().X;
01091 
01092           const size_t SliceSize = (X_Size * Y_Size );
01093 
01094 
01095            #ifndef __NO_OMP__            
01096                  #pragma omp for schedule (static)     
01097            #endif  
01098             for (size_t z = 0; z < Z_Size; z++){
01099                 register size_t i = z* SliceSize;                        
01100                 for (size_t y = 0; y< Y_Size; y++){                                               
01101                     for (size_t x = 0; x < X_Size; x++){              
01102                         duxdx[i] *= duxdxn[x];
01103                         i++;                    
01104                     } // x
01105                 } // y
01106               } // z
01107 
01108 
01109           #ifndef __NO_OMP__            
01110                  #pragma omp for schedule (static)    
01111            #endif  
01112             for (size_t z = 0; z < Z_Size; z++){
01113 
01114                 register size_t i = z* SliceSize;                        
01115                 for (size_t y = 0; y< Y_Size; y++){  
01116                     const float dyudyn_el = duydyn[y];
01117                     for (size_t x = 0; x < X_Size; x++){
01118 
01119                         duydy[i] *=  dyudyn_el;
01120                         i++;                    
01121                     } // x
01122                 } // y
01123               } // z
01124 
01125 
01126 
01127           #ifndef __NO_OMP__            
01128                  #pragma omp for schedule (static)     
01129            #endif  
01130             for (size_t z = 0; z < Z_Size; z++){
01131                 const float duzdzn_el = duzdzn[z];
01132                 register size_t i = z* SliceSize;                                        
01133                 for (size_t y = 0; y< Y_Size; y++){                                               
01134                     for (size_t x = 0; x < X_Size; x++){
01135 
01136                         duzdz[i] *=  duzdzn_el;
01137                         i++;                    
01138                     } // x
01139                 } // y
01140               } // z
01141 
01142           } // parallel
01143     
01144    }// nonlinear
01145    
01146    
01147    
01148 }// end of Compute_duxyz
01149 //------------------------------------------------------------------------------
01150 
01151 
01152 
01153 
01154 
01155 
01160 void TKSpaceFirstOrder3DSolver::Compute_rhoxyz_nonlinear(){
01161     
01162     
01163     const size_t Z_Size = Get_rhox().GetDimensionSizes().Z;
01164     const size_t Y_Size = Get_rhox().GetDimensionSizes().Y;
01165     const size_t X_Size = Get_rhox().GetDimensionSizes().X;
01166     
01167     
01168     const float dt2 = 2.0f * Parameters->Get_dt();
01169     const float dt_el = Parameters->Get_dt();
01170     const size_t SliceSize =  Y_Size * X_Size;
01171     
01172     #ifndef __NO_OMP__            
01173         #pragma omp parallel 
01174     #endif  
01175     {
01176     
01177         float * rhox_data  = Get_rhox().GetRawData();
01178         float * rhoy_data  = Get_rhoy().GetRawData();        
01179         float * rhoz_data  = Get_rhoz().GetRawData();        
01180         
01181         const float * pml_x_data = Get_pml_x().GetRawData();
01182         const float * duxdx_data = Get_duxdx().GetRawData();
01183         const float * duydy_data = Get_duydy().GetRawData();
01184         const float * duzdz_data = Get_duzdz().GetRawData();
01185 
01186         
01187 
01188         // Scalar
01189         if (Parameters->Get_rho0_scalar()) {
01190             
01191             
01192             const float dt_rho0 = Parameters->Get_rho0_scalar() * dt_el;          
01193         
01194             #ifndef __NO_OMP__            
01195                 #pragma omp for schedule (static)     
01196             #endif  
01197             for (size_t z = 0; z < Z_Size; z++){
01198 
01199                 register size_t i = z * SliceSize;
01200 
01201                 for (size_t y = 0; y< Y_Size; y++){                                            
01202                     for (size_t x = 0; x < X_Size; x++){
01203 
01204                         const float pml_x   = pml_x_data[x];                        
01205                               float du      = duxdx_data[i];
01206 
01207                         rhox_data[i] = pml_x * (
01208                                             ((pml_x * rhox_data[i]) - (dt_rho0 * du))/
01209                                             (1.0f + (dt2 * du))                        
01210                                             );
01211                         i++;
01212 
01213                     } // x
01214                 }// y
01215             }// z
01216 
01217               #ifndef __NO_OMP__            
01218                 #pragma omp for schedule (static)     
01219             #endif  
01220             for (size_t z = 0; z < Z_Size; z++){
01221 
01222                 register size_t i = z * SliceSize;            
01223                 for (size_t y = 0; y< Y_Size; y++){                
01224 
01225                     const float pml_y = Get_pml_y()[y];
01226                     for (size_t x = 0; x < X_Size; x++){
01227                         
01228                               float du = duydy_data[i];
01229 
01230 
01231                         rhoy_data[i] = pml_y * (
01232                                             ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/
01233                                             (1.0f + (dt2 * du))                        
01234                                             );
01235                         i++;
01236 
01237                     } // x
01238                 }// y
01239             }// z
01240 
01241           
01242               #ifndef __NO_OMP__            
01243                 #pragma omp for schedule (static)     
01244             #endif  
01245             for (size_t z = 0; z < Z_Size; z++){
01246 
01247                 register size_t i = z * SliceSize;
01248                 const float pml_z = Get_pml_z()[z];
01249                 for (size_t y = 0; y< Y_Size; y++){                
01250 
01251                     for (size_t x = 0; x < X_Size; x++){
01252                         
01253                               float du      = duzdz_data[i];
01254 
01255 
01256 
01257                         rhoz_data[i] = pml_z * (
01258                                             ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/
01259                                             (1.0f + (dt2 * du))                        
01260                                             );
01261                         i++;
01262 
01263                     } // x
01264                 }// y
01265             }// z
01266    
01267             
01268             
01269             
01270             
01271         }else { 
01272             //----------------------------------------------------------------//
01273             // rho0 is a matrix
01274         
01275             
01276             const float * rho0_data  = Get_rho0().GetRawData();
01277         
01278             #ifndef __NO_OMP__            
01279                 #pragma omp for schedule (static)     
01280             #endif  
01281             for (size_t z = 0; z < Z_Size; z++){
01282 
01283                 register size_t i = z * SliceSize;
01284 
01285                 for (size_t y = 0; y< Y_Size; y++){                                            
01286                     for (size_t x = 0; x < X_Size; x++){
01287 
01288                         const float pml_x   = pml_x_data[x];
01289                         const float dt_rho0 = dt_el * rho0_data[i];
01290                               float du      = duxdx_data[i];
01291 
01292                         rhox_data[i] = pml_x * (
01293                                             ((pml_x * rhox_data[i]) - (dt_rho0 * du))/
01294                                             (1.0f + (dt2 * du))                        
01295                                             );
01296 
01297                         i++;
01298 
01299                     } // x
01300                 }// y
01301             }// z
01302 
01303 
01304               #ifndef __NO_OMP__            
01305                 #pragma omp for schedule (static)     
01306             #endif  
01307             for (size_t z = 0; z < Z_Size; z++){
01308 
01309                 register size_t i = z * SliceSize;            
01310                 for (size_t y = 0; y< Y_Size; y++){                
01311 
01312                     const float pml_y = Get_pml_y()[y];
01313                     for (size_t x = 0; x < X_Size; x++){
01314 
01315                         const float dt_rho0 = dt_el * rho0_data[i];
01316                               float du = duydy_data[i];
01317 
01318 
01319                         rhoy_data[i] = pml_y * (
01320                                             ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/
01321                                             (1.0f + (dt2 * du))                        
01322                                             );
01323 
01324                         i++;
01325 
01326                     } // x
01327                 }// y
01328             }// z
01329 
01330           
01331               #ifndef __NO_OMP__            
01332                 #pragma omp for schedule (static)     
01333             #endif  
01334             for (size_t z = 0; z < Z_Size; z++){
01335 
01336                 register size_t i = z * SliceSize;
01337                 const float pml_z = Get_pml_z()[z];
01338                 for (size_t y = 0; y< Y_Size; y++){                
01339 
01340                     for (size_t x = 0; x < X_Size; x++){
01341 
01342 
01343                         const float dt_rho0 = dt_el * rho0_data[i];
01344                               float du      = duzdz_data[i];
01345 
01346 
01347 
01348                         rhoz_data[i] = pml_z * (
01349                                             ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/
01350                                             (1.0f + (dt2 * du))                        
01351                                             );
01352                         i++;
01353 
01354                     } // x
01355                 }// y
01356             }// z
01357 
01358         } // end matrix     
01359   }// parallel
01360 }// end of Compute_rhoxyz
01361 //------------------------------------------------------------------------------
01362 
01363  
01364 
01365 
01370 void TKSpaceFirstOrder3DSolver::Compute_rhoxyz_linear(){
01371     
01372     
01373     const size_t Z_Size = Get_rhox().GetDimensionSizes().Z;
01374     const size_t Y_Size = Get_rhox().GetDimensionSizes().Y;
01375     const size_t X_Size = Get_rhox().GetDimensionSizes().X;
01376     
01377     
01378     
01379     const float dt_el = Parameters->Get_dt();
01380     const size_t SliceSize =  Y_Size * X_Size;
01381     
01382     #ifndef __NO_OMP__            
01383         #pragma omp parallel 
01384     #endif  
01385     {
01386     
01387         float * rhox_data  = Get_rhox().GetRawData();
01388         float * rhoy_data  = Get_rhoy().GetRawData();        
01389         float * rhoz_data  = Get_rhoz().GetRawData();        
01390         
01391         const float * pml_x_data = Get_pml_x().GetRawData();
01392         const float * duxdx_data = Get_duxdx().GetRawData();
01393         const float * duydy_data = Get_duydy().GetRawData();
01394         const float * duzdz_data = Get_duzdz().GetRawData();
01395 
01396         
01397 
01398         // Scalar
01399         if (Parameters->Get_rho0_scalar()) {
01400             
01401             
01402             const float dt_rho0 = Parameters->Get_rho0_scalar() * dt_el;          
01403         
01404             #ifndef __NO_OMP__            
01405                 #pragma omp for schedule (static)     
01406             #endif  
01407             for (size_t z = 0; z < Z_Size; z++){
01408 
01409                 register size_t i = z * SliceSize;
01410 
01411                 for (size_t y = 0; y< Y_Size; y++){                                            
01412                     for (size_t x = 0; x < X_Size; x++){
01413 
01414                         const float pml_x   = pml_x_data[x];                                                
01415                         rhox_data[i] = pml_x * (
01416                                             ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i]))    
01417                                             );
01418                         i++;
01419 
01420                     } // x
01421                 }// y
01422             }// z
01423 
01424               #ifndef __NO_OMP__            
01425                 #pragma omp for schedule (static)     
01426             #endif  
01427             for (size_t z = 0; z < Z_Size; z++){
01428 
01429                 register size_t i = z * SliceSize;            
01430                 for (size_t y = 0; y< Y_Size; y++){                
01431 
01432                     const float pml_y = Get_pml_y()[y];
01433                     for (size_t x = 0; x < X_Size; x++){
01434                                                       
01435                         rhoy_data[i] = pml_y * (
01436                                             ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i]))  
01437                                             );
01438                         i++;
01439 
01440                     } // x
01441                 }// y
01442             }// z
01443 
01444           
01445               #ifndef __NO_OMP__            
01446                 #pragma omp for schedule (static)     
01447             #endif  
01448             for (size_t z = 0; z < Z_Size; z++){
01449 
01450                 register size_t i = z * SliceSize;
01451                 const float pml_z = Get_pml_z()[z];
01452                 for (size_t y = 0; y< Y_Size; y++){                
01453 
01454                     for (size_t x = 0; x < X_Size; x++){
01455                         
01456                         rhoz_data[i] = pml_z * (
01457                                             ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i]))                                            
01458                                             );
01459                         i++;
01460 
01461                     } // x
01462                 }// y
01463             }// z
01464    
01465             
01466             
01467             
01468             
01469         }else { 
01470             //----------------------------------------------------------------//
01471             // rho0 is a matrix
01472         
01473             
01474             const float * rho0_data  = Get_rho0().GetRawData();
01475         
01476             #ifndef __NO_OMP__            
01477                 #pragma omp for schedule (static)     
01478             #endif  
01479             for (size_t z = 0; z < Z_Size; z++){
01480 
01481                 register size_t i = z * SliceSize;
01482 
01483                 for (size_t y = 0; y< Y_Size; y++){                                            
01484                     for (size_t x = 0; x < X_Size; x++){
01485 
01486                         const float pml_x   = pml_x_data[x];
01487                         const float dt_rho0 = dt_el * rho0_data[i];                              
01488 
01489                         rhox_data[i] = pml_x * (
01490                                             ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i]))                                            
01491                                             );
01492 
01493                         i++;
01494 
01495                     } // x
01496                 }// y
01497             }// z
01498 
01499 
01500               #ifndef __NO_OMP__            
01501                 #pragma omp for schedule (static)     
01502             #endif  
01503             for (size_t z = 0; z < Z_Size; z++){
01504 
01505                 register size_t i = z * SliceSize;            
01506                 for (size_t y = 0; y< Y_Size; y++){                
01507 
01508                     const float pml_y = Get_pml_y()[y];
01509                     for (size_t x = 0; x < X_Size; x++){
01510 
01511                         const float dt_rho0 = dt_el * rho0_data[i];
01512                               
01513                         rhoy_data[i] = pml_y * (
01514                                             ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i]))                                            
01515                                             );
01516 
01517                         i++;
01518 
01519                     } // x
01520                 }// y
01521             }// z
01522 
01523           
01524               #ifndef __NO_OMP__            
01525                 #pragma omp for schedule (static)     
01526             #endif  
01527             for (size_t z = 0; z < Z_Size; z++){
01528 
01529                 register size_t i = z * SliceSize;
01530                 const float pml_z = Get_pml_z()[z];
01531                 for (size_t y = 0; y< Y_Size; y++){                
01532 
01533                     for (size_t x = 0; x < X_Size; x++){
01534 
01535                         const float dt_rho0 = dt_el * rho0_data[i];
01536 
01537                         rhoz_data[i] = pml_z * (
01538                                             ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i]))
01539                                             );
01540                         i++;
01541 
01542                     } // x
01543                 }// y
01544             }// z
01545 
01546         } // end matrix     
01547   }// parallel
01548 }// end of Compute_rhoxyz
01549 //------------------------------------------------------------------------------
01550  
01551 
01559  void TKSpaceFirstOrder3DSolver::Calculate_SumRho_BonA_SumDu_SSE2(TRealMatrix & RHO_Temp, TRealMatrix & BonA_Temp, TRealMatrix & Sum_du){
01560      
01561     const size_t TotalElementCount_4 = (RHO_Temp.GetTotalElementCount() >> 2) << 2;
01562      
01563     const float * rhox_data = Get_rhox().GetRawData();
01564     const float * rhoy_data = Get_rhoy().GetRawData();
01565     const float * rhoz_data = Get_rhoz().GetRawData();    
01566     
01567     const float * dux_data = Get_duxdx().GetRawData();
01568     const float * duy_data = Get_duydy().GetRawData();
01569     const float * duz_data = Get_duzdz().GetRawData();
01570     
01571     
01572           float * BonA;     
01573           size_t  BonA_shift;
01574     const bool    BonA_flag = Parameters->Get_BonA_scalar_flag();
01575     
01576     if (BonA_flag) {
01577         BonA = &Parameters->Get_BonA_scalar();
01578         BonA_shift = 0;
01579     }else {
01580         BonA = Get_BonA().GetRawData();
01581         BonA_shift = 1;
01582     }
01583     
01584     
01585           float * rho0_data;     
01586           size_t  rho0_shift;
01587     const bool    rho0_flag = Parameters->Get_rho0_scalar_flag();
01588     
01589     if (rho0_flag) {
01590         rho0_data = &Parameters->Get_rho0_scalar();
01591         rho0_shift = 0;
01592     }else {
01593         rho0_data = Get_rho0().GetRawData();
01594         rho0_shift = 1;
01595     }
01596     
01597     #ifndef __NO_OMP__            
01598       #pragma  omp parallel
01599     #endif    
01600     {
01601     
01602       float * RHO_Temp_Data  = RHO_Temp.GetRawData();
01603       float * BonA_Temp_Data = BonA_Temp.GetRawData();
01604       float * SumDU_Temp_Data= Sum_du.GetRawData();
01605         
01606       
01607       const __m128 Two_SSE        = _mm_set1_ps(2.0f);
01608             __m128 BonA_SSE       = _mm_set1_ps(Parameters->Get_BonA_scalar());
01609             __m128 rho0_SSE       = _mm_set1_ps(Parameters->Get_rho0_scalar());        
01610               
01611      #ifndef __NO_OMP__            
01612         #pragma omp for schedule (static) nowait
01613      #endif    
01614      for (size_t i = 0; i < TotalElementCount_4; i+=4){       
01615         
01616          
01617          if (!BonA_flag)  BonA_SSE = _mm_load_ps(&BonA[i]);
01618          
01619         __m128 xmm1       = _mm_load_ps(&rhox_data[i]);
01620         __m128 xmm2       = _mm_load_ps(&rhoy_data[i]);                
01621         __m128 xmm3       = _mm_load_ps(&rhoz_data[i]);
01622         
01623         
01624           
01625         if (!rho0_flag)  rho0_SSE = _mm_load_ps(&rho0_data[i]);        
01626         __m128 rho_xyz_sq_SSE;        
01627         __m128 rho_xyz_el_SSE; 
01628         
01629         //  register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
01630         rho_xyz_el_SSE = _mm_add_ps(xmm1,xmm2); 
01631         rho_xyz_el_SSE = _mm_add_ps(xmm3, rho_xyz_el_SSE); 
01632         
01633         // RHO_Temp_Data[i]  = rho_xyz_el;
01634         _mm_stream_ps(&RHO_Temp_Data[i],rho_xyz_el_SSE);
01635         
01636         //  BonA_Temp_Data[i] =  ((BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])) + rho_xyz_el;
01637         rho_xyz_sq_SSE = _mm_mul_ps(rho_xyz_el_SSE,rho_xyz_el_SSE);// (rho_xyz_el * rho_xyz_el)        
01638         
01639         xmm1           = _mm_mul_ps(rho_xyz_sq_SSE,BonA_SSE);      //((BonA * (rho_xyz_el * rho_xyz_el))        
01640         xmm2           = _mm_mul_ps(Two_SSE,rho0_SSE);             // (2.0f * rho0_data[i])     
01641         xmm3           = _mm_div_ps(xmm1,xmm2);                    // (BonA * (rho_xyz_el * rho_xyz_el)) /  (2.0f * rho0_data[i])     
01642         
01643         xmm1           = _mm_add_ps(xmm3,rho_xyz_el_SSE);          // + rho_xyz_el
01644         
01645         _mm_stream_ps(&BonA_Temp_Data[i],xmm1);
01646          
01647         xmm1       = _mm_load_ps(&dux_data[i]); //dudx
01648         xmm2       = _mm_load_ps(&duy_data[i]); //dudu
01649         xmm3       = _mm_load_ps(&duz_data[i]); //dudz        
01650          
01651         __m128 xmm_acc    = _mm_add_ps(xmm1,xmm2);
01652         xmm_acc    = _mm_add_ps(xmm_acc,xmm3);
01653         xmm_acc    = _mm_mul_ps(xmm_acc,rho0_SSE);
01654         
01655         _mm_stream_ps(&SumDU_Temp_Data[i],xmm_acc); 
01656         
01657         
01658       // BonA_Temp_Data[i] =  ((BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])) + rho_xyz_el;
01659       
01660       }      
01661 
01662       
01663                   //-- non SSE code --//
01664                   if (omp_get_thread_num() == omp_get_num_threads() -1 ){
01665                     for (size_t i = TotalElementCount_4; i < RHO_Temp.GetTotalElementCount() ; i++){                        
01666                       register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
01667                       RHO_Temp_Data[i]   = rho_xyz_el;
01668                       BonA_Temp_Data[i]  =  ((BonA[i * BonA_shift] * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i* rho0_shift])) + rho_xyz_el;      
01669                       SumDU_Temp_Data[i] = rho0_data[i * rho0_shift] * (dux_data[i] + duy_data[i] + duz_data[i]);
01670                     }      
01671                   }
01672      
01673    }// parallel
01674  
01675  } // end of Calculate_SumRho_BonA_SumDu
01676  //-----------------------------------------------------------------------------
01677  
01678  
01679  
01685  void TKSpaceFirstOrder3DSolver::Calculate_SumRho_SumRhoDu(TRealMatrix & Sum_rhoxyz, TRealMatrix & Sum_rho0_du){
01686      
01687       
01688      const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
01689      
01690     #ifndef __NO_OMP__            
01691       #pragma  omp parallel
01692     #endif    
01693     {
01694         const float * rhox_data = Get_rhox().GetRawData();
01695         const float * rhoy_data = Get_rhoy().GetRawData();
01696         const float * rhoz_data = Get_rhoz().GetRawData();    
01697 
01698         const float * dux_data = Get_duxdx().GetRawData();
01699         const float * duy_data = Get_duydy().GetRawData();
01700         const float * duz_data = Get_duzdz().GetRawData();
01701 
01702         const float * rho0_data = NULL;
01703         
01704         const float rho0_data_el = Parameters->Get_rho0_scalar();
01705         if (!Parameters->Get_rho0_scalar_flag()) {
01706             rho0_data = Get_rho0().GetRawData();
01707         }
01708          
01709          
01710               float * Sum_rhoxyz_Data  = Sum_rhoxyz.GetRawData();              
01711               float * Sum_rho0_du_Data = Sum_rho0_du.GetRawData();
01712         
01713       
01714               
01715          #ifndef __NO_OMP__            
01716             #pragma omp for schedule (static)
01717          #endif           
01718         for (size_t i = 0; i <  TotalElementCount; i++){                                  
01719             Sum_rhoxyz_Data[i] = rhox_data[i] + rhoy_data[i] + rhoz_data[i];                              
01720         }      
01721          
01722         
01723         if (Parameters->Get_rho0_scalar_flag()){ // scalar
01724              #ifndef __NO_OMP__            
01725                 #pragma omp for schedule (static) nowait
01726              #endif           
01727              for (size_t i = 0; i <  TotalElementCount; i++){                                          
01728                 Sum_rho0_du_Data[i] = rho0_data_el * (dux_data[i] + duy_data[i] + duz_data[i]);            
01729                     
01730              }
01731         }else{ // matrix
01732             
01733             #ifndef __NO_OMP__            
01734                 #pragma omp for schedule (static) nowait
01735              #endif           
01736             for (size_t i = 0; i <  TotalElementCount; i++){                                          
01737                 Sum_rho0_du_Data[i] = rho0_data[i] * (dux_data[i] + duy_data[i] + duz_data[i]);            
01738             }      
01739             
01740         }    
01741             
01742     } // parallel
01743      
01744  }// end of Calculate_SumRho_SumRhoDu
01745  //-----------------------------------------------------------------------------
01746  
01747  
01748  
01757  void TKSpaceFirstOrder3DSolver::Compute_Absorb_nabla1_2_SSE2(TFFTWComplexMatrix& FFT_1, TFFTWComplexMatrix& FFT_2){
01758        
01759      const float * nabla1 = Get_absorb_nabla1().GetRawData();
01760      const float * nabla2 = Get_absorb_nabla2().GetRawData();
01761      
01762      const size_t TotalElementCount = FFT_1.GetTotalElementCount();     
01763      const size_t TotalElementCount_SSE = (FFT_1.GetTotalElementCount() >> 1) << 1;     
01764      
01765     #ifndef __NO_OMP__            
01766       #pragma omp parallel
01767     #endif    
01768     {
01769     
01770       float * FFT_1_data  = FFT_1.GetRawData();
01771       float * FFT_2_data  = FFT_2.GetRawData();      
01772         
01773      #ifndef __NO_OMP__            
01774         #pragma omp for schedule (static) nowait
01775      #endif    
01776       for (size_t i = 0; i < TotalElementCount_SSE; i+=2){
01777           
01778           __m128 xmm_nabla1 = _mm_set_ps( nabla1[i+1], nabla1[i+1] , nabla1[i] , nabla1[i] );
01779           __m128 xmm_FFT_1  = _mm_load_ps(&FFT_1_data[2*i]);
01780           
01781                  xmm_FFT_1 = _mm_mul_ps(xmm_nabla1, xmm_FFT_1);
01782                  _mm_store_ps(&FFT_1_data[2*i], xmm_FFT_1);
01783           
01784           
01785       }
01786       
01787      #ifndef __NO_OMP__            
01788         #pragma omp for schedule (static) 
01789      #endif    
01790       for (size_t i = 0; i < TotalElementCount; i+=2){
01791           __m128 xmm_nabla2 = _mm_set_ps( nabla2[i+1], nabla2[i+1] , nabla2[i] , nabla2[i] );
01792           __m128 xmm_FFT_2  = _mm_load_ps(&FFT_2_data[2*i]);
01793           
01794           xmm_FFT_2 = _mm_mul_ps(xmm_nabla2, xmm_FFT_2);
01795           _mm_store_ps(&FFT_2_data[2*i], xmm_FFT_2);
01796       }
01797       
01798                 //-- non SSE code --//
01799                   if (omp_get_thread_num() == omp_get_num_threads() -1 ){
01800                     for (size_t i = TotalElementCount_SSE; i < TotalElementCount ; i++){                                            
01801                            FFT_1_data[(i<<1)]   *= nabla1[i];          
01802                            FFT_1_data[(i<<1)+1] *= nabla1[i];                    
01803           
01804                            FFT_2_data[(i<<1)]   *=  nabla2[i];          
01805                            FFT_2_data[(i<<1)+1] *=  nabla2[i];                                  
01806                     }      
01807                   }
01808      
01809       
01810     }// parallel 
01811      
01812  } // end of Compute_Absorb_nabla1_2_Normalize
01813  //-----------------------------------------------------------------------------
01814  
01815  
01816  
01817  
01824  void TKSpaceFirstOrder3DSolver::Sum_Subterms_nonlinear(TRealMatrix& Absorb_tau_temp, TRealMatrix& Absorb_eta_temp, TRealMatrix& BonA_temp){
01825     
01826      float *  tau_data;
01827      float *  eta_data;
01828      float *  c2_data;   
01829      
01830      size_t  c2_shift;
01831      size_t  tau_eta_shift;
01832      
01833      
01834      const float *  Absorb_tau_data = Absorb_tau_temp.GetRawData();
01835      const float *  Absorb_eta_data = Absorb_eta_temp.GetRawData();
01836      
01837      const size_t TotalElementCount = Get_p().GetTotalElementCount();
01838      const float Divider = 1.0f / (float) TotalElementCount;
01839 
01840      // c2 scalar
01841      if (Parameters->Get_c0_scalar_flag()){
01842          c2_data = &Parameters->Get_c0_scalar();
01843          c2_shift = 0;         
01844      }else{
01845          c2_data  = Get_c2().GetRawData();
01846          c2_shift = 1;
01847          
01848      }         
01849      
01850      // eta and tau scalars
01851      if (Parameters->Get_c0_scalar_flag() && Parameters->Get_alpha_coeff_scallar_flag()){     
01852         tau_data = &Parameters->Get_absorb_tau_scalar();
01853         eta_data = &Parameters->Get_absorb_eta_scalar();
01854         tau_eta_shift = 0;
01855          
01856      }else{
01857         tau_data = Get_absorb_tau().GetRawData();
01858         eta_data = Get_absorb_eta().GetRawData();
01859         tau_eta_shift = 1;
01860          
01861      }
01862      
01863      
01864      
01865     #ifndef __NO_OMP__            
01866       #pragma omp parallel
01867     #endif    
01868     {
01869     
01870        const float * BonA_data = BonA_temp.GetRawData();  
01871        float * p_data  = Get_p().GetRawData();
01872       
01873         
01874      #ifndef __NO_OMP__            
01875         #pragma omp for schedule (static) 
01876      #endif    
01877       for (size_t i = 0; i < TotalElementCount; i++){
01878           p_data[i] = (c2_data[i * c2_shift]) *(
01879                       BonA_data[i] +
01880                      (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) -
01881                                  (Absorb_eta_data[i] * eta_data[i * tau_eta_shift])
01882                                 ))                   
01883                   );
01884           
01885       }
01886       
01887     
01888     }// parallel 
01889            
01890  }// end of Sum_Subterms_nonlinear
01891  //-----------------------------------------------------------------------------
01892  
01893  
01900  void TKSpaceFirstOrder3DSolver::Sum_Subterms_linear(TRealMatrix& Absorb_tau_temp, TRealMatrix& Absorb_eta_temp, TRealMatrix& Sum_rhoxyz){
01901      
01902      const float *  tau_data = NULL;
01903      const float *  eta_data = NULL;
01904      const float *  c2_data  = NULL;   
01905      
01906      size_t  c2_shift       = 0;
01907      size_t  tau_eta_shift  = 0;
01908      
01909      
01910      const float *  Absorb_tau_data = Absorb_tau_temp.GetRawData();
01911      const float *  Absorb_eta_data = Absorb_eta_temp.GetRawData();
01912      
01913      const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
01914      const float Divider = 1.0f / (float) TotalElementCount;
01915 
01916      // c2 scalar
01917      if (Parameters->Get_c0_scalar_flag()){
01918          c2_data = &Parameters->Get_c0_scalar();
01919          c2_shift = 0;         
01920      }else{
01921          c2_data  = Get_c2().GetRawData();
01922          c2_shift = 1;
01923          
01924      }         
01925      
01926      // eta and tau scalars
01927      if (Parameters->Get_c0_scalar_flag() && Parameters->Get_alpha_coeff_scallar_flag()){     
01928         tau_data = &Parameters->Get_absorb_tau_scalar();
01929         eta_data = &Parameters->Get_absorb_eta_scalar();
01930         tau_eta_shift = 0;
01931          
01932      }else{
01933         tau_data = Get_absorb_tau().GetRawData();
01934         eta_data = Get_absorb_eta().GetRawData();
01935         tau_eta_shift = 1;
01936          
01937      }
01938      
01939      
01940      
01941     #ifndef __NO_OMP__            
01942       #pragma omp parallel
01943     #endif    
01944     {
01945     
01946        const float * Sum_rhoxyz_Data = Sum_rhoxyz.GetRawData();  
01947        float * p_data  = Get_p().GetRawData();
01948       
01949         
01950      #ifndef __NO_OMP__            
01951         #pragma omp for schedule (static) 
01952      #endif    
01953       for (size_t i = 0; i < TotalElementCount; i++){
01954           p_data[i] = (c2_data[i * c2_shift]) *(
01955                       Sum_rhoxyz_Data[i] +
01956                      (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) -
01957                                  (Absorb_eta_data[i] * eta_data[i * tau_eta_shift])
01958                                 ))                   
01959                   );
01960           
01961       }
01962       
01963     
01964     }// parallel  
01965      
01966      
01967      
01968  }// end of Sum_Subterms_linear
01969  //-----------------------------------------------------------------------------
01970  
01971  
01972 
01977  void TKSpaceFirstOrder3DSolver::Sum_new_p_nonlinear_lossless(){
01978      
01979      
01980     #ifndef __NO_OMP__            
01981       #pragma omp parallel
01982     #endif    
01983     {
01984     
01985        const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
01986        float * p_data  = Get_p().GetRawData();
01987 
01988        const float * rhox_data = Get_rhox().GetRawData();
01989        const float * rhoy_data = Get_rhoy().GetRawData();
01990        const float * rhoz_data = Get_rhoz().GetRawData();
01991        
01992        float * c2_data;
01993        size_t  c2_shift;
01994              
01995          if (Parameters->Get_c0_scalar_flag()){
01996              c2_data = &Parameters->Get_c0_scalar();
01997              c2_shift = 0;         
01998          }else{
01999              c2_data  = Get_c2().GetRawData();
02000              c2_shift = 1;
02001 
02002          }         
02003 
02004        
02005        float * BonA_data;
02006        size_t  BonA_shift;
02007 
02008          if (Parameters->Get_BonA_scalar_flag()){     
02009                 BonA_data  = &Parameters->Get_BonA_scalar();
02010                 BonA_shift = 0;
02011          }else{
02012                 BonA_data  = Get_BonA().GetRawData();
02013                 BonA_shift = 1;
02014          }
02015 
02016        float * rho0_data;
02017        size_t  rho0_shift;
02018 
02019          if (Parameters->Get_rho0_scalar_flag()){     
02020                 rho0_data  = &Parameters->Get_rho0_scalar();
02021                 rho0_shift = 0;
02022          }else{
02023                 rho0_data  = Get_rho0().GetRawData();
02024                 rho0_shift = 1;
02025          }
02026                      
02027         
02028      #ifndef __NO_OMP__            
02029         #pragma omp for schedule (static) 
02030      #endif    
02031       for (size_t i = 0; i < TotalElementCount; i++){
02032           const float sum_rho = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
02033           p_data[i] = c2_data[i * c2_shift] *(
02034                           sum_rho +
02035                           (BonA_data[i * BonA_shift] * (sum_rho* sum_rho) / 
02036                                 (2.0f* rho0_data[i * rho0_shift]))
02037                   
02038                   );
02039           
02040       }
02041       
02042     
02043     }// parallel  
02044      
02045      
02046      
02047  }// end of Sum_new_p_nonlinear_lossless
02048  //-----------------------------------------------------------------------------
02049  
02050  
02055  void TKSpaceFirstOrder3DSolver::Sum_new_p_linear_lossless(){
02056     
02057      
02058     #ifndef __NO_OMP__            
02059       #pragma omp parallel
02060     #endif    
02061     {
02062     
02063         const float * rhox   = Get_rhox().GetRawData();
02064         const float * rhoy   = Get_rhoy().GetRawData();
02065         const float * rhoz   = Get_rhoz().GetRawData();
02066               float * p_data = Get_p().GetRawData();
02067         const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
02068               
02069         
02070       if (Parameters->Get_c0_scalar_flag()){
02071      
02072              const float c2_element = Parameters->Get_c0_scalar();
02073              #ifndef __NO_OMP__            
02074                 #pragma omp for schedule (static) 
02075              #endif    
02076               for (size_t i = 0; i < TotalElementCount; i++){
02077                   p_data[i] = c2_element * ( rhox[i] + rhoy[i] + rhoz[i]);                                
02078               }
02079               
02080        }else {       
02081            
02082              const float * c2_data  = Get_c2().GetRawData();     
02083              
02084              #ifndef __NO_OMP__            
02085                 #pragma omp for schedule (static) 
02086              #endif    
02087               for (size_t i = 0; i < TotalElementCount; i++){
02088                   p_data[i] = (c2_data[i]) *( rhox[i] + rhoy[i] + rhoz[i]);                                
02089               }
02090        }
02091       
02092     
02093     }// parallel       
02094      
02095  }// end of Sum_new_p_linear_lossless()
02096  //-----------------------------------------------------------------------------
02097  
02098  
02099  
02103  void TKSpaceFirstOrder3DSolver::Compute_new_p_nonlinear(){
02104      
02105     
02106   
02107   //-- rhox + rhoy + rhoz --//
02108     if (Parameters->Get_absorbing_flag()) { // absorbing case
02109   
02110      
02111         TRealMatrix&  Sum_rhoxyz      = Get_Temp_1_RS3D();
02112         TRealMatrix&  BonA_rho_rhoxyz = Get_Temp_2_RS3D();
02113         TRealMatrix&  Sum_du          = Get_Temp_3_RS3D();
02114 
02115 
02116         Calculate_SumRho_BonA_SumDu_SSE2(Sum_rhoxyz,BonA_rho_rhoxyz, Sum_du);
02117 
02118 
02119         // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz))
02120 
02121         Get_FFT_X_temp().Compute_FFT_3D_R2C(Sum_du);
02122         Get_FFT_Y_temp().Compute_FFT_3D_R2C(Sum_rhoxyz);
02123 
02124 
02125         Compute_Absorb_nabla1_2_SSE2(Get_FFT_X_temp(),Get_FFT_Y_temp());
02126 
02127 
02128         TRealMatrix& Absorb_tau_temp = Sum_du;
02129         TRealMatrix& Absorb_eta_temp = Sum_rhoxyz;
02130 
02131         Get_FFT_X_temp().Compute_iFFT_3D_C2R(Absorb_tau_temp);
02132         Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Absorb_eta_temp);
02133 
02134         Sum_Subterms_nonlinear(Absorb_tau_temp, Absorb_eta_temp, BonA_rho_rhoxyz);   
02135     }else {
02136       
02137         Sum_new_p_nonlinear_lossless();  
02138                 
02139     }
02140      
02141      
02142  }// end of Compute_new_p_nonlinear_absorbing()
02143  //-----------------------------------------------------------------------------
02144  
02145  
02146  
02150  void TKSpaceFirstOrder3DSolver::Compute_new_p_linear(){
02151      
02152     
02153   //-- rhox + rhoy + rhoz --//
02154     if (Parameters->Get_absorbing_flag()) { // absorbing case
02155 
02156         TRealMatrix&  Sum_rhoxyz      = Get_Temp_1_RS3D();
02157         TRealMatrix&  Sum_rho0_du     = Get_Temp_2_RS3D();
02158 
02159 
02160         Calculate_SumRho_SumRhoDu(Sum_rhoxyz,Sum_rho0_du);
02161 
02162 
02163         // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz))
02164 
02165         Get_FFT_X_temp().Compute_FFT_3D_R2C(Sum_rho0_du);
02166         Get_FFT_Y_temp().Compute_FFT_3D_R2C(Sum_rhoxyz);
02167 
02168 
02169         Compute_Absorb_nabla1_2_SSE2(Get_FFT_X_temp(),Get_FFT_Y_temp());
02170 
02171 
02172         TRealMatrix& Absorb_tau_temp = Get_Temp_2_RS3D(); // override Sum_rho0_dx
02173         TRealMatrix& Absorb_eta_temp = Get_Temp_3_RS3D();
02174 
02175         Get_FFT_X_temp().Compute_iFFT_3D_C2R(Absorb_tau_temp);
02176         Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Absorb_eta_temp);
02177 
02178         Sum_Subterms_linear(Absorb_tau_temp, Absorb_eta_temp, Sum_rhoxyz);   
02179     }else{
02180         // lossless case
02181         
02182         Sum_new_p_linear_lossless();
02183         
02184         
02185      } 
02186      
02187      
02188  }// end of Compute_new_p_linear_absorbing()
02189  //-----------------------------------------------------------------------------
02190 
02191  
02192  
02193  
02194  
02195  
02196  /*
02197   * Compute new values of ux_sgx, uy_sgy, uz_sgz.
02198   */
02199  void TKSpaceFirstOrder3DSolver::Compute_uxyz(){
02200     
02201      Compute_ddx_kappa_fft_p(Get_p(),
02202                                Get_FFT_X_temp(),Get_FFT_Y_temp(),Get_FFT_Z_temp(),
02203                                Get_kappa(),
02204                                Get_ddx_k_shift_pos(),Get_ddy_k_shift_pos(),Get_ddz_k_shift_pos()
02205                                );
02206     
02207      Get_FFT_X_temp().Compute_iFFT_3D_C2R(Get_Temp_1_RS3D());                                       
02208      Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Get_Temp_2_RS3D());                                       
02209      Get_FFT_Z_temp().Compute_iFFT_3D_C2R(Get_Temp_3_RS3D());                    
02210      
02211     #ifndef __NO_OMP__            
02212       #pragma omp parallel 
02213     #endif
02214     {
02215    
02216      if (Parameters->Get_rho0_scalar_flag()) { // scalars
02217          if (Parameters->Get_nonuniform_grid_flag()){ 
02218              Get_ux_sgx().Compute_ux_sgx_normalize_scalar_nonuniform(Get_Temp_1_RS3D(),Parameters->Get_rho0_sgx_scalar(), Get_dxudxn_sgx(), Get_pml_x_sgx());        
02219              Get_uy_sgy().Compute_uy_sgy_normalize_scalar_nonuniform(Get_Temp_2_RS3D(),Parameters->Get_rho0_sgy_scalar(), Get_dyudyn_sgy(), Get_pml_y_sgy());                
02220              Get_uz_sgz().Compute_uz_sgz_normalize_scalar_nonuniform(Get_Temp_3_RS3D(),Parameters->Get_rho0_sgz_scalar(), Get_dzudzn_sgz(), Get_pml_z_sgz());
02221              
02222          }else {
02223              Get_ux_sgx().Compute_ux_sgx_normalize_scalar_uniform(Get_Temp_1_RS3D(),Parameters->Get_rho0_sgx_scalar(), Get_pml_x_sgx());        
02224              Get_uy_sgy().Compute_uy_sgy_normalize_scalar_uniform(Get_Temp_2_RS3D(),Parameters->Get_rho0_sgy_scalar(), Get_pml_y_sgy());                
02225              Get_uz_sgz().Compute_uz_sgz_normalize_scalar_uniform(Get_Temp_3_RS3D(),Parameters->Get_rho0_sgz_scalar(), Get_pml_z_sgz());             
02226          }
02227              
02228      }else {// matrices
02229         Get_ux_sgx().Compute_ux_sgx_normalize(Get_Temp_1_RS3D(),Get_dt_rho0_sgx(), Get_pml_x_sgx());        
02230         Get_uy_sgy().Compute_uy_sgy_normalize(Get_Temp_2_RS3D(),Get_dt_rho0_sgy(), Get_pml_y_sgy());                
02231         Get_uz_sgz().Compute_uz_sgz_normalize(Get_Temp_3_RS3D(),Get_dt_rho0_sgz(), Get_pml_z_sgz());
02232       }
02233     
02234     } // parallel  
02235         
02236  }// end of Compute_uxyz()
02237  //-----------------------------------------------------------------------------
02238 
02242  void TKSpaceFirstOrder3DSolver::Add_u_source(){
02243  
02244 
02245    if (Parameters->Get_ux_source_flag() > t_index) {
02246        Get_ux_sgx().Add_u_source(Get_ux_source_input(), Get_u_source_index(), t_index, 
02247                                      Parameters->Get_u_source_mode(), Parameters->Get_u_source_many());
02248    
02249    }
02250    if (Parameters->Get_uy_source_flag() > t_index) {
02251        Get_uy_sgy().Add_u_source(Get_uy_source_input(), Get_u_source_index(), t_index, 
02252                                      Parameters->Get_u_source_mode(), Parameters->Get_u_source_many());
02253        
02254        
02255 
02256    }
02257    if (Parameters->Get_uz_source_flag() > t_index) {
02258        Get_uz_sgz().Add_u_source(Get_uz_source_input(), Get_u_source_index(), t_index, 
02259                                      Parameters->Get_u_source_mode(), Parameters->Get_u_source_many());       
02260 
02261    }    
02262      
02263  }// end of Add_u_source
02264  //-----------------------------------------------------------------------------
02265  
02266  
02267  
02272 void TKSpaceFirstOrder3DSolver::Add_p_source(){
02273     
02274     if (Parameters->Get_p_source_flag() > t_index){
02275 
02276         float * rhox = Get_rhox().GetRawData();
02277         float * rhoy = Get_rhoy().GetRawData();
02278         float * rhoz = Get_rhoz().GetRawData();
02279 
02280         float * p_source_input = Get_p_source_input().GetRawData();
02281         long  * p_source_index = Get_p_source_index().GetRawData(); 
02282 
02283 
02284         size_t index2D = t_index;
02285 
02286         if (Parameters->Get_p_source_many() != 0) { // is 2D
02287             index2D = t_index * Get_p_source_index().GetTotalElementCount();
02288         }
02289         
02290         // replacement
02291         if (Parameters->Get_p_source_mode() == 0){ 
02292                 
02293             for (size_t i = 0; i < Get_p_source_index().GetTotalElementCount(); i++){
02294                                                 
02295                 rhox[p_source_index[i]] = p_source_input[index2D];                    
02296                 rhoy[p_source_index[i]] = p_source_input[index2D];                    
02297                 rhoz[p_source_index[i]] = p_source_input[index2D];                    
02298                 
02299                 if (Parameters->Get_p_source_many() != 0) index2D ++;
02300                 
02301             }                
02302             
02303         // Addition
02304         }else{ 
02305                                                     
02306             for (size_t i = 0; i < Get_p_source_index().GetTotalElementCount(); i++){
02307                 
02308                 rhox[p_source_index[i]] += p_source_input[index2D];                    
02309                 rhoy[p_source_index[i]] += p_source_input[index2D];                    
02310                 rhoz[p_source_index[i]] += p_source_input[index2D];                    
02311                 
02312                 if (Parameters->Get_p_source_many() != 0) index2D ++;
02313                                 
02314             }
02315         }// type of replacement
02316                     
02317     }// if do at all
02318     
02319 }// end of Add_p_source
02320 //------------------------------------------------------------------------------
02321  
02322  
02327 void TKSpaceFirstOrder3DSolver::Compute_MainLoop(){
02328    
02329     
02330     ActPercent = 0;    
02331     PrintOtputHeader();
02332     
02333     IterationTime.Start();
02334     for (t_index = 0; t_index < Parameters->Get_Nt(); t_index ++){
02335 
02336         
02337        Compute_uxyz();
02338        
02339        // add in the velocity u source term
02340        Add_u_source(); 
02341                          
02342        
02343        // add in the transducer source term (t = t1) to ux
02344        if (Parameters->Get_transducer_source_flag() > t_index) 
02345            Get_ux_sgx().AddTransducerSource(Get_u_source_index(), Get_delay_mask(), Get_transducer_source_input());
02346 
02347        Compute_duxyz();
02348        
02349        
02350        if (Parameters->Get_nonlinear_flag())
02351            Compute_rhoxyz_nonlinear();
02352        else
02353            Compute_rhoxyz_linear();
02354        
02355        
02356        // add in the source pressure term 
02357        Add_p_source();
02358        
02359        if (Parameters->Get_nonlinear_flag()){
02360                 
02361            Compute_new_p_nonlinear();
02362                 
02363        }else {
02364            Compute_new_p_linear();
02365        }
02366        
02367        
02368        //-- calculate initial pressure
02369        if ((t_index == 0) && (Parameters->Get_p0_source_flag() == 1)) Calculate_p0_source();
02370 
02371     
02372     //-- store the initial pressure at the first time step --//
02373        
02374        StoreSensorData();
02375         
02376        PrintStatisitcs();
02377     
02378   }
02379         
02380     
02381 }// end of Compute_Main_Loop()
02382 //------------------------------------------------------------------------------
02383 
02384 
02389 void TKSpaceFirstOrder3DSolver::PrintStatisitcs(){
02390  
02391   float Nt = Parameters->Get_Nt();
02392   if (t_index > (ActPercent * Nt * 0.01f) ) {
02393      
02394      ActPercent += Parameters->GetVerboseInterval();
02395           
02396      IterationTime.Stop();
02397      
02398      double ElTime = IterationTime.GetElapsedTime();
02399      const double ToGo   = ((ElTime / (float) (t_index+1)) * Nt) - ElTime;
02400      
02401 
02402      struct tm *current;
02403      time_t now;
02404      time(&now);
02405      now += ToGo;
02406      current = localtime(&now);
02407      
02408      fprintf(stdout, "%5i%c      %9.3fs      %9.3fs      %02i/%02i/%02i %02i:%02i:%02i\n",
02409                         (int) ((t_index) / (Nt * 0.01f)),'%',
02410                         ElTime, ToGo, 
02411                                 current->tm_mday, current->tm_mon+1, current->tm_year-100, 
02412                                 current->tm_hour, current->tm_min, current->tm_sec
02413                 );
02414          
02415      fflush(stdout);
02416      
02417  }
02418  
02419 }// end of KSpaceFirstOrder3DSolver
02420 //------------------------------------------------------------------------------
02421 
02425 void TKSpaceFirstOrder3DSolver::PrintOtputHeader(){                
02426     fprintf(stdout,"-------------------------------------------------------------\n");
02427     fprintf(stdout,"....................... Simulation ..........................\n");    
02428     fprintf(stdout,"Progress...ElapsedTime........TimeToGo......TimeOfTermination\n");
02429 
02430 }// end of PrintOtputHeader
02431 //------------------------------------------------------------------------------
02432 
02437 void TKSpaceFirstOrder3DSolver::PostPorcessing(){
02438     
02439                                  //-- p --//
02440     if (Parameters->IsStore_p_max()){
02441         Get_p_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02442                                                MatrixContainer[p_sensor_max].HDF5MatrixName.c_str(),
02443                                                Parameters->GetCompressionLevel());        
02444     }// p_max
02445     
02446     
02447     if (Parameters->IsStore_p_rms()){
02448          
02449                float * p_rms       = Get_p_sensor_rms().GetRawData();         
02450          const size_t  sensor_size = Get_sensor_mask_ind().GetTotalElementCount();                         
02451                float  Divider     = 1.0f / (float) (Parameters->Get_Nt() - Parameters->GetStartTimeIndex());
02452          
02453          #ifndef __NO_OMP__            
02454              #pragma omp parallel for schedule (static) if (sensor_size > 1e5) \
02455                      firstprivate(Divider)
02456          #endif
02457          for (size_t i = 0; i < sensor_size; i++){
02458              p_rms[i] = sqrtf(Divider * p_rms[i]);             
02459          }            
02460         
02461         
02462           Get_p_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02463                                                  MatrixContainer[p_sensor_rms].HDF5MatrixName.c_str(),
02464                                                  Parameters->GetCompressionLevel());        
02465     }//  p_rms
02466     
02467             
02468     if (Parameters->IsStore_p_final()){
02469         Get_p().WriteDataToHDF5File(Parameters->HDF5_OutputFile,p_final_Name,Parameters->GetCompressionLevel());                
02470     }// p_final
02471     
02472     if (Parameters->IsStore_p_raw()){
02473         p_sensor_raw_OutputStream->CloseStream();
02474     } // p_raw       
02475 
02476     
02477                                      
02478     
02479                                 //-- u --//
02480     
02481     
02482     if (Parameters->IsStore_u_max()){
02483         Get_ux_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02484                                                 MatrixContainer[ux_sensor_max].HDF5MatrixName.c_str(),
02485                                                 Parameters->GetCompressionLevel());        
02486         Get_uy_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02487                                                 MatrixContainer[uy_sensor_max].HDF5MatrixName.c_str(),
02488                                                 Parameters->GetCompressionLevel());        
02489         Get_uz_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02490                                                 MatrixContainer[uz_sensor_max].HDF5MatrixName.c_str(),
02491                                                 Parameters->GetCompressionLevel());        
02492     }// u_max
02493     
02494     
02495     if (Parameters->IsStore_u_rms()){
02496                float* ux_rms       = Get_ux_sensor_rms().GetRawData();
02497                float* uy_rms       = Get_uy_sensor_rms().GetRawData();
02498                float* uz_rms       = Get_uz_sensor_rms().GetRawData();               
02499          const size_t  sensor_size = Get_sensor_mask_ind().GetTotalElementCount();                         
02500                float  Divider      = 1.0f / (float) (Parameters->Get_Nt() - Parameters->GetStartTimeIndex());
02501          
02502          #ifndef __NO_OMP__            
02503              #pragma omp parallel for schedule (static) if (sensor_size > 1e5) \
02504                      firstprivate(Divider)
02505          #endif
02506          for (size_t i = 0; i < sensor_size; i++){
02507              ux_rms[i] = sqrtf(Divider * ux_rms[i]);             
02508              uy_rms[i] = sqrtf(Divider * uy_rms[i]);             
02509              uz_rms[i] = sqrtf(Divider * uz_rms[i]);             
02510          }            
02511         
02512         
02513         Get_ux_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02514                                                 MatrixContainer[ux_sensor_rms].HDF5MatrixName.c_str(),
02515                                                 Parameters->GetCompressionLevel());        
02516         Get_uy_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02517                                                 MatrixContainer[uy_sensor_rms].HDF5MatrixName.c_str(),
02518                                                 Parameters->GetCompressionLevel());        
02519         Get_uz_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02520                                                 MatrixContainer[uz_sensor_rms].HDF5MatrixName.c_str(),
02521                                                 Parameters->GetCompressionLevel());        
02522     }// u_rms
02523     
02524                                     
02525     if (Parameters->IsStore_u_final()){
02526         Get_ux_sgx().WriteDataToHDF5File(Parameters->HDF5_OutputFile,ux_final_Name,Parameters->GetCompressionLevel());                
02527         Get_uy_sgy().WriteDataToHDF5File(Parameters->HDF5_OutputFile,uy_final_Name,Parameters->GetCompressionLevel());                
02528         Get_uz_sgz().WriteDataToHDF5File(Parameters->HDF5_OutputFile,uz_final_Name,Parameters->GetCompressionLevel());                
02529     }// u_final
02530      
02531     
02532     if (Parameters->IsStore_u_raw()){      
02533       ux_sensor_raw_OutputStream->CloseStream();    
02534       uy_sensor_raw_OutputStream->CloseStream();    
02535       uz_sensor_raw_OutputStream->CloseStream();
02536     }// u_raw
02537     
02538     
02539     
02540                                         //-- I --//
02541     
02542         // compute half step for intensity!
02543     if ((Parameters->IsStore_I_avg()) || (Parameters->IsStore_I_max())) {        
02544          Compute_uxyz();
02545          StoreIntensityData();
02546     }
02547 
02548 
02549 
02550     if (Parameters->IsStore_I_max()){
02551         Get_Ix_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02552                                                MatrixContainer[Ix_sensor_max].HDF5MatrixName.c_str(),
02553                                                Parameters->GetCompressionLevel());        
02554         Get_Iy_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02555                                                MatrixContainer[Iy_sensor_max].HDF5MatrixName.c_str(),
02556                                                Parameters->GetCompressionLevel());        
02557         Get_Iz_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02558                                                MatrixContainer[Iz_sensor_max].HDF5MatrixName.c_str(),
02559                                                Parameters->GetCompressionLevel());        
02560     }// I_max
02561     
02562     
02563     
02564     if (Parameters->IsStore_I_avg()){
02565         Get_Ix_sensor_avg().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02566                                                MatrixContainer[Ix_sensor_avg].HDF5MatrixName.c_str(),
02567                                                Parameters->GetCompressionLevel());        
02568         Get_Iy_sensor_avg().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02569                                                MatrixContainer[Iy_sensor_avg].HDF5MatrixName.c_str(),
02570                                                Parameters->GetCompressionLevel());        
02571         Get_Iz_sensor_avg().WriteDataToHDF5File(Parameters->HDF5_OutputFile,  
02572                                                MatrixContainer[Iz_sensor_avg].HDF5MatrixName.c_str(),
02573                                                Parameters->GetCompressionLevel());        
02574     }//  I_avg
02575         
02576     
02577 }// end of PostPorcessing
02578 //------------------------------------------------------------------------------
02579   
02580   
02581     
02586 void TKSpaceFirstOrder3DSolver::StoreSensorData(){
02587     
02588     if (t_index < Parameters->GetStartTimeIndex()) return;
02589     
02590     
02591     if (Parameters->IsStore_p_raw()){
02592        p_sensor_raw_OutputStream->AddData(Get_p(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData());
02593     }
02594     
02595     if (Parameters->IsStore_u_raw()){
02596        ux_sensor_raw_OutputStream->AddData(Get_ux_sgx(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData());
02597        uy_sensor_raw_OutputStream->AddData(Get_uy_sgy(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData());
02598        uz_sensor_raw_OutputStream->AddData(Get_uz_sgz(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData());
02599     }
02600 
02601 
02602     if (Parameters->IsStore_p_max()){        
02603          
02604          const float * p           = Get_p().GetRawData();
02605                float * p_max       = Get_p_sensor_max().GetRawData();
02606          const long  * index       = Get_sensor_mask_ind().GetRawData();
02607          const size_t  sensor_size = Get_sensor_mask_ind().GetTotalElementCount();
02608           
02609          #ifndef __NO_OMP__            
02610                 #pragma omp parallel for schedule (static) if (sensor_size > 1e5)
02611          #endif
02612          for (size_t i = 0; i <sensor_size; i++){
02613              if (p_max[i] < p[index[i]]) p_max[i] = p[index[i]];             
02614          }            
02615       }// p_max
02616         
02617     if (Parameters->IsStore_p_rms()){        
02618          // square sum
02619          const float * p           = Get_p().GetRawData();
02620                float * p_rms       = Get_p_sensor_rms().GetRawData();
02621          const long  * index       = Get_sensor_mask_ind().GetRawData();
02622          const size_t  sensor_size = Get_sensor_mask_ind().GetTotalElementCount();
02623                          
02624          #ifndef __NO_OMP__            
02625              #pragma omp parallel for schedule (static) if (sensor_size > 1e5)                      
02626          #endif
02627          for (size_t i = 0; i <sensor_size; i++){
02628              p_rms[i] += (p[index[i]] * p[index[i]]);             
02629          }            
02630       }//p_rms
02631     
02632     if (Parameters->IsStore_u_max()){        
02633          
02634          const float* ux           = Get_ux_sgx().GetRawData();
02635          const float* uy           = Get_uy_sgy().GetRawData();
02636          const float* uz           = Get_uz_sgz().GetRawData();
02637                float* ux_max       = Get_ux_sensor_max().GetRawData();
02638                float* uy_max       = Get_uy_sensor_max().GetRawData();
02639                float* uz_max       = Get_uz_sensor_max().GetRawData();
02640          const long * index        = Get_sensor_mask_ind().GetRawData();
02641          const size_t  sensor_size = Get_sensor_mask_ind().GetTotalElementCount();
02642          #ifndef __NO_OMP__            
02643                 #pragma omp parallel for schedule (static) if (sensor_size > 1e5)
02644          #endif
02645          for (size_t i = 0; i < sensor_size; i++){
02646              if (ux_max[i] < ux[index[i]]) ux_max[i] = ux[index[i]];             
02647              if (uy_max[i] < uy[index[i]]) uy_max[i] = uy[index[i]];             
02648              if (uz_max[i] < uz[index[i]]) uz_max[i] = uz[index[i]];             
02649          }            
02650       }// u_max
02651 
02652     
02653     if (Parameters->IsStore_u_rms()){        
02654          
02655          const float* ux           = Get_ux_sgx().GetRawData();
02656          const float* uy           = Get_uy_sgy().GetRawData();
02657          const float* uz           = Get_uz_sgz().GetRawData();
02658                float* ux_rms       = Get_ux_sensor_rms().GetRawData();
02659                float* uy_rms       = Get_uy_sensor_rms().GetRawData();
02660                float* uz_rms       = Get_uz_sensor_rms().GetRawData();               
02661          const long * index        = Get_sensor_mask_ind().GetRawData();
02662          const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount();               
02663           
02664          #ifndef __NO_OMP__            
02665              #pragma omp parallel for schedule (static) if (sensor_size > 1e5)                      
02666          #endif
02667          for (size_t i = 0; i <sensor_size; i++){
02668              ux_rms[i] += (ux[index[i]] * ux[index[i]]);             
02669              uy_rms[i] += (uy[index[i]] * uy[index[i]]);             
02670              uz_rms[i] += (uz[index[i]] * uz[index[i]]);             
02671          }            
02672       }// u_rms
02673     
02674     
02675     
02676       if ((Parameters->IsStore_I_max()) || (Parameters->IsStore_I_avg())) StoreIntensityData();      
02677     
02678      
02679 }// end of StoreData
02680 //------------------------------------------------------------------------------
02681 
02682 
02687 void TKSpaceFirstOrder3DSolver::StoreIntensityData(){
02688         
02689     #ifndef __NO_OMP__            
02690         #pragma omp parallel 
02691     #endif        
02692     {
02693         const size_t  sensor_size = Get_sensor_mask_ind().GetTotalElementCount();               
02694 
02695         const float* ux      = Get_ux_sgx().GetRawData();
02696         const float* uy      = Get_uy_sgy().GetRawData();
02697         const float* uz      = Get_uz_sgz().GetRawData();
02698         const float* p       = Get_p().GetRawData();
02699         
02700         float * ux_i_1       = Get_ux_sensor_i_1_agr_2().GetRawData();
02701         float * uy_i_1       = Get_uy_sensor_i_1_agr_2().GetRawData();
02702         float * uz_i_1       = Get_uz_sensor_i_1_agr_2().GetRawData();
02703         float * p_i_1        = Get_p_sensor_i_1_raw().GetRawData();
02704         
02705         float * Ix_avg       = NULL;
02706         float * Iy_avg       = NULL;
02707         float * Iz_avg       = NULL;
02708         
02709         float * Ix_max       = NULL;
02710         float * Iy_max       = NULL;
02711         float * Iz_max       = NULL;
02712         
02713         
02714         if (Parameters->IsStore_I_avg()) {
02715            Ix_avg       = Get_Ix_sensor_avg().GetRawData();
02716            Iy_avg       = Get_Iy_sensor_avg().GetRawData();
02717            Iz_avg       = Get_Iz_sensor_avg().GetRawData();
02718         }
02719         if (Parameters->IsStore_I_max()) {
02720            Ix_max       = Get_Ix_sensor_max().GetRawData();
02721            Iy_max       = Get_Iy_sensor_max().GetRawData();
02722            Iz_max       = Get_Iz_sensor_max().GetRawData();                
02723         }
02724         
02725         const long * index   = Get_sensor_mask_ind().GetRawData();
02726         
02727         const TDimensionSizes Dims = Parameters->GetFullDimensionSizes();             
02728         
02729         // calculate  intensity
02730         if (t_index+1 > Parameters->GetStartTimeIndex()){
02731             #ifndef __NO_OMP__            
02732                #pragma omp for schedule (static) 
02733             #endif
02734             for (size_t i = 0; i < sensor_size; i++){
02735                // calculate positions in the grid
02736                const size_t sensor_point_ind    = index[i];                 
02737                const size_t sensor_point_ind_1x = ((sensor_point_ind % Dims.X) == 0) ? 
02738                                                     sensor_point_ind : sensor_point_ind - 1;                                                    
02739                const size_t sensor_point_ind_1y = (((sensor_point_ind /  Dims.X) %  Dims.Y) == 0) ? 
02740                                                      sensor_point_ind : sensor_point_ind - Dims.X; 
02741                const size_t sensor_point_ind_1z = (((sensor_point_ind /  (Dims.Y * Dims.X))) == 0) ? 
02742                                                      sensor_point_ind : sensor_point_ind - (Dims.X * Dims.Y);                           
02743                              
02744                //avg of actual values of u in staggered grid
02745                const float ux_act = (ux[sensor_point_ind] + ux[sensor_point_ind_1x]) * 0.5f;
02746                const float uy_act = (uy[sensor_point_ind] + uy[sensor_point_ind_1y]) * 0.5f;
02747                const float uz_act = (uz[sensor_point_ind] + uz[sensor_point_ind_1z]) * 0.5f;
02748                const float p_data = p_i_1[i];
02749                
02750                // calculate actual intensity based on p(n-1) * 1/4[ u(i-1)(n-1) + u(i)(n-1) + u(i-1)(n) + u(i)(n)
02751                const float Ix = p_data * ((ux_act + ux_i_1[i]) * 0.5f);
02752                const float Iy = p_data * ((uy_act + uy_i_1[i]) * 0.5f);
02753                const float Iz = p_data * ((uz_act + uz_i_1[i]) * 0.5f);
02754                
02755                ux_i_1[i] = ux_act;
02756                uy_i_1[i] = uy_act;
02757                uz_i_1[i] = uz_act;
02758                p_i_1[i]  = p[sensor_point_ind];
02759                
02760                const float Divider = 1.0f / (float) (Parameters->Get_Nt() - Parameters->GetStartTimeIndex());
02761                
02762                // easily predictable...
02763                if (Parameters->IsStore_I_max()) {                  
02764                   if (Ix_max[i] < Ix)  Ix_max[i] = Ix;
02765                   if (Iy_max[i] < Iy)  Iy_max[i] = Iy;            
02766                   if (Iz_max[i] < Iz)  Iz_max[i] = Iz;                                      
02767                }
02768                
02769                // easily predictable...
02770                if (Parameters->IsStore_I_avg()) {                                    
02771                   Ix_avg[i] += (Ix * Divider);
02772                   Iy_avg[i] += (Iy * Divider);            
02773                   Iz_avg[i] += (Iz * Divider);                                      
02774                }        
02775             }
02776         } else {
02777             // first step of the recording. Store only actual data
02778              #ifndef __NO_OMP__            
02779                #pragma omp for schedule (static) 
02780             #endif
02781             for (size_t i = 0; i < sensor_size; i++){
02782                // calculate positions in the grid
02783                const size_t sensor_point_ind    = index[i];                 
02784                const size_t sensor_point_ind_1x = ((sensor_point_ind % Dims.X) == 0) ? 
02785                                                     sensor_point_ind : sensor_point_ind - 1;                                                    
02786                const size_t sensor_point_ind_1y = (((sensor_point_ind /  Dims.X) %  Dims.Y) == 0) ? 
02787                                                      sensor_point_ind : sensor_point_ind - Dims.X; 
02788                const size_t sensor_point_ind_1z = (((sensor_point_ind /  (Dims.Y * Dims.X))) == 0) ? 
02789                                                      sensor_point_ind : sensor_point_ind - (Dims.X * Dims.Y);                           
02790                              
02791                //avg of actual values of u in staggered grid
02792                ux_i_1[i] = (ux[sensor_point_ind] + ux[sensor_point_ind_1x]) * 0.5f;
02793                uy_i_1[i] = (uy[sensor_point_ind] + uy[sensor_point_ind_1y]) * 0.5f;
02794                uz_i_1[i] = (uz[sensor_point_ind] + uz[sensor_point_ind_1z]) * 0.5f;
02795                p_i_1[i]   = p[sensor_point_ind];
02796                               
02797             }
02798         }// else
02799                 
02800     }// parallel
02801     
02802 
02803 }// end of StoreIntensity
02804 //------------------------------------------------------------------------------
02805 
02809 void  TKSpaceFirstOrder3DSolver::WriteOutputDataInfo(){
02810     
02811     // Write scalars
02812     Parameters->SaveScalarsToHDF5File(Parameters->HDF5_OutputFile);        
02813     THDF5_FileHeader & HDF5_FileHeader = Parameters->HDF5_FileHeader;
02814    
02815     // Write File header
02816     
02817     HDF5_FileHeader.SetCodeName(GetCodeName());
02818     HDF5_FileHeader.SetMajorFileVersion();
02819     HDF5_FileHeader.SetMinorFileVersion();
02820     HDF5_FileHeader.SetActualCreationTime();
02821     HDF5_FileHeader.SetFileType(THDF5_FileHeader::hdf5_ft_output);
02822     HDF5_FileHeader.SetHostName();
02823 
02824     HDF5_FileHeader.SetMemoryConsumption(ShowMemoryUsageInMB());
02825 
02826     // Stop total timer here
02827     TotalTime.Stop();
02828     HDF5_FileHeader.SetExecutionTimes(GetTotalTime(),                                 
02829                                       GetDataLoadTime(),
02830                                       GetPreProcessingTime(),
02831                                       GetSimulationTime(),
02832                                       GetPostProcessingTime());
02833     
02834 
02835     HDF5_FileHeader.SetNumberOfCores();
02836   
02837             
02838      
02839     HDF5_FileHeader.WriteHeaderToOutputFile(Parameters->HDF5_OutputFile);
02840             
02841             
02842     
02843 }// end of WriteOutputDataInfo
02844 //------------------------------------------------------------------------------
02845 
02846 
02847 //----------------------------------------------------------------------------//
02848 //                            Private methods                                 //
02849 //----------------------------------------------------------------------------//
02850 
02851 
 All Classes Files Functions Variables Typedefs Enumerations