34 #include <sys/resource.h> 39 #define _USE_MATH_DEFINES 42 #pragma comment(lib, "Psapi.lib") 49 #include <immintrin.h> 75 mMatrixContainer(), mOutputStreamContainer(),
78 mTotalTime(), mPreProcessingTime(), mDataLoadTime (), mSimulationTime(),
79 mPostProcessingTime(), mIterationTime()
84 H5Eset_auto(H5E_DEFAULT, NULL, NULL);
161 if (recoverFromCheckpoint)
174 size_t checkpointedTimeIndex;
181 checkpointFile.
close();
241 catch (
const std::exception& e)
265 catch (
const std::exception& e)
312 catch (
const std::exception &e)
329 catch (
const std::exception &e)
344 struct rusage mem_usage;
345 getrusage(RUSAGE_SELF, &mem_usage);
347 return mem_usage.ru_maxrss >> 10;
353 PROCESS_MEMORY_COUNTERS pmc;
355 hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ,
357 GetCurrentProcessId());
359 GetProcessMemoryInfo(hProcess, &pmc,
sizeof(pmc));
360 CloseHandle(hProcess);
362 return pmc.PeakWorkingSetSize >> 20;
401 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER)) 404 #ifdef __INTEL_COMPILER 412 #if (defined (__AVX2__)) 414 #elif (defined (__AVX__)) 416 #elif (defined (__SSE4_2__)) 418 #elif (defined (__SSE4_1__)) 420 #elif (defined (__SSE3__)) 422 #elif (defined (__SSE2__)) 439 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER)) 440 setenv(
"OMP_PROC_BIND",
"TRUE", 1);
443 #ifdef __INTEL_COMPILER 444 setenv(
"KMP_AFFINITY",
"none", 1);
450 _putenv_s(
"KMP_AFFINITY",
"none");
468 fftwf_init_threads();
477 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER)) 479 if (recoverFromPrevState)
492 catch (
const std::runtime_error& e)
775 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER)) 784 catch (
const std::runtime_error& e)
793 if (checkpointFile.
isOpen()) checkpointFile.
close();
825 checkpointFile.
close();
914 #pragma omp parallel for schedule(static) 915 for (
size_t z = 0; z < reducedDimensionSizes.
nz; z++)
917 for (
size_t y = 0; y < reducedDimensionSizes.
ny; y++)
920 for (
size_t x = 0; x < reducedDimensionSizes.
nx; x++)
922 const size_t i =
get1DIndex(z, y, x, reducedDimensionSizes);
923 const float eKappa = divider * kappa[i];
925 tempFftX[i] *= ddxKShiftNeg[x] * eKappa;
926 tempFftY[i] *= ddyKShiftNeg[y] * eKappa;
927 tempFftZ[i] *= ddzKShiftNeg[z] * eKappa;
950 #pragma omp parallel for schedule(static) 951 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
953 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
956 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
958 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
959 duxdx[i] *= duxdxn[x];
960 duydy[i] *= duydyn[y];
961 duzdz[i] *= duzdzn[z];
1004 #pragma omp parallel for schedule(static) 1005 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1007 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1010 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1012 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1013 const float sumRhosDt = (2.0f * (rhoX[i] + rhoY[i] + rhoZ[i]) + rho0) * dt;
1015 rhoX[i] = pmlX[x] * ((pmlX[x] * rhoX[i]) - sumRhosDt * duxdx[i]);
1016 rhoY[i] = pmlY[y] * ((pmlY[y] * rhoY[i]) - sumRhosDt * duydy[i]);
1017 rhoZ[i] = pmlZ[z] * ((pmlZ[z] * rhoZ[i]) - sumRhosDt * duzdz[i]);
1027 #pragma omp parallel for schedule(static) 1028 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1030 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1033 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1035 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1036 const float sumRhosDt = (2.0f * (rhoX[i] + rhoY[i] + rhoZ[i]) + rho0[i]) * dt;
1038 rhoX[i] = pmlX[x] * ((pmlX[x] * rhoX[i]) - sumRhosDt * duxdx[i]);
1039 rhoY[i] = pmlY[y] * ((pmlY[y] * rhoY[i]) - sumRhosDt * duydy[i]);
1040 rhoZ[i] = pmlZ[z] * ((pmlZ[z] * rhoZ[i]) - sumRhosDt * duzdz[i]);
1082 #pragma omp parallel for schedule(static) 1083 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1085 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1088 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1090 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1092 rhoX[i] = pmlX[x] * (((pmlX[x] * rhoX[i]) - (dtRho0 * duxdx[i])));
1093 rhoY[i] = pmlY[y] * (((pmlY[y] * rhoY[i]) - (dtRho0 * duydy[i])));
1094 rhoZ[i] = pmlZ[z] * (((pmlZ[z] * rhoZ[i]) - (dtRho0 * duzdz[i])));
1104 #pragma omp parallel for schedule(static) 1105 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1107 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1110 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1112 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1113 const float dtRho0 = dt * rho0[i];
1115 rhoX[i] = pmlX[x] * (((pmlX[x] * rhoX[i]) - (dtRho0 * duxdx[i])));
1116 rhoY[i] = pmlY[y] * (((pmlY[y] * rhoY[i]) - (dtRho0 * duydy[i])));
1117 rhoZ[i] = pmlZ[z] * (((pmlZ[z] * rhoZ[i]) - (dtRho0 * duzdz[i])));
1156 RealMatrix& absorbTauTerm = velocitGradientSum;
1164 computePressureTermsNonlinear<true, true>(densitySum, nonlinearTerm, velocitGradientSum);
1168 computePressureTermsNonlinear<true, false>(densitySum, nonlinearTerm, velocitGradientSum);
1175 computePressureTermsNonlinear<false, true>(densitySum, nonlinearTerm, velocitGradientSum);
1179 computePressureTermsNonlinear<false, false>(densitySum, nonlinearTerm, velocitGradientSum);
1197 sumPressureTermsNonlinear<true, true>(absorbTauTerm, absorbEtaTerm, nonlinearTerm);
1201 sumPressureTermsNonlinear<true, false>(absorbTauTerm, absorbEtaTerm, nonlinearTerm);
1206 sumPressureTermsNonlinear<false, false>(absorbTauTerm, absorbEtaTerm, nonlinearTerm);
1218 sumPressureTermsNonlinearLossless<true, true, true>();
1222 sumPressureTermsNonlinearLossless<true, true, false>();
1229 sumPressureTermsNonlinearLossless<true, false, true>();
1233 sumPressureTermsNonlinearLossless<true, false, false>();
1243 sumPressureTermsNonlinearLossless<false, true, true>();
1247 sumPressureTermsNonlinearLossless<false, true, false>();
1254 sumPressureTermsNonlinearLossless<false, false, true>();
1258 sumPressureTermsNonlinearLossless<false, false, false>();
1315 sumPressureTermsLinear<true, true>(absorbTauTerm, absorbEtaTerm, densitySum);
1319 sumPressureTermsLinear<true, false>(absorbTauTerm, absorbEtaTerm, densitySum);
1324 sumPressureTermsLinear<false, false>(absorbTauTerm, absorbEtaTerm, densitySum);
1368 const size_t sourceSize = velocitySourceIndex.
size();
1374 if (velocitySourceMode == 0)
1376 #pragma omp parallel for if (sourceSize > 16384) 1377 for (
size_t i = 0; i < sourceSize; i++)
1379 const size_t signalIndex = (velocitySourceMany != 0) ? index2D + i : index2D;
1380 velocityMatrix[velocitySourceIndex[i]] = velocitySourceInput[signalIndex];
1384 if (velocitySourceMode == 1)
1386 #pragma omp parallel for if (sourceSize > 16384) 1387 for (
size_t i = 0; i < sourceSize; i++)
1389 const size_t signalIndex = (velocitySourceMany != 0) ? index2D + i : index2D;
1390 velocityMatrix[velocitySourceIndex[i]] += velocitySourceInput[signalIndex];
1414 const size_t index2D = (isManyFlag) ? timeIndex * sourceSize : timeIndex;
1419 #pragma omp parallel for if (sourceSize > 16384) 1420 for (
size_t i = 0; i < sourceSize; i++)
1422 const size_t signalIndex = (isManyFlag) ? index2D + i : index2D;
1424 rhox[sourceIndex[i]] = sourceInput[signalIndex];
1425 rhoy[sourceIndex[i]] = sourceInput[signalIndex];
1426 rhoz[sourceIndex[i]] = sourceInput[signalIndex];
1432 #pragma omp parallel for if (sourceSize > 16384) 1433 for (
size_t i = 0; i < sourceSize; i++)
1435 const size_t signalIndex = (isManyFlag) ? index2D + i : index2D;
1437 rhox[sourceIndex[i]] += sourceInput[signalIndex];
1438 rhoy[sourceIndex[i]] += sourceInput[signalIndex];
1439 rhoz[sourceIndex[i]] += sourceInput[signalIndex];
1473 const float* c2Matrix = (c0ScalarFlag) ?
nullptr :
getC2().
getData();
1481 #pragma omp parallel for simd schedule(static) 1482 for (
size_t i = 0; i < nElements; i++)
1484 const float tmp = sourceInput[i] / (3.0f * ((c0ScalarFlag) ? c2Scalar : c2Matrix[i]));
1529 #pragma omp parallel for schedule(static) if (sourceSize > 16384) 1530 for (
size_t i = 0; i < sourceSize; i++)
1532 uxSgx[velocitySourceIndex[i]] += transducerSourceInput[delayMask[i] + timeIndex];
1556 #pragma omp parallel for schedule (static) 1557 for (
size_t z = 0; z < reducedDimensionSizes.
nz; z++)
1559 const float zf =
static_cast<float>(z);
1560 float zPart = 0.5f - fabs(0.5f - zf * nzRec);
1561 zPart = (zPart * zPart) * dz2Rec;
1563 for (
size_t y = 0; y < reducedDimensionSizes.
ny; y++)
1565 const float yf =
static_cast<float>(y);
1566 float yPart = 0.5f - fabs(0.5f - yf * nyRec);
1567 yPart = (yPart * yPart) * dy2Rec;
1569 const float yzPart = zPart + yPart;
1570 for (
size_t x = 0; x < reducedDimensionSizes.
nx; x++)
1572 const float xf =
static_cast<float>(x);
1573 float xPart = 0.5f - fabs(0.5f - xf * nxRec);
1574 xPart = (xPart * xPart) * dx2Rec;
1576 float k = cRefDtPi * sqrt(xPart + yzPart);
1578 const size_t i =
get1DIndex(z, y, x, reducedDimensionSizes);
1581 kappa[i] = (k == 0.0f) ? 1.0f : sin(k) / k;
1598 const float pi2 =
static_cast<float>(M_PI) * 2.0f;
1604 const float nxRec = 1.0f /
static_cast<float>(nx);
1605 const float nyRec = 1.0f /
static_cast<float>(ny);
1606 const float nzRec = 1.0f /
static_cast<float>(nz);
1615 #pragma omp parallel for schedule (static) 1616 for (
size_t z = 0; z < reducedDimensionSizes.
nz; z++)
1618 const float zf =
static_cast<float>(z);
1619 float zPart = 0.5f - fabs(0.5f - zf * nzRec);
1620 zPart = (zPart * zPart) * dzSqRec;
1622 for (
size_t y = 0; y < reducedDimensionSizes.
ny; y++)
1624 const float yf =
static_cast<float>(y);
1625 float yPart = 0.5f - fabs(0.5f - yf * nyRec);
1626 yPart = (yPart * yPart) * dySqRec;
1628 const float yzPart = zPart + yPart;
1630 for (
size_t x = 0; x < reducedDimensionSizes.
nx; x++)
1632 const float xf =
static_cast<float>(x);
1633 float xPart = 0.5f - fabs(0.5f - xf * nxRec);
1634 xPart = (xPart * xPart) * dxSqRec;
1636 float k = pi2 * sqrt(xPart + yzPart);
1637 float cRefK = cRefDt2 * k;
1639 const size_t i =
get1DIndex(z, y, x, reducedDimensionSizes);
1641 kappa[i] = (cRefK == 0.0f) ? 1.0f : sin(cRefK) / cRefK;
1643 absorbNabla1[i] = pow(k, alphaPower - 2);
1644 absorbNabla2[i] = pow(k, alphaPower - 1);
1646 if (absorbNabla1[i] == std::numeric_limits<float>::infinity()) absorbNabla1[i] = 0.0f;
1647 if (absorbNabla2[i] == std::numeric_limits<float>::infinity()) absorbNabla2[i] = 0.0f;
1663 const float tanPi2AlphaPower = tan(static_cast<float> (M_PI_2) * alphaPower);
1664 const float alphaNeperCoeff = (100.0f * pow(1.0e-6f / (2.0f * static_cast<float>(M_PI)), alphaPower)) /
1665 (20.0f * static_cast<float>(M_LOG10E));
1688 const float* cOMatrix = (c0ScalarFlag) ?
nullptr :
getC2().
getData();
1692 const float tanPi2AlphaPower = tan(static_cast<float>(M_PI_2) * alphaPower);
1696 const float alphaNeperCoeff = (100.0f * pow(1.0e-6f / (2.0f * static_cast<float>(M_PI)), alphaPower)) /
1697 (20.0f * static_cast<float>(M_LOG10E));
1700 #pragma omp parallel for schedule (static) 1701 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1703 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1705 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1707 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1709 const float alphaCoeff2 = 2.0f * alphaNeperCoeff *
1710 ((alphaCoeffScalarFlag) ? alphaCoeffScalar : alphaCoeffMatrix[i]);
1712 absorbTau[i] = (-alphaCoeff2) * pow(((c0ScalarFlag) ? c0Scalar : cOMatrix[i]), alphaPower - 1);
1713 absorbEta[i] = alphaCoeff2 * pow(((c0ScalarFlag) ? c0Scalar : cOMatrix[i]),
1714 alphaPower) * tanPi2AlphaPower;
1740 #pragma omp parallel for schedule(static) 1741 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1743 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1746 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1748 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1750 dtRho0Sgx[i] = (dt * duxdxnSgx[x]) / dtRho0Sgx[i];
1751 dtRho0Sgy[i] = (dt * duydynSgy[y]) / dtRho0Sgy[i];
1752 dtRho0Sgz[i] = (dt * duzdznSgz[z]) / dtRho0Sgz[i];
1769 #pragma omp parallel for simd schedule(static) aligned(c2) 1770 for (
size_t i=0; i <
getC2().
size(); i++)
1772 c2[i] = c2[i] * c2[i];
1788 const float divider = 1.0f / (2.0f *
static_cast<float>(nElements));
1799 #pragma omp parallel for simd schedule(static) \ 1800 aligned(uxSgx, uySgy, uzSgz, dtRho0Sgx, dtRho0Sgy, dtRho0Sgz) 1801 for (
size_t i = 0; i < nElements; i++)
1803 uxSgx[i] *= dtRho0Sgx[i] * divider;
1804 uySgy[i] *= dtRho0Sgy[i] * divider;
1805 uzSgz[i] *= dtRho0Sgz[i] * divider;
1828 #pragma omp parallel for simd schedule(static) aligned(uxSgx, uySgy, uzSgz) 1829 for (
size_t i = 0; i < nElements; i++)
1831 uxSgx[i] *= dividerX;
1832 uySgy[i] *= dividerY;
1833 uzSgz[i] *= dividerZ;
1848 const size_t nElements = dimensionSizes.
nElements();
1863 #pragma omp parallel for schedule(static) 1864 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1866 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1869 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1871 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1872 uxSgx[i] *= dividerX * dxudxnSgx[x];
1873 uySgy[i] *= dividerY * dyudynSgy[y];
1874 uzSgz[i] *= dividerZ * dzudznSgz[z] ;
1887 const size_t nElements = dimensionSizes.
nElements();
1888 const float divider = 1.0f /
static_cast<float>(nElements);
1907 #pragma omp parallel for schedule(static) 1908 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1910 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1913 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1915 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1917 uxSgx[i] = (uxSgx[i] * pmlX[x] - divider * ifftX[i] * dtRho0Sgx[i]) * pmlX[x];
1922 #pragma omp parallel for schedule(static) 1923 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1925 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1927 const float ePmlY = pmlY[y];
1929 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1931 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1933 uySgy[i] = (uySgy[i] * ePmlY - divider * ifftY[i] * dtRho0Sgy[i]) * ePmlY;
1938 #pragma omp parallel for schedule(static) 1939 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1941 const float ePmlZ = pmlZ[z];
1942 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1945 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1947 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1949 uzSgz[i] = (uzSgz[i] * ePmlZ - divider * ifftZ[i] * dtRho0Sgz[i]) * ePmlZ;
1962 const size_t nElements = dimensionSizes.
nElements();
1981 #pragma omp parallel for schedule(static) 1982 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1984 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
1987 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
1989 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
1991 uxSgx[i] = (uxSgx[i] * pmlX[x] - dividerX * ifftX[i]) * pmlX[x];
1996 #pragma omp parallel for schedule(static) 1997 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
1999 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
2002 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
2004 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
2006 uySgy[i] = (uySgy[i] * pmlY[y] - dividerY * ifftY[i]) * pmlY[y];
2011 #pragma omp parallel for schedule(static) 2012 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
2014 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
2017 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
2019 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
2021 uzSgz[i] = (uzSgz[i] * pmlZ[z] - dividerZ * ifftZ[i]) * pmlZ[z];
2034 const size_t nElements = dimensionSizes.
nElements();
2057 #pragma omp parallel for schedule(static) 2058 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
2060 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
2063 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
2065 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
2067 uxSgx[i] = (uxSgx[i] * pmlX[x] - (dividerX * dxudxnSgx[x] * ifftX[i])) * pmlX[x];
2072 #pragma omp parallel for schedule(static) 2073 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
2075 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
2078 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
2080 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
2082 uySgy[i] = (uySgy[i] * pmlY[y] - (dividerY * dyudynSgy[y] * ifftY[i])) * pmlY[y];
2087 #pragma omp parallel for schedule(static) 2088 for (
size_t z = 0; z < dimensionSizes.
nz; z++)
2090 for (
size_t y = 0; y < dimensionSizes.
ny; y++)
2093 for (
size_t x = 0; x < dimensionSizes.
nx; x++)
2095 const size_t i =
get1DIndex(z, y, x, dimensionSizes);
2097 uzSgz[i] = (uzSgz[i] * pmlZ[z] - (dividerZ * dzudznSgz[z] * ifftZ[i])) * pmlZ[z];
2129 #pragma omp parallel for schedule(static) 2130 for (
size_t z = 0; z < reducedDimensionSizes.
nz; z++)
2132 for (
size_t y = 0; y < reducedDimensionSizes.
ny; y++)
2135 for (
size_t x = 0; x < reducedDimensionSizes.
nx; x++)
2137 const size_t i =
get1DIndex(z, y, x, reducedDimensionSizes);
2141 ifftX[i] = eKappa * ddxKShiftPos[x];
2142 ifftY[i] = eKappa * ddyKShiftPos[y];
2143 ifftZ[i] = eKappa * ddzKShiftPos[z];
2153 template<
bool bOnAScalarFlag,
bool rho0ScalarFlag>
2167 const float* bOnAMatrix = (bOnAScalarFlag) ?
nullptr :
getBOnA().
getData();
2170 const float* rho0Matrix = (rho0ScalarFlag) ?
nullptr :
getRho0().
getData();
2173 float* eDensitySum = densitySum.
getData();
2174 float* eNonlinearTerm = nonlinearTerm.
getData();
2175 float* eVelocityGradientSum = velocityGradientSum.
getData();
2179 #pragma omp parallel for simd schedule(static) \ 2180 aligned(eDensitySum, eNonlinearTerm, eVelocityGradientSum, \ 2181 rhoX, rhoY, rhoZ, bOnAMatrix, rho0Matrix, duxdx, duydy, duzdz) 2182 for (
size_t i = 0; i < nElements ; i++)
2184 const float rhoSum = rhoX[i] + rhoY[i] + rhoZ[i];
2185 const float bOnA = (bOnAScalarFlag) ? bOnAScalar : bOnAMatrix[i];
2186 const float rho0 = (rho0ScalarFlag) ? rho0Scalar : rho0Matrix[i];
2188 eDensitySum[i] = rhoSum;
2189 eNonlinearTerm[i] = (bOnA * rhoSum * rhoSum) / (2.0f * rho0) + rhoSum;
2190 eVelocityGradientSum[i] = rho0 * (duxdx[i] + duydy[i] + duzdz[i]);
2211 float* pDensitySum = densitySum.
getData();
2212 float* pVelocityGradientSum = velocityGradientSum.
getData();
2214 #pragma omp parallel for simd schedule(static) aligned (pDensitySum, rhoX, rhoY, rhoZ) 2215 for (
size_t i = 0; i < size; i++)
2217 pDensitySum[i] = rhoX[i] + rhoY[i] + rhoZ[i];
2223 #pragma omp parallel for simd schedule(static) aligned (pDensitySum, duxdx, duydy, duzdz) 2224 for (
size_t i = 0; i < size; i++)
2226 pVelocityGradientSum[i] = eRho0 * (duxdx[i] + duydy[i] + duzdz[i]);
2232 #pragma omp parallel for simd schedule(static) aligned (pDensitySum, rho0, duxdx, duydy, duzdz) 2233 for (
size_t i = 0; i < size; i++)
2235 pVelocityGradientSum[i] = rho0[i] * (duxdx[i] + duydy[i] + duzdz[i]);
2256 #pragma omp parallel for simd schedule(static) aligned(pFftPart1, pFftPart2, absorbNabla1, absorbNabla2) 2257 for (
size_t i = 0; i < nElements; i++)
2259 pFftPart1[i] *= absorbNabla1[i];
2260 pFftPart2[i] *= absorbNabla2[i];
2268 template<
bool c0ScalarFlag,
bool areTauAndEtaScalars>
2273 const float* pAbsorbTauTerm = absorbTauTerm.
getData();
2274 const float* pAbsorbEtaTerm = absorbEtaTerm.
getData();
2277 const float divider = 1.0f /
static_cast<float>(nElements);
2280 const float* c2Matrix = (c0ScalarFlag) ?
nullptr :
getC2().
getData();
2283 const float* absorbTauMatrix = (areTauAndEtaScalars) ?
nullptr :
getAbsorbTau().
getData();
2286 const float* absorbEtaMatrix = (areTauAndEtaScalars) ?
nullptr :
getAbsorbEta().
getData();;
2288 const float* bOnA = nonlinearTerm.
getData();
2291 #pragma omp parallel for simd schedule(static) \ 2292 aligned(p, c2Matrix, pAbsorbTauTerm, absorbTauMatrix, pAbsorbEtaTerm, absorbEtaMatrix) 2293 for (
size_t i = 0; i < nElements; i++)
2295 const float c2 = (c0ScalarFlag) ? c2Scalar : c2Matrix[i];
2296 const float absorbTau = (areTauAndEtaScalars) ? absorbTauScalar : absorbTauMatrix[i];
2297 const float absorbEta = (areTauAndEtaScalars) ? absorbEtaScalar : absorbEtaMatrix[i];
2299 p[i] = c2 *(bOnA[i] + (divider * ((pAbsorbTauTerm[i] * absorbTau) - (pAbsorbEtaTerm[i] * absorbEta))));
2307 template<
bool c0ScalarFlag,
bool areTauAndEtaScalars>
2312 const float* pAbsorbTauTerm = absorbTauTerm.
getData();
2313 const float* pAbsorbEtaTerm = absorbEtaTerm.
getData();
2316 const float divider = 1.0f /
static_cast<float>(nElements);
2319 const float* c2Matrix = (c0ScalarFlag) ?
nullptr :
getC2().
getData();
2322 const float* absorbTauMatrix = (areTauAndEtaScalars) ?
nullptr :
getAbsorbTau().
getData();
2325 const float* absorbEtaMatrix = (areTauAndEtaScalars) ?
nullptr :
getAbsorbEta().
getData();;
2327 const float* pDenistySum = densitySum.
getData();
2330 #pragma omp parallel for simd schedule(static) \ 2331 aligned (p, pDenistySum, c2Matrix, absorbTauMatrix, absorbEtaMatrix, pAbsorbTauTerm, pAbsorbEtaTerm) 2332 for (
size_t i = 0; i < nElements; i++)
2334 const float c2 = (c0ScalarFlag) ? c2Scalar : c2Matrix[i];
2335 const float absorbTau = (areTauAndEtaScalars) ? absorbTauScalar : absorbTauMatrix[i];
2336 const float absorbEta = (areTauAndEtaScalars) ? absorbEtaScalar : absorbEtaMatrix[i];
2338 p[i] = c2 * (pDenistySum[i] + (divider * ((pAbsorbTauTerm[i] * absorbTau) - (pAbsorbEtaTerm[i] * absorbEta))));
2346 template<
bool c0ScalarFlag,
bool nonlinearFlag,
bool rho0ScalarFlag>
2358 const float* c2Matrix = (c0ScalarFlag) ?
nullptr :
getC2().
getData();
2361 const float* bOnAMatrix = (nonlinearFlag) ?
nullptr :
getBOnA().
getData();
2364 const float* rho0Matrix = (rho0ScalarFlag) ?
nullptr :
getRho0().
getData();
2366 #pragma omp parallel for simd schedule (static) 2367 for (
size_t i = 0; i < nElements; i++)
2369 const float c2 = (c0ScalarFlag) ? c2Scalar : c2Matrix[i];
2370 const float bOnA = (nonlinearFlag) ? bOnAScalar : bOnAMatrix[i];
2371 const float rho0 = (rho0ScalarFlag) ? rho0Scalar : rho0Matrix[i];
2373 const float sumDensity = rhoX[i] + rhoY[i] + rhoZ[i];
2375 p[i] = c2 * (sumDensity + (bOnA * (sumDensity * sumDensity) / (2.0f * rho0)));
2396 #pragma omp parallel for simd schedule(static) aligned(p, rhoX, rhoY, rhoZ) 2397 for (
size_t i = 0; i < nElements; i++)
2399 p[i] = c2 * (rhoX[i] + rhoY[i] + rhoZ[i]);
2406 #pragma omp parallel for simd schedule(static) aligned(p, c2, rhoX, rhoY, rhoZ) 2407 for (
size_t i = 0; i < nElements; i++)
2409 p[i] = c2[i] * (rhoX[i] + rhoY[i] + rhoZ[i]);
2439 xShiftDims.
nx = xShiftDims.
nx / 2 + 1;
2442 yShiftDims.
ny = yShiftDims.
ny / 2 + 1;
2445 zShiftDims.
nz = zShiftDims.
nz / 2 + 1;
2455 #pragma omp parallel for schedule(static) 2456 for (
size_t z = 0; z < xShiftDims.
nz; z++)
2458 for (
size_t y = 0; y < xShiftDims.
ny; y++)
2461 for (
size_t x = 0; x < xShiftDims.
nx; x++)
2463 const size_t i =
get1DIndex(z, y, x, xShiftDims);
2465 tempFftShift[i] = tempFftShift[i] * xShiftNegR[x] * dividerX;
2475 #pragma omp parallel for schedule(static) 2476 for (
size_t z = 0; z < yShiftDims.
nz; z++)
2478 for (
size_t y = 0; y < yShiftDims.
ny; y++)
2481 for (
size_t x = 0; x < yShiftDims.
nx; x++)
2483 const size_t i =
get1DIndex(z, y, x, yShiftDims);
2485 tempFftShift[i] = (tempFftShift[i] * yShiftNegR[y]) * dividerY;
2494 #pragma omp parallel for schedule(static) 2495 for (
size_t z = 0; z < zShiftDims.
nz; z++)
2497 for (
size_t y = 0; y < zShiftDims.
ny; y++)
2500 for (
size_t x = 0; x < zShiftDims.
nx; x++)
2502 const size_t i =
get1DIndex(z, y, x, zShiftDims);
2504 tempFftShift[i] = (tempFftShift[i] * zShiftNegR[z]) * dividerZ;
2529 const double toGo = ((elTimeWithLegs /
static_cast<double>((timeIndex + 1)) * nt)) - elTimeWithLegs;
2535 current = localtime(&now);
2539 static_cast<size_t>(((timeIndex) / (nt * 0.01f))),
'%',
2541 current->tm_mday, current->tm_mon+1, current->tm_year-100,
2542 current->tm_hour, current->tm_min, current->tm_sec);
2671 checkpointDimSizes.
nx,
2672 checkpointDimSizes.
ny,
2673 checkpointDimSizes.
nz,
2686 double totalTime, dataLoadTime, preProcessingTime, simulationTime, postProcessingTime;
2693 postProcessingTime);
2709 return (z * dimensionSizes.
ny + y) * dimensionSizes.
nx + x;
ComplexMatrix & getDdyKShiftNeg()
Get negative Fourier shift in y.
IndexMatrix & getDelayMask()
Get delay mask for many types sources.
void saveScalarsToOutputFile()
Save scalar values into the output HDF5 file.
RealMatrix & getVelocityZSourceInput()
Get Velocity source input data in z direction.
bool getStoreVelocityFinalAllFlag() const
Is –u_final set?
ErrorMessage kErrFmtBadMajorFileVersion
Command line parameters error message.
MatrixName kUyFinalName
uy_final variable name
ComplexMatrix & getDdyKShiftPos()
Get positive Fourier shift in y.
The class for real matrices.
float getDt() const
Get time step size.
void generateKappa()
Generate kappa matrix for lossless media.
void create(const std::string &fileName, unsigned int flags=H5F_ACC_TRUNC)
Create the HDF5 file.
float getRho0Scalar() const
Get value of homogeneous medium density.
bool isCheckpointInterruption() const
Was the loop interrupted to checkpoint?
RealMatrix & getDuydy()
Get velocity gradient on in y direction.
void sumPressureTermsLinear(const RealMatrix &absorbTauTerm, const RealMatrix &absorbEtaTerm, const RealMatrix &densitySum)
Sum sub-terms to calculate new pressure, after FFTs, linear case.
void recomputeIndicesToMatlab()
Recompute indices C++ -> MATLAB.
void writeScalarValue(const hid_t parentGroup, MatrixName &datasetName, const T value)
Write the scalar value under a specified group.
Hdf5File & getCheckpointFile()
Get checkpoint file handle.
RealMatrix & getRhoX()
Get density matrix in x direction.
void postProcessStreams()
Post-process all streams and flush them to the file.
void computePressureTermsLinear(RealMatrix &densitySum, RealMatrix &velocityGradientSum)
Calculate two temporary sums in the new pressure formula before taking the FFT, linear absorbing case...
float getDx() const
Get spatial displacement in x.
RealMatrix & getAbsorbNabla2()
Get absorbing coefficient Nabla2.
void addTransducerSource()
Add transducer data source to velocity x component.
void createC2RFftPlan1DY(RealMatrix &outMatrix)
Create FFTW plan for Complex-to-Real in the y dimension.
double getCumulatedSimulationTime() const
Get simulation time (time loop) accumulated over all legs.
RealMatrix & getPmlX()
Get PML in x.
RealMatrix & getUzSgz()
Get velocity matrix on staggered grid in z direction.
void createC2RFftPlan3D(RealMatrix &outMatrix)
Create FFTW plan for 3D Complex-to-Real.
size_t getCompressionLevel() const
Get compression level.
size_t getVelocitySourceMode() const
Get velocity source mode.
OutputMessage kOutFmtSSE3
Print version output message.
RealMatrix & getDtRho0Sgz()
Get dt * rho0Sgz matrix (time step size * ambient velocity on staggered grid in z direction)...
Class implementing 3D and 1D Real-To-Complex and Complex-To-Real transforms using FFTW interface...
void saveCheckpointData()
Save checkpoint data and flush aggregated outputs into the output file.
virtual void writeData(Hdf5File &file, MatrixName &matrixName, const size_t compressionLevel)
Write data into HDF5 file.
OutputMessage kOutFmtLoadingFftwWisdom
Output message.
OutputMessage kOutFmtCreatingCheckpoint
Output message.
hid_t getRootGroup() const
Get handle to the root group of the file.
virtual void writeData(Hdf5File &file, MatrixName &matrixName, const size_t compressionLevel)
Write data into HDF5 file.
size_t nz
Number of elements in the z direction.
void computeC2RFft1DX(RealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the x dimension.
bool getC0ScalarFlag() const
Is sound speed in the medium homogeneous (scalar value)?
float getAlphaPower() const
Get alpha power value for the absorption law.
RealMatrix & getRho0()
Get ambient density matrix.
RealMatrix & getAbsorbEta()
Get absorbing coefficient Eta.
size_t getNt() const
Get total number of time steps.
OutputMessage kOutFmtReadingOutputFile
Output message.
MatrixName kTimeIndexName
t_index name
OutputMessage kOutFmtCompResourcesHeader
Output message.
MatrixName kUxFinalName
ux_final variable name
OutputMessage kOutFmtSimulatoinFinalSeparator
Output message.
ErrorMessage kErrFmtPathDelimiters
delimiters for linux paths
OutputMessage kOutFmtFailed
Output message - failed message.
bool getRho0ScalarFlag() const
Is density in the medium homogeneous (scalar value)?
ComplexMatrix & getDdzKShiftPos()
Get positive Fourier shift in z.
float getAbsorbEtaScalar() const
Get absorb eta coefficient for homogeneous medium (scalar value)?
void open(const std::string &fileName, unsigned int flags=H5F_ACC_RDONLY)
Open the HDF5 file.
Hdf5FileHeader & getFileHeader()
Get file header handle.
TimeMeasure mTotalTime
Total time of the simulation.
RealMatrix & getRhoZ()
Get density matrix in z direction.
size_t getTransducerSourceFlag() const
Get transducer source flag.
std::string getCheckpointFileName() const
Get checkpoint file name.
void computeVelocityHomogeneousUniform()
Compute acoustic velocity for homogeneous medium and a uniform grid.
static void log(const LogLevel queryLevel, const std::string &format, Args ... args)
Log desired activity for a given log level, version with string format.
RealMatrix & getUyShifted()
Get velocity shifted on normal grid in y direction.
MatrixContainer mMatrixContainer
Matrix container with all the matrix classes.
TimeMeasure mPostProcessingTime
Post-processing time of the simulation.
RealMatrix & getTransducerSourceInput()
Get transducer source input data (signal).
OutputMessage kOutFmtElapsedTime
Output message.
OutputMessage kOutFmtPreProcessing
Output message.
void generateKappaAndNablas()
Generate kappa matrix, absorbNabla1, absorbNabla2 for absorbing medium.
static std::string wordWrapString(const std::string &inputString, const std::string &delimiters, const int indentation=0, const int lineSize=65)
RealMatrix & getAbsorbNabla1()
Get absorbing coefficient Nabla1.
size_t getNonLinearFlag() const
Is the wave propagation nonlinear?
float getDtRho0SgzScalar() const
Get value of dt / rho0Sgz.
double getCumulatedPreProcessingTime() const
Get pre-processing time accumulated over all legs.
TimeMeasure mPreProcessingTime
Pre-processing time of the simulation.
void computeAbsorbtionTerm(FftwComplexMatrix &fftPart1, FftwComplexMatrix &fftPart2)
Compute absorbing term with abosrbNabla1 and absorbNabla2.
virtual void loadInputData()
Load simulation data.
OutputMessage kOutFmtDataLoading
Output message.
void createC2RFftPlan1DX(RealMatrix &outMatrix)
Create FFTW plan for Complex-to-Real in the x dimension.
TimeMeasure mIterationTime
Iteration time of the simulation.
RealMatrix & getTemp2Real3D()
Get second real 3D temporary matrix.
void loadDataFromCheckpointFile()
Load selected matrices from the checkpoint HDF5 file.
Class storing all parameters of the simulation.
void computeInitialVelocityHeterogeneous()
Compute velocity for the initial pressure problem, heterogeneous medium, uniform grid.
void postProcessing()
Post processing, and closing the output streams.
void addInitialPressureSource()
Calculate initial pressure source.
RealMatrix & getAbsorbTau()
Get absorbing coefficient Tau.
MatrixName kNzName
Nz variable name.
void computePressureNonlinear()
Compute acoustic pressure for nonlinear case.
virtual void compute()
This method computes k-space First Order 3D simulation.
Hdf5File & getOutputFile()
Get output file handle.
double getElapsedTime() const
Get elapsed time.
RealMatrix & getTemp3Real3D()
Get third real 3D temporary matrix.
DimensionSizes getFullDimensionSizes() const
Get full dimension sizes of the simulation (real classes).
void computeC2RFft1DZ(RealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the z dimension.
OutputMessage kOutFmtSimulationEndSeparator
Output message.
IndexMatrix & getSensorMaskIndex()
Get linear sensor mask (spatial geometry of the sensor).
void storeSensorData()
Store sensor data.
void createR2CFftPlan1DY(RealMatrix &inMatrix)
Create an FFTW plan for 1D Real-to-Complex in the y dimension.
double getCumulatedDataLoadTime() const
Get simulation time (time loop) accumulated over all legs.
std::string getCodeName() const
Get code name - release code version.
size_t mActPercent
Percentage of the simulation done.
virtual size_t * getData()
Get raw data out of the class (for direct kernel access).
static void exportWisdom()
Export wisdom to the file.
RealMatrix & getBOnA()
Get B on A (nonlinear coefficient).
OutputMessage kOutFmtStoringFftwWisdom
Output message.
static bool canAccess(const std::string &fileName)
Can I access the file.
void setAbsorbTauScalar(const float absorbTau)
Set absorb tau coefficient for homogeneous medium (scalar value).
RealMatrix & getUzShifted()
Get velocity shifted on normal grid in z direction.
size_t getCheckpointInterval() const
Get checkpoint interval.
void SetElapsedTimeOverPreviousLegs(const double elapsedTime)
Set elapsed time in previous legs of the simulation.
float getDtRho0SgyScalar() const
Get value of dt / rho0Sgy.
virtual size_t size() const
Size of the matrix.
size_t getNumberOfThreads() const
Get number of CPU threads to use.
FftwComplexMatrix & getTempFftwZ()
Get temporary matrix for 1D fft in z.
double getElapsedTimeOverPreviousLegs() const
Get time spent in previous legs.
void computeVelocityHomogeneousNonuniform()
Compute acoustic velocity for homogenous medium and nonuniform grid.
virtual size_t size() const
Size of the matrix.
RealMatrix & getDtRho0Sgx()
Get dt * rho0Sgx matrix (time step size * ambient velocity on staggered grid in x direction)...
void computeC2RFft3D(RealMatrix &outMatrix)
Compute forward out-of-place 3D Complex-to-Real FFT.
size_t getPressureSourceFlag() const
Get pressure source flag.
float getC2Scalar() const
Get scalar value of sound speed squared.
The header file containing the main class of the project responsible for the entire simulation...
std::complex< float > FloatComplex
Datatype for complex single precision numbers.
void readScalarValue(const hid_t parentGroup, MatrixName &datasetName, T &value)
Read the scalar value under a specified group.
void reopenStreams()
Reopen streams after checkpoint file (datasets).
static void flush(const LogLevel queryLevel)
Flush output messages.
RealMatrix & getC2()
Get the c^2 matrix from the container.
void addStreams(MatrixContainer &matrixContainer)
Add all streams in simulation in the container, set all streams records here!
virtual void freeMemory()
Memory deallocation.
OutputMessage kOutFmtSSE2
Print version output message.
void computeDensityLinear()
Compute new values of acoustic density for linear case.
size_t getNonUniformGridFlag() const
Enable non uniform grid? - not implemented yet.
double getCumulatedTotalTime() const
Get total simulation time accumulated over all legs.
static void errorAndTerminate(const std::string &errorMessage)
Log an error and terminate the execution.
void freeMatrices()
Destroy and free all matrices.
OutputMessage kOutFmtIntelCompiler
Print version output message.
OutputMessage kOutFmtBuildNoDataTime
Print version output message.
bool getStoreVelocityNonStaggeredRawFlag() const
Is –u_non_staggered_raw set?
void freeStreams()
Free all streams - destroy them.
size_t get1DIndex(const size_t z, const size_t y, const size_t x, const DimensionSizes &dimensionSizes)
Compute 1D index using 3 spatial coordinates and the size of the matrix.
RealMatrix & getUxSgx()
Get velocity matrix on staggered grid in x direction.
void checkCheckpointFile()
Check the file type and the version of the checkpoint file.
RealMatrix & getPmlYSgy()
Get PML on staggered grid y.
OutputMessage kOutFmtStoringCheckpointData
Output message.
void computeVelocitySourceTerm(RealMatrix &velocityMatrix, const RealMatrix &velocitySourceInput, const IndexMatrix &velocitySourceIndex)
Add in velocity source terms.
ErrorMessage kErrFmtBadMinorFileVersion
Command line parameters error message.
RealMatrix & getPmlZSgz()
Get PML on staggered grid z.
FftwComplexMatrix & getTempFftwX()
Get temporary matrix for 1D fft in x.
void computeVelocityGradient()
Compute new values of acoustic velocity gradients.
OutputMessage kOutFmtCheckpointHeader
Output message.
RealMatrix & getP()
Get pressure matrix.
MatrixName kUzFinalName
uz_final variable name
FftwComplexMatrix & getTempFftwY()
Get temporary matrix for 1D fft in y.
size_t getSamplingStartTimeIndex() const
Get start time index when sensor data collection begins.
void computeR2CFft3D(RealMatrix &inMatrix)
Compute forward out-of-place 3D Real-to-Complex FFT.
void createMatrices()
Create all matrix objects in the container.
void incrementTimeIndex()
Increment simulation time step.
The header file containing a class responsible for printing out info and error messages (stdout...
void start()
Take start timestamp.
size_t getVelocityXSourceFlag() const
Get velocity in x source flag.
Class wrapping the HDF5 routines.
void sampleStreams()
Sample all streams.
float getDz() const
Get spatial displacement in z.
void addVelocitySource()
Add in all velocity sources.
void computeC2()
Calculate square of velocity.
RealMatrix & getDuxdx()
Get velocity gradient on in x direction.
OutputMessage kOutFmtVersionGitHash
Print version output message.
ComplexMatrix & getYShiftNegR()
Get negative shift for non-staggered velocity in y.
static std::string formatMessage(const std::string &format, Args ... args)
C++-11 replacement for sprintf that works with std::string instead of char*.
TimeMeasure mSimulationTime
Simulation time of the simulation.
void createStreams()
Create all streams - opens the datasets.
void setTimeIndex(const size_t timeIndex)
Set simulation time step - should be used only when recovering from checkpoint.
float getDtRho0SgxScalar() const
Get value of dt / rho0Sgx.
void stop()
Take stop timestamp.
bool getBOnAScalarFlag() const
Is nonlinear coefficient homogeneous in the medium (scalar value)?
void computePressureLinear()
Compute acoustic pressure for linear case.
OutputStreamContainer mOutputStreamContainer
Output stream container.
virtual void copyData(const BaseFloatMatrix &src)
Copy data from other matrix with the same size.
float getBOnAScalar() const
Get nonlinear coefficient for homogenous medium.
ComplexMatrix & getDdxKShiftNeg()
Get negative Fourier shift in x.
RealMatrix & getPmlZ()
Get PML in z.
Structure with 4D dimension sizes (3 in space and 1 in time).
float getCRef() const
Get reference sound speed.
float getDy() const
Get spatial displacement in y.
virtual FloatComplex * getComplexData()
Get raw complex data out of the class (for direct kernel access).
size_t getInitialPressureSourceFlag() const
Get initial pressure source flag (p0).
virtual ~KSpaceFirstOrder3DSolver()
Destructor.
RealMatrix & getDxudxn()
Non uniform grid acoustic velocity in x.
OutputMessage kOutFmtKWaveVersion
Output message.
Cuboid corners sensor mask.
void createC2RFftPlan1DZ(RealMatrix &outMatrix)
Create FFTW plan for Complex-to-Real in the z dimension.
OutputMessage kOutFmtPostProcessing
Output message.
bool getCopySensorMaskFlag() const
Is –copy_mask set set?
OutputMessage kOutFmtCheckpointTimeSteps
Output message.
SensorMaskType getSensorMaskType() const
Get sensor mask type (linear or corners).
IndexMatrix & getSensorMaskCorners()
Get cuboid corners sensor mask. (Spatial geometry of multiple sensors).
void setAbsorbEtaScalar(const float absrobEta)
Set absorb eta coefficient for homogeneous medium (scalar value).
static LogLevel getLevel()
OutputMessage kOutFmtMacOsBuild
Print version output message.
TimeMeasure mDataLoadTime
Data load time of the simulation.
RealMatrix & getKappa()
Get the kappa matrix from the container.
OutputMessage kOutFmtFftPlans
Output message.
size_t ny
Number of elements in the y direction.
void checkOutputFile()
Check the output file has the correct format and version.
OutputMessage kOutFmtSimulationProgress
Output message.
void computeDensityNonliner()
Compute new values of acoustic density for nonlinear case.
RealMatrix & getDyudyn()
Non uniform grid acoustic velocity in y.
float getC0Scalar() const
Get scalar value of sound speed.
RealMatrix & getDzudzn()
Non uniform grid acoustic velocity in z.
size_t getTimeIndex() const
Get actual simulation time step.
IndexMatrix & getVelocitySourceIndex()
Get velocity source geometry data.
void computeInitialVelocityHomogeneousUniform()
Compute velocity for the initial pressure problem, homogeneous medium, uniform grid.
OutputMessage kOutFmtCurrentMemory
Output message.
virtual size_t getMemoryUsage() const
Get memory usage in MB on the host side.
MatrixName kSensorMaskIndexName
sensor_mask_index variable name
bool getStorePressureFinalAllFlag() const
Is –p_final set?
OutputMessage kOutFmtSimulationHeader
Output message.
IndexMatrix & getPressureSourceIndex()
Get pressure source geometry data.
size_t getProgressPrintInterval() const
Get progress print interval.
Hdf5File & getInputFile()
Get input file handle.
ErrorMessage kErrFmtBadCheckpointFileFormat
KSpaceFirstOrder3DSolver error message.
OutputMessage kOutFmtWindowsBuild
Print version output message.
double getCumulatedPostProcessingTime() const
Get post-processing time accumulated over all legs.
void computeR2CFft1DX(RealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the x dimension.
MatrixName kNxName
Nx variable name.
size_t nElements() const
Get the number of elements, in 3D only spatial domain, in 4D with time.
void generateTauAndEta()
Generate absorbTau, absorbEta for heterogenous medium.
void close()
Close the HDF5 file.
OutputMessage kOutFmtVisualStudioCompiler
Print version output message.
bool getAlphaCoeffScalarFlag() const
Is alpha absorption coefficient homogeneous (scalar value)?
virtual void allocateMemory()
Memory allocation.
Basic (default) level of verbosity.
void recomputeIndicesToCPP()
Recompute indices MATALAB->C++.
void storeDataIntoCheckpointFile()
Store selected matrices into the checkpoint file.
RealMatrix & getInitialPressureSourceInput()
Get initial pressure source input data (whole matrix).
void printFullCodeNameAndLicense() const
Print the code name and license.
Parameters & mParameters
Global parameters of the simulation.
virtual float * getData()
Get raw data out of the class (for direct kernel access).
OutputMessage kOutFmtAVX2
Print version output message.
void computePressureGradient()
Compute part of the new velocity term - gradient of pressure.
OutputMessage kOutFmtCreatingOutputFile
Output message.
OutputMessage kOutFmtLastSeparator
Output message -last separator.
RealMatrix & getPmlY()
Get PML in y.
void computeR2CFft1DZ(RealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the z dimension.
void computeVelocityHeterogeneous()
Compute acoustic velocity for heterogeneous medium and a uniform grid.
void createR2CFftPlan1DX(RealMatrix &inMatrix)
Create an FFTW plan for 1D Real-to-Complex in the x dimension.
MatrixName kNyName
Ny variable name.
RealMatrix & getDtRho0Sgy()
Get dt * rho0Sgy matrix (time step size * ambient velocity on staggered grid in y direction)...
RealMatrix & GetVelocityYSourceInput()
Get Velocity source input data in y direction.
RealMatrix & getDyudynSgy()
Non uniform grid acoustic velocity on staggered grid x.
std::string getOutputFileName() const
Get output file name.
The header file containing the matrix container.
size_t getVelocityYSourceFlag() const
Get velocity in y source flag.
OutputMessage kOutFmtSSE42
Print version output message.
OutputMessage kOutFmtReadingInputFile
Output message.
MatrixName kSensorMaskCornersName
sensor_mask_corners variable name
OutputMessage kOutFmtLinuxBuild
Print version output message.
size_t nx
Number of elements in the x direction.
float getAlphaCoeffScalar() const
Get value of alpha absorption coefficient.
void loadDataFromInputFile()
Load all marked matrices from the input HDF5 file.
size_t getPressureSourceMany() const
Get number of time series in the pressure source.
virtual void scalarDividedBy(const float scalar)
Calculate matrix = scalar / matrix.
size_t getPressureSourceMode() const
Get pressure source mode.
OutputMessage kOutFmtReadingCheckpointFile
Output message.
RealMatrix & getDzudznSgz()
Non uniform grid acoustic velocity on staggered grid x.
void sumPressureTermsLinearLossless()
Sum sub-terms for new pressure, linear lossless case.
size_t getVelocityZSourceFlag() const
Get velocity in z source flag.
The header file containing the class that implements 3D FFT using the FFTW interface.
RealMatrix & getPressureSourceInput()
Get pressure source input data (signal).
OutputMessage kOutFmtSeparator
Output message - separator.
void computeMainLoop()
Compute the main time loop of the kspaceFirstOrder3D.
The header file defining the output stream container.
void setProcessorAffinity()
Set processor affinity.
RealMatrix & getRhoY()
Get density matrix in y direction.
void computeVelocity()
Compute new values of acoustic velocity.
size_t getVelocitySourceMany() const
Get number of time series in the velocity sources.
static Parameters & getInstance()
Get instance of the singleton class.
size_t getAbsorbingFlag() const
Is the simulation absrobing or lossless?
RealMatrix & getUySgy()
Get velocity matrix on staggered grid in y direction.
void computeC2RFft1DY(RealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the y dimension.
static void importWisdom()
Import wisdom from the file.
float getAbsorbTauScalar() const
Get absorb tau coefficient for homogeneous medium.
OutputMessage kOutFmtStoringSensorData
Output message.
void loadElapsedTimeFromOutputFile()
Reads the header of the output file and sets the cumulative elapsed time from the first log...
RealMatrix & GetVelocityXSourceInput()
Get Velocity source input data in x direction.
RealMatrix & getUxShifted()
Get velocity shifted on normal grid in x direction.
void createR2CFftPlan3D(RealMatrix &inMatrix)
Create FFTW plan for 3D Real-to-Complex.
ComplexMatrix & getDdxKShiftPos()
Get positive Fourier shift in x.
ComplexMatrix & getXShiftNegR()
Get negative shift for non-staggered velocity in x.
ErrorMessage kErrFmtOutputDimensionsMismatch
KSpaceFirstOrder3DSolver error message.
void addPressureSource()
Add in pressure source.
RealMatrix & getPmlXSgx()
Get PML on staggered grid x.
void printStatistics()
Print progress statistics.
RealMatrix & getTemp1Real3D()
Get first real 3D temporary matrix.
OutputMessage kOutFmtMemoryAllocation
Output message.
void closeStreams()
Close all streams.
ErrorMessage kErrFmtCheckpointDimensionsMismatch
KSpaceFirstOrder3DSolver error message.
void checkpointStreams()
Checkpoint streams.
The class for 64b unsigned integers (indices). It is used for linear and cuboid corners masks to get ...
void computePressureTermsNonlinear(RealMatrix &densitySum, RealMatrix &nonlinearTerm, RealMatrix &velocityGradientSum)
Calculate three temporary sums in the new pressure formula before taking the FFT, nonlinear absorbing...
ErrorMessage kErrFmtBadOutputFileFormat
KSpaceFirstOrder3DSolver error message.
void computeInitialVelocityHomogeneousNonuniform()
Compute acoustic velocity for initial pressure problem, homogenous medium, nonuniform grid...
void sumPressureTermsNonlinear(const RealMatrix &absorbTauTerm, const RealMatrix &absorbEtaTerm, const RealMatrix &nonlinearTerm)
Sum sub-terms to calculate new pressure, after FFTs, nonlinear case.
std::string getGitHash() const
Get git hash of the code.
RealMatrix & getDxudxnSgx()
Non uniform grid acoustic velocity on staggered grid x.
OutputMessage kOutFmtSSE41
Print version output message.
void createR2CFftPlan1DZ(RealMatrix &inMatrix)
Create an FFTW plan for 1D Real-to-Complex in the z dimension.
void sumPressureTermsNonlinearLossless()
Sum sub-terms for new pressure, linear lossless case.
FftwComplexMatrix & getTempFftwShift()
Get temporary matrix for fft shift.
void writeOutputDataInfo()
Write statistics and header into the output file.
bool isCheckpointEnabled() const
Is checkpoint enabled?
KSpaceFirstOrder3DSolver()
Constructor.
OutputMessage kOutFmtGnuCompiler
Print version output message.
RealMatrix & getDuzdz()
Get velocity gradient on in z direction.
void InitializeFftwPlans()
Initialize FFTW plans.
bool isOpen() const
Is the file opened?
void preProcessing()
Compute pre-processing phase.
OutputMessage kOutFmtLicense
Print version output message.
OutputMessage kOutFmtNoDone
Output message - finish line without done.
void addMatrices()
Populate the container based on the simulation type.
OutputMessage kOutFmtDone
Output message - Done with two spaces.
ComplexMatrix & getZShiftNegR()
Get negative shift for non-staggered velocity in z.
MatrixName kPressureFinalName
p_final variable name
void computeR2CFft1DY(RealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the y dimension.
OutputMessage kOutFmtAVX
Print version output message.
void generateInitialDenisty()
Calculate dt ./ rho0 for nonuniform grids.
ComplexMatrix & getDdzKShiftNeg()
Get negative Fourier shift in z.
DimensionSizes getReducedDimensionSizes() const
Get reduced dimension sizes of the simulation (complex classes).
void computeShiftedVelocity()
compute shifted velocity for –u_non_staggered flag.
bool isTimeToCheckpoint()
Is time to checkpoint (save actual state on disk).