1#include "VariationalApproximateProjection.h"
 
    2#include "SummationDensity.h"
 
    4#include "Algorithm/Function2Pt.h"
 
    5#include "Algorithm/Functional.h"
 
    6#include "Algorithm/Arithmetic.h"
 
    7#include "Algorithm/Reduction.h"
 
   13    template<typename TDataType>
 
   14    VariationalApproximateProjection<TDataType>::VariationalApproximateProjection()
 
   15        : ParticleApproximation<TDataType>()
 
   16        , mAirPressure(Real(0))
 
   20        mDensityCalculator = std::make_shared<SummationDensity<TDataType>>();
 
   22        this->varRestDensity()->connect(mDensityCalculator->varRestDensity());
 
   23        this->inSmoothingLength()->connect(mDensityCalculator->inSmoothingLength());
 
   24        this->inSamplingDistance()->connect(mDensityCalculator->inSamplingDistance());
 
   26        this->inPosition()->connect(mDensityCalculator->inPosition());
 
   27        this->inNeighborIds()->connect(mDensityCalculator->inNeighborIds());
 
   30    template<typename TDataType>
 
   31    VariationalApproximateProjection<TDataType>::~VariationalApproximateProjection()
 
   57    __device__ inline float kernWeight(const float r, const float h)
 
   59        const float q = r / h;
 
   60        if (q > 1.0f) return 0.0f;
 
   62            const float d = 1.0f - q;
 
   64//          return 45.0f / ((float)M_PI * hh*h) *d*d;
 
   65            return (1.0-pow(q, 4.0f));
 
   66//          return (1.0 - q)*(1.0 - q)*h*h;
 
   70    __device__ inline float kernWR(const float r, const float h)
 
   72        float w = kernWeight(r, h);
 
   73        const float q = r / h;
 
   81    __device__ inline float kernWRR(const float r, const float h)
 
   83        float w = kernWeight(r, h);
 
   84        const float q = r / h;
 
   87            return w / (0.16f*h*h);
 
   93    template <typename Real, typename Coord>
 
   94    __global__ void VAP_ComputeAlpha
 
   97        DArray<Coord> position,
 
   98        DArray<Attribute> attribute,
 
   99        DArrayList<int> neighbors,
 
  100        Real smoothingLength)
 
  102        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  103        if (pId >= position.size()) return;
 
  104        if (!attribute[pId].isDynamic()) return;
 
  106        Coord pos_i = position[pId];
 
  109        List<int>& list_i = neighbors[pId];
 
  110        int nbSize = list_i.size();
 
  111        for (int ne = 0; ne < nbSize; ne++)
 
  114            Real r = (pos_i - position[j]).norm();;
 
  118                Real a_ij = kernWeight(r, smoothingLength);
 
  123        alpha[pId] = alpha_i;
 
  126    template <typename Real>
 
  127    __global__ void VAP_CorrectAlpha
 
  132        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  133        if (pId >= alpha.size()) return;
 
  135        Real alpha_i = alpha[pId];
 
  136        if (alpha_i < maxAlpha)
 
  140        alpha[pId] = alpha_i;
 
  143    template <typename Real, typename Coord>
 
  144    __global__ void VAP_ComputeDiagonalElement
 
  146        DArray<Real> AiiFluid,
 
  147        DArray<Real> AiiTotal,
 
  149        DArray<Coord> position,
 
  150        DArray<Attribute> attribute,
 
  151        DArrayList<int> neighbors,
 
  155        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  156        if (pId >= position.size()) return;
 
  157        if (!attribute[pId].isDynamic()) return;
 
  159        Real invAlpha = 1.0f / alpha[pId];
 
  162        Real diaA_total = 0.0f;
 
  163        Real diaA_fluid = 0.0f;
 
  164        Coord pos_i = position[pId];
 
  166        List<int>& list_i = neighbors[pId];
 
  167        int nbSize = list_i.size();
 
  168        for (int ne = 0; ne < nbSize; ne++)
 
  171            Real r = (pos_i - position[j]).norm();
 
  173            Attribute att_j = attribute[j];
 
  176                Real wrr_ij = invAlpha*kernWRR(r, smoothingLength);
 
  177                if (att_j.isDynamic())
 
  179                    diaA_total += wrr_ij;
 
  180                    diaA_fluid += wrr_ij;
 
  181                    atomicAdd(&AiiFluid[j], wrr_ij);
 
  182                    atomicAdd(&AiiTotal[j], wrr_ij);
 
  186                    diaA_total += 2.0f*wrr_ij;
 
  191        atomicAdd(&AiiFluid[pId], diaA_fluid);
 
  192        atomicAdd(&AiiTotal[pId], diaA_total);
 
  195    template <typename Real, typename Coord>
 
  196    __global__ void VAP_ComputeDiagonalElement
 
  200        DArray<Coord> position,
 
  201        DArray<Attribute> attribute,
 
  202        DArrayList<int> neighbors,
 
  203        Real smoothingLength)
 
  205        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  206        if (pId >= position.size()) return;
 
  207        if (!attribute[pId].isDynamic()) return;
 
  209        Coord pos_i = position[pId];
 
  210        Real invAlpha_i = 1.0f / alpha[pId];
 
  213        List<int>& list_i = neighbors[pId];
 
  214        int nbSize = list_i.size();
 
  215        for (int ne = 0; ne < nbSize; ne++)
 
  218            Real r = (pos_i - position[j]).norm();
 
  220            if (r > EPSILON && attribute[j].isDynamic())
 
  222                Real wrr_ij = invAlpha_i*kernWRR(r, smoothingLength);
 
  224                atomicAdd(&diaA[j], wrr_ij);
 
  228        atomicAdd(&diaA[pId], A_i);
 
  231    template <typename Real, typename Coord>
 
  232    __global__ void VAP_DetectSurface
 
  235        DArray<bool> bSurface,
 
  236        DArray<Real> AiiFluid,
 
  237        DArray<Real> AiiTotal,
 
  238        DArray<Coord> position,
 
  239        DArray<Attribute> attribute,
 
  240        DArrayList<int> neighbors,
 
  241        Real smoothingLength,
 
  245        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  246        if (pId >= position.size()) return;
 
  247        if (!attribute[pId].isDynamic()) return;
 
  249        Real total_weight = 0.0f;
 
  250        Coord div_i = Coord(0);
 
  252        SmoothKernel<Real> kernSmooth;
 
  254        Coord pos_i = position[pId];
 
  255        bool bNearWall = false;
 
  257        List<int>& list_i = neighbors[pId];
 
  258        int nbSize = list_i.size();
 
  259        for (int ne = 0; ne < nbSize; ne++)
 
  262            Real r = (pos_i - position[j]).norm();
 
  264            if (r > EPSILON && attribute[j].isDynamic())
 
  266                float weight = -kernSmooth.Gradient(r, smoothingLength);
 
  267                total_weight += weight;
 
  268                div_i += (position[j] - pos_i)*(weight / r);
 
  271            if (!attribute[j].isDynamic())
 
  277        total_weight = total_weight < EPSILON ? 1.0f : total_weight;
 
  278        Real absDiv = div_i.norm() / total_weight;
 
  280        bool bSurface_i = false;
 
  281        Real diagF_i = AiiFluid[pId];
 
  282        Real diagT_i = AiiTotal[pId];
 
  284        Real diagS_i = diagT_i - diagF_i;
 
  286        //A hack, further improvements should be done to impose the exact solid boundary condition
 
  287        Real threshold = 1.5f;
 
  288        if (bNearWall && diagT_i < threshold * maxA)
 
  291            aii = threshold * maxA - (diagT_i - diagF_i);
 
  294        if (!bNearWall && diagF_i < maxA)
 
  299        bSurface[pId] = bSurface_i;
 
  303    template <typename Real, typename Coord>
 
  304    __global__ void VAP_ComputeDivergence
 
  306        DArray<Real> divergence,
 
  308        DArray<Real> density,
 
  309        DArray<Coord> position,
 
  310        DArray<Coord> velocity,
 
  311        DArray<bool> bSurface,
 
  312        DArray<Coord> normals,
 
  313        DArray<Attribute> attribute,
 
  314        DArrayList<int> neighbors,
 
  318        Real smoothingLength,
 
  321        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  322        if (pId >= position.size()) return;
 
  323        if (!attribute[pId].isDynamic()) return;
 
  325        Coord pos_i = position[pId];
 
  326        Coord vel_i = velocity[pId];
 
  328        Real invAlpha_i = 1.0f / alpha[pId];
 
  330        List<int>& list_i = neighbors[pId];
 
  331        int nbSize = list_i.size();
 
  332        for (int ne = 0; ne < nbSize; ne++)
 
  335            Real r = (pos_i - position[j]).norm();
 
  337            if (r > EPSILON && attribute[j].isFluid())
 
  339                Real wr_ij = kernWR(r, smoothingLength);
 
  340                Coord g = -invAlpha_i*(pos_i - position[j])*wr_ij*(1.0f / r);
 
  342                if (attribute[j].isDynamic())
 
  344                    Real div_ij = 0.5f*(vel_i - velocity[j]).dot(g)*restDensity / dt;   //dv_ij = 1 / alpha_i * (v_i-v_j).*(x_i-x_j) / r * (w / r);
 
  345                    atomicAdd(&divergence[pId], div_ij);
 
  346                    atomicAdd(&divergence[j], div_ij);
 
  350                    //float div_ij = dot(2.0f*vel_i, g)*const_vc_state.restDensity / dt;
 
  351                    Coord normal_j = normals[j];
 
  353                    Coord dVel = vel_i - velocity[j];
 
  354                    Real magNVel = dVel.dot(normal_j);
 
  355                    Coord nVel = magNVel*normal_j;
 
  356                    Coord tVel = dVel - nVel;
 
  358                    //float div_ij = dot(2.0f*dot(vel_i - velArr[j], normal_i)*normal_i, g)*const_vc_state.restDensity / dt;
 
  359                    //printf("Boundary dVel: %f %f %f\n", div_ij, pos_i.x, pos_i.y);
 
  360                    //printf("Normal: %f %f %f; Position: %f %f %f \n", normal_i.x, normal_i.y, normal_i.z, posArr[j].x, posArr[j].y, posArr[j].z);
 
  361                    if (magNVel < -EPSILON)
 
  363                        Real div_ij = g.dot(2.0f*(nVel + tangential*tVel))*restDensity / dt;
 
  364                        //                      printf("Boundary div: %f \n", div_ij);
 
  365                        atomicAdd(&divergence[pId], div_ij);
 
  369                        Real div_ij = g.dot(2.0f*(separation*nVel + tangential*tVel))*restDensity / dt;
 
  370                        atomicAdd(&divergence[pId], div_ij);
 
  376        //      if (rhoArr[pId] > const_vc_state.restDensity)
 
  378        //          atomicAdd(&divArr[pId], 1000.0f/const_vc_state.smoothingLength*(rhoArr[pId] - const_vc_state.restDensity) / (const_vc_state.restDensity * dt));
 
  382    template <typename Real, typename Coord>
 
  383    __global__ void VAP_CompensateSource
 
  385        DArray<Real> divergence,
 
  386        DArray<Real> density,
 
  387        DArray<Attribute> attribute,
 
  388        DArray<Coord> position,
 
  392        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  393        if (pId >= density.size()) return;
 
  394        if (!attribute[pId].isDynamic()) return;
 
  396        Coord pos_i = position[pId];
 
  397        if (density[pId] > restDensity)
 
  399            Real ratio = (density[pId] - restDensity) / restDensity;
 
  400            atomicAdd(&divergence[pId], 5*restDensity * ratio / (dt * dt));
 
  405    template <typename Real, typename Coord>
 
  406    __global__ void VAP_ComputeAx
 
  408        DArray<Real> residual,
 
  409        DArray<Real> pressure,
 
  410        DArray<Real> aiiSymArr,
 
  412        DArray<Coord> position,
 
  413        DArray<Attribute> attribute,
 
  414        DArrayList<int> neighbors,
 
  415        Real smoothingLength)
 
  417        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  418        if (pId >= position.size()) return;
 
  419        if (!attribute[pId].isDynamic()) return;
 
  421        Coord pos_i = position[pId];
 
  422        Real invAlpha_i = 1.0f / alpha[pId];
 
  424        atomicAdd(&residual[pId], aiiSymArr[pId] * pressure[pId]);
 
  425        Real con1 = 1.0f;// PARAMS.mass / PARAMS.restDensity / PARAMS.restDensity;
 
  427        List<int>& list_i = neighbors[pId];
 
  428        int nbSize = list_i.size();
 
  429        for (int ne = 0; ne < nbSize; ne++)
 
  432            Real r = (pos_i - position[j]).norm();
 
  434            if (r > EPSILON && attribute[j].isDynamic())
 
  436                Real wrr_ij = kernWRR(r, smoothingLength);
 
  437                Real a_ij = -invAlpha_i*wrr_ij;
 
  438                //              residual += con1*a_ij*preArr[j];
 
  439                atomicAdd(&residual[pId], con1*a_ij*pressure[j]);
 
  440                atomicAdd(&residual[j], con1*a_ij*pressure[pId]);
 
  445    template <typename Real, typename Coord>
 
  446    __global__ void VAP_UpdateVelocity1rd
 
  450        DArray<bool> bSurface,
 
  451        DArray<Coord> posArr,
 
  452        DArray<Coord> velArr,
 
  453        DArray<Attribute> attArr,
 
  454        DArrayList<int> neighbors,
 
  456        Real smoothingLength,
 
  460        int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  461        if (pId >= posArr.size()) return;
 
  463        if (attArr[pId].isDynamic())
 
  465            Coord pos_i = posArr[pId];
 
  466            Coord b = Coord(0.0f);
 
  467            //glm::mat3 A_i = glm::mat3(0.0f);
 
  468            float p_i = preArr[pId];
 
  469            bool bNearWall = false;
 
  471            float total_weight = 0.0f;
 
  472            List<int>& list_i = neighbors[pId];
 
  473            int nbSize = list_i.size();
 
  477            float invAii = 1.0f / aiiArr[pId];
 
  478            Coord vel_i = velArr[pId];
 
  479            Coord dv_i = Coord(0.0f);
 
  480            float scale = 0.1f*dt / restDensity;
 
  481            for (int ne = 0; ne < nbSize; ne++)
 
  484                Real r = (pos_i - posArr[j]).norm();
 
  486                Attribute att_j = attArr[j];
 
  489                    Real weight = -invAii * kernWR(r, smoothingLength);
 
  490                    Coord dnij = -scale * (pos_i - posArr[j])*weight*(1.0f / r);
 
  491                    Coord corrected = dnij;// Vec2Float(A_i*Float2Vec(dnij));
 
  494                    Coord dvij = (preArr[j] - preArr[pId])*corrected;
 
  495                    Coord dvjj = (preArr[j] +/* const_vc_state.pAir*/0) * corrected;
 
  497                    //Calculate asymmetric pressure force
 
  498                    if (att_j.isDynamic())
 
  500                        if (bSurface[pId] && !bNearWall)
 
  511                        Real weight = 1.0f*invAii*kernWRR(r, smoothingLength);
 
  512                        Coord nij = (pos_i - posArr[j]);
 
  513                        dv_i += weight * (velArr[j] - vel_i).dot(nij)*nij;
 
  516                    //Stabilize particles under compression state.
 
  517                    if (preArr[j] + preArr[pId] > 0.0f)
 
  520                        Coord dvij = (preArr[pId])*dnij;
 
  522                        if (att_j.isDynamic())
 
  524                            atomicAdd(&velArr[j].x, -dvij.x);
 
  525                            atomicAdd(&velArr[j].y, -dvij.y);
 
  526                            atomicAdd(&velArr[j].z, -dvij.z);
 
  528                        atomicAdd(&velArr[pId].x, dvij.x);
 
  529                        atomicAdd(&velArr[pId].y, dvij.y);
 
  530                        atomicAdd(&velArr[pId].z, dvij.z);
 
  540//  template <typename Real, typename Coord>
 
  541//  __global__ void VAP_UpdateVelocityBoundaryCorrected(
 
  542//      DArray<Real> pressure,
 
  543//      DArray<Real> alpha,
 
  544//      DArray<bool> bSurface,
 
  545//      DArray<Coord> position,
 
  546//      DArray<Coord> velocity,
 
  547//      DArray<Coord> normal,
 
  548//      DArray<Attribute> attribute,
 
  549//      DArrayList<int> neighbors,
 
  554//      Real smoothingLength,
 
  557//      int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  558//      if (pId >= position.size()) return;
 
  560//      if (attribute[pId].isDynamic())
 
  562//          Coord pos_i = position[pId];
 
  563//          Real p_i = pressure[pId];
 
  567//          Real invAlpha = 1.0f / alpha[pId];
 
  568//          Coord vel_i = velocity[pId];
 
  570//          Real scale = 1.0f*dt / restDensity;
 
  572//          List<int>& list_i = neighbors[pId];
 
  573//          int nbSize = list_i.size();
 
  574//          for (int ne = 0; ne < nbSize; ne++)
 
  576//              int j = list_i[ne];
 
  577//              Real r = (pos_i - position[j]).norm();
 
  579//              Attribute att_j = attribute[j];
 
  582//                  Real weight = -invAlpha*kernWR(r, smoothingLength);
 
  583//                  Coord dnij = (pos_i - position[j])*(1.0f / r);
 
  584//                  Coord corrected = dnij;
 
  585//                  if (corrected.norm() > EPSILON)
 
  587//                      corrected = corrected.normalize();
 
  589//                  corrected = -scale*weight*corrected;
 
  591//                  Coord dvij = (pressure[j] - pressure[pId])*corrected;
 
  592//                  Coord dvjj = (pressure[j] + airPressure) * corrected;
 
  593//                  Coord dvij_sym = 0.5f*(pressure[pId] + pressure[j])*corrected;
 
  595//                  //Calculate asymmetric pressure force
 
  596//                  if (att_j.isDynamic())
 
  609//                          Coord dvii = -(pressure[pId] + airPressure) * corrected;
 
  610//                          atomicAdd(&velocity[j][0], ceo*dvii[0]);
 
  611//                          atomicAdd(&velocity[j][1], ceo*dvii[1]);
 
  612//                          atomicAdd(&velocity[j][2], ceo*dvii[2]);
 
  616//                          atomicAdd(&velocity[j][0], ceo*dvij[0]);
 
  617//                          atomicAdd(&velocity[j][1], ceo*dvij[1]);
 
  618//                          atomicAdd(&velocity[j][2], ceo*dvij[2]);
 
  623//                      Coord dvii = 2.0f*(pressure[pId]) * corrected;
 
  629//                      float weight = 2.0f*invAlpha*kernWeight(r, smoothingLength);
 
  630//                      Coord nij = (pos_i - position[j]);
 
  631//                      if (nij.norm() > EPSILON)
 
  633//                          nij = nij.normalize();
 
  636//                          nij = Coord(1.0f, 0.0f, 0.0f);
 
  638//                      Coord normal_j = normal[j];
 
  639//                      Coord dVel = velocity[j] - vel_i;
 
  640//                      Real magNVel = dVel.dot(normal_j);
 
  641//                      Coord nVel = magNVel*normal_j;
 
  642//                      Coord tVel = dVel - nVel;
 
  643//                      if (magNVel > EPSILON)
 
  645//                          dv_i += weight*nij.dot(nVel + sliding*tVel)*nij;
 
  649//                          dv_i += weight*nij.dot(separation*nVel + sliding*tVel)*nij;
 
  653//                      //                      float weight = 2.0f*invAlpha*kernWRR(r, const_vc_state.smoothingLength);
 
  654//                      //                      float3 nij = (pos_i - posArr[j]);
 
  655//                      //                      //printf("Normal: %f %f %f; Position: %f %f %f \n", normal_i.x, normal_i.y, normal_i.z, posArr[j].x, posArr[j].y, posArr[j].z);
 
  656//                      //                      dv_i += weight*dot(velArr[j] - vel_i, nij)*nij;
 
  664//          atomicAdd(&velocity[pId][0], dv_i[0]);
 
  665//          atomicAdd(&velocity[pId][1], dv_i[1]);
 
  666//          atomicAdd(&velocity[pId][2], dv_i[2]);
 
  670//  template<typename Coord>
 
  671//  __global__ void VC_InitializeNormal(
 
  672//      DArray<Coord> normal)
 
  674//      int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  675//      if (pId >= normal.size()) return;
 
  677//      normal[pId] = Coord(0);
 
  680//  __global__ void VC_InitializeAttribute(
 
  681//      DArray<Attribute> attr)
 
  683//      int pId = threadIdx.x + (blockIdx.x * blockDim.x);
 
  684//      if (pId >= attr.size()) return;
 
  686//      attr[pId].setDynamic();
 
  689//  template<typename TDataType>
 
  690//  bool VelocityConstraint<TDataType>::initializeImpl()
 
  692//      int num = this->inPosition()->size();
 
  694//      m_alpha.resize(num);
 
  696//      m_AiiFluid.resize(num);
 
  697//      m_AiiTotal.resize(num);
 
  698//      m_pressure.resize(num);
 
  699//      m_divergence.resize(num);
 
  700//      m_bSurface.resize(num);
 
  706//      m_pressure.resize(num);
 
  708//      m_reduce = Reduction<float>::Create(num);
 
  709//      m_arithmetic = Arithmetic<float>::Create(num);
 
  712//      uint pDims = cudaGridSize(num, BLOCK_SIZE);
 
  715//      VC_ComputeAlpha << <pDims, BLOCK_SIZE >> > (
 
  717//          this->inPosition()->getData(),
 
  718//          this->inAttribute()->getData(),
 
  719//          this->inNeighborIds()->getData(),
 
  720//          this->inSmoothingLength()->getData());
 
  722//      m_maxAlpha = m_reduce->maximum(m_alpha.begin(), m_alpha.size());
 
  724//      VC_CorrectAlpha << <pDims, BLOCK_SIZE >> > (
 
  728//      m_AiiFluid.reset();
 
  729//      VC_ComputeDiagonalElement << <pDims, BLOCK_SIZE >> > (
 
  732//          this->inPosition()->getData(),
 
  733//          this->inAttribute()->getData(),
 
  734//          this->inNeighborIds()->getData(),
 
  735//          this->inSmoothingLength()->getData());
 
  737//      m_maxA = m_reduce->maximum(m_AiiFluid.begin(), m_AiiFluid.size());
 
  739//      std::cout << "Max alpha: " << m_maxAlpha << std::endl;
 
  740//      std::cout << "Max A: " << m_maxA << std::endl;
 
  742// //       m_normal.resize(num);
 
  743// //       m_attribute.resize(num);
 
  746// //           VC_InitializeNormal,
 
  747// //           m_normal.getData());
 
  750// //           VC_InitializeAttribute,
 
  751// //           m_attribute.getData());
 
  756    template<typename TDataType>
 
  757    void VariationalApproximateProjection<TDataType>::compute()
 
  759        int num = this->inPosition()->size();
 
  761        if (num != mAlpha.size())
 
  765            mAiiFluid.resize(num);
 
  766            mAiiTotal.resize(num);
 
  767            mPressure.resize(num);
 
  768            mDivergence.resize(num);
 
  769            mIsSurface.resize(num);
 
  775            mPressure.resize(num);
 
  777//          m_normal.resize(num);
 
  778            //m_attribute.resize(num);
 
  781//              VC_InitializeNormal,
 
  782//              m_normal.getData());
 
  785//              VC_InitializeAttribute,
 
  786//              m_attribute.getData());
 
  788            m_reduce = Reduction<float>::Create(num);
 
  789            m_arithmetic = Arithmetic<float>::Create(num);
 
  791            uint pDims = cudaGridSize(num, BLOCK_SIZE);
 
  794            VAP_ComputeAlpha << <pDims, BLOCK_SIZE >> > (
 
  796                this->inPosition()->getData(),
 
  797                this->inAttribute()->getData(),
 
  798                this->inNeighborIds()->getData(),
 
  799                this->inSmoothingLength()->getData());
 
  801            mAlphaMax = m_reduce->maximum(mAlpha.begin(), mAlpha.size());
 
  803            VAP_CorrectAlpha << <pDims, BLOCK_SIZE >> > (
 
  808            VAP_ComputeDiagonalElement << <pDims, BLOCK_SIZE >> > (
 
  811                this->inPosition()->getData(),
 
  812                this->inAttribute()->getData(),
 
  813                this->inNeighborIds()->getData(),
 
  814                this->inSmoothingLength()->getData());
 
  816            mAMax = m_reduce->maximum(mAiiFluid.begin(), mAiiFluid.size());
 
  818            std::cout << "Max alpha: " << mAlphaMax << std::endl;
 
  819            std::cout << "Max A: " << mAMax << std::endl;
 
  822        Real dt = this->inTimeStep()->getValue();
 
  824        uint pDims = cudaGridSize(this->inPosition()->size(), BLOCK_SIZE);
 
  826        //compute alpha_i = sigma w_j and A_i = sigma w_ij / r_ij / r_ij
 
  828        VAP_ComputeAlpha << <pDims, BLOCK_SIZE >> > (
 
  830            this->inPosition()->getData(),
 
  831            this->inAttribute()->getData(),
 
  832            this->inNeighborIds()->getData(), 
 
  833            this->inSmoothingLength()->getValue());
 
  834        VAP_CorrectAlpha << <pDims, BLOCK_SIZE >> > (
 
  838        //compute the diagonal elements of the coefficient matrix
 
  841        VAP_ComputeDiagonalElement << <pDims, BLOCK_SIZE >> > (
 
  845            this->inPosition()->getData(),
 
  846            this->inAttribute()->getData(),
 
  847            this->inNeighborIds()->getData(),
 
  848            this->inSmoothingLength()->getData());
 
  852        VAP_DetectSurface << <pDims, BLOCK_SIZE >> > (
 
  857            this->inPosition()->getData(),
 
  858            this->inAttribute()->getData(),
 
  859            this->inNeighborIds()->getData(),
 
  860            this->inSmoothingLength()->getData(),
 
  867        //compute the source term
 
  868        mDensityCalculator->compute();
 
  870        VAP_ComputeDivergence << <pDims, BLOCK_SIZE >> > (
 
  873            mDensityCalculator->outDensity()->getData(),
 
  874            this->inPosition()->getData(),
 
  875            this->inVelocity()->getData(), 
 
  877            this->inNormal()->getData(), 
 
  878            this->inAttribute()->getData(),
 
  879            this->inNeighborIds()->getData(),
 
  882            this->varRestDensity()->getData(),
 
  883            this->inSmoothingLength()->getData(),
 
  886        VAP_CompensateSource << <pDims, BLOCK_SIZE >> > (
 
  888            mDensityCalculator->outDensity()->getData(),
 
  889            this->inAttribute()->getData(),
 
  890            this->inPosition()->getData(),
 
  891            this->varRestDensity()->getData(),
 
  894        //solve the linear system of equations with a conjugate gradient method.
 
  896        VAP_ComputeAx << <pDims, BLOCK_SIZE >> > (
 
  901            this->inPosition()->getData(),
 
  902            this->inAttribute()->getData(),
 
  903            this->inNeighborIds()->getData(),
 
  904            this->inSmoothingLength()->getData());
 
  907        Function2Pt::subtract(m_r, mDivergence, m_y);
 
  909        Real rr = m_arithmetic->Dot(m_r, m_r);
 
  910        Real err = sqrt(rr / m_r.size());
 
  912        while (itor < 1000 && err > 1.0f)
 
  915            //VC_ComputeAx << <pDims, BLOCK_SIZE >> > (*yArr, *pArr, *aiiArr, *alphaArr, *posArr, *attArr, *neighborArr);
 
  916            VAP_ComputeAx << <pDims, BLOCK_SIZE >> > (
 
  921                this->inPosition()->getData(),
 
  922                this->inAttribute()->getData(),
 
  923                this->inNeighborIds()->getData(),
 
  924                this->inSmoothingLength()->getValue());
 
  926            float alpha = rr / m_arithmetic->Dot(m_p, m_y);
 
  927            Function2Pt::saxpy(mPressure, m_p, mPressure, alpha);
 
  928            Function2Pt::saxpy(m_r, m_y, m_r, -alpha);
 
  932            rr = m_arithmetic->Dot(m_r, m_r);
 
  934            Real beta = rr / rr_old;
 
  935            Function2Pt::saxpy(m_p, m_p, m_r, beta);
 
  937            err = sqrt(rr / m_r.size());
 
  942        //update the each particle's velocity
 
  943//      VC_UpdateVelocityBoundaryCorrected << <pDims, BLOCK_SIZE >> > (
 
  947//          this->inPosition()->getData(),
 
  948//          this->inVelocity()->getData(),
 
  949//          this->inNormal()->getData(),
 
  950//          this->inAttribute()->getData(),
 
  951//          this->inNeighborIds()->getData(),
 
  956//          this->inSmoothingLength()->getData(),
 
  959        VAP_UpdateVelocity1rd << <pDims, BLOCK_SIZE >> > (
 
  963            this->inPosition()->getData(),
 
  964            this->inVelocity()->getData(),
 
  965            this->inAttribute()->getData(),
 
  966            this->inNeighborIds()->getData(),
 
  967            this->varRestDensity()->getValue(),
 
  968            this->inSmoothingLength()->getValue(),
 
  972    DEFINE_CLASS(VariationalApproximateProjection);