2#ifndef STORMM_CORE_KERNEL_MANAGER_H
3#define STORMM_CORE_KERNEL_MANAGER_H
9# include <cuda_runtime.h>
13#include "Constants/behavior.h"
14#include "DataTypes/stormm_vector_types.h"
15#include "Math/reduction.h"
16#include "Numerics/split_fixed_precision.h"
17#include "Potential/energy_enumerators.h"
18#include "Structure/structure_enumerators.h"
19#include "Synthesis/atomgraph_synthesis.h"
20#include "Synthesis/synthesis_enumerators.h"
21#include "Topology/atomgraph_enumerators.h"
22#include "Trajectory/trajectory_enumerators.h"
23#include "gpu_details.h"
24#include "kernel_format.h"
29using constants::PrecisionModel;
30using energy::ClashResponse;
31using energy::EvaluateForce;
32using energy::EvaluateEnergy;
33using energy::getEnumerationName;
34using energy::NeighborListKind;
35using energy::QMapMethod;
36using energy::TinyBoxPresence;
37using energy::ValenceKernelSize;
38using numerics::AccumulationMethod;
39using structure::GridDetail;
40using structure::RMSDTask;
41using structure::VirtualSiteActivity;
42using synthesis::AtomGraphSynthesis;
43using synthesis::NbwuKind;
44using synthesis::VwuGoal;
45using stmath::ReductionGoal;
46using stmath::ReductionStage;
47using synthesis::VwuGoal;
48using topology::ImplicitSolventModel;
49using topology::UnitCellType;
50using trajectory::IntegrationStage;
84 AccumulationMethod acc_meth, VwuGoal purpose,
85 ClashResponse collision_handling)
const;
108 NeighborListKind neighbor_layout, EvaluateEnergy eval_nrg,
109 AccumulationMethod acc_meth,
110 ClashResponse collision_handling)
const;
121 IntegrationStage process)
const;
129 EvaluateEnergy eval_nrg, AccumulationMethod acc_meth,
130 ImplicitSolventModel igb, ClashResponse collision_handling)
const;
136 ImplicitSolventModel igb)
const;
142 ImplicitSolventModel igb)
const;
174 PrecisionModel acc_prec,
bool overflow,
size_t cg_tmat,
185 ReductionStage process)
const;
217 NeighborListKind grid_configuration, TinyBoxPresence has_tiny_box,
218 EvaluateForce eval_frc, EvaluateEnergy eval_nrg,
219 ClashResponse mitigation)
const;
240 int stage_number,
int gpos_bits = 0,
int chain_count = 0)
const;
249 ValenceKernelSize valence_kernel_width;
257 int nonbond_block_multiplier_dp;
258 int nonbond_block_multiplier_sp;
264 int gbradii_block_multiplier_dp;
265 int gbradii_block_multiplier_sp;
271 int gbderiv_block_multiplier_dp;
272 int gbderiv_block_multiplier_sp;
280 std::vector<int> gen_qmap_block_multiplier_dp;
281 std::vector<int> gen_qmap_block_multiplier_sp;
282 std::vector<int> sac_qmap_block_multiplier_dp;
283 std::vector<int> sac_qmap_block_multiplier_sp;
289 int reduction_block_multiplier;
294 ValenceKernelSize virtual_site_kernel_width;
300 int rmsd_block_multiplier_dp;
301 int rmsd_block_multiplier_sp;
333 void catalogValenceKernel(PrecisionModel prec, EvaluateForce eval_force, EvaluateEnergy eval_nrg,
334 AccumulationMethod acc_meth, VwuGoal purpose,
335 ClashResponse collision_handling, ValenceKernelSize kwidth,
336 const std::string &kernel_name = std::string(
""));
338 void catalogValenceKernel(PrecisionModel prec, PrecisionModel neighbor_prec,
339 NeighborListKind neighbor_layout, EvaluateEnergy eval_nrg,
340 AccumulationMethod acc_meth, ClashResponse collision_handling,
341 const std::string &kernel_name = std::string(
""));
349 void catalogIntegrationKernel(PrecisionModel prec, AccumulationMethod acc_meth,
350 ValenceKernelSize kwidth, IntegrationStage process,
351 const std::string &kernel_name = std::string(
""));
359 void catalogNonbondedKernel(PrecisionModel prec, NbwuKind kind, EvaluateForce eval_force,
360 EvaluateEnergy eval_nrg, AccumulationMethod acc_meth,
361 ImplicitSolventModel igb, ClashResponse collision_handling,
362 const std::string &kernel_name = std::string(
""));
367 void catalogBornRadiiKernel(PrecisionModel prec, NbwuKind kind, AccumulationMethod acc_meth,
368 ImplicitSolventModel igb,
369 const std::string &kernel_name = std::string(
""));
374 void catalogBornDerivativeKernel(PrecisionModel prec, NbwuKind kind, AccumulationMethod acc_meth,
375 ImplicitSolventModel igb,
376 const std::string &kernel_name = std::string(
""));
390 void catalogGeneralQMapKernel(PrecisionModel prec,
size_t cg_tmat,
int order,
391 const std::string &kernel_name);
405 void catalogShrAccQMapKernel(PrecisionModel calc_prec, PrecisionModel acc_prec,
406 bool overflow,
size_t cg_tmat,
int order,
407 const std::string &kernel_name);
415 void catalogReductionKernel(PrecisionModel prec, ReductionGoal purpose, ReductionStage process,
416 int subdivision,
const std::string &kernel_name = std::string(
""));
428 void catalogVirtualSiteKernel(PrecisionModel prec, VirtualSiteActivity purpose,
429 ValenceKernelSize kwidth,
430 const std::string &kernel_name = std::string(
""));
437 void catalogRMSDKernel(PrecisionModel prec, RMSDTask order,
438 const std::string &kernel_name = std::string(
""));
457 void catalogPMEPairsKernel(PrecisionModel coord_prec, PrecisionModel calc_prec,
458 NeighborListKind grid_configuration, TinyBoxPresence has_tiny_box,
459 EvaluateForce eval_frc, EvaluateEnergy eval_nrg,
460 ClashResponse mitigation,
461 const std::string &kernel_name = std::string(
""));
472 void catalogMigrationKernel(PrecisionModel coord_prec, NeighborListKind grid_configuration,
473 int stage_number,
int gpos_bits,
474 const std::string &kernel_name = std::string(
""));
483int nonbondedBlockMultiplier(
const GpuDetails &gpu, UnitCellType unit_cell, PrecisionModel prec,
484 ImplicitSolventModel igb);
490int gbRadiiBlockMultiplier(
const GpuDetails &gpu, PrecisionModel prec);
497int gbDerivativeBlockMultiplier(
const GpuDetails &gpu, PrecisionModel prec);
505std::vector<int> densityMappingBlockMultiplier(
const GpuDetails &gpu, PrecisionModel prec,
506 QMapMethod approach);
509int reductionBlockMultiplier();
515int virtualSiteBlockMultiplier(PrecisionModel prec);
521int rmsdBlockMultiplier(PrecisionModel prec);
529int pmePairsBlockMultiplier(
const GpuDetails &gpu, PrecisionModel coord_prec,
530 PrecisionModel calc_prec);
534int migrationBlockMultiplier();
541std::string valenceKernelWidthExtension(PrecisionModel prec, ValenceKernelSize kwidth);
574std::string valenceKernelKey(PrecisionModel prec, EvaluateForce eval_force,
575 EvaluateEnergy eval_nrg, AccumulationMethod acc_meth,
576 VwuGoal purpose, ClashResponse collision_handling,
577 ValenceKernelSize kwidth);
579std::string valenceKernelKey(PrecisionModel prec, PrecisionModel neighbor_prec,
580 NeighborListKind neighbor_layout, EvaluateEnergy eval_nrg,
581 AccumulationMethod acc_meth, ClashResponse collision_handling);
596std::string integrationKernelKey(PrecisionModel prec, AccumulationMethod acc_meth,
597 ValenceKernelSize kwidth, IntegrationStage process);
624std::string nonbondedKernelKey(PrecisionModel prec, NbwuKind kind, EvaluateForce eval_force,
625 EvaluateEnergy eval_nrg, AccumulationMethod acc_meth,
626 ImplicitSolventModel igb, ClashResponse collision_handling);
631std::string appendBornKernelKey(PrecisionModel prec, NbwuKind kind, AccumulationMethod acc_meth,
632 ImplicitSolventModel igb);
638std::string bornRadiiKernelKey(PrecisionModel prec, NbwuKind kind, AccumulationMethod acc_meth,
639 ImplicitSolventModel igb);
645std::string bornDerivativeKernelKey(PrecisionModel prec, NbwuKind kind,
646 AccumulationMethod acc_meth, ImplicitSolventModel igb);
660std::string appendQMapKernelKey(PrecisionModel prec,
size_t cg_tmat,
int order);
666std::string generalQMapKernelKey(PrecisionModel prec,
size_t cg_tmat,
int order);
679std::string shrAccQMapKernelKey(PrecisionModel calc_prec, PrecisionModel acc_prec,
680 bool overflow,
size_t cg_tmat,
int order);
692std::string reductionKernelKey(PrecisionModel prec, ReductionGoal purpose, ReductionStage process);
705std::string virtualSiteKernelKey(PrecisionModel prec, VirtualSiteActivity process,
706 ValenceKernelSize kwidth);
717std::string rmsdKernelKey(PrecisionModel prec, RMSDTask order);
746std::string pmePairsKernelKey(PrecisionModel coord_prec, PrecisionModel calc_prec,
747 NeighborListKind grid_configuration, TinyBoxPresence has_tiny_box,
748 EvaluateForce eval_frc, EvaluateEnergy eval_nrg,
749 ClashResponse mitigation);
771std::string migrationKernelKey(PrecisionModel coord_prec, NeighborListKind grid_configuration,
772 int stage_number,
int gpos_bits);
int2 getPMEPairsKernelDims(PrecisionModel coord_prec, PrecisionModel calc_prec, NeighborListKind grid_configuration, TinyBoxPresence has_tiny_box, EvaluateForce eval_frc, EvaluateEnergy eval_nrg, ClashResponse mitigation) const
Get the block and thread counts for a PME pair interactions kernel evaluating the purview of each nie...
Definition core_kernel_manager.cpp:997
int2 getNonbondedKernelDims(PrecisionModel prec, NbwuKind kind, EvaluateForce eval_force, EvaluateEnergy eval_nrg, AccumulationMethod acc_meth, ImplicitSolventModel igb, ClashResponse collision_handling) const
Get the block and thread counts for a non-bonded kernel. Parameter descriptions for this function fol...
Definition core_kernel_manager.cpp:874
int2 getReductionKernelDims(PrecisionModel prec, ReductionGoal purpose, ReductionStage process) const
Get the block and thread counts for a reduction kernel.
Definition core_kernel_manager.cpp:965
int2 getDensityMappingKernelDims(QMapMethod approach, PrecisionModel prec, size_t cg_tmat, int order) const
Get the launch parameters for a general-purpose particle-mesh density mapping kernel....
Definition core_kernel_manager.cpp:944
int2 getPmeValenceKernelDims(PrecisionModel prec, PrecisionModel neighbor_prec, NeighborListKind neighbor_layout, EvaluateEnergy eval_nrg, AccumulationMethod acc_meth, ClashResponse collision_handling) const
Get the block and thread counts for a PME-compatible valence kernel. All such kernels will take abstr...
Definition core_kernel_manager.cpp:845
int2 getBornDerivativeKernelDims(PrecisionModel prec, NbwuKind kind, AccumulationMethod acc_meth, ImplicitSolventModel igb) const
Get the block and thread counts for a Born derivative computation kernel. Parameter descriptions for ...
Definition core_kernel_manager.cpp:905
int2 getIntegrationKernelDims(PrecisionModel prec, AccumulationMethod acc_meth, IntegrationStage process) const
Get the block and thread counts for a stand-alone velocity constraint kernel.
Definition core_kernel_manager.cpp:862
int2 getBornRadiiKernelDims(PrecisionModel prec, NbwuKind kind, AccumulationMethod acc_meth, ImplicitSolventModel igb) const
Get the block and thread counts for a Born radii computation kernel. Parameter descriptions for this ...
Definition core_kernel_manager.cpp:890
CoreKlManager(const GpuDetails &gpu_in, const AtomGraphSynthesis *poly_ag)
The constructor will fill in values as if this were a single-threaded CPU "launch....
Definition core_kernel_manager.cpp:54
int2 getMigrationKernelDims(PrecisionModel coord_prec, NeighborListKind grid_configuration, int stage_number, int gpos_bits=0, int chain_count=0) const
Get the block and thread counts for a particle migration kernel needed to update particle positions a...
Definition core_kernel_manager.cpp:1014
int2 getValenceKernelDims(PrecisionModel prec, EvaluateForce eval_force, EvaluateEnergy eval_nrg, AccumulationMethod acc_meth, VwuGoal purpose, ClashResponse collision_handling) const
Get the block and thread counts for a valence kernel.
Definition core_kernel_manager.cpp:830
int getArchBlockMultiplier() const
Get the architecture-specific block multiplier. This will run a minimum number of blocks per streamin...
int2 getVirtualSiteKernelDims(PrecisionModel prec, VirtualSiteActivity purpose) const
Get the block and thread counts for a virtual site placement or force transmission kernel.
Definition core_kernel_manager.cpp:976
int2 getRMSDKernelDims(PrecisionModel prec, RMSDTask order) const
Get the launch parameters for an RMSD calculation kernel.
Definition core_kernel_manager.cpp:987
Pertinent aspects of one particular GPU. Condensing the data for each GPU in this manner helps to ens...
Definition gpu_details.h:27
KernelManager(const GpuDetails &gpu_in=null_gpu)
The constructor for this base class takes the GPU specifications.
Definition kernel_format.cpp:89
A collection of one or more AtomGraph objects, with similar components arranged in contiguous arrays ...
Definition atomgraph_synthesis.h:55
Definition stormm_vector_types.h:22