Collective Variables Module - Developer Documentation
Loading...
Searching...
No Matches
colvar_rotation_derivative.h
1#ifndef COLVAR_ROTATION_DERIVATIVE
2#define COLVAR_ROTATION_DERIVATIVE
3
4#include "colvar_gpu_support.h"
5#include "colvartypes.h"
6#include <type_traits>
7#include <cstring>
8#include <array>
9
10#ifndef _noalias
11#if defined(__INTEL_COMPILER) || (defined(__PGI) && !defined(__NVCOMPILER))
12#define _noalias restrict
13#elif defined(__GNUC__) || defined(__INTEL_LLVM_COMPILER) || defined(__NVCOMPILER)
14#define _noalias __restrict
15#else
16#define _noalias
17#endif
18#endif
19
21enum class rotation_derivative_dldq {
23 use_dl = 1 << 0,
25 use_dq = 1 << 1
26};
27
28inline COLVARS_HOST_DEVICE constexpr rotation_derivative_dldq operator|(rotation_derivative_dldq Lhs, rotation_derivative_dldq Rhs) {
29 return static_cast<rotation_derivative_dldq>(
30 static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Lhs) |
31 static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Rhs));
32}
33
34inline COLVARS_HOST_DEVICE constexpr bool operator&(rotation_derivative_dldq Lhs, rotation_derivative_dldq Rhs)
35{
36 return (static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Lhs) &
37 static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Rhs));
38}
39
41// template <typename T1, typename T2, bool soa = false>
46 // const std::vector<cvm::real> &m_pos1;
47 cvm::ag_vector_real_t::const_iterator pos1x;
48 cvm::ag_vector_real_t::const_iterator pos1y;
49 cvm::ag_vector_real_t::const_iterator pos1z;
51 // const std::vector<cvm::real> &m_pos2;
52 cvm::ag_vector_real_t::const_iterator pos2x;
53 cvm::ag_vector_real_t::const_iterator pos2y;
54 cvm::ag_vector_real_t::const_iterator pos2z;
61 cvm::real tmp_Q0Q0_L[4][4][4];
73 const cvm::rotation &rot,
74 const cvm::ag_vector_real_t &pos1,
75 const cvm::ag_vector_real_t &pos2,
76 const size_t num_atoms_pos1,
77 const size_t num_atoms_pos2):
78 m_rot(rot),
79 pos1x(pos1.cbegin()),
80 pos1y(pos1x + num_atoms_pos1),
81 pos1z(pos1y + num_atoms_pos1),
82 pos2x(pos2.cbegin()),
83 pos2y(pos2x + num_atoms_pos2),
84 pos2z(pos2y + num_atoms_pos2),
85 m_num_atoms_pos1(num_atoms_pos1),
86 m_num_atoms_pos2(num_atoms_pos2) {}
93 void prepare_derivative(rotation_derivative_dldq require_dl_dq) {
94 if (require_dl_dq & rotation_derivative_dldq::use_dl) {
95 const auto &Q0 = m_rot.S_eigvec[0];
96 tmp_Q0Q0[0][0] = Q0[0] * Q0[0];
97 tmp_Q0Q0[0][1] = Q0[0] * Q0[1];
98 tmp_Q0Q0[0][2] = Q0[0] * Q0[2];
99 tmp_Q0Q0[0][3] = Q0[0] * Q0[3];
100 tmp_Q0Q0[1][0] = Q0[1] * Q0[0];
101 tmp_Q0Q0[1][1] = Q0[1] * Q0[1];
102 tmp_Q0Q0[1][2] = Q0[1] * Q0[2];
103 tmp_Q0Q0[1][3] = Q0[1] * Q0[3];
104 tmp_Q0Q0[2][0] = Q0[2] * Q0[0];
105 tmp_Q0Q0[2][1] = Q0[2] * Q0[1];
106 tmp_Q0Q0[2][2] = Q0[2] * Q0[2];
107 tmp_Q0Q0[2][3] = Q0[2] * Q0[3];
108 tmp_Q0Q0[3][0] = Q0[3] * Q0[0];
109 tmp_Q0Q0[3][1] = Q0[3] * Q0[1];
110 tmp_Q0Q0[3][2] = Q0[3] * Q0[2];
111 tmp_Q0Q0[3][3] = Q0[3] * Q0[3];
112 }
113 if (require_dl_dq & rotation_derivative_dldq::use_dq) {
114 const auto &Q0 = m_rot.S_eigvec[0];
115 const auto &Q1 = m_rot.S_eigvec[1];
116 const auto &Q2 = m_rot.S_eigvec[2];
117 const auto &Q3 = m_rot.S_eigvec[3];
118 cvm::real const L0 = m_rot.S_eigval[0];
119 cvm::real const L1 = m_rot.S_eigval[1];
120 cvm::real const L2 = m_rot.S_eigval[2];
121 cvm::real const L3 = m_rot.S_eigval[3];
122
123 tmp_Q0Q0_L[0][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[0] +
124 (Q2[0] * Q0[0]) / (L0-L2) * Q2[0] +
125 (Q3[0] * Q0[0]) / (L0-L3) * Q3[0];
126 tmp_Q0Q0_L[0][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[0] +
127 (Q2[0] * Q0[1]) / (L0-L2) * Q2[0] +
128 (Q3[0] * Q0[1]) / (L0-L3) * Q3[0];
129 tmp_Q0Q0_L[0][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[0] +
130 (Q2[0] * Q0[2]) / (L0-L2) * Q2[0] +
131 (Q3[0] * Q0[2]) / (L0-L3) * Q3[0];
132 tmp_Q0Q0_L[0][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[0] +
133 (Q2[0] * Q0[3]) / (L0-L2) * Q2[0] +
134 (Q3[0] * Q0[3]) / (L0-L3) * Q3[0];
135
136 tmp_Q0Q0_L[0][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[0] +
137 (Q2[1] * Q0[0]) / (L0-L2) * Q2[0] +
138 (Q3[1] * Q0[0]) / (L0-L3) * Q3[0];
139 tmp_Q0Q0_L[0][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[0] +
140 (Q2[1] * Q0[1]) / (L0-L2) * Q2[0] +
141 (Q3[1] * Q0[1]) / (L0-L3) * Q3[0];
142 tmp_Q0Q0_L[0][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[0] +
143 (Q2[1] * Q0[2]) / (L0-L2) * Q2[0] +
144 (Q3[1] * Q0[2]) / (L0-L3) * Q3[0];
145 tmp_Q0Q0_L[0][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[0] +
146 (Q2[1] * Q0[3]) / (L0-L2) * Q2[0] +
147 (Q3[1] * Q0[3]) / (L0-L3) * Q3[0];
148
149 tmp_Q0Q0_L[0][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[0] +
150 (Q2[2] * Q0[0]) / (L0-L2) * Q2[0] +
151 (Q3[2] * Q0[0]) / (L0-L3) * Q3[0];
152 tmp_Q0Q0_L[0][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[0] +
153 (Q2[2] * Q0[1]) / (L0-L2) * Q2[0] +
154 (Q3[2] * Q0[1]) / (L0-L3) * Q3[0];
155 tmp_Q0Q0_L[0][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[0] +
156 (Q2[2] * Q0[2]) / (L0-L2) * Q2[0] +
157 (Q3[2] * Q0[2]) / (L0-L3) * Q3[0];
158 tmp_Q0Q0_L[0][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[0] +
159 (Q2[2] * Q0[3]) / (L0-L2) * Q2[0] +
160 (Q3[2] * Q0[3]) / (L0-L3) * Q3[0];
161
162 tmp_Q0Q0_L[0][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[0] +
163 (Q2[3] * Q0[0]) / (L0-L2) * Q2[0] +
164 (Q3[3] * Q0[0]) / (L0-L3) * Q3[0];
165 tmp_Q0Q0_L[0][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[0] +
166 (Q2[3] * Q0[1]) / (L0-L2) * Q2[0] +
167 (Q3[3] * Q0[1]) / (L0-L3) * Q3[0];
168 tmp_Q0Q0_L[0][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[0] +
169 (Q2[3] * Q0[2]) / (L0-L2) * Q2[0] +
170 (Q3[3] * Q0[2]) / (L0-L3) * Q3[0];
171 tmp_Q0Q0_L[0][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[0] +
172 (Q2[3] * Q0[3]) / (L0-L2) * Q2[0] +
173 (Q3[3] * Q0[3]) / (L0-L3) * Q3[0];
174
175 tmp_Q0Q0_L[1][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[1] +
176 (Q2[0] * Q0[0]) / (L0-L2) * Q2[1] +
177 (Q3[0] * Q0[0]) / (L0-L3) * Q3[1];
178 tmp_Q0Q0_L[1][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[1] +
179 (Q2[0] * Q0[1]) / (L0-L2) * Q2[1] +
180 (Q3[0] * Q0[1]) / (L0-L3) * Q3[1];
181 tmp_Q0Q0_L[1][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[1] +
182 (Q2[0] * Q0[2]) / (L0-L2) * Q2[1] +
183 (Q3[0] * Q0[2]) / (L0-L3) * Q3[1];
184 tmp_Q0Q0_L[1][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[1] +
185 (Q2[0] * Q0[3]) / (L0-L2) * Q2[1] +
186 (Q3[0] * Q0[3]) / (L0-L3) * Q3[1];
187
188 tmp_Q0Q0_L[1][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[1] +
189 (Q2[1] * Q0[0]) / (L0-L2) * Q2[1] +
190 (Q3[1] * Q0[0]) / (L0-L3) * Q3[1];
191 tmp_Q0Q0_L[1][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[1] +
192 (Q2[1] * Q0[1]) / (L0-L2) * Q2[1] +
193 (Q3[1] * Q0[1]) / (L0-L3) * Q3[1];
194 tmp_Q0Q0_L[1][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[1] +
195 (Q2[1] * Q0[2]) / (L0-L2) * Q2[1] +
196 (Q3[1] * Q0[2]) / (L0-L3) * Q3[1];
197 tmp_Q0Q0_L[1][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[1] +
198 (Q2[1] * Q0[3]) / (L0-L2) * Q2[1] +
199 (Q3[1] * Q0[3]) / (L0-L3) * Q3[1];
200
201 tmp_Q0Q0_L[1][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[1] +
202 (Q2[2] * Q0[0]) / (L0-L2) * Q2[1] +
203 (Q3[2] * Q0[0]) / (L0-L3) * Q3[1];
204 tmp_Q0Q0_L[1][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[1] +
205 (Q2[2] * Q0[1]) / (L0-L2) * Q2[1] +
206 (Q3[2] * Q0[1]) / (L0-L3) * Q3[1];
207 tmp_Q0Q0_L[1][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[1] +
208 (Q2[2] * Q0[2]) / (L0-L2) * Q2[1] +
209 (Q3[2] * Q0[2]) / (L0-L3) * Q3[1];
210 tmp_Q0Q0_L[1][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[1] +
211 (Q2[2] * Q0[3]) / (L0-L2) * Q2[1] +
212 (Q3[2] * Q0[3]) / (L0-L3) * Q3[1];
213
214 tmp_Q0Q0_L[1][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[1] +
215 (Q2[3] * Q0[0]) / (L0-L2) * Q2[1] +
216 (Q3[3] * Q0[0]) / (L0-L3) * Q3[1];
217 tmp_Q0Q0_L[1][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[1] +
218 (Q2[3] * Q0[1]) / (L0-L2) * Q2[1] +
219 (Q3[3] * Q0[1]) / (L0-L3) * Q3[1];
220 tmp_Q0Q0_L[1][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[1] +
221 (Q2[3] * Q0[2]) / (L0-L2) * Q2[1] +
222 (Q3[3] * Q0[2]) / (L0-L3) * Q3[1];
223 tmp_Q0Q0_L[1][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[1] +
224 (Q2[3] * Q0[3]) / (L0-L2) * Q2[1] +
225 (Q3[3] * Q0[3]) / (L0-L3) * Q3[1];
226
227 tmp_Q0Q0_L[2][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[2] +
228 (Q2[0] * Q0[0]) / (L0-L2) * Q2[2] +
229 (Q3[0] * Q0[0]) / (L0-L3) * Q3[2];
230 tmp_Q0Q0_L[2][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[2] +
231 (Q2[0] * Q0[1]) / (L0-L2) * Q2[2] +
232 (Q3[0] * Q0[1]) / (L0-L3) * Q3[2];
233 tmp_Q0Q0_L[2][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[2] +
234 (Q2[0] * Q0[2]) / (L0-L2) * Q2[2] +
235 (Q3[0] * Q0[2]) / (L0-L3) * Q3[2];
236 tmp_Q0Q0_L[2][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[2] +
237 (Q2[0] * Q0[3]) / (L0-L2) * Q2[2] +
238 (Q3[0] * Q0[3]) / (L0-L3) * Q3[2];
239
240 tmp_Q0Q0_L[2][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[2] +
241 (Q2[1] * Q0[0]) / (L0-L2) * Q2[2] +
242 (Q3[1] * Q0[0]) / (L0-L3) * Q3[2];
243 tmp_Q0Q0_L[2][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[2] +
244 (Q2[1] * Q0[1]) / (L0-L2) * Q2[2] +
245 (Q3[1] * Q0[1]) / (L0-L3) * Q3[2];
246 tmp_Q0Q0_L[2][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[2] +
247 (Q2[1] * Q0[2]) / (L0-L2) * Q2[2] +
248 (Q3[1] * Q0[2]) / (L0-L3) * Q3[2];
249 tmp_Q0Q0_L[2][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[2] +
250 (Q2[1] * Q0[3]) / (L0-L2) * Q2[2] +
251 (Q3[1] * Q0[3]) / (L0-L3) * Q3[2];
252
253 tmp_Q0Q0_L[2][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[2] +
254 (Q2[2] * Q0[0]) / (L0-L2) * Q2[2] +
255 (Q3[2] * Q0[0]) / (L0-L3) * Q3[2];
256 tmp_Q0Q0_L[2][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[2] +
257 (Q2[2] * Q0[1]) / (L0-L2) * Q2[2] +
258 (Q3[2] * Q0[1]) / (L0-L3) * Q3[2];
259 tmp_Q0Q0_L[2][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[2] +
260 (Q2[2] * Q0[2]) / (L0-L2) * Q2[2] +
261 (Q3[2] * Q0[2]) / (L0-L3) * Q3[2];
262 tmp_Q0Q0_L[2][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[2] +
263 (Q2[2] * Q0[3]) / (L0-L2) * Q2[2] +
264 (Q3[2] * Q0[3]) / (L0-L3) * Q3[2];
265
266 tmp_Q0Q0_L[2][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[2] +
267 (Q2[3] * Q0[0]) / (L0-L2) * Q2[2] +
268 (Q3[3] * Q0[0]) / (L0-L3) * Q3[2];
269 tmp_Q0Q0_L[2][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[2] +
270 (Q2[3] * Q0[1]) / (L0-L2) * Q2[2] +
271 (Q3[3] * Q0[1]) / (L0-L3) * Q3[2];
272 tmp_Q0Q0_L[2][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[2] +
273 (Q2[3] * Q0[2]) / (L0-L2) * Q2[2] +
274 (Q3[3] * Q0[2]) / (L0-L3) * Q3[2];
275 tmp_Q0Q0_L[2][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[2] +
276 (Q2[3] * Q0[3]) / (L0-L2) * Q2[2] +
277 (Q3[3] * Q0[3]) / (L0-L3) * Q3[2];
278
279 tmp_Q0Q0_L[3][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[3] +
280 (Q2[0] * Q0[0]) / (L0-L2) * Q2[3] +
281 (Q3[0] * Q0[0]) / (L0-L3) * Q3[3];
282 tmp_Q0Q0_L[3][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[3] +
283 (Q2[0] * Q0[1]) / (L0-L2) * Q2[3] +
284 (Q3[0] * Q0[1]) / (L0-L3) * Q3[3];
285 tmp_Q0Q0_L[3][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[3] +
286 (Q2[0] * Q0[2]) / (L0-L2) * Q2[3] +
287 (Q3[0] * Q0[2]) / (L0-L3) * Q3[3];
288 tmp_Q0Q0_L[3][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[3] +
289 (Q2[0] * Q0[3]) / (L0-L2) * Q2[3] +
290 (Q3[0] * Q0[3]) / (L0-L3) * Q3[3];
291
292 tmp_Q0Q0_L[3][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[3] +
293 (Q2[1] * Q0[0]) / (L0-L2) * Q2[3] +
294 (Q3[1] * Q0[0]) / (L0-L3) * Q3[3];
295 tmp_Q0Q0_L[3][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[3] +
296 (Q2[1] * Q0[1]) / (L0-L2) * Q2[3] +
297 (Q3[1] * Q0[1]) / (L0-L3) * Q3[3];
298 tmp_Q0Q0_L[3][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[3] +
299 (Q2[1] * Q0[2]) / (L0-L2) * Q2[3] +
300 (Q3[1] * Q0[2]) / (L0-L3) * Q3[3];
301 tmp_Q0Q0_L[3][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[3] +
302 (Q2[1] * Q0[3]) / (L0-L2) * Q2[3] +
303 (Q3[1] * Q0[3]) / (L0-L3) * Q3[3];
304
305 tmp_Q0Q0_L[3][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[3] +
306 (Q2[2] * Q0[0]) / (L0-L2) * Q2[3] +
307 (Q3[2] * Q0[0]) / (L0-L3) * Q3[3];
308 tmp_Q0Q0_L[3][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[3] +
309 (Q2[2] * Q0[1]) / (L0-L2) * Q2[3] +
310 (Q3[2] * Q0[1]) / (L0-L3) * Q3[3];
311 tmp_Q0Q0_L[3][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[3] +
312 (Q2[2] * Q0[2]) / (L0-L2) * Q2[3] +
313 (Q3[2] * Q0[2]) / (L0-L3) * Q3[3];
314 tmp_Q0Q0_L[3][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[3] +
315 (Q2[2] * Q0[3]) / (L0-L2) * Q2[3] +
316 (Q3[2] * Q0[3]) / (L0-L3) * Q3[3];
317
318 tmp_Q0Q0_L[3][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[3] +
319 (Q2[3] * Q0[0]) / (L0-L2) * Q2[3] +
320 (Q3[3] * Q0[0]) / (L0-L3) * Q3[3];
321 tmp_Q0Q0_L[3][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[3] +
322 (Q2[3] * Q0[1]) / (L0-L2) * Q2[3] +
323 (Q3[3] * Q0[1]) / (L0-L3) * Q3[3];
324 tmp_Q0Q0_L[3][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[3] +
325 (Q2[3] * Q0[2]) / (L0-L2) * Q2[3] +
326 (Q3[3] * Q0[2]) / (L0-L3) * Q3[3];
327 tmp_Q0Q0_L[3][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[3] +
328 (Q2[3] * Q0[3]) / (L0-L2) * Q2[3] +
329 (Q3[3] * Q0[3]) / (L0-L3) * Q3[3];
330 }
331 }
339 template <bool use_dl, bool use_dq, bool use_ds>
341 const cvm::rvector (&ds)[4][4],
342 cvm::rvector* _noalias const dl0_out,
343 std::array<cvm::rvector, 4>* _noalias const dq0_out,
344 std::array<std::array<cvm::rvector, 4>, 4>* _noalias const ds_out) const {
345 if (use_ds) {
346 // this code path is for debug_gradients, so not necessary to unroll the loop
347 *ds_out = std::array<std::array<cvm::rvector, 4>, 4>();
348 for (int i = 0; i < 4; ++i) {
349 for (int j = 0; j < 4; ++j) {
350 (*ds_out)[i][j] = ds[i][j];
351 }
352 }
353 }
354 if (use_dl) {
355 /* manually loop unrolling of the following loop:
356 dl0_1.reset();
357 for (size_t i = 0; i < 4; i++) {
358 for (size_t j = 0; j < 4; j++) {
359 dl0_1 += Q0[i] * ds_1[i][j] * Q0[j];
360 }
361 }
362 */
363 *dl0_out = tmp_Q0Q0[0][0] * ds[0][0] +
364 tmp_Q0Q0[0][1] * ds[0][1] +
365 tmp_Q0Q0[0][2] * ds[0][2] +
366 tmp_Q0Q0[0][3] * ds[0][3] +
367 tmp_Q0Q0[1][0] * ds[1][0] +
368 tmp_Q0Q0[1][1] * ds[1][1] +
369 tmp_Q0Q0[1][2] * ds[1][2] +
370 tmp_Q0Q0[1][3] * ds[1][3] +
371 tmp_Q0Q0[2][0] * ds[2][0] +
372 tmp_Q0Q0[2][1] * ds[2][1] +
373 tmp_Q0Q0[2][2] * ds[2][2] +
374 tmp_Q0Q0[2][3] * ds[2][3] +
375 tmp_Q0Q0[3][0] * ds[3][0] +
376 tmp_Q0Q0[3][1] * ds[3][1] +
377 tmp_Q0Q0[3][2] * ds[3][2] +
378 tmp_Q0Q0[3][3] * ds[3][3];
379 }
380 if (use_dq) {
381 // we can skip this check if a fixed-size array is used
382 // if (dq0_out->size() != 4) dq0_out->resize(4);
383 /* manually loop unrolling of the following loop:
384 dq0_1.reset();
385 for (size_t p = 0; p < 4; p++) {
386 for (size_t i = 0; i < 4; i++) {
387 for (size_t j = 0; j < 4; j++) {
388 dq0_1[p] +=
389 (Q1[i] * ds_1[i][j] * Q0[j]) / (L0-L1) * Q1[p] +
390 (Q2[i] * ds_1[i][j] * Q0[j]) / (L0-L2) * Q2[p] +
391 (Q3[i] * ds_1[i][j] * Q0[j]) / (L0-L3) * Q3[p];
392 }
393 }
394 }
395 */
396 (*dq0_out)[0] = tmp_Q0Q0_L[0][0][0] * ds[0][0] +
397 tmp_Q0Q0_L[0][0][1] * ds[0][1] +
398 tmp_Q0Q0_L[0][0][2] * ds[0][2] +
399 tmp_Q0Q0_L[0][0][3] * ds[0][3] +
400 tmp_Q0Q0_L[0][1][0] * ds[1][0] +
401 tmp_Q0Q0_L[0][1][1] * ds[1][1] +
402 tmp_Q0Q0_L[0][1][2] * ds[1][2] +
403 tmp_Q0Q0_L[0][1][3] * ds[1][3] +
404 tmp_Q0Q0_L[0][2][0] * ds[2][0] +
405 tmp_Q0Q0_L[0][2][1] * ds[2][1] +
406 tmp_Q0Q0_L[0][2][2] * ds[2][2] +
407 tmp_Q0Q0_L[0][2][3] * ds[2][3] +
408 tmp_Q0Q0_L[0][3][0] * ds[3][0] +
409 tmp_Q0Q0_L[0][3][1] * ds[3][1] +
410 tmp_Q0Q0_L[0][3][2] * ds[3][2] +
411 tmp_Q0Q0_L[0][3][3] * ds[3][3];
412
413 (*dq0_out)[1] = tmp_Q0Q0_L[1][0][0] * ds[0][0] +
414 tmp_Q0Q0_L[1][0][1] * ds[0][1] +
415 tmp_Q0Q0_L[1][0][2] * ds[0][2] +
416 tmp_Q0Q0_L[1][0][3] * ds[0][3] +
417 tmp_Q0Q0_L[1][1][0] * ds[1][0] +
418 tmp_Q0Q0_L[1][1][1] * ds[1][1] +
419 tmp_Q0Q0_L[1][1][2] * ds[1][2] +
420 tmp_Q0Q0_L[1][1][3] * ds[1][3] +
421 tmp_Q0Q0_L[1][2][0] * ds[2][0] +
422 tmp_Q0Q0_L[1][2][1] * ds[2][1] +
423 tmp_Q0Q0_L[1][2][2] * ds[2][2] +
424 tmp_Q0Q0_L[1][2][3] * ds[2][3] +
425 tmp_Q0Q0_L[1][3][0] * ds[3][0] +
426 tmp_Q0Q0_L[1][3][1] * ds[3][1] +
427 tmp_Q0Q0_L[1][3][2] * ds[3][2] +
428 tmp_Q0Q0_L[1][3][3] * ds[3][3];
429
430 (*dq0_out)[2] = tmp_Q0Q0_L[2][0][0] * ds[0][0] +
431 tmp_Q0Q0_L[2][0][1] * ds[0][1] +
432 tmp_Q0Q0_L[2][0][2] * ds[0][2] +
433 tmp_Q0Q0_L[2][0][3] * ds[0][3] +
434 tmp_Q0Q0_L[2][1][0] * ds[1][0] +
435 tmp_Q0Q0_L[2][1][1] * ds[1][1] +
436 tmp_Q0Q0_L[2][1][2] * ds[1][2] +
437 tmp_Q0Q0_L[2][1][3] * ds[1][3] +
438 tmp_Q0Q0_L[2][2][0] * ds[2][0] +
439 tmp_Q0Q0_L[2][2][1] * ds[2][1] +
440 tmp_Q0Q0_L[2][2][2] * ds[2][2] +
441 tmp_Q0Q0_L[2][2][3] * ds[2][3] +
442 tmp_Q0Q0_L[2][3][0] * ds[3][0] +
443 tmp_Q0Q0_L[2][3][1] * ds[3][1] +
444 tmp_Q0Q0_L[2][3][2] * ds[3][2] +
445 tmp_Q0Q0_L[2][3][3] * ds[3][3];
446
447 (*dq0_out)[3] = tmp_Q0Q0_L[3][0][0] * ds[0][0] +
448 tmp_Q0Q0_L[3][0][1] * ds[0][1] +
449 tmp_Q0Q0_L[3][0][2] * ds[0][2] +
450 tmp_Q0Q0_L[3][0][3] * ds[0][3] +
451 tmp_Q0Q0_L[3][1][0] * ds[1][0] +
452 tmp_Q0Q0_L[3][1][1] * ds[1][1] +
453 tmp_Q0Q0_L[3][1][2] * ds[1][2] +
454 tmp_Q0Q0_L[3][1][3] * ds[1][3] +
455 tmp_Q0Q0_L[3][2][0] * ds[2][0] +
456 tmp_Q0Q0_L[3][2][1] * ds[2][1] +
457 tmp_Q0Q0_L[3][2][2] * ds[2][2] +
458 tmp_Q0Q0_L[3][2][3] * ds[2][3] +
459 tmp_Q0Q0_L[3][3][0] * ds[3][0] +
460 tmp_Q0Q0_L[3][3][1] * ds[3][1] +
461 tmp_Q0Q0_L[3][3][2] * ds[3][2] +
462 tmp_Q0Q0_L[3][3][3] * ds[3][3];
463 }
464 }
475 template <bool use_dl, bool use_dq, bool use_ds>
477 size_t ia, cvm::rvector* _noalias const dl0_1_out = nullptr,
478 std::array<cvm::rvector, 4>* _noalias const dq0_1_out = nullptr,
479 std::array<std::array<cvm::rvector, 4>, 4>* _noalias const ds_1_out = nullptr) const {
480 // if (dl0_1_out == nullptr && dq0_1_out == nullptr) return;
481 const cvm::real a2x = *(pos2x + ia);
482 const cvm::real a2y = *(pos2y + ia);
483 const cvm::real a2z = *(pos2z + ia);
484 const cvm::rvector ds_1[4][4] = {
485 {{ a2x, a2y, a2z}, { 0.0, a2z, -a2y}, {-a2z, 0.0, a2x}, { a2y, -a2x, 0.0}},
486 {{ 0.0, a2z, -a2y}, { a2x, -a2y, -a2z}, { a2y, a2x, 0.0}, { a2z, 0.0, a2x}},
487 {{-a2z, 0.0, a2x}, { a2y, a2x, 0.0}, {-a2x, a2y, -a2z}, { 0.0, a2z, a2y}},
488 {{ a2y, -a2x, 0.0}, { a2z, 0.0, a2x}, { 0.0, a2z, a2y}, {-a2x, -a2y, a2z}}};
489 calc_derivative_impl<use_dl, use_dq, use_ds>(ds_1, dl0_1_out, dq0_1_out, ds_1_out);
490 }
501 template <bool use_dl, bool use_dq, bool use_ds>
503 size_t ia, cvm::rvector* _noalias const dl0_2_out = nullptr,
504 std::array<cvm::rvector, 4>* _noalias const dq0_2_out = nullptr,
505 std::array<std::array<cvm::rvector, 4>, 4>* _noalias const ds_2_out = nullptr) const {
506 // if (dl0_2_out == nullptr && dq0_2_out == nullptr) return;
507 const cvm::real a1x = *(pos1x + ia);
508 const cvm::real a1y = *(pos1y + ia);
509 const cvm::real a1z = *(pos1z + ia);
510 const cvm::rvector ds_2[4][4] = {
511 {{ a1x, a1y, a1z}, { 0.0, -a1z, a1y}, { a1z, 0.0, -a1x}, {-a1y, a1x, 0.0}},
512 {{ 0.0, -a1z, a1y}, { a1x, -a1y, -a1z}, { a1y, a1x, 0.0}, { a1z, 0.0, a1x}},
513 {{ a1z, 0.0, -a1x}, { a1y, a1x, 0.0}, {-a1x, a1y, -a1z}, { 0.0, a1z, a1y}},
514 {{-a1y, a1x, 0.0}, { a1z, 0.0, a1x}, { 0.0, a1z, a1y}, {-a1x, -a1y, a1z}}};
515 calc_derivative_impl<use_dl, use_dq, use_ds>(ds_2, dl0_2_out, dq0_2_out, ds_2_out);
516 }
517
534 template <int i>
536 static_assert((i < 4) && (i >= 0), "i must be in [0, 3] in project_force_to_C_from_dxdqi.");
537 cvm::rmatrix result;
538 result.xx = f_on_q * ( tmp_Q0Q0_L[i][0][0] + tmp_Q0Q0_L[i][1][1] - tmp_Q0Q0_L[i][2][2] - tmp_Q0Q0_L[i][3][3] );
539 result.xy = f_on_q * ( tmp_Q0Q0_L[i][0][3] + tmp_Q0Q0_L[i][1][2] + tmp_Q0Q0_L[i][2][1] + tmp_Q0Q0_L[i][3][0] );
540 result.xz = f_on_q * (-tmp_Q0Q0_L[i][0][2] + tmp_Q0Q0_L[i][1][3] - tmp_Q0Q0_L[i][2][0] + tmp_Q0Q0_L[i][3][1] );
541 result.yx = f_on_q * (-tmp_Q0Q0_L[i][0][3] + tmp_Q0Q0_L[i][1][2] + tmp_Q0Q0_L[i][2][1] - tmp_Q0Q0_L[i][3][0] );
542 result.yy = f_on_q * ( tmp_Q0Q0_L[i][0][0] - tmp_Q0Q0_L[i][1][1] + tmp_Q0Q0_L[i][2][2] - tmp_Q0Q0_L[i][3][3] );
543 result.yz = f_on_q * ( tmp_Q0Q0_L[i][0][1] + tmp_Q0Q0_L[i][1][0] + tmp_Q0Q0_L[i][2][3] + tmp_Q0Q0_L[i][3][2] );
544 result.zx = f_on_q * ( tmp_Q0Q0_L[i][0][2] + tmp_Q0Q0_L[i][1][3] + tmp_Q0Q0_L[i][2][0] + tmp_Q0Q0_L[i][3][1] );
545 result.zy = f_on_q * (-tmp_Q0Q0_L[i][0][1] - tmp_Q0Q0_L[i][1][0] + tmp_Q0Q0_L[i][2][3] + tmp_Q0Q0_L[i][3][2] );
546 result.zz = f_on_q * ( tmp_Q0Q0_L[i][0][0] - tmp_Q0Q0_L[i][1][1] - tmp_Q0Q0_L[i][2][2] + tmp_Q0Q0_L[i][3][3] );
547 return result;
548 }
549
567 template <typename dim4_array_t>
568 inline cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t& sum_dxdq) const {
569 cvm::rmatrix result;
570 result += project_force_to_C_from_dxdqi<0>(sum_dxdq[0]);
571 result += project_force_to_C_from_dxdqi<1>(sum_dxdq[1]);
572 result += project_force_to_C_from_dxdqi<2>(sum_dxdq[2]);
573 result += project_force_to_C_from_dxdqi<3>(sum_dxdq[3]);
574 return result;
575 }
576
587 inline cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix& dxdC) const {
588 const cvm::real a2x = *(pos2x + ia);
589 const cvm::real a2y = *(pos2y + ia);
590 const cvm::real a2z = *(pos2z + ia);
591 const cvm::rvector result{
592 dxdC.xx * a2x + dxdC.xy * a2y + dxdC.xz * a2z,
593 dxdC.yx * a2x + dxdC.yy * a2y + dxdC.yz * a2z,
594 dxdC.zx * a2x + dxdC.zy * a2y + dxdC.zz * a2z};
595 return result;
596 }
597
608 inline cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix& dxdC) const {
609 const cvm::real a1x = *(pos1x + ia);
610 const cvm::real a1y = *(pos1y + ia);
611 const cvm::real a1z = *(pos1z + ia);
612 const cvm::rvector result{
613 dxdC.xx * a1x + dxdC.yx * a1y + dxdC.zx * a1z,
614 dxdC.xy * a1x + dxdC.yy * a1y + dxdC.zy * a1z,
615 dxdC.xz * a1x + dxdC.yz * a1y + dxdC.zz * a1z};
616 return result;
617 }
618};
619
620#if defined(COLVARS_CUDA) || defined(COLVARS_HIP)
621namespace colvars_gpu {
627 const cvm::real* d_pos1y;
628 const cvm::real* d_pos1z;
631 const cvm::real* d_pos2y;
632 const cvm::real* d_pos2z;
639 cvm::real* tmp_Q0Q0_L;
652 int init(
653 const colvars_gpu::rotation_gpu* rot,
654 const cvm::real* d_pos1,
655 const cvm::real* d_pos2,
656 const size_t num_atoms_pos1,
657 const size_t num_atoms_pos2);
674 rotation_derivative_dldq require_dl_dq,
675 cudaGraph_t& graph,
676 std::unordered_map<std::string, cudaGraphNode_t>& nodes_map);
677
684 template <bool use_dl, bool use_dq/*, bool use_ds*/>
685 inline COLVARS_DEVICE void calc_derivative_impl(
686 const cvm::rvector (&ds)[4][4],
687 cvm::rvector* _noalias const dl0_out,
688 cvm::rvector* _noalias const dq0_out) const {
689 if (use_dl) {
690 *dl0_out = tmp_Q0Q0[0*4+0] * ds[0][0] +
691 tmp_Q0Q0[0*4+1] * ds[0][1] +
692 tmp_Q0Q0[0*4+2] * ds[0][2] +
693 tmp_Q0Q0[0*4+3] * ds[0][3] +
694 tmp_Q0Q0[1*4+0] * ds[1][0] +
695 tmp_Q0Q0[1*4+1] * ds[1][1] +
696 tmp_Q0Q0[1*4+2] * ds[1][2] +
697 tmp_Q0Q0[1*4+3] * ds[1][3] +
698 tmp_Q0Q0[2*4+0] * ds[2][0] +
699 tmp_Q0Q0[2*4+1] * ds[2][1] +
700 tmp_Q0Q0[2*4+2] * ds[2][2] +
701 tmp_Q0Q0[2*4+3] * ds[2][3] +
702 tmp_Q0Q0[3*4+0] * ds[3][0] +
703 tmp_Q0Q0[3*4+1] * ds[3][1] +
704 tmp_Q0Q0[3*4+2] * ds[3][2] +
705 tmp_Q0Q0[3*4+3] * ds[3][3];
706 }
707 if (use_dq) {
708 dq0_out[0] = tmp_Q0Q0_L[0*16+0*4+0] * ds[0][0] +
709 tmp_Q0Q0_L[0*16+0*4+1] * ds[0][1] +
710 tmp_Q0Q0_L[0*16+0*4+2] * ds[0][2] +
711 tmp_Q0Q0_L[0*16+0*4+3] * ds[0][3] +
712 tmp_Q0Q0_L[0*16+1*4+0] * ds[1][0] +
713 tmp_Q0Q0_L[0*16+1*4+1] * ds[1][1] +
714 tmp_Q0Q0_L[0*16+1*4+2] * ds[1][2] +
715 tmp_Q0Q0_L[0*16+1*4+3] * ds[1][3] +
716 tmp_Q0Q0_L[0*16+2*4+0] * ds[2][0] +
717 tmp_Q0Q0_L[0*16+2*4+1] * ds[2][1] +
718 tmp_Q0Q0_L[0*16+2*4+2] * ds[2][2] +
719 tmp_Q0Q0_L[0*16+2*4+3] * ds[2][3] +
720 tmp_Q0Q0_L[0*16+3*4+0] * ds[3][0] +
721 tmp_Q0Q0_L[0*16+3*4+1] * ds[3][1] +
722 tmp_Q0Q0_L[0*16+3*4+2] * ds[3][2] +
723 tmp_Q0Q0_L[0*16+3*4+3] * ds[3][3];
724
725 dq0_out[1] = tmp_Q0Q0_L[1*16+0*4+0] * ds[0][0] +
726 tmp_Q0Q0_L[1*16+0*4+1] * ds[0][1] +
727 tmp_Q0Q0_L[1*16+0*4+2] * ds[0][2] +
728 tmp_Q0Q0_L[1*16+0*4+3] * ds[0][3] +
729 tmp_Q0Q0_L[1*16+1*4+0] * ds[1][0] +
730 tmp_Q0Q0_L[1*16+1*4+1] * ds[1][1] +
731 tmp_Q0Q0_L[1*16+1*4+2] * ds[1][2] +
732 tmp_Q0Q0_L[1*16+1*4+3] * ds[1][3] +
733 tmp_Q0Q0_L[1*16+2*4+0] * ds[2][0] +
734 tmp_Q0Q0_L[1*16+2*4+1] * ds[2][1] +
735 tmp_Q0Q0_L[1*16+2*4+2] * ds[2][2] +
736 tmp_Q0Q0_L[1*16+2*4+3] * ds[2][3] +
737 tmp_Q0Q0_L[1*16+3*4+0] * ds[3][0] +
738 tmp_Q0Q0_L[1*16+3*4+1] * ds[3][1] +
739 tmp_Q0Q0_L[1*16+3*4+2] * ds[3][2] +
740 tmp_Q0Q0_L[1*16+3*4+3] * ds[3][3];
741
742 dq0_out[2] = tmp_Q0Q0_L[2*16+0*4+0] * ds[0][0] +
743 tmp_Q0Q0_L[2*16+0*4+1] * ds[0][1] +
744 tmp_Q0Q0_L[2*16+0*4+2] * ds[0][2] +
745 tmp_Q0Q0_L[2*16+0*4+3] * ds[0][3] +
746 tmp_Q0Q0_L[2*16+1*4+0] * ds[1][0] +
747 tmp_Q0Q0_L[2*16+1*4+1] * ds[1][1] +
748 tmp_Q0Q0_L[2*16+1*4+2] * ds[1][2] +
749 tmp_Q0Q0_L[2*16+1*4+3] * ds[1][3] +
750 tmp_Q0Q0_L[2*16+2*4+0] * ds[2][0] +
751 tmp_Q0Q0_L[2*16+2*4+1] * ds[2][1] +
752 tmp_Q0Q0_L[2*16+2*4+2] * ds[2][2] +
753 tmp_Q0Q0_L[2*16+2*4+3] * ds[2][3] +
754 tmp_Q0Q0_L[2*16+3*4+0] * ds[3][0] +
755 tmp_Q0Q0_L[2*16+3*4+1] * ds[3][1] +
756 tmp_Q0Q0_L[2*16+3*4+2] * ds[3][2] +
757 tmp_Q0Q0_L[2*16+3*4+3] * ds[3][3];
758
759 dq0_out[3] = tmp_Q0Q0_L[3*16+0*4+0] * ds[0][0] +
760 tmp_Q0Q0_L[3*16+0*4+1] * ds[0][1] +
761 tmp_Q0Q0_L[3*16+0*4+2] * ds[0][2] +
762 tmp_Q0Q0_L[3*16+0*4+3] * ds[0][3] +
763 tmp_Q0Q0_L[3*16+1*4+0] * ds[1][0] +
764 tmp_Q0Q0_L[3*16+1*4+1] * ds[1][1] +
765 tmp_Q0Q0_L[3*16+1*4+2] * ds[1][2] +
766 tmp_Q0Q0_L[3*16+1*4+3] * ds[1][3] +
767 tmp_Q0Q0_L[3*16+2*4+0] * ds[2][0] +
768 tmp_Q0Q0_L[3*16+2*4+1] * ds[2][1] +
769 tmp_Q0Q0_L[3*16+2*4+2] * ds[2][2] +
770 tmp_Q0Q0_L[3*16+2*4+3] * ds[2][3] +
771 tmp_Q0Q0_L[3*16+3*4+0] * ds[3][0] +
772 tmp_Q0Q0_L[3*16+3*4+1] * ds[3][1] +
773 tmp_Q0Q0_L[3*16+3*4+2] * ds[3][2] +
774 tmp_Q0Q0_L[3*16+3*4+3] * ds[3][3];
775 }
776 }
785 template <bool use_dl, bool use_dq/*, bool use_ds*/>
786 inline COLVARS_DEVICE void calc_derivative_wrt_group1(
787 int ia,
788 cvm::rvector* _noalias const dl0_1_out,
789 cvm::rvector* _noalias const dq0_1_out) const {
790 const cvm::real a2x = d_pos2x[ia];
791 const cvm::real a2y = d_pos2y[ia];
792 const cvm::real a2z = d_pos2z[ia];
793 const cvm::rvector ds_1[4][4] = {
794 {{ a2x, a2y, a2z}, { 0.0, a2z, -a2y}, {-a2z, 0.0, a2x}, { a2y, -a2x, 0.0}},
795 {{ 0.0, a2z, -a2y}, { a2x, -a2y, -a2z}, { a2y, a2x, 0.0}, { a2z, 0.0, a2x}},
796 {{-a2z, 0.0, a2x}, { a2y, a2x, 0.0}, {-a2x, a2y, -a2z}, { 0.0, a2z, a2y}},
797 {{ a2y, -a2x, 0.0}, { a2z, 0.0, a2x}, { 0.0, a2z, a2y}, {-a2x, -a2y, a2z}}};
798 calc_derivative_impl<use_dl, use_dq>(ds_1, dl0_1_out, dq0_1_out);
799 }
808 template <bool use_dl, bool use_dq/*, bool use_ds*/>
809 inline COLVARS_DEVICE void calc_derivative_wrt_group2(
810 int ia,
811 cvm::rvector* _noalias const dl0_2_out,
812 cvm::rvector* _noalias const dq0_2_out) const {
813 const cvm::real a1x = d_pos1x[ia];
814 const cvm::real a1y = d_pos1y[ia];
815 const cvm::real a1z = d_pos1z[ia];
816 const cvm::rvector ds_2[4][4] = {
817 {{ a1x, a1y, a1z}, { 0.0, -a1z, a1y}, { a1z, 0.0, -a1x}, {-a1y, a1x, 0.0}},
818 {{ 0.0, -a1z, a1y}, { a1x, -a1y, -a1z}, { a1y, a1x, 0.0}, { a1z, 0.0, a1x}},
819 {{ a1z, 0.0, -a1x}, { a1y, a1x, 0.0}, {-a1x, a1y, -a1z}, { 0.0, a1z, a1y}},
820 {{-a1y, a1x, 0.0}, { a1z, 0.0, a1x}, { 0.0, a1z, a1y}, {-a1x, -a1y, a1z}}};
821 calc_derivative_impl<use_dl, use_dq>(ds_2, dl0_2_out, dq0_2_out);
822 }
823
840 template <int i>
841 inline COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdqi(cvm::real f_on_q) const {
842 static_assert((i < 4) && (i >= 0), "i must be in [0, 3] in project_force_to_C_from_dxdqi.");
843 return project_force_to_C_from_dxdqi(i, f_on_q);
844 }
845
846 inline COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdqi(int i, cvm::real f_on_q) const {
847 cvm::rmatrix result;
848 result.xx = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+0] + tmp_Q0Q0_L[i*16+1*4+1] - tmp_Q0Q0_L[i*16+2*4+2] - tmp_Q0Q0_L[i*16+3*4+3] );
849 result.xy = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+3] + tmp_Q0Q0_L[i*16+1*4+2] + tmp_Q0Q0_L[i*16+2*4+1] + tmp_Q0Q0_L[i*16+3*4+0] );
850 result.xz = f_on_q * (-tmp_Q0Q0_L[i*16+0*4+2] + tmp_Q0Q0_L[i*16+1*4+3] - tmp_Q0Q0_L[i*16+2*4+0] + tmp_Q0Q0_L[i*16+3*4+1] );
851 result.yx = f_on_q * (-tmp_Q0Q0_L[i*16+0*4+3] + tmp_Q0Q0_L[i*16+1*4+2] + tmp_Q0Q0_L[i*16+2*4+1] - tmp_Q0Q0_L[i*16+3*4+0] );
852 result.yy = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+0] - tmp_Q0Q0_L[i*16+1*4+1] + tmp_Q0Q0_L[i*16+2*4+2] - tmp_Q0Q0_L[i*16+3*4+3] );
853 result.yz = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+1] + tmp_Q0Q0_L[i*16+1*4+0] + tmp_Q0Q0_L[i*16+2*4+3] + tmp_Q0Q0_L[i*16+3*4+2] );
854 result.zx = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+2] + tmp_Q0Q0_L[i*16+1*4+3] + tmp_Q0Q0_L[i*16+2*4+0] + tmp_Q0Q0_L[i*16+3*4+1] );
855 result.zy = f_on_q * (-tmp_Q0Q0_L[i*16+0*4+1] - tmp_Q0Q0_L[i*16+1*4+0] + tmp_Q0Q0_L[i*16+2*4+3] + tmp_Q0Q0_L[i*16+3*4+2] );
856 result.zz = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+0] - tmp_Q0Q0_L[i*16+1*4+1] - tmp_Q0Q0_L[i*16+2*4+2] + tmp_Q0Q0_L[i*16+3*4+3] );
857 return result;
858 }
859
877 template <typename dim4_array_t>
878 inline COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t& sum_dxdq) const {
879 cvm::rmatrix result;
880 result += project_force_to_C_from_dxdqi<0>(sum_dxdq[0]);
881 result += project_force_to_C_from_dxdqi<1>(sum_dxdq[1]);
882 result += project_force_to_C_from_dxdqi<2>(sum_dxdq[2]);
883 result += project_force_to_C_from_dxdqi<3>(sum_dxdq[3]);
884 return result;
885 }
886
897 inline COLVARS_DEVICE cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix& dxdC) const {
898 const cvm::real a2x = *(d_pos2x + ia);
899 const cvm::real a2y = *(d_pos2y + ia);
900 const cvm::real a2z = *(d_pos2z + ia);
901 const cvm::rvector result{
902 dxdC.xx * a2x + dxdC.xy * a2y + dxdC.xz * a2z,
903 dxdC.yx * a2x + dxdC.yy * a2y + dxdC.yz * a2z,
904 dxdC.zx * a2x + dxdC.zy * a2y + dxdC.zz * a2z};
905 return result;
906 }
907
918 inline COLVARS_DEVICE cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix& dxdC) const {
919 const cvm::real a1x = *(d_pos1x + ia);
920 const cvm::real a1y = *(d_pos1y + ia);
921 const cvm::real a1z = *(d_pos1z + ia);
922 const cvm::rvector result{
923 dxdC.xx * a1x + dxdC.yx * a1y + dxdC.zx * a1z,
924 dxdC.xy * a1x + dxdC.yy * a1y + dxdC.zy * a1z,
925 dxdC.xz * a1x + dxdC.yz * a1y + dxdC.zz * a1z};
926 return result;
927 }
928};
929}
930#elif defined(COLVARS_SYCL)
931#endif // defined(COLVARS_CUDA) || defined(COLVARS_HIP)
932
933#endif // COLVAR_ROTATION_DERIVATIVE
2-dimensional array of real numbers with three components along each dimension (works with colvarmodu...
Definition: colvartypes.h:901
A rotation between two sets of coordinates (for the moment a wrapper for colvarmodule::quaternion)
Definition: colvartypes.h:1366
cvm::real S_eigval[4]
Eigenvalues of S.
Definition: colvartypes.h:1375
cvm::real S_eigvec[4][4]
Eigenvectors of S.
Definition: colvartypes.h:1378
vector of real numbers with three components
Definition: colvartypes.h:726
double real
Defining an abstract real number allows to switch precision.
Definition: colvarmodule.h:98
Definition: colvartypes.h:1578
Definition: colvar_rotation_derivative.h:622
const cvm::real * d_pos2x
Reference to the atom positions of group 2.
Definition: colvar_rotation_derivative.h:630
COLVARS_DEVICE void calc_derivative_impl(const cvm::rvector(&ds)[4][4], cvm::rvector *_noalias const dl0_out, cvm::rvector *_noalias const dq0_out) const
Actual implementation of the derivative calculation.
Definition: colvar_rotation_derivative.h:685
int add_prepare_derivative_nodes(rotation_derivative_dldq require_dl_dq, cudaGraph_t &graph, std::unordered_map< std::string, cudaGraphNode_t > &nodes_map)
Add a "prepare_rotation_derivative" node to the CUDA graph.
Definition: colvar_rotation_derivative.cpp:48
size_t m_num_atoms_pos2
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:636
COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdqi(cvm::real f_on_q) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:841
const cvm::real * d_pos1x
Reference to the atom positions of group 1.
Definition: colvar_rotation_derivative.h:626
int init(const colvars_gpu::rotation_gpu *rot, const cvm::real *d_pos1, const cvm::real *d_pos2, const size_t num_atoms_pos1, const size_t num_atoms_pos2)
Initialization of the rotation_derivative_gpu class.
Definition: colvar_rotation_derivative.cpp:18
COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t &sum_dxdq) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:878
COLVARS_DEVICE void calc_derivative_wrt_group2(int ia, cvm::rvector *_noalias const dl0_2_out, cvm::rvector *_noalias const dq0_2_out) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:809
COLVARS_DEVICE cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:897
size_t m_num_atoms_pos1
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:634
COLVARS_DEVICE void calc_derivative_wrt_group1(int ia, cvm::rvector *_noalias const dl0_1_out, cvm::rvector *_noalias const dq0_1_out) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:786
const colvars_gpu::rotation_gpu * m_rot
Reference to the rotation.
Definition: colvar_rotation_derivative.h:624
rotation_derivative_gpu()
Constructor.
Definition: colvar_rotation_derivative.cpp:9
cvm::real * tmp_Q0Q0
GPU temporary variable that will be updated if prepare_derivative called.
Definition: colvar_rotation_derivative.h:638
COLVARS_DEVICE cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:918
Helper class for calculating the derivative of rotation.
Definition: colvar_rotation_derivative.h:42
cvm::ag_vector_real_t::const_iterator pos2x
Reference to the atom positions of group 2.
Definition: colvar_rotation_derivative.h:52
rotation_derivative(const cvm::rotation &rot, const cvm::ag_vector_real_t &pos1, const cvm::ag_vector_real_t &pos2, const size_t num_atoms_pos1, const size_t num_atoms_pos2)
Constructor of the cvm::rotation::derivative class for SOA.
Definition: colvar_rotation_derivative.h:72
cvm::rmatrix project_force_to_C_from_dxdqi(cvm::real f_on_q) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:535
void prepare_derivative(rotation_derivative_dldq require_dl_dq)
This function must be called before calc_derivative_wrt_group1 and calc_derivative_wrt_group2 in orde...
Definition: colvar_rotation_derivative.h:93
void calc_derivative_wrt_group1(size_t ia, cvm::rvector *_noalias const dl0_1_out=nullptr, std::array< cvm::rvector, 4 > *_noalias const dq0_1_out=nullptr, std::array< std::array< cvm::rvector, 4 >, 4 > *_noalias const ds_1_out=nullptr) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:476
size_t m_num_atoms_pos1
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:56
void calc_derivative_impl(const cvm::rvector(&ds)[4][4], cvm::rvector *_noalias const dl0_out, std::array< cvm::rvector, 4 > *_noalias const dq0_out, std::array< std::array< cvm::rvector, 4 >, 4 > *_noalias const ds_out) const
Actual implementation of the derivative calculation.
Definition: colvar_rotation_derivative.h:340
cvm::real tmp_Q0Q0[4][4]
Temporary variable that will be updated if prepare_derivative called.
Definition: colvar_rotation_derivative.h:60
cvm::ag_vector_real_t::const_iterator pos1x
Reference to the atom positions of group 1.
Definition: colvar_rotation_derivative.h:47
cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:608
void calc_derivative_wrt_group2(size_t ia, cvm::rvector *_noalias const dl0_2_out=nullptr, std::array< cvm::rvector, 4 > *_noalias const dq0_2_out=nullptr, std::array< std::array< cvm::rvector, 4 >, 4 > *_noalias const ds_2_out=nullptr) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:502
const cvm::rotation & m_rot
Reference to the rotation.
Definition: colvar_rotation_derivative.h:44
cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t &sum_dxdq) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:568
size_t m_num_atoms_pos2
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:58
cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:587