Collective Variables Module - Developer Documentation
Loading...
Searching...
No Matches
colvar_rotation_derivative.h
1#ifndef COLVAR_ROTATION_DERIVATIVE
2#define COLVAR_ROTATION_DERIVATIVE
3
4#include "colvar_gpu_support.h"
5#include "colvartypes.h"
6#include <type_traits>
7#include <cstring>
8#include <array>
9
10#ifndef _noalias
11#if defined(__INTEL_COMPILER) || (defined(__PGI) && !defined(__NVCOMPILER))
12#define _noalias restrict
13#elif defined(__GNUC__) || defined(__INTEL_LLVM_COMPILER) || defined(__NVCOMPILER)
14#define _noalias __restrict
15#else
16#define _noalias
17#endif
18#endif
19
21enum class rotation_derivative_dldq {
23 use_dl = 1 << 0,
25 use_dq = 1 << 1
26};
27
28inline COLVARS_HOST_DEVICE constexpr rotation_derivative_dldq operator|(rotation_derivative_dldq Lhs, rotation_derivative_dldq Rhs) {
29 return static_cast<rotation_derivative_dldq>(
30 static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Lhs) |
31 static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Rhs));
32}
33
34inline COLVARS_HOST_DEVICE constexpr bool operator&(rotation_derivative_dldq Lhs, rotation_derivative_dldq Rhs)
35{
36 return (static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Lhs) &
37 static_cast<std::underlying_type<rotation_derivative_dldq>::type>(Rhs));
38}
39
41// template <typename T1, typename T2, bool soa = false>
46 // const std::vector<cvm::real> &m_pos1;
47 cvm::ag_vector_real_t::const_iterator pos1x;
48 cvm::ag_vector_real_t::const_iterator pos1y;
49 cvm::ag_vector_real_t::const_iterator pos1z;
51 // const std::vector<cvm::real> &m_pos2;
52 cvm::ag_vector_real_t::const_iterator pos2x;
53 cvm::ag_vector_real_t::const_iterator pos2y;
54 cvm::ag_vector_real_t::const_iterator pos2z;
61 cvm::real tmp_Q0Q0_L[4][4][4];
73 const cvm::rotation &rot,
74 const cvm::ag_vector_real_t &pos1,
75 const cvm::ag_vector_real_t &pos2,
76 const size_t num_atoms_pos1,
77 const size_t num_atoms_pos2):
78 m_rot(rot),
79 pos1x(pos1.cbegin()),
80 pos1y(pos1x + num_atoms_pos1),
81 pos1z(pos1y + num_atoms_pos1),
82 pos2x(pos2.cbegin()),
83 pos2y(pos2x + num_atoms_pos2),
84 pos2z(pos2y + num_atoms_pos2),
85 m_num_atoms_pos1(num_atoms_pos1),
86 m_num_atoms_pos2(num_atoms_pos2) {}
93 void prepare_derivative(rotation_derivative_dldq require_dl_dq) {
94 if (require_dl_dq & rotation_derivative_dldq::use_dl) {
95 const auto &Q0 = m_rot.S_eigvec[0];
96 tmp_Q0Q0[0][0] = Q0[0] * Q0[0];
97 tmp_Q0Q0[0][1] = Q0[0] * Q0[1];
98 tmp_Q0Q0[0][2] = Q0[0] * Q0[2];
99 tmp_Q0Q0[0][3] = Q0[0] * Q0[3];
100 tmp_Q0Q0[1][0] = Q0[1] * Q0[0];
101 tmp_Q0Q0[1][1] = Q0[1] * Q0[1];
102 tmp_Q0Q0[1][2] = Q0[1] * Q0[2];
103 tmp_Q0Q0[1][3] = Q0[1] * Q0[3];
104 tmp_Q0Q0[2][0] = Q0[2] * Q0[0];
105 tmp_Q0Q0[2][1] = Q0[2] * Q0[1];
106 tmp_Q0Q0[2][2] = Q0[2] * Q0[2];
107 tmp_Q0Q0[2][3] = Q0[2] * Q0[3];
108 tmp_Q0Q0[3][0] = Q0[3] * Q0[0];
109 tmp_Q0Q0[3][1] = Q0[3] * Q0[1];
110 tmp_Q0Q0[3][2] = Q0[3] * Q0[2];
111 tmp_Q0Q0[3][3] = Q0[3] * Q0[3];
112 }
113 if (require_dl_dq & rotation_derivative_dldq::use_dq) {
114 const auto &Q0 = m_rot.S_eigvec[0];
115 const auto &Q1 = m_rot.S_eigvec[1];
116 const auto &Q2 = m_rot.S_eigvec[2];
117 const auto &Q3 = m_rot.S_eigvec[3];
118 cvm::real const L0 = m_rot.S_eigval[0];
119 cvm::real const L1 = m_rot.S_eigval[1];
120 cvm::real const L2 = m_rot.S_eigval[2];
121 cvm::real const L3 = m_rot.S_eigval[3];
122
123 tmp_Q0Q0_L[0][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[0] +
124 (Q2[0] * Q0[0]) / (L0-L2) * Q2[0] +
125 (Q3[0] * Q0[0]) / (L0-L3) * Q3[0];
126 tmp_Q0Q0_L[0][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[0] +
127 (Q2[0] * Q0[1]) / (L0-L2) * Q2[0] +
128 (Q3[0] * Q0[1]) / (L0-L3) * Q3[0];
129 tmp_Q0Q0_L[0][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[0] +
130 (Q2[0] * Q0[2]) / (L0-L2) * Q2[0] +
131 (Q3[0] * Q0[2]) / (L0-L3) * Q3[0];
132 tmp_Q0Q0_L[0][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[0] +
133 (Q2[0] * Q0[3]) / (L0-L2) * Q2[0] +
134 (Q3[0] * Q0[3]) / (L0-L3) * Q3[0];
135
136 tmp_Q0Q0_L[0][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[0] +
137 (Q2[1] * Q0[0]) / (L0-L2) * Q2[0] +
138 (Q3[1] * Q0[0]) / (L0-L3) * Q3[0];
139 tmp_Q0Q0_L[0][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[0] +
140 (Q2[1] * Q0[1]) / (L0-L2) * Q2[0] +
141 (Q3[1] * Q0[1]) / (L0-L3) * Q3[0];
142 tmp_Q0Q0_L[0][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[0] +
143 (Q2[1] * Q0[2]) / (L0-L2) * Q2[0] +
144 (Q3[1] * Q0[2]) / (L0-L3) * Q3[0];
145 tmp_Q0Q0_L[0][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[0] +
146 (Q2[1] * Q0[3]) / (L0-L2) * Q2[0] +
147 (Q3[1] * Q0[3]) / (L0-L3) * Q3[0];
148
149 tmp_Q0Q0_L[0][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[0] +
150 (Q2[2] * Q0[0]) / (L0-L2) * Q2[0] +
151 (Q3[2] * Q0[0]) / (L0-L3) * Q3[0];
152 tmp_Q0Q0_L[0][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[0] +
153 (Q2[2] * Q0[1]) / (L0-L2) * Q2[0] +
154 (Q3[2] * Q0[1]) / (L0-L3) * Q3[0];
155 tmp_Q0Q0_L[0][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[0] +
156 (Q2[2] * Q0[2]) / (L0-L2) * Q2[0] +
157 (Q3[2] * Q0[2]) / (L0-L3) * Q3[0];
158 tmp_Q0Q0_L[0][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[0] +
159 (Q2[2] * Q0[3]) / (L0-L2) * Q2[0] +
160 (Q3[2] * Q0[3]) / (L0-L3) * Q3[0];
161
162 tmp_Q0Q0_L[0][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[0] +
163 (Q2[3] * Q0[0]) / (L0-L2) * Q2[0] +
164 (Q3[3] * Q0[0]) / (L0-L3) * Q3[0];
165 tmp_Q0Q0_L[0][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[0] +
166 (Q2[3] * Q0[1]) / (L0-L2) * Q2[0] +
167 (Q3[3] * Q0[1]) / (L0-L3) * Q3[0];
168 tmp_Q0Q0_L[0][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[0] +
169 (Q2[3] * Q0[2]) / (L0-L2) * Q2[0] +
170 (Q3[3] * Q0[2]) / (L0-L3) * Q3[0];
171 tmp_Q0Q0_L[0][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[0] +
172 (Q2[3] * Q0[3]) / (L0-L2) * Q2[0] +
173 (Q3[3] * Q0[3]) / (L0-L3) * Q3[0];
174
175 tmp_Q0Q0_L[1][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[1] +
176 (Q2[0] * Q0[0]) / (L0-L2) * Q2[1] +
177 (Q3[0] * Q0[0]) / (L0-L3) * Q3[1];
178 tmp_Q0Q0_L[1][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[1] +
179 (Q2[0] * Q0[1]) / (L0-L2) * Q2[1] +
180 (Q3[0] * Q0[1]) / (L0-L3) * Q3[1];
181 tmp_Q0Q0_L[1][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[1] +
182 (Q2[0] * Q0[2]) / (L0-L2) * Q2[1] +
183 (Q3[0] * Q0[2]) / (L0-L3) * Q3[1];
184 tmp_Q0Q0_L[1][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[1] +
185 (Q2[0] * Q0[3]) / (L0-L2) * Q2[1] +
186 (Q3[0] * Q0[3]) / (L0-L3) * Q3[1];
187
188 tmp_Q0Q0_L[1][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[1] +
189 (Q2[1] * Q0[0]) / (L0-L2) * Q2[1] +
190 (Q3[1] * Q0[0]) / (L0-L3) * Q3[1];
191 tmp_Q0Q0_L[1][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[1] +
192 (Q2[1] * Q0[1]) / (L0-L2) * Q2[1] +
193 (Q3[1] * Q0[1]) / (L0-L3) * Q3[1];
194 tmp_Q0Q0_L[1][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[1] +
195 (Q2[1] * Q0[2]) / (L0-L2) * Q2[1] +
196 (Q3[1] * Q0[2]) / (L0-L3) * Q3[1];
197 tmp_Q0Q0_L[1][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[1] +
198 (Q2[1] * Q0[3]) / (L0-L2) * Q2[1] +
199 (Q3[1] * Q0[3]) / (L0-L3) * Q3[1];
200
201 tmp_Q0Q0_L[1][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[1] +
202 (Q2[2] * Q0[0]) / (L0-L2) * Q2[1] +
203 (Q3[2] * Q0[0]) / (L0-L3) * Q3[1];
204 tmp_Q0Q0_L[1][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[1] +
205 (Q2[2] * Q0[1]) / (L0-L2) * Q2[1] +
206 (Q3[2] * Q0[1]) / (L0-L3) * Q3[1];
207 tmp_Q0Q0_L[1][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[1] +
208 (Q2[2] * Q0[2]) / (L0-L2) * Q2[1] +
209 (Q3[2] * Q0[2]) / (L0-L3) * Q3[1];
210 tmp_Q0Q0_L[1][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[1] +
211 (Q2[2] * Q0[3]) / (L0-L2) * Q2[1] +
212 (Q3[2] * Q0[3]) / (L0-L3) * Q3[1];
213
214 tmp_Q0Q0_L[1][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[1] +
215 (Q2[3] * Q0[0]) / (L0-L2) * Q2[1] +
216 (Q3[3] * Q0[0]) / (L0-L3) * Q3[1];
217 tmp_Q0Q0_L[1][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[1] +
218 (Q2[3] * Q0[1]) / (L0-L2) * Q2[1] +
219 (Q3[3] * Q0[1]) / (L0-L3) * Q3[1];
220 tmp_Q0Q0_L[1][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[1] +
221 (Q2[3] * Q0[2]) / (L0-L2) * Q2[1] +
222 (Q3[3] * Q0[2]) / (L0-L3) * Q3[1];
223 tmp_Q0Q0_L[1][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[1] +
224 (Q2[3] * Q0[3]) / (L0-L2) * Q2[1] +
225 (Q3[3] * Q0[3]) / (L0-L3) * Q3[1];
226
227 tmp_Q0Q0_L[2][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[2] +
228 (Q2[0] * Q0[0]) / (L0-L2) * Q2[2] +
229 (Q3[0] * Q0[0]) / (L0-L3) * Q3[2];
230 tmp_Q0Q0_L[2][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[2] +
231 (Q2[0] * Q0[1]) / (L0-L2) * Q2[2] +
232 (Q3[0] * Q0[1]) / (L0-L3) * Q3[2];
233 tmp_Q0Q0_L[2][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[2] +
234 (Q2[0] * Q0[2]) / (L0-L2) * Q2[2] +
235 (Q3[0] * Q0[2]) / (L0-L3) * Q3[2];
236 tmp_Q0Q0_L[2][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[2] +
237 (Q2[0] * Q0[3]) / (L0-L2) * Q2[2] +
238 (Q3[0] * Q0[3]) / (L0-L3) * Q3[2];
239
240 tmp_Q0Q0_L[2][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[2] +
241 (Q2[1] * Q0[0]) / (L0-L2) * Q2[2] +
242 (Q3[1] * Q0[0]) / (L0-L3) * Q3[2];
243 tmp_Q0Q0_L[2][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[2] +
244 (Q2[1] * Q0[1]) / (L0-L2) * Q2[2] +
245 (Q3[1] * Q0[1]) / (L0-L3) * Q3[2];
246 tmp_Q0Q0_L[2][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[2] +
247 (Q2[1] * Q0[2]) / (L0-L2) * Q2[2] +
248 (Q3[1] * Q0[2]) / (L0-L3) * Q3[2];
249 tmp_Q0Q0_L[2][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[2] +
250 (Q2[1] * Q0[3]) / (L0-L2) * Q2[2] +
251 (Q3[1] * Q0[3]) / (L0-L3) * Q3[2];
252
253 tmp_Q0Q0_L[2][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[2] +
254 (Q2[2] * Q0[0]) / (L0-L2) * Q2[2] +
255 (Q3[2] * Q0[0]) / (L0-L3) * Q3[2];
256 tmp_Q0Q0_L[2][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[2] +
257 (Q2[2] * Q0[1]) / (L0-L2) * Q2[2] +
258 (Q3[2] * Q0[1]) / (L0-L3) * Q3[2];
259 tmp_Q0Q0_L[2][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[2] +
260 (Q2[2] * Q0[2]) / (L0-L2) * Q2[2] +
261 (Q3[2] * Q0[2]) / (L0-L3) * Q3[2];
262 tmp_Q0Q0_L[2][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[2] +
263 (Q2[2] * Q0[3]) / (L0-L2) * Q2[2] +
264 (Q3[2] * Q0[3]) / (L0-L3) * Q3[2];
265
266 tmp_Q0Q0_L[2][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[2] +
267 (Q2[3] * Q0[0]) / (L0-L2) * Q2[2] +
268 (Q3[3] * Q0[0]) / (L0-L3) * Q3[2];
269 tmp_Q0Q0_L[2][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[2] +
270 (Q2[3] * Q0[1]) / (L0-L2) * Q2[2] +
271 (Q3[3] * Q0[1]) / (L0-L3) * Q3[2];
272 tmp_Q0Q0_L[2][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[2] +
273 (Q2[3] * Q0[2]) / (L0-L2) * Q2[2] +
274 (Q3[3] * Q0[2]) / (L0-L3) * Q3[2];
275 tmp_Q0Q0_L[2][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[2] +
276 (Q2[3] * Q0[3]) / (L0-L2) * Q2[2] +
277 (Q3[3] * Q0[3]) / (L0-L3) * Q3[2];
278
279 tmp_Q0Q0_L[3][0][0] = (Q1[0] * Q0[0]) / (L0-L1) * Q1[3] +
280 (Q2[0] * Q0[0]) / (L0-L2) * Q2[3] +
281 (Q3[0] * Q0[0]) / (L0-L3) * Q3[3];
282 tmp_Q0Q0_L[3][0][1] = (Q1[0] * Q0[1]) / (L0-L1) * Q1[3] +
283 (Q2[0] * Q0[1]) / (L0-L2) * Q2[3] +
284 (Q3[0] * Q0[1]) / (L0-L3) * Q3[3];
285 tmp_Q0Q0_L[3][0][2] = (Q1[0] * Q0[2]) / (L0-L1) * Q1[3] +
286 (Q2[0] * Q0[2]) / (L0-L2) * Q2[3] +
287 (Q3[0] * Q0[2]) / (L0-L3) * Q3[3];
288 tmp_Q0Q0_L[3][0][3] = (Q1[0] * Q0[3]) / (L0-L1) * Q1[3] +
289 (Q2[0] * Q0[3]) / (L0-L2) * Q2[3] +
290 (Q3[0] * Q0[3]) / (L0-L3) * Q3[3];
291
292 tmp_Q0Q0_L[3][1][0] = (Q1[1] * Q0[0]) / (L0-L1) * Q1[3] +
293 (Q2[1] * Q0[0]) / (L0-L2) * Q2[3] +
294 (Q3[1] * Q0[0]) / (L0-L3) * Q3[3];
295 tmp_Q0Q0_L[3][1][1] = (Q1[1] * Q0[1]) / (L0-L1) * Q1[3] +
296 (Q2[1] * Q0[1]) / (L0-L2) * Q2[3] +
297 (Q3[1] * Q0[1]) / (L0-L3) * Q3[3];
298 tmp_Q0Q0_L[3][1][2] = (Q1[1] * Q0[2]) / (L0-L1) * Q1[3] +
299 (Q2[1] * Q0[2]) / (L0-L2) * Q2[3] +
300 (Q3[1] * Q0[2]) / (L0-L3) * Q3[3];
301 tmp_Q0Q0_L[3][1][3] = (Q1[1] * Q0[3]) / (L0-L1) * Q1[3] +
302 (Q2[1] * Q0[3]) / (L0-L2) * Q2[3] +
303 (Q3[1] * Q0[3]) / (L0-L3) * Q3[3];
304
305 tmp_Q0Q0_L[3][2][0] = (Q1[2] * Q0[0]) / (L0-L1) * Q1[3] +
306 (Q2[2] * Q0[0]) / (L0-L2) * Q2[3] +
307 (Q3[2] * Q0[0]) / (L0-L3) * Q3[3];
308 tmp_Q0Q0_L[3][2][1] = (Q1[2] * Q0[1]) / (L0-L1) * Q1[3] +
309 (Q2[2] * Q0[1]) / (L0-L2) * Q2[3] +
310 (Q3[2] * Q0[1]) / (L0-L3) * Q3[3];
311 tmp_Q0Q0_L[3][2][2] = (Q1[2] * Q0[2]) / (L0-L1) * Q1[3] +
312 (Q2[2] * Q0[2]) / (L0-L2) * Q2[3] +
313 (Q3[2] * Q0[2]) / (L0-L3) * Q3[3];
314 tmp_Q0Q0_L[3][2][3] = (Q1[2] * Q0[3]) / (L0-L1) * Q1[3] +
315 (Q2[2] * Q0[3]) / (L0-L2) * Q2[3] +
316 (Q3[2] * Q0[3]) / (L0-L3) * Q3[3];
317
318 tmp_Q0Q0_L[3][3][0] = (Q1[3] * Q0[0]) / (L0-L1) * Q1[3] +
319 (Q2[3] * Q0[0]) / (L0-L2) * Q2[3] +
320 (Q3[3] * Q0[0]) / (L0-L3) * Q3[3];
321 tmp_Q0Q0_L[3][3][1] = (Q1[3] * Q0[1]) / (L0-L1) * Q1[3] +
322 (Q2[3] * Q0[1]) / (L0-L2) * Q2[3] +
323 (Q3[3] * Q0[1]) / (L0-L3) * Q3[3];
324 tmp_Q0Q0_L[3][3][2] = (Q1[3] * Q0[2]) / (L0-L1) * Q1[3] +
325 (Q2[3] * Q0[2]) / (L0-L2) * Q2[3] +
326 (Q3[3] * Q0[2]) / (L0-L3) * Q3[3];
327 tmp_Q0Q0_L[3][3][3] = (Q1[3] * Q0[3]) / (L0-L1) * Q1[3] +
328 (Q2[3] * Q0[3]) / (L0-L2) * Q2[3] +
329 (Q3[3] * Q0[3]) / (L0-L3) * Q3[3];
330 }
331 }
339 template <bool use_dl, bool use_dq, bool use_ds>
341 const cvm::rvector (&ds)[4][4],
342 cvm::rvector* _noalias const dl0_out,
343 std::array<cvm::rvector, 4>* _noalias const dq0_out,
344 std::array<std::array<cvm::rvector, 4>, 4>* _noalias const ds_out) const {
345 if (use_ds) {
346 // this code path is for debug_gradients, so not necessary to unroll the loop
347 *ds_out = std::array<std::array<cvm::rvector, 4>, 4>();
348 for (int i = 0; i < 4; ++i) {
349 for (int j = 0; j < 4; ++j) {
350 (*ds_out)[i][j] = ds[i][j];
351 }
352 }
353 }
354 if (use_dl) {
355 /* manually loop unrolling of the following loop:
356 dl0_1.reset();
357 for (size_t i = 0; i < 4; i++) {
358 for (size_t j = 0; j < 4; j++) {
359 dl0_1 += Q0[i] * ds_1[i][j] * Q0[j];
360 }
361 }
362 */
363 *dl0_out = tmp_Q0Q0[0][0] * ds[0][0] +
364 tmp_Q0Q0[0][1] * ds[0][1] +
365 tmp_Q0Q0[0][2] * ds[0][2] +
366 tmp_Q0Q0[0][3] * ds[0][3] +
367 tmp_Q0Q0[1][0] * ds[1][0] +
368 tmp_Q0Q0[1][1] * ds[1][1] +
369 tmp_Q0Q0[1][2] * ds[1][2] +
370 tmp_Q0Q0[1][3] * ds[1][3] +
371 tmp_Q0Q0[2][0] * ds[2][0] +
372 tmp_Q0Q0[2][1] * ds[2][1] +
373 tmp_Q0Q0[2][2] * ds[2][2] +
374 tmp_Q0Q0[2][3] * ds[2][3] +
375 tmp_Q0Q0[3][0] * ds[3][0] +
376 tmp_Q0Q0[3][1] * ds[3][1] +
377 tmp_Q0Q0[3][2] * ds[3][2] +
378 tmp_Q0Q0[3][3] * ds[3][3];
379 }
380 if (use_dq) {
381 // we can skip this check if a fixed-size array is used
382 // if (dq0_out->size() != 4) dq0_out->resize(4);
383 /* manually loop unrolling of the following loop:
384 dq0_1.reset();
385 for (size_t p = 0; p < 4; p++) {
386 for (size_t i = 0; i < 4; i++) {
387 for (size_t j = 0; j < 4; j++) {
388 dq0_1[p] +=
389 (Q1[i] * ds_1[i][j] * Q0[j]) / (L0-L1) * Q1[p] +
390 (Q2[i] * ds_1[i][j] * Q0[j]) / (L0-L2) * Q2[p] +
391 (Q3[i] * ds_1[i][j] * Q0[j]) / (L0-L3) * Q3[p];
392 }
393 }
394 }
395 */
396 (*dq0_out)[0] = tmp_Q0Q0_L[0][0][0] * ds[0][0] +
397 tmp_Q0Q0_L[0][0][1] * ds[0][1] +
398 tmp_Q0Q0_L[0][0][2] * ds[0][2] +
399 tmp_Q0Q0_L[0][0][3] * ds[0][3] +
400 tmp_Q0Q0_L[0][1][0] * ds[1][0] +
401 tmp_Q0Q0_L[0][1][1] * ds[1][1] +
402 tmp_Q0Q0_L[0][1][2] * ds[1][2] +
403 tmp_Q0Q0_L[0][1][3] * ds[1][3] +
404 tmp_Q0Q0_L[0][2][0] * ds[2][0] +
405 tmp_Q0Q0_L[0][2][1] * ds[2][1] +
406 tmp_Q0Q0_L[0][2][2] * ds[2][2] +
407 tmp_Q0Q0_L[0][2][3] * ds[2][3] +
408 tmp_Q0Q0_L[0][3][0] * ds[3][0] +
409 tmp_Q0Q0_L[0][3][1] * ds[3][1] +
410 tmp_Q0Q0_L[0][3][2] * ds[3][2] +
411 tmp_Q0Q0_L[0][3][3] * ds[3][3];
412
413 (*dq0_out)[1] = tmp_Q0Q0_L[1][0][0] * ds[0][0] +
414 tmp_Q0Q0_L[1][0][1] * ds[0][1] +
415 tmp_Q0Q0_L[1][0][2] * ds[0][2] +
416 tmp_Q0Q0_L[1][0][3] * ds[0][3] +
417 tmp_Q0Q0_L[1][1][0] * ds[1][0] +
418 tmp_Q0Q0_L[1][1][1] * ds[1][1] +
419 tmp_Q0Q0_L[1][1][2] * ds[1][2] +
420 tmp_Q0Q0_L[1][1][3] * ds[1][3] +
421 tmp_Q0Q0_L[1][2][0] * ds[2][0] +
422 tmp_Q0Q0_L[1][2][1] * ds[2][1] +
423 tmp_Q0Q0_L[1][2][2] * ds[2][2] +
424 tmp_Q0Q0_L[1][2][3] * ds[2][3] +
425 tmp_Q0Q0_L[1][3][0] * ds[3][0] +
426 tmp_Q0Q0_L[1][3][1] * ds[3][1] +
427 tmp_Q0Q0_L[1][3][2] * ds[3][2] +
428 tmp_Q0Q0_L[1][3][3] * ds[3][3];
429
430 (*dq0_out)[2] = tmp_Q0Q0_L[2][0][0] * ds[0][0] +
431 tmp_Q0Q0_L[2][0][1] * ds[0][1] +
432 tmp_Q0Q0_L[2][0][2] * ds[0][2] +
433 tmp_Q0Q0_L[2][0][3] * ds[0][3] +
434 tmp_Q0Q0_L[2][1][0] * ds[1][0] +
435 tmp_Q0Q0_L[2][1][1] * ds[1][1] +
436 tmp_Q0Q0_L[2][1][2] * ds[1][2] +
437 tmp_Q0Q0_L[2][1][3] * ds[1][3] +
438 tmp_Q0Q0_L[2][2][0] * ds[2][0] +
439 tmp_Q0Q0_L[2][2][1] * ds[2][1] +
440 tmp_Q0Q0_L[2][2][2] * ds[2][2] +
441 tmp_Q0Q0_L[2][2][3] * ds[2][3] +
442 tmp_Q0Q0_L[2][3][0] * ds[3][0] +
443 tmp_Q0Q0_L[2][3][1] * ds[3][1] +
444 tmp_Q0Q0_L[2][3][2] * ds[3][2] +
445 tmp_Q0Q0_L[2][3][3] * ds[3][3];
446
447 (*dq0_out)[3] = tmp_Q0Q0_L[3][0][0] * ds[0][0] +
448 tmp_Q0Q0_L[3][0][1] * ds[0][1] +
449 tmp_Q0Q0_L[3][0][2] * ds[0][2] +
450 tmp_Q0Q0_L[3][0][3] * ds[0][3] +
451 tmp_Q0Q0_L[3][1][0] * ds[1][0] +
452 tmp_Q0Q0_L[3][1][1] * ds[1][1] +
453 tmp_Q0Q0_L[3][1][2] * ds[1][2] +
454 tmp_Q0Q0_L[3][1][3] * ds[1][3] +
455 tmp_Q0Q0_L[3][2][0] * ds[2][0] +
456 tmp_Q0Q0_L[3][2][1] * ds[2][1] +
457 tmp_Q0Q0_L[3][2][2] * ds[2][2] +
458 tmp_Q0Q0_L[3][2][3] * ds[2][3] +
459 tmp_Q0Q0_L[3][3][0] * ds[3][0] +
460 tmp_Q0Q0_L[3][3][1] * ds[3][1] +
461 tmp_Q0Q0_L[3][3][2] * ds[3][2] +
462 tmp_Q0Q0_L[3][3][3] * ds[3][3];
463 }
464 }
475 template <bool use_dl, bool use_dq, bool use_ds>
477 size_t ia, cvm::rvector* _noalias const dl0_1_out = nullptr,
478 std::array<cvm::rvector, 4>* _noalias const dq0_1_out = nullptr,
479 std::array<std::array<cvm::rvector, 4>, 4>* _noalias const ds_1_out = nullptr) const {
480 // if (dl0_1_out == nullptr && dq0_1_out == nullptr) return;
481 const cvm::real a2x = *(pos2x + ia);
482 const cvm::real a2y = *(pos2y + ia);
483 const cvm::real a2z = *(pos2z + ia);
484 const cvm::rvector ds_1[4][4] = {
485 {{ a2x, a2y, a2z}, { 0.0, a2z, -a2y}, {-a2z, 0.0, a2x}, { a2y, -a2x, 0.0}},
486 {{ 0.0, a2z, -a2y}, { a2x, -a2y, -a2z}, { a2y, a2x, 0.0}, { a2z, 0.0, a2x}},
487 {{-a2z, 0.0, a2x}, { a2y, a2x, 0.0}, {-a2x, a2y, -a2z}, { 0.0, a2z, a2y}},
488 {{ a2y, -a2x, 0.0}, { a2z, 0.0, a2x}, { 0.0, a2z, a2y}, {-a2x, -a2y, a2z}}};
489 calc_derivative_impl<use_dl, use_dq, use_ds>(ds_1, dl0_1_out, dq0_1_out, ds_1_out);
490 }
501 template <bool use_dl, bool use_dq, bool use_ds>
503 size_t ia, cvm::rvector* _noalias const dl0_2_out = nullptr,
504 std::array<cvm::rvector, 4>* _noalias const dq0_2_out = nullptr,
505 std::array<std::array<cvm::rvector, 4>, 4>* _noalias const ds_2_out = nullptr) const {
506 // if (dl0_2_out == nullptr && dq0_2_out == nullptr) return;
507 const cvm::real a1x = *(pos1x + ia);
508 const cvm::real a1y = *(pos1y + ia);
509 const cvm::real a1z = *(pos1z + ia);
510 const cvm::rvector ds_2[4][4] = {
511 {{ a1x, a1y, a1z}, { 0.0, -a1z, a1y}, { a1z, 0.0, -a1x}, {-a1y, a1x, 0.0}},
512 {{ 0.0, -a1z, a1y}, { a1x, -a1y, -a1z}, { a1y, a1x, 0.0}, { a1z, 0.0, a1x}},
513 {{ a1z, 0.0, -a1x}, { a1y, a1x, 0.0}, {-a1x, a1y, -a1z}, { 0.0, a1z, a1y}},
514 {{-a1y, a1x, 0.0}, { a1z, 0.0, a1x}, { 0.0, a1z, a1y}, {-a1x, -a1y, a1z}}};
515 calc_derivative_impl<use_dl, use_dq, use_ds>(ds_2, dl0_2_out, dq0_2_out, ds_2_out);
516 }
517
534 template <int i>
536 static_assert((i < 4) && (i >= 0), "i must be in [0, 3] in project_force_to_C_from_dxdqi.");
537 cvm::rmatrix result;
538 result.xx = f_on_q * ( tmp_Q0Q0_L[i][0][0] + tmp_Q0Q0_L[i][1][1] - tmp_Q0Q0_L[i][2][2] - tmp_Q0Q0_L[i][3][3] );
539 result.xy = f_on_q * ( tmp_Q0Q0_L[i][0][3] + tmp_Q0Q0_L[i][1][2] + tmp_Q0Q0_L[i][2][1] + tmp_Q0Q0_L[i][3][0] );
540 result.xz = f_on_q * (-tmp_Q0Q0_L[i][0][2] + tmp_Q0Q0_L[i][1][3] - tmp_Q0Q0_L[i][2][0] + tmp_Q0Q0_L[i][3][1] );
541 result.yx = f_on_q * (-tmp_Q0Q0_L[i][0][3] + tmp_Q0Q0_L[i][1][2] + tmp_Q0Q0_L[i][2][1] - tmp_Q0Q0_L[i][3][0] );
542 result.yy = f_on_q * ( tmp_Q0Q0_L[i][0][0] - tmp_Q0Q0_L[i][1][1] + tmp_Q0Q0_L[i][2][2] - tmp_Q0Q0_L[i][3][3] );
543 result.yz = f_on_q * ( tmp_Q0Q0_L[i][0][1] + tmp_Q0Q0_L[i][1][0] + tmp_Q0Q0_L[i][2][3] + tmp_Q0Q0_L[i][3][2] );
544 result.zx = f_on_q * ( tmp_Q0Q0_L[i][0][2] + tmp_Q0Q0_L[i][1][3] + tmp_Q0Q0_L[i][2][0] + tmp_Q0Q0_L[i][3][1] );
545 result.zy = f_on_q * (-tmp_Q0Q0_L[i][0][1] - tmp_Q0Q0_L[i][1][0] + tmp_Q0Q0_L[i][2][3] + tmp_Q0Q0_L[i][3][2] );
546 result.zz = f_on_q * ( tmp_Q0Q0_L[i][0][0] - tmp_Q0Q0_L[i][1][1] - tmp_Q0Q0_L[i][2][2] + tmp_Q0Q0_L[i][3][3] );
547 return result;
548 }
549
567 template <typename dim4_array_t>
568 inline cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t& sum_dxdq) const {
569 cvm::rmatrix result;
570 result += project_force_to_C_from_dxdqi<0>(sum_dxdq[0]);
571 result += project_force_to_C_from_dxdqi<1>(sum_dxdq[1]);
572 result += project_force_to_C_from_dxdqi<2>(sum_dxdq[2]);
573 result += project_force_to_C_from_dxdqi<3>(sum_dxdq[3]);
574 return result;
575 }
576
587 inline cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix& dxdC) const {
588 const cvm::real a2x = *(pos2x + ia);
589 const cvm::real a2y = *(pos2y + ia);
590 const cvm::real a2z = *(pos2z + ia);
591 const cvm::rvector result{
592 dxdC.xx * a2x + dxdC.xy * a2y + dxdC.xz * a2z,
593 dxdC.yx * a2x + dxdC.yy * a2y + dxdC.yz * a2z,
594 dxdC.zx * a2x + dxdC.zy * a2y + dxdC.zz * a2z};
595 return result;
596 }
597
608 inline cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix& dxdC) const {
609 const cvm::real a1x = *(pos1x + ia);
610 const cvm::real a1y = *(pos1y + ia);
611 const cvm::real a1z = *(pos1z + ia);
612 const cvm::rvector result{
613 dxdC.xx * a1x + dxdC.yx * a1y + dxdC.zx * a1z,
614 dxdC.xy * a1x + dxdC.yy * a1y + dxdC.zy * a1z,
615 dxdC.xz * a1x + dxdC.yz * a1y + dxdC.zz * a1z};
616 return result;
617 }
618};
619
620#if defined(COLVARS_CUDA) || defined(COLVARS_HIP)
621namespace colvars_gpu {
629 const cvm::real* d_pos1y;
630 const cvm::real* d_pos1z;
633 const cvm::real* d_pos2y;
634 const cvm::real* d_pos2z;
641 cvm::real* tmp_Q0Q0_L;
654 int init(
655 const colvars_gpu::rotation_gpu* rot,
656 const cvm::real* d_pos1,
657 const cvm::real* d_pos2,
658 const size_t num_atoms_pos1,
659 const size_t num_atoms_pos2);
676 rotation_derivative_dldq require_dl_dq,
677 cudaGraph_t& graph,
678 std::unordered_map<std::string, cudaGraphNode_t>& nodes_map);
679
686 template <bool use_dl, bool use_dq/*, bool use_ds*/>
687 inline COLVARS_DEVICE void calc_derivative_impl(
688 const cvm::rvector (&ds)[4][4],
689 cvm::rvector* _noalias const dl0_out,
690 cvm::rvector* _noalias const dq0_out) const {
691 if (use_dl) {
692 *dl0_out = tmp_Q0Q0[0*4+0] * ds[0][0] +
693 tmp_Q0Q0[0*4+1] * ds[0][1] +
694 tmp_Q0Q0[0*4+2] * ds[0][2] +
695 tmp_Q0Q0[0*4+3] * ds[0][3] +
696 tmp_Q0Q0[1*4+0] * ds[1][0] +
697 tmp_Q0Q0[1*4+1] * ds[1][1] +
698 tmp_Q0Q0[1*4+2] * ds[1][2] +
699 tmp_Q0Q0[1*4+3] * ds[1][3] +
700 tmp_Q0Q0[2*4+0] * ds[2][0] +
701 tmp_Q0Q0[2*4+1] * ds[2][1] +
702 tmp_Q0Q0[2*4+2] * ds[2][2] +
703 tmp_Q0Q0[2*4+3] * ds[2][3] +
704 tmp_Q0Q0[3*4+0] * ds[3][0] +
705 tmp_Q0Q0[3*4+1] * ds[3][1] +
706 tmp_Q0Q0[3*4+2] * ds[3][2] +
707 tmp_Q0Q0[3*4+3] * ds[3][3];
708 }
709 if (use_dq) {
710 dq0_out[0] = tmp_Q0Q0_L[0*16+0*4+0] * ds[0][0] +
711 tmp_Q0Q0_L[0*16+0*4+1] * ds[0][1] +
712 tmp_Q0Q0_L[0*16+0*4+2] * ds[0][2] +
713 tmp_Q0Q0_L[0*16+0*4+3] * ds[0][3] +
714 tmp_Q0Q0_L[0*16+1*4+0] * ds[1][0] +
715 tmp_Q0Q0_L[0*16+1*4+1] * ds[1][1] +
716 tmp_Q0Q0_L[0*16+1*4+2] * ds[1][2] +
717 tmp_Q0Q0_L[0*16+1*4+3] * ds[1][3] +
718 tmp_Q0Q0_L[0*16+2*4+0] * ds[2][0] +
719 tmp_Q0Q0_L[0*16+2*4+1] * ds[2][1] +
720 tmp_Q0Q0_L[0*16+2*4+2] * ds[2][2] +
721 tmp_Q0Q0_L[0*16+2*4+3] * ds[2][3] +
722 tmp_Q0Q0_L[0*16+3*4+0] * ds[3][0] +
723 tmp_Q0Q0_L[0*16+3*4+1] * ds[3][1] +
724 tmp_Q0Q0_L[0*16+3*4+2] * ds[3][2] +
725 tmp_Q0Q0_L[0*16+3*4+3] * ds[3][3];
726
727 dq0_out[1] = tmp_Q0Q0_L[1*16+0*4+0] * ds[0][0] +
728 tmp_Q0Q0_L[1*16+0*4+1] * ds[0][1] +
729 tmp_Q0Q0_L[1*16+0*4+2] * ds[0][2] +
730 tmp_Q0Q0_L[1*16+0*4+3] * ds[0][3] +
731 tmp_Q0Q0_L[1*16+1*4+0] * ds[1][0] +
732 tmp_Q0Q0_L[1*16+1*4+1] * ds[1][1] +
733 tmp_Q0Q0_L[1*16+1*4+2] * ds[1][2] +
734 tmp_Q0Q0_L[1*16+1*4+3] * ds[1][3] +
735 tmp_Q0Q0_L[1*16+2*4+0] * ds[2][0] +
736 tmp_Q0Q0_L[1*16+2*4+1] * ds[2][1] +
737 tmp_Q0Q0_L[1*16+2*4+2] * ds[2][2] +
738 tmp_Q0Q0_L[1*16+2*4+3] * ds[2][3] +
739 tmp_Q0Q0_L[1*16+3*4+0] * ds[3][0] +
740 tmp_Q0Q0_L[1*16+3*4+1] * ds[3][1] +
741 tmp_Q0Q0_L[1*16+3*4+2] * ds[3][2] +
742 tmp_Q0Q0_L[1*16+3*4+3] * ds[3][3];
743
744 dq0_out[2] = tmp_Q0Q0_L[2*16+0*4+0] * ds[0][0] +
745 tmp_Q0Q0_L[2*16+0*4+1] * ds[0][1] +
746 tmp_Q0Q0_L[2*16+0*4+2] * ds[0][2] +
747 tmp_Q0Q0_L[2*16+0*4+3] * ds[0][3] +
748 tmp_Q0Q0_L[2*16+1*4+0] * ds[1][0] +
749 tmp_Q0Q0_L[2*16+1*4+1] * ds[1][1] +
750 tmp_Q0Q0_L[2*16+1*4+2] * ds[1][2] +
751 tmp_Q0Q0_L[2*16+1*4+3] * ds[1][3] +
752 tmp_Q0Q0_L[2*16+2*4+0] * ds[2][0] +
753 tmp_Q0Q0_L[2*16+2*4+1] * ds[2][1] +
754 tmp_Q0Q0_L[2*16+2*4+2] * ds[2][2] +
755 tmp_Q0Q0_L[2*16+2*4+3] * ds[2][3] +
756 tmp_Q0Q0_L[2*16+3*4+0] * ds[3][0] +
757 tmp_Q0Q0_L[2*16+3*4+1] * ds[3][1] +
758 tmp_Q0Q0_L[2*16+3*4+2] * ds[3][2] +
759 tmp_Q0Q0_L[2*16+3*4+3] * ds[3][3];
760
761 dq0_out[3] = tmp_Q0Q0_L[3*16+0*4+0] * ds[0][0] +
762 tmp_Q0Q0_L[3*16+0*4+1] * ds[0][1] +
763 tmp_Q0Q0_L[3*16+0*4+2] * ds[0][2] +
764 tmp_Q0Q0_L[3*16+0*4+3] * ds[0][3] +
765 tmp_Q0Q0_L[3*16+1*4+0] * ds[1][0] +
766 tmp_Q0Q0_L[3*16+1*4+1] * ds[1][1] +
767 tmp_Q0Q0_L[3*16+1*4+2] * ds[1][2] +
768 tmp_Q0Q0_L[3*16+1*4+3] * ds[1][3] +
769 tmp_Q0Q0_L[3*16+2*4+0] * ds[2][0] +
770 tmp_Q0Q0_L[3*16+2*4+1] * ds[2][1] +
771 tmp_Q0Q0_L[3*16+2*4+2] * ds[2][2] +
772 tmp_Q0Q0_L[3*16+2*4+3] * ds[2][3] +
773 tmp_Q0Q0_L[3*16+3*4+0] * ds[3][0] +
774 tmp_Q0Q0_L[3*16+3*4+1] * ds[3][1] +
775 tmp_Q0Q0_L[3*16+3*4+2] * ds[3][2] +
776 tmp_Q0Q0_L[3*16+3*4+3] * ds[3][3];
777 }
778 }
787 template <bool use_dl, bool use_dq/*, bool use_ds*/>
788 inline COLVARS_DEVICE void calc_derivative_wrt_group1(
789 int ia,
790 cvm::rvector* _noalias const dl0_1_out,
791 cvm::rvector* _noalias const dq0_1_out) const {
792 const cvm::real a2x = d_pos2x[ia];
793 const cvm::real a2y = d_pos2y[ia];
794 const cvm::real a2z = d_pos2z[ia];
795 const cvm::rvector ds_1[4][4] = {
796 {{ a2x, a2y, a2z}, { 0.0, a2z, -a2y}, {-a2z, 0.0, a2x}, { a2y, -a2x, 0.0}},
797 {{ 0.0, a2z, -a2y}, { a2x, -a2y, -a2z}, { a2y, a2x, 0.0}, { a2z, 0.0, a2x}},
798 {{-a2z, 0.0, a2x}, { a2y, a2x, 0.0}, {-a2x, a2y, -a2z}, { 0.0, a2z, a2y}},
799 {{ a2y, -a2x, 0.0}, { a2z, 0.0, a2x}, { 0.0, a2z, a2y}, {-a2x, -a2y, a2z}}};
800 calc_derivative_impl<use_dl, use_dq>(ds_1, dl0_1_out, dq0_1_out);
801 }
810 template <bool use_dl, bool use_dq/*, bool use_ds*/>
811 inline COLVARS_DEVICE void calc_derivative_wrt_group2(
812 int ia,
813 cvm::rvector* _noalias const dl0_2_out,
814 cvm::rvector* _noalias const dq0_2_out) const {
815 const cvm::real a1x = d_pos1x[ia];
816 const cvm::real a1y = d_pos1y[ia];
817 const cvm::real a1z = d_pos1z[ia];
818 const cvm::rvector ds_2[4][4] = {
819 {{ a1x, a1y, a1z}, { 0.0, -a1z, a1y}, { a1z, 0.0, -a1x}, {-a1y, a1x, 0.0}},
820 {{ 0.0, -a1z, a1y}, { a1x, -a1y, -a1z}, { a1y, a1x, 0.0}, { a1z, 0.0, a1x}},
821 {{ a1z, 0.0, -a1x}, { a1y, a1x, 0.0}, {-a1x, a1y, -a1z}, { 0.0, a1z, a1y}},
822 {{-a1y, a1x, 0.0}, { a1z, 0.0, a1x}, { 0.0, a1z, a1y}, {-a1x, -a1y, a1z}}};
823 calc_derivative_impl<use_dl, use_dq>(ds_2, dl0_2_out, dq0_2_out);
824 }
825
842 template <int i>
843 inline COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdqi(cvm::real f_on_q) const {
844 static_assert((i < 4) && (i >= 0), "i must be in [0, 3] in project_force_to_C_from_dxdqi.");
845 return project_force_to_C_from_dxdqi(i, f_on_q);
846 }
847
848 inline COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdqi(int i, cvm::real f_on_q) const {
849 cvm::rmatrix result;
850 result.xx = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+0] + tmp_Q0Q0_L[i*16+1*4+1] - tmp_Q0Q0_L[i*16+2*4+2] - tmp_Q0Q0_L[i*16+3*4+3] );
851 result.xy = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+3] + tmp_Q0Q0_L[i*16+1*4+2] + tmp_Q0Q0_L[i*16+2*4+1] + tmp_Q0Q0_L[i*16+3*4+0] );
852 result.xz = f_on_q * (-tmp_Q0Q0_L[i*16+0*4+2] + tmp_Q0Q0_L[i*16+1*4+3] - tmp_Q0Q0_L[i*16+2*4+0] + tmp_Q0Q0_L[i*16+3*4+1] );
853 result.yx = f_on_q * (-tmp_Q0Q0_L[i*16+0*4+3] + tmp_Q0Q0_L[i*16+1*4+2] + tmp_Q0Q0_L[i*16+2*4+1] - tmp_Q0Q0_L[i*16+3*4+0] );
854 result.yy = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+0] - tmp_Q0Q0_L[i*16+1*4+1] + tmp_Q0Q0_L[i*16+2*4+2] - tmp_Q0Q0_L[i*16+3*4+3] );
855 result.yz = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+1] + tmp_Q0Q0_L[i*16+1*4+0] + tmp_Q0Q0_L[i*16+2*4+3] + tmp_Q0Q0_L[i*16+3*4+2] );
856 result.zx = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+2] + tmp_Q0Q0_L[i*16+1*4+3] + tmp_Q0Q0_L[i*16+2*4+0] + tmp_Q0Q0_L[i*16+3*4+1] );
857 result.zy = f_on_q * (-tmp_Q0Q0_L[i*16+0*4+1] - tmp_Q0Q0_L[i*16+1*4+0] + tmp_Q0Q0_L[i*16+2*4+3] + tmp_Q0Q0_L[i*16+3*4+2] );
858 result.zz = f_on_q * ( tmp_Q0Q0_L[i*16+0*4+0] - tmp_Q0Q0_L[i*16+1*4+1] - tmp_Q0Q0_L[i*16+2*4+2] + tmp_Q0Q0_L[i*16+3*4+3] );
859 return result;
860 }
861
879 template <typename dim4_array_t>
880 inline COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t& sum_dxdq) const {
881 cvm::rmatrix result;
882 result += project_force_to_C_from_dxdqi<0>(sum_dxdq[0]);
883 result += project_force_to_C_from_dxdqi<1>(sum_dxdq[1]);
884 result += project_force_to_C_from_dxdqi<2>(sum_dxdq[2]);
885 result += project_force_to_C_from_dxdqi<3>(sum_dxdq[3]);
886 return result;
887 }
888
899 inline COLVARS_DEVICE cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix& dxdC) const {
900 const cvm::real a2x = *(d_pos2x + ia);
901 const cvm::real a2y = *(d_pos2y + ia);
902 const cvm::real a2z = *(d_pos2z + ia);
903 const cvm::rvector result{
904 dxdC.xx * a2x + dxdC.xy * a2y + dxdC.xz * a2z,
905 dxdC.yx * a2x + dxdC.yy * a2y + dxdC.yz * a2z,
906 dxdC.zx * a2x + dxdC.zy * a2y + dxdC.zz * a2z};
907 return result;
908 }
909
920 inline COLVARS_DEVICE cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix& dxdC) const {
921 const cvm::real a1x = *(d_pos1x + ia);
922 const cvm::real a1y = *(d_pos1y + ia);
923 const cvm::real a1z = *(d_pos1z + ia);
924 const cvm::rvector result{
925 dxdC.xx * a1x + dxdC.yx * a1y + dxdC.zx * a1z,
926 dxdC.xy * a1x + dxdC.yy * a1y + dxdC.zy * a1z,
927 dxdC.xz * a1x + dxdC.yz * a1y + dxdC.zz * a1z};
928 return result;
929 }
930};
931}
932#elif defined(COLVARS_SYCL)
933#endif // defined(COLVARS_CUDA) || defined(COLVARS_HIP)
934
935#endif // COLVAR_ROTATION_DERIVATIVE
2-dimensional array of real numbers with three components along each dimension (works with colvarmodu...
Definition: colvartypes.h:903
A rotation between two sets of coordinates (for the moment a wrapper for colvarmodule::quaternion)
Definition: colvartypes.h:1368
cvm::real S_eigval[4]
Eigenvalues of S.
Definition: colvartypes.h:1377
cvm::real S_eigvec[4][4]
Eigenvectors of S.
Definition: colvartypes.h:1380
vector of real numbers with three components
Definition: colvartypes.h:728
Collective variables module (main class)
Definition: colvarmodule.h:72
double real
Defining an abstract real number allows to switch precision.
Definition: colvarmodule.h:99
Definition: colvartypes.h:1580
Definition: colvar_rotation_derivative.h:622
const cvm::real * d_pos2x
Reference to the atom positions of group 2.
Definition: colvar_rotation_derivative.h:632
COLVARS_DEVICE void calc_derivative_impl(const cvm::rvector(&ds)[4][4], cvm::rvector *_noalias const dl0_out, cvm::rvector *_noalias const dq0_out) const
Actual implementation of the derivative calculation.
Definition: colvar_rotation_derivative.h:687
int add_prepare_derivative_nodes(rotation_derivative_dldq require_dl_dq, cudaGraph_t &graph, std::unordered_map< std::string, cudaGraphNode_t > &nodes_map)
Add a "prepare_rotation_derivative" node to the CUDA graph.
Definition: colvar_rotation_derivative.cpp:48
size_t m_num_atoms_pos2
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:638
COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdqi(cvm::real f_on_q) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:843
const cvm::real * d_pos1x
Reference to the atom positions of group 1.
Definition: colvar_rotation_derivative.h:628
int init(const colvars_gpu::rotation_gpu *rot, const cvm::real *d_pos1, const cvm::real *d_pos2, const size_t num_atoms_pos1, const size_t num_atoms_pos2)
Initialization of the rotation_derivative_gpu class.
Definition: colvar_rotation_derivative.cpp:18
COLVARS_DEVICE cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t &sum_dxdq) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:880
COLVARS_DEVICE void calc_derivative_wrt_group2(int ia, cvm::rvector *_noalias const dl0_2_out, cvm::rvector *_noalias const dq0_2_out) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:811
COLVARS_DEVICE cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:899
size_t m_num_atoms_pos1
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:636
COLVARS_DEVICE void calc_derivative_wrt_group1(int ia, cvm::rvector *_noalias const dl0_1_out, cvm::rvector *_noalias const dq0_1_out) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:788
const colvars_gpu::rotation_gpu * m_rot
Reference to the rotation.
Definition: colvar_rotation_derivative.h:626
cvm::real * tmp_Q0Q0
GPU temporary variable that will be updated if prepare_derivative called.
Definition: colvar_rotation_derivative.h:640
colvarmodule * cvmodule
Pointer to the parent colvarmodule.
Definition: colvar_rotation_derivative.h:624
COLVARS_DEVICE cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:920
Helper class for calculating the derivative of rotation.
Definition: colvar_rotation_derivative.h:42
cvm::ag_vector_real_t::const_iterator pos2x
Reference to the atom positions of group 2.
Definition: colvar_rotation_derivative.h:52
rotation_derivative(const cvm::rotation &rot, const cvm::ag_vector_real_t &pos1, const cvm::ag_vector_real_t &pos2, const size_t num_atoms_pos1, const size_t num_atoms_pos2)
Constructor of the cvm::rotation::derivative class for SOA.
Definition: colvar_rotation_derivative.h:72
cvm::rmatrix project_force_to_C_from_dxdqi(cvm::real f_on_q) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:535
void prepare_derivative(rotation_derivative_dldq require_dl_dq)
This function must be called before calc_derivative_wrt_group1 and calc_derivative_wrt_group2 in orde...
Definition: colvar_rotation_derivative.h:93
void calc_derivative_wrt_group1(size_t ia, cvm::rvector *_noalias const dl0_1_out=nullptr, std::array< cvm::rvector, 4 > *_noalias const dq0_1_out=nullptr, std::array< std::array< cvm::rvector, 4 >, 4 > *_noalias const ds_1_out=nullptr) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:476
size_t m_num_atoms_pos1
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:56
void calc_derivative_impl(const cvm::rvector(&ds)[4][4], cvm::rvector *_noalias const dl0_out, std::array< cvm::rvector, 4 > *_noalias const dq0_out, std::array< std::array< cvm::rvector, 4 >, 4 > *_noalias const ds_out) const
Actual implementation of the derivative calculation.
Definition: colvar_rotation_derivative.h:340
cvm::real tmp_Q0Q0[4][4]
Temporary variable that will be updated if prepare_derivative called.
Definition: colvar_rotation_derivative.h:60
cvm::ag_vector_real_t::const_iterator pos1x
Reference to the atom positions of group 1.
Definition: colvar_rotation_derivative.h:47
cvm::rvector project_force_to_group2(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:608
void calc_derivative_wrt_group2(size_t ia, cvm::rvector *_noalias const dl0_2_out=nullptr, std::array< cvm::rvector, 4 > *_noalias const dq0_2_out=nullptr, std::array< std::array< cvm::rvector, 4 >, 4 > *_noalias const ds_2_out=nullptr) const
Calculate the derivatives of S, the leading eigenvalue L and the leading eigenvector Q with respect t...
Definition: colvar_rotation_derivative.h:502
const cvm::rotation & m_rot
Reference to the rotation.
Definition: colvar_rotation_derivative.h:44
cvm::rmatrix project_force_to_C_from_dxdq(const dim4_array_t &sum_dxdq) const
Project the force on (or the gradient of ) to the force on the correlation matrix ,...
Definition: colvar_rotation_derivative.h:568
size_t m_num_atoms_pos2
Number of atoms in group1 (used in SOA)
Definition: colvar_rotation_derivative.h:58
cvm::rvector project_force_to_group1(size_t ia, const cvm::rmatrix &dxdC) const
Project the force on the correlation matrix to .
Definition: colvar_rotation_derivative.h:587