1/**
2 * @file parallelproj_cuda.h
3 */
4
5#ifndef __PARALLELPROJ_CUDA_H__
6#define __PARALLELPROJ_CUDA_H__
7
8#ifdef __cplusplus
9extern "C"
10{
11#endif
12
13 /** @brief 3D non-tof joseph back projector CUDA wrapper
14 *
15 * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs.
16 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
17 * Units are the ones of voxsize.
18 * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs.
19 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
20 * Units are the ones of voxsize.
21 * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to
22 * used to store the back projections.
23 * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k].
24 * The backprojector adds existing values.
25 * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel
26 * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes
27 * @param h_p array of length nlors containg the values to be back projected
28 * @param nlors number of projections (length of p array)
29 * @param h_img_dim array with dimensions of image [n0,n1,n2]
30 * @param threadsperblock number of threads per block
31 */
32 void joseph3d_back_cuda(const float *h_xstart,
33 const float *h_xend,
34 float **d_img,
35 const float *h_img_origin,
36 const float *h_voxsize,
37 const float *h_p,
38 long long nlors,
39 const int *h_img_dim,
40 int threadsperblock);
41
42 /** @brief 3D listmode tof joseph back projector CUDA wrapper
43 *
44 * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs.
45 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
46 * Units are the ones of voxsize.
47 * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs.
48 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
49 * Units are the ones of voxsize.
50 * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to
51 * used to store the back projections.
52 * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k].
53 * The backprojector adds existing values.
54 * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel
55 * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes
56 * @param h_p array of length nlors containg the values to be back projected
57 * @param nlors number of projections (length of p array)
58 * @param h_img_dim array with dimensions of image [n0,n1,n2]
59 * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend)
60 * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof)
61 * with the TOF resolution (sigma) for each LOR in
62 * spatial units (units of xstart and xend)
63 * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset)
64 * with the offset of the central TOF bin from the
65 * midpoint of each LOR in spatial units (units of xstart and xend).
66 * A positive value means a shift towards the end point of the LOR.
67 * @param n_sigmas number of sigmas to consider for calculation of TOF kernel
68 * @param h_tof_bin signed integer array with the tofbin of the events
69 * the center of TOF bin 0 is assumed to be at the center of the LOR
70 * (shifted by the tofcenter_offset)
71 * @param lor_dependent_sigma_tof unsigned char 0 or 1
72 * 1 means that the TOF sigmas are LOR dependent
73 * any other value means that the first value in the sigma_tof
74 * array is used for all LORs
75 * @param lor_dependent_tofcenter_offset unsigned char 0 or 1
76 * 1 means that the TOF center offsets are LOR dependent
77 * any other value means that the first value in the
78 * tofcenter_offset array is used for all LORs
79 * @param threadsperblock number of threads per block
80 */
81 void joseph3d_back_tof_lm_cuda(const float *h_xstart,
82 const float *h_xend,
83 float **d_img,
84 const float *h_img_origin,
85 const float *h_voxsize,
86 const float *h_p,
87 long long nlors,
88 const int *h_img_dim,
89 float tofbin_width,
90 const float *h_sigma_tof,
91 const float *h_tofcenter_offset,
92 float n_sigmas,
93 const short *h_tof_bin,
94 unsigned char lor_dependent_sigma_tof,
95 unsigned char lor_dependent_tofcenter_offset,
96 int threadsperblock);
97
98 /** @brief 3D sinogram tof joseph back projector CUDA wrapper
99 *
100 * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs.
101 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
102 * Units are the ones of voxsize.
103 * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs.
104 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
105 * Units are the ones of voxsize.
106 * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to
107 * used to store the back projections.
108 * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k].
109 * The backprojector adds existing values.
110 * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel
111 * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes
112 * @param h_p array of length nlors*n_tofbins with the values to be back projected
113 * the order of the array is
114 * [LOR0-TOFBIN-0, LOR0-TOFBIN-1, ... LOR0_TOFBIN-(n-1),
115 * LOR1-TOFBIN-0, LOR1-TOFBIN-1, ... LOR1_TOFBIN-(n-1),
116 * ...
117 * LOR(N-1)-TOFBIN-0, LOR(N-1)-TOFBIN-1, ... LOR(N-1)_TOFBIN-(n-1)]
118 * @param nlors number of gemeometrical projections
119 * @param h_img_dim array with dimensions of image [n0,n1,n2]
120 * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend)
121 * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof)
122 * with the TOF resolution (sigma) for each LOR in
123 * spatial units (units of xstart and xend)
124 * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset)
125 * with the offset of the central TOF bin from the
126 * midpoint of each LOR in spatial units (units of xstart and xend).
127 * A positive value means a shift towards the end point of the LOR.
128 * @param n_sigmas number of sigmas to consider for calculation of TOF kernel
129 * @param n_tofbins number of TOF bins
130 * @param lor_dependent_sigma_tof unsigned char 0 or 1
131 * 1 means that the TOF sigmas are LOR dependent
132 * any other value means that the first value in the sigma_tof
133 * array is used for all LORs
134 * @param lor_dependent_tofcenter_offset unsigned char 0 or 1
135 * 1 means that the TOF center offsets are LOR dependent
136 * any other value means that the first value in the
137 * tofcenter_offset array is used for all LORs
138 * @param threadsperblock number of threads per block
139 */
140 void joseph3d_back_tof_sino_cuda(const float *h_xstart,
141 const float *h_xend,
142 float **d_img,
143 const float *h_img_origin,
144 const float *h_voxsize,
145 const float *h_p,
146 long long nlors,
147 const int *h_img_dim,
148 float tofbin_width,
149 const float *h_sigma_tof,
150 const float *h_tofcenter_offset,
151 float n_sigmas,
152 short n_tofbins,
153 unsigned char lor_dependent_sigma_tof,
154 unsigned char lor_dependent_tofcenter_offset,
155 int threadsperblock);
156
157 /** @brief 3D non-tof joseph forward projector CUDA wrapper
158 *
159 * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs.
160 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
161 * Units are the ones of voxsize.
162 * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs.
163 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
164 * Units are the ones of voxsize.
165 * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to
166 * be projected.
167 * The pixel [i,j,k] ist stored at [n1*n2*i + n2*j + k].
168 * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel
169 * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes
170 * @param h_p array of length nlors (output) used to store the projections
171 * @param nlors number of projections (length of p array)
172 * @param h_img_dim array with dimensions of image [n0,n1,n2]
173 * @param threadsperblock number of threads per block
174 */
175 void joseph3d_fwd_cuda(const float *h_xstart,
176 const float *h_xend,
177 float **d_img,
178 const float *h_img_origin,
179 const float *h_voxsize,
180 float *h_p,
181 long long nlors,
182 const int *h_img_dim,
183 int threadsperblock);
184
185 /** @brief 3D listmode tof joseph forward projector CUDA wrapper
186 *
187 * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs.
188 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
189 * Units are the ones of voxsize.
190 * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs.
191 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
192 * Units are the ones of voxsize.
193 * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to
194 * used to store the back projections.
195 * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k].
196 * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel
197 * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes
198 * @param h_p array of length nlors (output) used to store the projections
199 * @param nlors number of projections (length of p array)
200 * @param h_img_dim array with dimensions of image [n0,n1,n2]
201 * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend)
202 * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof)
203 * with the TOF resolution (sigma) for each LOR in
204 * spatial units (units of xstart and xend)
205 * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset)
206 * with the offset of the central TOF bin from the
207 * midpoint of each LOR in spatial units (units of xstart and xend).
208 * A positive value means a shift towards the end point of the LOR.
209 * @param n_sigmas number of sigmas to consider for calculation of TOF kernel
210 * @param h_tof_bin signed integer array with the tofbin of the events
211 * the center of TOF bin 0 is assumed to be at the center of the LOR
212 * (shifted by the tofcenter_offset)
213 * @param lor_dependent_sigma_tof unsigned char 0 or 1
214 * 1 means that the TOF sigmas are LOR dependent
215 * any other value means that the first value in the sigma_tof
216 * array is used for all LORs
217 * @param lor_dependent_tofcenter_offset unsigned char 0 or 1
218 * 1 means that the TOF center offsets are LOR dependent
219 * any other value means that the first value in the
220 * tofcenter_offset array is used for all LORs
221 * @param threadsperblock number of threads per block
222 */
223 void joseph3d_fwd_tof_lm_cuda(const float *h_xstart,
224 const float *h_xend,
225 float **d_img,
226 const float *h_img_origin,
227 const float *h_voxsize,
228 float *h_p,
229 long long nlors,
230 const int *h_img_dim,
231 float tofbin_width,
232 const float *h_sigma_tof,
233 const float *h_tofcenter_offset,
234 float n_sigmas,
235 const short *h_tof_bin,
236 unsigned char lor_dependent_sigma_tof,
237 unsigned char lor_dependent_tofcenter_offset,
238 int threadsperblock);
239
240 /** @brief 3D sinogram tof joseph forward projector CUDA wrapper
241 *
242 * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs.
243 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
244 * Units are the ones of voxsize.
245 * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs.
246 * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2.
247 * Units are the ones of voxsize.
248 * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to
249 * used to store the back projections.
250 * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k].
251 * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel
252 * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes
253 * @param h_p array of length nlors*n_tofbins (output) used to store the projections
254 * the order of the array is
255 * [LOR0-TOFBIN-0, LOR0-TOFBIN-1, ... LOR0_TOFBIN-(n-1),
256 * LOR1-TOFBIN-0, LOR1-TOFBIN-1, ... LOR1_TOFBIN-(n-1),
257 * ...
258 * LOR(N-1)-TOFBIN-0, LOR(N-1)-TOFBIN-1, ... LOR(N-1)_TOFBIN-(n-1)]
259 * @param nlors number of geometrical LORs
260 * @param h_img_dim array with dimensions of image [n0,n1,n2]
261 * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend)
262 * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof)
263 * with the TOF resolution (sigma) for each LOR in
264 * spatial units (units of xstart and xend)
265 * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset)
266 * with the offset of the central TOF bin from the
267 * midpoint of each LOR in spatial units (units of xstart and xend).
268 * A positive value means a shift towards the end point of the LOR.
269 * @param n_sigmas number of sigmas to consider for calculation of TOF kernel
270 * @param n_tofbins number of TOF bins
271 * @param lor_dependent_sigma_tof unsigned char 0 or 1
272 * 1 means that the TOF sigmas are LOR dependent
273 * any other value means that the first value in the sigma_tof
274 * array is used for all LORs
275 * @param lor_dependent_tofcenter_offset unsigned char 0 or 1
276 * 1 means that the TOF center offsets are LOR dependent
277 * any other value means that the first value in the
278 * tofcenter_offset array is used for all LORs
279 * @param threadsperblock number of threads per block
280 */
281 void joseph3d_fwd_tof_sino_cuda(const float *h_xstart,
282 const float *h_xend,
283 float **d_img,
284 const float *h_img_origin,
285 const float *h_voxsize,
286 float *h_p,
287 long long nlors,
288 const int *h_img_dim,
289 float tofbin_width,
290 const float *h_sigma_tof,
291 const float *h_tofcenter_offset,
292 float n_sigmas,
293 short n_tofbins,
294 unsigned char lor_dependent_sigma_tof,
295 unsigned char lor_dependent_tofcenter_offset,
296 int threadsperblock);
297
298 /** @brief copy a float array to all visible cuda devices
299 *
300 * The number of visible cuda devices is determined automatically via the CUDA API
301 *
302 * @param h_array array of shape [n] on the host
303 * @param n number of array elements
304 * @return a pointer to all devices arrays
305 */
306 float **copy_float_array_to_all_devices(const float *h_array, long long n);
307
308 /** @brief free device array on all visible cuda devices
309 *
310 * The number of visible cuda devices is determined automatically via the CUDA API
311 *
312 * @param d_array a pointer to all devices arrays
313 */
314 void free_float_array_on_all_devices(float **d_array);
315
316 /** @brief sum multiple versions of an array on different devices on first device
317 *
318 * The number of visible cuda devices is determined automatically via the CUDA API
319 * This becomes usefule when multiple devices backproject into separate images.
320 *
321 * @param d_array a pointer to all devices arrays
322 * @param n number of array elements
323 */
324 void sum_float_arrays_on_first_device(float **d_array, long long n);
325
326 /** @brief copy a (summed) float array from first device back to host
327 *
328 * The number of visible cuda devices is determined automatically via the CUDA API
329 *
330 * @param d_array a pointer to all devices arrays of shape [n]
331 * @param n number of array elements
332 * @param i_dev device number
333 * @param h_array array of shape [n] on the host used for output
334 */
335 void get_float_array_from_device(float **d_array, long long n, int i_dev, float *h_array);
336
337 /** @brief get the number of visible cuda devices
338 */
339 int get_cuda_device_count();
340
341#ifdef __cplusplus
342} /* extern "C" */
343#endif
344
345#endif