-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkernel.cu
More file actions
65 lines (59 loc) · 2 KB
/
kernel.cu
File metadata and controls
65 lines (59 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include<cuComplex.h>
#ifdef __cplusplus
extern"C"{
#endif
typedef struct vector{double x,y,z;}vector;
void kernelCuda(size_t, size_t, vector*, void*, double, double, vector, double, double);
int getDeviceCountCuda();
void setDeviceCuda(int);
void freeCuda(void*);
int getMaxThreadsPerBlockCuda();
void*mallocCuda(size_t);
void memsetCuda(void*, int, size_t);
void memcpyHostToDeviceCuda(void*, const void*, size_t);
void memcpyDeviceToHostCuda(void*, const void*, size_t);
#ifdef __cplusplus
}
#endif
__device__ const cuDoubleComplex i = { 0, 1 };
__global__ void kernel(vector* input, cuDoubleComplex* output, cuDoubleComplex factor, vector avg, double wave, double lambda){
const int id = blockDim.x*blockIdx.x + threadIdx.x;
vector in = input[id];
const double distance = sqrt((in.x - avg.x)*(in.x - avg.x) + (in.y - avg.y)*(in.y - avg.y) + (in.z - avg.z)*(in.z - avg.z));
cuDoubleComplex exponential;
sincos(wave*distance,&exponential.y,&exponential.x);
output[id] = cuCsub(output[id], cuCmul(cuCdiv(i, make_cuDoubleComplex(lambda*distance, 0)), cuCmul(factor, exponential)));
}
void kernelCuda(size_t grid_size,size_t block_size,vector*input,void*output,double real,double imag,vector avg,double wave,double lambda){
kernel<<<grid_size,block_size>>>(input,(cuDoubleComplex*)output,make_cuDoubleComplex(real,imag),avg,wave,lambda);
}
int getDeviceCountCuda(){
int i;
cudaGetDeviceCount(&i);
return i;
}
void setDeviceCuda(int i) {
cudaSetDevice(i);
}
void freeCuda(void*v) {
cudaFree(v);
}
int getMaxThreadsPerBlockCuda() {
struct cudaDeviceProp prop;
cudaGetDeviceProperties(&prop,0);
return prop.maxThreadsPerBlock;
}
void*mallocCuda(size_t size) {
void*v;
cudaMalloc(&v,size);
return v;
}
void memsetCuda(void*ptr, int value, size_t size) {
cudaMemset(ptr,value,size);
}
void memcpyHostToDeviceCuda(void*dst,const void*src,size_t count) {
cudaMemcpy(dst,src,count,cudaMemcpyHostToDevice);
}
void memcpyDeviceToHostCuda(void*dst,const void*src,size_t count) {
cudaMemcpy(dst,src,count,cudaMemcpyDeviceToHost);
}