ifでもswitchでも同じ感じ。
CUDA:
- // -*- C++ -*-
- /*
- block制御のテスト
- if実験
- */
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <math.h>
- #include <unistd.h>
- #include <cutil.h>
- #define N 4
- int a[N];
- int b[N];
- #define PATTERN 0
- /*
- 0 : id => OK
- 1 : 即値 => NG
- 2 : table[id] => OK
- 3 : table[即値] => NG
- */
- __constant__ int table[4] = {0,1,2,3};
- __global__ void gpu
- (int* d_a, int* d_b)
- {
- int id = blockIdx.x;
- #if PATTERN == 0
- if(id==0){
- d_b[id] = 1;
- }else if(id==1){
- d_b[id] = 2;
- }else if(id==2){
- d_b[id] = 3;
- }else if(id==3){
- d_b[id] = 4;
- }
- #endif
- #if PATTERN == 1
- if(id==0){
- d_b[0] = 1;
- }else if(id==1){
- d_b[1] = 2;
- }else if(id==2){
- d_b[2] = 3;
- }else if(id==3){
- d_b[3] = 4;
- }
- #endif
- #if PATTERN == 2
- if(id==0){
- d_b[table[id]] = 1;
- }else if(id==1){
- d_b[table[id]] = 2;
- }else if(id==2){
- d_b[table[id]] = 3;
- }else if(id==3){
- d_b[table[id]] = 4;
- }
- #endif
- #if PATTERN == 3
- if(id==0){
- d_b[table[0]] = 1;
- }else if(id==1){
- d_b[table[1]] = 2;
- }else if(id==2){
- d_b[table[2]] = 3;
- }else if(id==3){
- d_b[table[3]] = 4;
- }
- #endif
- }
- void
- runTest(int argc, char** argv)
- {
- int i, n;
- n = N;
- for( i = 0 ; i <n ; i++ ){
- a[i] = 0;
- b[i] = 0;
- }
- CUT_DEVICE_INIT();
- int* d_a;
- CUDA_SAFE_CALL(cudaMalloc((void**)&d_a, sizeof(int)*n));
- CUDA_SAFE_CALL(cudaMemcpy(d_a, a, sizeof(int)*n, cudaMemcpyHostToDevice) );
- int* d_b;
- CUDA_SAFE_CALL(cudaMalloc((void**)&d_b, sizeof(int)*n));
- CUDA_SAFE_CALL(cudaMemcpy(d_b, b, sizeof(int)*n, cudaMemcpyHostToDevice) );
- dim3 grid(n,1,1);
- dim3 threads(1,1,1);
- gpu<<<grid, threads>>>(d_a, d_b);
- CUDA_SAFE_CALL( cudaThreadSynchronize() );
- CUT_CHECK_ERROR("Kernel execution failed");
- CUDA_SAFE_CALL(cudaMemcpy(b, d_b, sizeof(int)*n, cudaMemcpyDeviceToHost) );
- CUDA_SAFE_CALL(cudaFree(d_a));
- CUDA_SAFE_CALL(cudaFree(d_b));
- for( i = 0 ; i <n ; i++ ){
- printf(" %d", b[i]);
- }
- printf("\n");
- }
- int
- main(int argc, char** argv)
- {
- runTest(argc, argv);
- CUT_EXIT(argc, argv);
- }
CUDA:
- // -*- C++ -*-
- /*
- block制御のテスト
- switch実験
- */
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <math.h>
- #include <unistd.h>
- #include <cutil.h>
- #define N 4
- int a[N];
- int b[N];
- #define PATTERN 0
- /*
- 0 : id => OK
- 1 : 即値 => NG
- 2 : table[id] => OK
- 3 : table[即値] => NG
- */
- __constant__ int table[4] = {0,1,2,3};
- __global__ void gpu
- (int* d_a, int* d_b)
- {
- int id = blockIdx.x;
- #if PATTERN == 0
- switch(id){
- case 0:
- d_b[id] = 1;
- break;
- case 1:
- d_b[id] = 2;
- break;
- case 2:
- d_b[id] = 3;
- break;
- case 3:
- d_b[id] = 4;
- break;
- }
- #endif
- #if PATTERN == 1
- switch(id){
- case 0:
- d_b[0] = 1;
- break;
- case 1:
- d_b[1] = 2;
- break;
- case 2:
- d_b[2] = 3;
- break;
- case 3:
- d_b[3] = 4;
- break;
- }
- #endif
- #if PATTERN == 2
- switch(id){
- case 0:
- d_b[table[id]] = 1;
- break;
- case 1:
- d_b[table[id]] = 2;
- break;
- case 2:
- d_b[table[id]] = 3;
- break;
- case 3:
- d_b[table[id]] = 4;
- break;
- }
- #endif
- #if PATTERN == 3
- switch(id){
- case 0:
- d_b[table[0]] = 1;
- break;
- case 1:
- d_b[table[1]] = 2;
- break;
- case 2:
- d_b[table[2]] = 3;
- break;
- case 3:
- d_b[table[3]] = 4;
- break;
- }
- #endif
- }
- void
- runTest(int argc, char** argv)
- {
- int i, n;
- n = N;
- for( i = 0 ; i <n ; i++ ){
- a[i] = 0;
- b[i] = 0;
- }
- CUT_DEVICE_INIT();
- int* d_a;
- CUDA_SAFE_CALL(cudaMalloc((void**)&d_a, sizeof(int)*n));
- CUDA_SAFE_CALL(cudaMemcpy(d_a, a, sizeof(int)*n, cudaMemcpyHostToDevice) );
- int* d_b;
- CUDA_SAFE_CALL(cudaMalloc((void**)&d_b, sizeof(int)*n));
- CUDA_SAFE_CALL(cudaMemcpy(d_b, b, sizeof(int)*n, cudaMemcpyHostToDevice) );
- dim3 grid(n,1,1);
- dim3 threads(1,1,1);
- gpu<<<grid, threads>>>(d_a, d_b);
- CUDA_SAFE_CALL( cudaThreadSynchronize() );
- CUT_CHECK_ERROR("Kernel execution failed");
- CUDA_SAFE_CALL(cudaMemcpy(b, d_b, sizeof(int)*n, cudaMemcpyDeviceToHost) );
- CUDA_SAFE_CALL(cudaFree(d_a));
- CUDA_SAFE_CALL(cudaFree(d_b));
- for( i = 0 ; i <n ; i++ ){
- printf(" %d", b[i]);
- }
- printf("\n");
- }
- int
- main(int argc, char** argv)
- {
- runTest(argc, argv);
- CUT_EXIT(argc, argv);
- }
ソースコードのマーキングをしてくれるプラグインは便利なんだけど、いっそのこと添付ファイルを展開してマーキングしてくれたほうが便利だよね、とか思った。