25 #include <OpenCLFramework/clfContext.hh>
26 #include <iostream>
28 using namespace BIAS;
29 using namespace std;
31 /**
32  * @brief the simple example from all the internet tutorials for using opencl
33  *
34  * in this example, we create two vectors with 1000 elements each. we then
35  * add them together with opencl and store the result in a third vector
36  * (C = A+b)
37  *
38  * @author fkellner 06/11
39  */
40 int main() {
41  // initialize vectors A and B (such that the sum of each element is 1000
42  const int LIST_SIZE = 1000;
43  int *A = new int[LIST_SIZE];
44  int *B = new int[LIST_SIZE];
45  for(int i = 0; i < LIST_SIZE; i++) {
46  A[i] = i;
47  B[i] = LIST_SIZE - i;
48  }
49  // create vector to store result
50  int *C = new int[LIST_SIZE];
52  // try/catch of OpenCLException is strongly recommended!
53  try {
54  // create context on first GPU found
55  clfContext context;
56  // print some info
57  context.PrintPlatformsInfo();
58  context.PrintDeviceInfo();
60  // create a program in this context
61  clfProgram *program = context.CreateProgram();
63  // this is the source code for our compute kernel
64  string source = ""
65  "__kernel void vector_add(__global const int *A, __global const int *B, __global int *C) {\n"
66  " // element index\n"
67  " size_t v = get_global_id(0);\n"
68  " C[v] = A[v]+B[v];\n"
69  "}\n";
71  // add code to our program
72  program->AddSourceFromString(source);
73  // compile program
74  program->Build();
75  // we want to use the vector_add kernel
76  program->AddKernel("vector_add");
78  // allocate opencl buffers in the context
79  clfBuffer *bufferA = context.CreateBuffer();
80  bufferA->Allocate(LIST_SIZE * sizeof(int), true, false);
81  clfBuffer *bufferB = context.CreateBuffer();
82  bufferB->Allocate(LIST_SIZE * sizeof(int), true, false);
83  clfBuffer *bufferC = context.CreateBuffer();
84  bufferC->Allocate(LIST_SIZE * sizeof(int), false, true);
85  // now write our local memory to the buffers
86  // note that the Allocate functions allow different means of doing this
87  bufferA->WriteToBuffer(A);
88  bufferB->WriteToBuffer(B);
90  // set the arguments of compute kernel vector_add.
91  program->KernelSetArgument( "vector_add", 0, *bufferA );
92  program->KernelSetArgument( "vector_add", 1, *bufferB );
93  program->KernelSetArgument( "vector_add", 2, *bufferC );
95  // run the vector_add kernel on 1000 elements. each thread will handle one element
96  // which is not very efficient!
97  context.RunOn1DRange( *program, "vector_add", LIST_SIZE);
99  // read the result back to host memory
100  bufferC->ReadFromBuffer(C);
102  // check if each element of C is now 1000
103  bool success = true;
104  for(int i = 0; i < LIST_SIZE; i ++) {
105  if (C[i] != 1000) {
106  success = false;
107  }
108  }
109  if (success) {
110  cout << "computation successful." << endl;
111  } else {
112  cout << "error in computation!" << endl;
113  }
115  } catch (clfException &err) {
116  // the detailed error string
117  cout << err.GetDetailedString() << endl;
118  }
120  return 0;
121 }
