forked from Theano/libgpuarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuffer.h
More file actions
811 lines (725 loc) · 21.3 KB
/
buffer.h
File metadata and controls
811 lines (725 loc) · 21.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
/** \file buffer.h
* \brief This file contains the interface definition for the backends.
*
* For normal use you should not call the functions defined in this
* file directly.
*
* \see array.h For managing buffers
* \see kernel.h For using kernels
*/
#ifndef GPUARRAY_BUFFER_H
#define GPUARRAY_BUFFER_H
#include <sys/types.h>
#include <stdio.h>
#include <stdarg.h>
#include <gpuarray/config.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFUSE_EMACS
}
#endif
struct _gpudata;
/**
* Opaque struct for buffer data.
*/
typedef struct _gpudata gpudata;
struct _gpucontext;
/**
* Opaque struct for context data.
*/
typedef struct _gpucontext gpucontext;
struct _gpukernel;
/**
* Opaque struct for kernel data.
*/
typedef struct _gpukernel gpukernel;
/**
* \brief Gets information about the number of available platforms for the
* backend specified in `name`.
* \param name [const char*] the backend name
* \param platcount [unsigned int*] will contain number of compatible platforms in host
* \return int GA_NO_ERROR, if success
*/
GPUARRAY_PUBLIC int gpu_get_platform_count(const char* name,
unsigned int* platcount);
/**
* \brief Gets information about the number of compatible devices on a specific
* host's `platform` for the backend specified in `name`.
* \param name [const char*] the backend name
* \param platform [unsigned int] number for a platform in host
* \param devcount [unsigned int*] will contain number of compatible devices in
* `platform`
* \return int GA_NO_ERROR, if success
*/
GPUARRAY_PUBLIC int gpu_get_device_count(const char* name,
unsigned int platform,
unsigned int* devcount);
/**
* Create a context on the specified device.
*
* \warning This function is not thread-safe.
*
* \param name the backend name.
* \param dev the device number. The precise meaning of the device
* number is backend-dependent
* \param flags see \ref context_flags "Context flags"
* \param ret error return location. Will be ignored if set to NULL.
*
* \returns An opaque pointer to the created context or NULL if an
* error occured.
*/
GPUARRAY_PUBLIC gpucontext *gpucontext_init(const char *name, int dev,
int flags, int *ret);
/**
* \defgroup context_flags Context flags
* @{
*/
/**
* Let the backend decide on optimal parameters, using backend-defined
* heuristics and defaults.
*
* This is the default (0) value.
*/
#define GA_CTX_DEFAULT 0x00
/**
* Optimize parameters for multi-thread performance.
*
* May decrease overall performance in single-thread scenarios.
*/
#define GA_CTX_MULTI_THREAD 0x01
/**
* Optimize parameters for single-thread performance.
*
* May decrease overall performace in multithread scenarios.
*/
#define GA_CTX_SINGLE_THREAD 0x02
/**
* Allocate a single stream per context, performing all operations in order.
*
* This will remove any attempt at exploiting parallelism in the
* underlying device by performing unrelated operations concurrently
* and/or out of order.
*
* This can help performance by removing the small cost paid for each
* operation to keep everything coherent in the face of parallelism.
* It can also hinder performance by not exploiting concurrency.
*/
#define GA_CTX_SINGLE_STREAM 0x4
/**
* Disable allocations cache (if any).
*
* This will usually decrease performance by quite a bit, but will
* enable better debugging of kernels that perform out of bounds
* access.
*/
#define GA_CTX_DISABLE_ALLOCATION_CACHE 0x10
/**
* @}
*/
/**
* Dereference a context.
*
* This removes a reference to the context and as soon as the
* reference count drops to zero the context is destroyed. The
* context can stay alive after you call this function because some
* object keep a reference to their context.
*
* \param ctx a valid context pointer.
*/
GPUARRAY_PUBLIC void gpucontext_deref(gpucontext *ctx);
/**
* Fetch a context property.
*
* The property must be a context property. The currently defined
* properties and their type are defined in \ref props "Properties".
*
* \param ctx context
* \param prop_id property id (from \ref props "Properties")
* \param res pointer to the return space of the appropriate type
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpucontext_property(gpucontext *ctx, int prop_id,
void *res);
/**
* Get a string describing `err`.
*
* If you need to get a description of a error that occurred during
* context creation, call this function using NULL as the context.
* This version of the call is not thread-safe.
*
* \param ctx the context in which the error occured
* \param err error code
*
* \returns string description of error
*/
GPUARRAY_PUBLIC const char *gpucontext_error(gpucontext *ctx, int err);
/**
* Allocates a buffer of size `sz` in context `ctx`.
*
* Buffers are reference counted internally and start with a
* reference count of 1.
*
* \param ctx a context pointer
* \param sz the requested size
* \param flags see \ref alloc_flags "Allocation flags"
* \param data optional pointer to host buffer
* \param ret error return pointer
*
* \returns A non-NULL pointer to a gpudata structure. This
* structure is intentionally opaque as its content may change
* according to the backend used.
*/
GPUARRAY_PUBLIC gpudata *gpudata_alloc(gpucontext *ctx, size_t sz, void *data,
int flags, int *ret);
/**
* \defgroup alloc_flags Allocation flags
* @{
*/
/**
* The buffer is available for reading and writing from kernels.
*
* This is the default (0) value.
*/
#define GA_BUFFER_READ_WRITE 0x00
/**
* Allocate the buffer in device-only memory.
*
* This is the default (0) value.
*/
#define GA_BUFFER_DEV 0x00
/**
* Signal that the memory in this buffer will only be read by kernels.
*
* You can use gpudata_write() to set the contents.
*
* You may not call gpudata_memset() with the resulting buffer as the
* destination.
*/
#define GA_BUFFER_READ_ONLY 0x01
/**
* Signal that the memory in this buffer will only be written by
* kernels (i.e. it is an output buffer).
*
* You can read the contents with gpudata_read().
*/
#define GA_BUFFER_WRITE_ONLY 0x02
/**
* Initialize the contents of the buffer with the user-supplied host
* buffer (`data`). This buffer must be at least `sz` large.
*/
#define GA_BUFFER_INIT 0x04
/**
* Allocate the buffer in host-reachable memory enabling you to
* retrieve a pointer to the contents as the
* `GA_BUFFER_PROP_HOSTPOINTER` property.
*/
#define GA_BUFFER_HOST 0x08
/*#define GA_BUFFER_USE_DATA 0x10*/
/* The upper 16 bits are private flags */
#define GA_BUFFER_MASK 0xffff
/**
* @}
*/
/**
* Increase the reference count to the passed buffer by 1.
*
* \param b a buffer
*/
GPUARRAY_PUBLIC void gpudata_retain(gpudata *b);
/**
* Release a buffer.
*
* This will decrement the reference count of the buffer by 1. If
* that count reaches 0 all associated ressources will be released.
*
* Even if your application does not have any references left to a
* buffer it may still hang around if it is in use by internal
* mechanisms (kernel call, ...)
*/
GPUARRAY_PUBLIC void gpudata_release(gpudata *b);
/**
* Check if two buffers may overlap.
*
* Both buffers must have been created with the same backend.
*
* \param a first buffer
* \param b second buffer
* \param ret error return pointer
*
* \retval 1 The buffers may overlap
* \retval 0 The buffers do not overlap.
* \retval -1 An error was encoutered, `ret` contains a detailed
* error code if not NULL.
*/
GPUARRAY_PUBLIC int gpudata_share(gpudata *a, gpudata *b, int *ret);
/**
* Copy the content of a buffer to another.
*
* Both buffers must be in the same context and contiguous.
* Additionally the buffers must not overlap otherwise the content of
* the destination buffer is not defined.
*
* \param dst destination buffer
* \param dstoff offset inside the destination buffer
* \param src source buffer
* \param srcoff offset inside the source buffer
* \param sz size of data to copy (in bytes)
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpudata_move(gpudata *dst, size_t dstoff,
gpudata *src, size_t srcoff,
size_t sz);
/**
* Transfer the content of buffer across contexts.
*
* If possible it will try to the the transfer in an efficient way
* using backend-specific tricks. If those fail or can't be used, it
* will fallback to a copy through the host.
*
* \param dst buffer to transfer to
* \param dstoff offset in the destination buffer
* \param src buffer to transfer from
* \param srcoff offset in the source buffer
* \param sz size of the region to transfer
*
* \returns the new buffer in dst_ctx or NULL if no efficient way to
* transfer could be found.
*/
GPUARRAY_LOCAL int gpudata_transfer(gpudata *dst, size_t dstoff,
gpudata *src, size_t srcoff,
size_t sz);
/**
* Transfer data from a buffer to memory.
*
* The buffer and the memory region must be contiguous.
*
* \param dst destination in memory
* \param src source buffer
* \param srcoff offset inside the source buffer
* \param sz size of data to copy (in bytes)
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpudata_read(void *dst,
gpudata *src, size_t srcoff,
size_t sz);
/**
* Transfer data from memory to a buffer.
*
* The buffer and the memory region must be contiguous.
*
* \param dst destination buffer
* \param dstoff offset inside the destination buffer
* \param src source in memory
* \param sz size of data to copy (in bytes)
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpudata_write(gpudata *dst, size_t dstoff,
const void *src, size_t sz);
/**
* Set a buffer to a byte pattern.
*
* This function acts like the C function memset() for device buffers.
*
* \param dst destination buffer
* \param dstoff offset into the destination buffer
* \param data byte value to write into the destination.
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpudata_memset(gpudata *dst, size_t dstoff, int data);
/**
* Synchronize a buffer.
*
* Waits for all previous read, writes, copies and kernel calls
* involving this buffer to be finished.
*
* This call is not required for normal use of the library as all
* exposed operations will properly synchronize amongst themselves.
* This call may be useful in a performance timing context to ensure
* that the work is really done, or before interaction with another
* library to wait for pending operations.
*/
GPUARRAY_PUBLIC int gpudata_sync(gpudata *b);
/**
* Fetch a buffer property.
*
* Can be used for buffer properties and context properties. Context
* properties will fetch the value for the context associated with the
* buffer. The currently defined properties and their type are
* defined in \ref props "Properties".
*
* \param buf buffer
* \param prop_id property id (from \ref props "Properties")
* \param res pointer to the return space of the appropriate type
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpudata_property(gpudata *buf, int prop_id, void *res);
GPUARRAY_PUBLIC gpucontext *gpudata_context(gpudata *b);
/**
* Compile a kernel.
*
* Compile the kernel composed of the concatenated strings in
* `strings` and return a callable kernel. If lengths is NULL then
* all the strings must be NUL-terminated. Otherwise, it doesn't
* matter (but the lengths must not include the final NUL byte if
* provided).
*
* \param ctx context to work in
* \param count number of input strings
* \param strings table of string pointers
* \param lengths (optional) length for each string in the table
* \param fname name of the kernel function (as defined in the code)
* \param flags flags for compilation (see #ga_usefl)
* \param ret error return pointer
* \param err_str returns pointer to debug message from GPU backend
* (if provided a non-NULL err_str)
*
* If `*err_str` is not NULL on return, the caller must call
* `free(*err_str)` after use.
*
* \returns Allocated kernel structure or NULL if an error occured.
* `ret` will be updated with the error code if not NULL.
*/
GPUARRAY_PUBLIC gpukernel *gpukernel_init(gpucontext *ctx, unsigned int count,
const char **strings, const size_t *lengths,
const char *fname, unsigned int numargs,
const int *typecodes, int flags, int *ret,
char **err_str);
/**
* Retain a kernel.
*
* Increase the reference count of the passed kernel by 1.
*
* \param k a kernel
*/
GPUARRAY_PUBLIC void gpukernel_retain(gpukernel *k);
/**
* Release a kernel.
*
* Decrease the reference count of a kernel. If it reaches 0, all
* resources associated with `k` will be released.
*
* If the reference count of a kernel reaches 0 while it is running,
* this call will block until completion.
*/
GPUARRAY_PUBLIC void gpukernel_release(gpukernel *k);
/**
* Set kernel argument.
*
* Buffer arguments will not be retained and it is the
* responsability of the caller to ensure that the value is still
* valid whenever a call is made.
*
* \param k kernel
* \param i argument index (starting at 0)
* \param a pointer to argument
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpukernel_setarg(gpukernel *k, unsigned int i, void *a);
/**
* Call a kernel.
*
* If args is NULL, it will be assumed that the arguments have
* previously been set with kernel_setarg().
*
* \param k kernel
* \param n number of dimensions of grid/block
* \param bs block sizes for this call (also known as local size)
* \param gs grid sizes for this call (also known as global size)
* \param shared amount of dynamic shared memory to reserve
* \param args table of pointers to each argument (optional).
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpukernel_call(gpukernel *k, unsigned int n,
const size_t *ls, const size_t *gs,
size_t shared, void **args);
/**
* Get the kernel binary.
*
* This can be use to cache kernel binaries after compilation of a
* specific device. The kernel can be recreated by calling
* kernel_alloc with the binary and size and passing `GA_USE_BINARY`
* as the use flags.
*
* The returned pointer is allocated and must be freed by the caller.
*
* \param k kernel
* \param sz size of the returned binary
* \param obj pointer to the binary for the kernel.
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpukernel_binary(gpukernel *k, size_t *sz, void **obj);
/**
* Fetch a property.
*
* Can be used for kernel and context properties. The context
* properties will fetch the value for the context associated with the
* kernel. The currently defined properties and their type are
* defined in \ref props "Properties".
*
* \param k kernel
* \param prop_id property id (from \ref props "Properties")
* \param res pointer to the return space of the appropriate type
*
* \returns GA_NO_ERROR or an error code if an error occurred.
*/
GPUARRAY_PUBLIC int gpukernel_property(gpukernel *k, int prop_id, void *res);
GPUARRAY_PUBLIC gpucontext *gpukernel_context(gpukernel *k);
/**
* \defgroup props Properties
* @{
*/
/* Start at 1 for GA_CTX_PROP_ */
/**
* Get the device name for the context.
*
* \note The returned string is allocated and must be freed by the caller.
*
* Type: `char *`
*/
#define GA_CTX_PROP_DEVNAME 1
/**
* Get the maximum block size (also known as local size) for a kernel
* call in the context.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXLSIZE 2
/**
* Get the local memory size available for a call in the context.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_LMEMSIZE 3
/**
* Number of compute units in this context.
*
* compute units times local size is more or less the expected
* parallelism available on the device, but this is a very rough
* estimate.
*
* Type: `unsigned int`
*/
#define GA_CTX_PROP_NUMPROCS 4
/**
* Get the maximum group size for a kernel call in this context.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXGSIZE 5
/**
* Get the vector of blas ops for the context.
*
* This may differ from one context to the other in the same backend
* depending of the availability and performance of various BLAS
* libraries.
*
* Type: `const gpuarray_blas_ops *`
*/
#define GA_CTX_PROP_BLAS_OPS 6
/**
* Get the compatibility ID for the binaries generated with this context.
*
* Those binaries should work with any context which has the same ID.
*
* Type: `const char *`
*/
#define GA_CTX_PROP_BIN_ID 7
/**
* Get a pre-allocated 8 byte buffer for kernel ops.
*
* This buffer is initialized to 0 on allocation and must always be
* returned to that state after using it.
*
* This only to avoid the overhead of an allocation when calling a
* kernel that may error out. It does not preclude the need for
* synchronization and transfers.
*
* Type: `gpudata *`
*/
#define GA_CTX_PROP_ERRBUF 8
/**
* Get the total size of global memory on the device.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_TOTAL_GMEM 9
/**
* Get the size of free global memory on the device.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_FREE_GMEM 10
/**
* Get the status of native float16 support on the device.
*
* Type: `int`
*/
#define GA_CTX_PROP_NATIVE_FLOAT16 11
/**
* Get the maximum global size for dimension 0.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXGSIZE0 12
/**
* Get the maximum global size for dimension 1.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXGSIZE1 13
/**
* Get the maximum global size for dimension 2.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXGSIZE2 14
/**
* Get the maximum local size for dimension 0.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXLSIZE0 15
/**
* Get the maximum local size for dimension 1.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXLSIZE1 16
/**
* Get the maximum loca size for dimension 2.
*
* Type: `size_t`
*/
#define GA_CTX_PROP_MAXLSIZE2 17
/**
* Get the vector of collective ops for the context.
*
* Type: `const gpuarray_comm_ops *`
*/
#define GA_CTX_PROP_COMM_OPS 18
/* Start at 512 for GA_BUFFER_PROP_ */
#define GA_BUFFER_PROP_START 512
/**
* Get the context in which this buffer was allocated.
*
* Type: `gpucontext *`
*/
#define GA_BUFFER_PROP_CTX 512
/**
* The reference count of the buffer. Use only for debugging purposes.
*
* Type: `unsigned int`
*/
#define GA_BUFFER_PROP_REFCNT 513
/**
* Size of the buffer on the device.
*
* This may be larger than the requested allocation size due to a
* number of factors.
*
* Type: `size_t`
*/
#define GA_BUFFER_PROP_SIZE 514
/* Start at 1024 for GA_KERNEL_PROP_ */
#define GA_KERNEL_PROP_START 1024
/**
* Get the context for which this kernel was compiled.
*
* Type: `gpucontext *`
*/
#define GA_KERNEL_PROP_CTX 1024
/**
* Get the maximum block size (also known as local size) for a call of
* this kernel.
*
* Type: `size_t`
*/
#define GA_KERNEL_PROP_MAXLSIZE 1025
/**
* Get the prefered multiple of the block size for a call to this
* kernel.
*
* Type: `size_t`
*/
#define GA_KERNEL_PROP_PREFLSIZE 1026
/**
* Get the number of kernel arguments.
*
* Type `unsigned int`
*/
#define GA_KERNEL_PROP_NUMARGS 1027
/**
* Get the list of argument types for a kernel.
*
* This list is the same length as the number of arguments to the
* kernel. Do not modify the returned list.
*
* Type: `const int *`
*/
#define GA_KERNEL_PROP_TYPES 1028
/**
* @}
*/
/**
* Flags for gpukernel_init().
*
* It is important to specify these properly as the compilation
* machinery will ensure that the proper configuration is made to
* support the requested features or error out if the demands cannot
* be met.
*
* \warning Failure to properly specify the feature flags will in most
* cases result in silent data corruption (especially on ATI cards).
*/
typedef enum _ga_usefl {
/**
* The kernel source uses CLUDA unified language.
*/
GA_USE_CLUDA = 0x01,
/**
* The kernel makes use of small (size is smaller than 4 bytes) types.
*/
GA_USE_SMALL = 0x02,
/**
* The kernel makes use of double or complex doubles.
*/
GA_USE_DOUBLE = 0x04,
/**
* The kernel makes use of complex of complex doubles.
*/
GA_USE_COMPLEX = 0x08,
/**
* The kernel makes use of half-floats (also known as float16)
*/
GA_USE_HALF = 0x10,
/**
* The source code passed is actually a kernel binary.
*
* For the cuda backend this can also be a PTX module.
*/
GA_USE_BINARY = 0x20,
/* If you add a new flag, don't forget to update both
gpuarray_buffer_{cuda,opencl}.c with the implementation of your flag */
/**
* The kernel is made of CUDA code.
*/
GA_USE_CUDA = 0x2000,
/**
* The kernel is made of OpenCL code.
*/
GA_USE_OPENCL = 0x4000,
} ga_usefl;
#ifdef __cplusplus
}
#endif
#endif