forked from Theano/libgpuarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathelemwise.h
More file actions
167 lines (138 loc) · 3.58 KB
/
elemwise.h
File metadata and controls
167 lines (138 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#ifndef GPUARRAY_ELEMWISE_H
#define GPUARRAY_ELEMWISE_H
/** \file elemwise.h
* \brief Custom elementwise operations generator.
*/
#include <gpuarray/buffer.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFUSE_EMACS
}
#endif
struct _GpuElemwise;
/**
* Elementwise generator structure.
*
* The contents are private.
*/
typedef struct _GpuElemwise GpuElemwise;
/**
* Argument information structure for GpuElemwise.
*/
typedef struct _gpuelemwise_arg {
/**
* Name of this argument in the associated expression, mandatory.
*/
const char *name;
/**
* Type of argument, mandatory (not GA_BUFFER, the content dtype)
*/
int typecode;
/**
* Argument flags, mandatory (see \ref eflags).
*/
int flags;
/**
* \defgroup eflags GpuElemwise argument flags
* @{
*/
/**
* Argument is a scalar passed from the CPU, requires nd == 0.
*/
#define GE_SCALAR 0x0001
/**
* Array is read from in the expression.
*/
#define GE_READ 0x0002
/**
* Array is written to in the expression.
*/
#define GE_WRITE 0x0004
/**
* }@
*/
} gpuelemwise_arg;
/**
* Create a new GpuElemwise.
*
* This will allocate and initialized a new GpuElemwise object. This
* object can be used to run the specified operation on different sets
* of arrays.
*
* The argument descriptor name the arguments and provide their data
* types and geometry (arrays or scalars). They also specify if the
* arguments are used for reading or writing. An argument can be used
* for both.
*
* The expression is a C-like string performing an operation with
* scalar values named according to the argument descriptors. All of
* the indexing and selection of the right values is handled by the
* GpuElemwise code.
*
* \param ctx the context in which to run the operations
* \param preamble code to be inserted before the kernel code
* \param expr the expression to compute
* \param n the number of arguments
* \param args the argument descriptors
* \param nd the number of dimensions to precompile for
* \param flags see \ref elem_flags "GpuElemwise flags"
*
* \returns a new GpuElemwise object or NULL
*/
GPUARRAY_PUBLIC GpuElemwise *GpuElemwise_new(gpucontext *ctx,
const char *preamble,
const char *expr,
unsigned int n,
gpuelemwise_arg *args,
unsigned int nd,
int flags);
/**
* \defgroup elem_flags GpuElemwise flags
* @{
*/
/**
* Don't precompile kernels for 64-bits addressing.
*/
#define GE_NOADDR64 0x0001
/**
* Convert float16 inputs to float32 for computation.
*/
#define GE_CONVERT_F16 0x0002
/**
* @}
*/
/**
* Free all storage associated with a GpuElemwise.
*
* \param ge the GpuElemwise object to free.
*/
GPUARRAY_PUBLIC void GpuElemwise_free(GpuElemwise *ge);
/**
* Run a GpuElemwise on some inputs.
*
* \param ge the GpuElemwise to run
* \param args pointers to the arguments (must macth what was described by
* the argument descriptors)
* \param flags see \ref elem_call_flags "GpuElemwise call flags"
*/
GPUARRAY_PUBLIC int GpuElemwise_call(GpuElemwise *ge, void **args, int flags);
/**
* \defgroup elem_call_flags GpuElemwise call flags
* @{
*/
/**
* Allow broadcasting of dimensions of size 1.
*/
#define GE_BROADCAST 0x0100
/**
* Disable dimension collapsing (not recommended).
*/
#define GE_NOCOLLAPSE 0x0200
/**
* @}
*/
#ifdef __cplusplus
}
#endif
#endif