-
Notifications
You must be signed in to change notification settings - Fork 105
Expand file tree
/
Copy pathtest_udf.py
More file actions
320 lines (289 loc) · 9.76 KB
/
test_udf.py
File metadata and controls
320 lines (289 loc) · 9.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
from decimal import Decimal
import unittest
from pandas import Timedelta, Timestamp
from feldera import PipelineBuilder
from tests import TEST_CLIENT, unique_pipeline_name
from feldera.runtime_config import RuntimeConfig
from feldera.testutils import FELDERA_TEST_NUM_WORKERS, FELDERA_TEST_NUM_HOSTS
class TestUDF(unittest.TestCase):
def test_local(self):
sql = """
CREATE TYPE my_struct AS (
i INT,
s VARCHAR
);
CREATE TABLE t (
i INT,
ti TINYINT,
si SMALLINT,
bi BIGINT,
r REAL,
d DOUBLE,
bin VARBINARY,
dt DATE,
t TIME,
ts TIMESTAMP,
a INT ARRAY,
m MAP<VARCHAR, VARCHAR>,
v VARIANT,
b BOOLEAN,
dc DECIMAL(7,2),
s VARCHAR,
ms MY_STRUCT
) with ('materialized' = 'true');
CREATE FUNCTION bool2bool(i BOOLEAN) RETURNS BOOLEAN;
CREATE FUNCTION nbool2nbool(i BOOLEAN NOT NULL) RETURNS BOOLEAN NOT NULL;
CREATE FUNCTION i2i(i INT) RETURNS INT;
CREATE FUNCTION ni2ni(i INT NOT NULL) RETURNS INT NOT NULL;
CREATE FUNCTION ti2ti(i TINYINT) RETURNS TINYINT;
CREATE FUNCTION nti2nti(i TINYINT NOT NULL) RETURNS TINYINT NOT NULL;
CREATE FUNCTION si2si(i SMALLINT) RETURNS SMALLINT;
CREATE FUNCTION nsi2nsi(i SMALLINT NOT NULL) RETURNS SMALLINT NOT NULL;
CREATE FUNCTION bi2bi(i BIGINT) RETURNS BIGINT;
CREATE FUNCTION nbi2nbi(i BIGINT NOT NULL) RETURNS BIGINT NOT NULL;
CREATE FUNCTION r2r(i REAL) RETURNS REAL;
CREATE FUNCTION nr2nr(i REAL NOT NULL) RETURNS REAL NOT NULL;
CREATE FUNCTION d2d(i DOUBLE) RETURNS DOUBLE;
CREATE FUNCTION nd2nd(i DOUBLE NOT NULL) RETURNS DOUBLE NOT NULL;
CREATE FUNCTION bin2bin(i VARBINARY) RETURNS VARBINARY;
CREATE FUNCTION nbin2nbin(i VARBINARY NOT NULL) RETURNS VARBINARY NOT NULL;
CREATE FUNCTION date2date(i DATE) RETURNS DATE;
CREATE FUNCTION ndate2ndate(i DATE NOT NULL) RETURNS DATE NOT NULL;
CREATE FUNCTION ts2ts(i TIMESTAMP) RETURNS TIMESTAMP;
CREATE FUNCTION nts2nts(i TIMESTAMP NOT NULL) RETURNS TIMESTAMP NOT NULL;
CREATE FUNCTION t2t(i TIME) RETURNS TIME;
CREATE FUNCTION nt2nt(i TIME NOT NULL) RETURNS TIME NOT NULL;
CREATE FUNCTION arr2arr(i INT ARRAY) RETURNS INT ARRAY;
CREATE FUNCTION narr2narr(i INT ARRAY NOT NULL) RETURNS INT ARRAY NOT NULL;
CREATE FUNCTION map2map(i MAP<VARCHAR, VARCHAR>) RETURNS MAP<VARCHAR, VARCHAR>;
CREATE FUNCTION nmap2nmap(i MAP<VARCHAR, VARCHAR> NOT NULL) RETURNS MAP<VARCHAR, VARCHAR> NOT NULL;
CREATE FUNCTION var2var(i VARIANT) RETURNS VARIANT;
CREATE FUNCTION nvar2nvar(i VARIANT NOT NULL) RETURNS VARIANT NOT NULL;
CREATE FUNCTION dec2dec(i DECIMAL(7, 2)) RETURNS DECIMAL(7, 2);
CREATE FUNCTION ndec2ndec(i DECIMAL(7, 2) NOT NULL) RETURNS DECIMAL(7, 2) NOT NULL;
CREATE FUNCTION str2str(i VARCHAR) RETURNS VARCHAR;
CREATE FUNCTION nstr2nstr(i VARCHAR NOT NULL) RETURNS VARCHAR NOT NULL;
CREATE FUNCTION struct2struct(i my_struct) RETURNS my_struct;
CREATE FUNCTION nstruct2nstruct(i my_struct NOT NULL) RETURNS my_struct NOT NULL;
CREATE MATERIALIZED VIEW v AS
SELECT
bool2bool(b),
nbool2nbool(COALESCE(b, FALSE)),
i2i(i),
ni2ni(COALESCE(i, 0)),
ti2ti(ti),
nti2nti(COALESCE(ti, 0)),
si2si(si),
nsi2nsi(COALESCE(si, 0)),
bi2bi(bi),
nbi2nbi(COALESCE(bi, 0)),
r2r(r),
nr2nr(COALESCE(r, 0.0)),
d2d(d),
nd2nd(COALESCE(d, 0.0)),
bin2bin(bin),
nbin2nbin(COALESCE(bin, x'')),
date2date(dt),
ndate2ndate(COALESCE(dt, DATE '2023-01-01')),
ts2ts(ts),
nts2nts(COALESCE(ts, TIMESTAMP '2023-01-01 00:00:00')),
t2t(t),
nt2nt(COALESCE(t, TIME '00:00:00')),
arr2arr(a),
narr2narr(a),
map2map(m),
nmap2nmap(m),
var2var(v),
nvar2nvar(COALESCE(v, VARIANTNULL())),
dec2dec(dc),
ndec2ndec(COALESCE(dc, 0)),
str2str(s),
nstr2nstr(COALESCE(s, ''))
FROM
t;
"""
udfs = """
use feldera_sqllib::F32;
use crate::*;
pub fn bool2bool(i: Option<bool>) -> Result<Option<bool>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nbool2nbool(i: bool) -> Result<bool, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn i2i(i: Option<i32>) -> Result<Option<i32>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn ni2ni(i: i32) -> Result<i32, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn ti2ti(i: Option<i8>) -> Result<Option<i8>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nti2nti(i: i8) -> Result<i8, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn si2si(i: Option<i16>) -> Result<Option<i16>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nsi2nsi(i: i16) -> Result<i16, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn bi2bi(i: Option<i64>) -> Result<Option<i64>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nbi2nbi(i: i64) -> Result<i64, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn r2r(i: Option<F32>) -> Result<Option<F32>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nr2nr(i: F32) -> Result<F32, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn d2d(i: Option<F64>) -> Result<Option<F64>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nd2nd(i: F64) -> Result<F64, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn bin2bin(i: Option<ByteArray>) -> Result<Option<ByteArray>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nbin2nbin(i: ByteArray) -> Result<ByteArray, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn date2date(i: Option<Date>) -> Result<Option<Date>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn ndate2ndate(i: Date) -> Result<Date, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn ts2ts(i: Option<Timestamp>) -> Result<Option<Timestamp>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nts2nts(i: Timestamp) -> Result<Timestamp, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn t2t(i: Option<Time>) -> Result<Option<Time>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nt2nt(i: Time) -> Result<Time, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn arr2arr(i: Option<Array<Option<i32>>>) -> Result<Option<Array<Option<i32>>>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn narr2narr(i: Array<Option<i32>>) -> Result<Array<Option<i32>>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn map2map(i: Option<Map<SqlString, Option<SqlString>>>) -> Result<Option<Map<SqlString, Option<SqlString>>>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nmap2nmap(i: Map<SqlString, Option<SqlString>>) -> Result<Map<SqlString, Option<SqlString>>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn var2var(i: Option<Variant>) -> Result<Option<Variant>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nvar2nvar(i: Variant) -> Result<Variant, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn dec2dec(i: Option<SqlDecimal<7, 2>>) -> Result<Option<SqlDecimal<7, 2>>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn ndec2ndec(i: SqlDecimal<7, 2>) -> Result<SqlDecimal<7, 2>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn str2str(i: Option<SqlString>) -> Result<Option<SqlString>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nstr2nstr(i: SqlString) -> Result<SqlString, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn struct2struct(i: Option<Tup2<Option<i32>, Option<SqlString>>>) -> Result<Option<Tup2<Option<i32>, Option<SqlString>>>, Box<dyn std::error::Error>> {
Ok(i)
}
pub fn nstruct2nstruct(i: Tup2<Option<i32>, Option<SqlString>>) -> Result<Tup2<Option<i32>, Option<SqlString>>, Box<dyn std::error::Error>> {
Ok(i)
}
"""
pipeline = PipelineBuilder(
TEST_CLIENT,
name=unique_pipeline_name("test_udfs"),
sql=sql,
udf_rust=udfs,
runtime_config=RuntimeConfig(
workers=FELDERA_TEST_NUM_WORKERS,
hosts=FELDERA_TEST_NUM_HOSTS,
),
).create_or_replace()
# TODO: use .query() instead
pipeline.start_paused()
out = pipeline.listen("v")
pipeline.resume()
pipeline.input_json(
"t",
[
{
"i": 1,
"ti": -2,
"si": 3,
"bi": -4,
"r": 0.5,
"d": 1e-5,
"bin": [],
"dt": "2024-09-25",
"t": "13:05:00",
"ts": "2024-09-25 13:05:00",
"a": [1, 2, 3, 4, 5],
"m": {"foo": "bar"},
"v": '{"foo": "bar"}',
"b": True,
"dc": "123.45",
"s": "foobar",
}
],
)
pipeline.wait_for_completion()
output = out.to_dict()
assert output == [
{
"EXPR$0": True,
"EXPR$1": True,
"EXPR$10": 0.5,
"EXPR$11": 0.5,
"EXPR$12": 1e-05,
"EXPR$13": 1e-05,
"EXPR$14": [],
"EXPR$15": [],
"EXPR$16": Timestamp("2024-09-25 00:00:00"),
"EXPR$17": Timestamp("2024-09-25 00:00:00"),
"EXPR$18": Timestamp("2024-09-25 13:05:00"),
"EXPR$19": Timestamp("2024-09-25 13:05:00"),
"EXPR$2": 1,
"EXPR$20": Timedelta("0 days 13:05:00"),
"EXPR$21": Timedelta("0 days 13:05:00"),
"EXPR$22": [1, 2, 3, 4, 5],
"EXPR$23": [1, 2, 3, 4, 5],
"EXPR$24": {"foo": "bar"},
"EXPR$25": {"foo": "bar"},
"EXPR$26": '{"foo": "bar"}',
"EXPR$27": '{"foo": "bar"}',
"EXPR$28": Decimal("123.45"),
"EXPR$29": Decimal("123.45"),
"EXPR$3": 1,
"EXPR$30": "foobar",
"EXPR$31": "foobar",
"EXPR$4": -2,
"EXPR$5": -2,
"EXPR$6": 3,
"EXPR$7": 3,
"EXPR$8": -4,
"EXPR$9": -4,
"insert_delete": 1,
}
]
pipeline.stop(force=True)
if __name__ == "__main__":
unittest.main()