-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathtypstruct.py
More file actions
554 lines (445 loc) · 16.2 KB
/
typstruct.py
File metadata and controls
554 lines (445 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
##
# copyright 2009, James William Pye
# http://python.projects.postgresql.org
##
"""
PostgreSQL data type protocol--packing and unpacking functions.
This module provides functions that pack and unpack many standard PostgreSQL
types. It also associates those functions with their corresponding type Oids.
The name of the function describes what type the function is intended to be used
on. Normally, the fucntions return a structured form of the serialized data to
be used as a parameter to the creation of a higher level instance. In
particular, most of the functions that deal with time return a pair for
representing the relative offset: (seconds, microseconds). For times, this
provides an abstraction for quad-word based times used by some configurations of
PostgreSQL.
"""
import math
import struct
from operator import itemgetter
from .. import types as pg_types
from ..python.functools import Composition as compose
null_sequence = b'\xff\xff\xff\xff'
# Always to and from network order.
def mk_pack(x):
'Create a pair, (pack, unpack) for the given `struct` format.'
s = struct.Struct('!' + x)
if len(x) > 1:
def pack_apply(data):
return s.pack(*data)
return (pack_apply, s.unpack)
else:
return (s.pack, compose((s.unpack, itemgetter(0))))
def mktimetuple(ts):
'make a pair of (seconds, microseconds) out of the given double'
seconds = math.floor(ts)
return (int(seconds), int(1000000 * (ts - seconds)))
def mktimetuple64(ts):
'make a pair of (seconds, microseconds) out of the given long'
seconds = ts // 1000000
return (seconds, ts - (seconds * 1000000))
def mktime(seconds_ms):
'make a double out of the pair of (seconds, microseconds)'
return float(seconds_ms[0]) + (seconds_ms[1] / 1000000.0)
def mktime64(seconds_ms):
'make an integer out of the pair of (seconds, microseconds)'
return seconds_ms[0] * 1000000 + seconds_ms[1]
data_to_bool = {b'\x01' : True, b'\x00' : False}
bool_to_data = {True : b'\x01', False : b'\x00'}
bool_pack = bool_to_data.__getitem__
bool_unpack = data_to_bool.__getitem__
bitdata_to_bool = {b'\x00\x00\x00\x01\x01' : True, b'\x00\x00\x00\x01\x00' : False}
bool_to_bitdata = {True : b'\x00\x00\x00\x01\x01', False : b'\x00\x00\x00\x01\x00'}
bit_pack = bool_to_bitdata.__getitem__
bit_unpack = bitdata_to_bool.__getitem__
longlong_pack, longlong_unpack = mk_pack("q")
long_pack, long_unpack = mk_pack("l")
ulong_pack, ulong_unpack = mk_pack("L")
byte_pack, byte_unpack = lambda x: bytes((x,)), lambda x: x[0]
short_pack, short_unpack = mk_pack("h")
ushort_pack, ushort_unpack = mk_pack("H")
double_pack, double_unpack = mk_pack("d")
float_pack, float_unpack = mk_pack("f")
dd_pack, dd_unpack = mk_pack("dd")
ddd_pack, ddd_unpack = mk_pack("ddd")
dddd_pack, dddd_unpack = mk_pack("dddd")
LH_pack, LH_unpack = mk_pack("LH")
llL_pack, llL_unpack = mk_pack("llL")
qll_pack, qll_unpack = mk_pack("qll")
dll_pack, dll_unpack = mk_pack("dll")
BBBB_pack, BBBB_unpack = mk_pack("BBBB")
dl_pack, dl_unpack = mk_pack("dl")
ql_pack, ql_unpack = mk_pack("ql")
hhhh_pack, hhhh_unpack = mk_pack("hhhh")
# Optimizations for int2 and int4.
try:
from sys import byteorder as bo
if bo == 'little':
from .optimized import swap_int2_unpack as short_unpack, swap_int2_pack as short_pack
from .optimized import swap_int4_unpack as long_unpack, swap_int4_pack as long_pack
from .optimized import swap_uint2_unpack as ushort_unpack, swap_uint2_pack as ushort_pack
from .optimized import swap_uint4_unpack as ulong_unpack, swap_uint4_pack as ulong_pack
elif bo == 'big':
from .optimized import int2_unpack as short_unpack, int2_pack as short_pack
from .optimized import int4_unpack as long_unpack, int4_pack as long_pack
from .optimized import uint2_unpack as ushort_unpack, uint2_pack as ushort_pack
from .optimized import uint4_unpack as ulong_unpack, uint4_pack as ulong_pack
del bo
except ImportError:
pass
int2_pack, int2_unpack = short_pack, short_unpack
int4_pack, int4_unpack = long_pack, long_unpack
int8_pack, int8_unpack = longlong_pack, longlong_unpack
oid_pack = cid_pack = xid_pack = ulong_pack
oid_unpack = cid_unpack = xid_unpack = ulong_unpack
tid_pack, tid_unpack = LH_pack, LH_unpack
# geometry types
point_pack, point_unpack = dd_pack, dd_unpack
circle_pack, circle_unpack = ddd_pack, ddd_unpack
lseg_pack = box_pack = dddd_pack
lseg_unpack = box_unpack = dddd_unpack
def numeric_pack(data):
(header, numbers) = data
return hhhh_pack(header) + struct.pack("!%dh"%(len(numbers),), *numbers)
def numeric_unpack(data):
header = hhhh_unpack(data[:8])
return (header, struct.unpack("!8x%dh"%((len(data)-8) // 2,), data))
def path_pack(data):
"""
Given a sequence of point data, pack it into a path's serialized form.
[px1, py1, px2, py2, ...]
Must be an even number of numbers.
"""
return struct.pack("!l%dd" %(len(data),), len(data), *data)
def path_unpack(data):
"""
Unpack a path's serialized form into a sequence of point data:
[px1, py1, px2, py2, ...]
Should be an even number of numbers.
"""
npoints = long_unpack(data[:4])
points = struct.unpack("!4x%dd" %(npoints,), data)
return points
polygon_pack, polygon_unpack = path_pack, path_unpack
# time types
date_pack, date_unpack = long_pack, long_unpack
# takes a pair, (seconds, microseconds)
time_pack = compose((mktime, double_pack))
time_unpack = compose((double_unpack, mktimetuple))
def interval_pack(m_d_timetup):
"""
Given a triple, (month, day, (seconds, microseconds)), serialize it for
transport.
"""
(month, day, timetup) = m_d_timetup
return dll_pack((mktime(timetup), day, month))
def interval_unpack(data):
"""
Given a serialized interval, '{month}{day}{time}', yield the triple:
(month, day, (seconds, microseconds))
"""
tim, day, month = dll_unpack(data)
return (month, day, mktimetuple(tim))
def interval_noday_pack(month_day_timetup):
"""
Given a triple, (month, day, (seconds, microseconds)), return the serialized
form that does not have an individual day component.
There is no day component, so if day is non-zero, it will be converted to
seconds and subsequently added to the seconds.
"""
(month, day, timetup) = month_day_timetup
if day:
timetup = (timetup[0] + (day * 24 * 60 * 60), timetup[1])
return dl_pack((mktime(timetup), month))
def interval_noday_unpack(data):
"""
Given a serialized interval without a day component, return the triple:
(month, day(always zero), (seconds, microseconds))
"""
tim, month = dl_unpack(data)
return (month, 0, mktimetuple(tim))
time64_pack = compose((mktime64, longlong_pack))
time64_unpack = compose((longlong_unpack, mktimetuple64))
def interval64_pack(m_d_timetup):
"""
Given a triple, (month, day, (seconds, microseconds)), return the serialized
data using a quad-word for the (seconds, microseconds) tuple.
"""
(month, day, timetup) = m_d_timetup
return qll_pack((mktime64(timetup), day, month))
def interval64_unpack(data):
"""
Unpack an interval containing a quad-word into a triple:
(month, day, (seconds, microseconds))
"""
tim, day, month = qll_unpack(data)
return (month, day, mktimetuple64(tim))
def interval64_noday_pack(m_d_timetup):
"""
Pack an interval without a day component and using a quad-word for second
representation.
There is no day component, so if day is non-zero, it will be converted to
seconds and subsequently added to the seconds.
"""
(month, day, timetup) = m_d_timetup
if day:
timetup = (timetup[0] + (day * 24 * 60 * 60), timetup[1])
return ql_pack((mktime64(timetup), month))
def interval64_noday_unpack(data):
"""
Unpack a ``noday`` quad-word based interval. Returns a triple:
(month, day(always zero), (seconds, microseconds))
"""
tim, month = ql_unpack(data)
return (month, 0, mktimetuple64(tim))
def timetz_pack(timetup_tz):
"""
Pack a time; offset from beginning of the day and timezone offset.
Given a pair, ((seconds, microseconds), timezone_offset), pack it into its
serialized form: "!dl".
"""
(timetup, tz_offset) = timetup_tz
return dl_pack((mktime(timetup), tz_offset))
def timetz_unpack(data):
"""
Given serialized time data, unpack it into a pair:
((seconds, microseconds), timezone_offset).
"""
ts, tz = dl_unpack(data)
return (mktimetuple(ts), tz)
def timetz64_pack(timetup_tz):
"""
Pack a time; offset from beginning of the day and timezone offset.
Given a pair, ((seconds, microseconds), timezone_offset), pack it into its
serialized form using a long long: "!ql".
"""
(timetup, tz_offset) = timetup_tz
return ql_pack((mktime64(timetup), tz_offset))
def timetz64_unpack(data):
"""
Given "long long" serialized time data, "ql", unpack it into a pair:
((seconds, microseconds), timezone_offset)
"""
ts, tz = ql_unpack(data)
return (mktimetuple64(ts), tz)
# oidvectors are 128 bytes, so pack the number of Oids in self
# and justify that to 128 by padding with \x00.
def oidvector_pack(seq):
"""
Given a sequence of Oids, pack them into the serialized form.
An oidvector is a type used by the PostgreSQL catalog.
"""
return struct.pack("!%dL"%(len(seq),), *seq).ljust(128, '\x00')
def oidvector_unpack(data):
"""
Given a serialized oidvector(32 longs), unpack it into a list of unsigned integers.
An int2vector is a type used by the PostgreSQL catalog.
"""
return struct.unpack("!32L", data)
def int2vector_pack(seq):
"""
Given a sequence of integers, pack them into the serialized form.
An int2vector is a type used by the PostgreSQL catalog.
"""
return struct.pack("!%dh"%(len(seq),), *seq).ljust(64, '\x00')
def int2vector_unpack(data):
"""
Given a serialized int2vector, unpack it into a list of integers.
An int2vector is a type used by the PostgreSQL catalog.
"""
return struct.unpack("!32h", data)
def varbit_pack(bits_data):
r"""
Given a pair, serialize the varbit.
# (number of bits, data)
>>> varbit_pack((1, '\x00'))
b'\x00\x00\x00\x01\x00'
"""
return long_pack(bits_data[0]) + bits_data[1]
def varbit_unpack(data):
"""
Given ``varbit`` data, unpack it into a pair:
(bits, data)
Where
"""
return long_unpack(data[0:4]), data[4:]
def net_pack(family_mask_data):
"""
Given a triple, yield the serialized form for transport.
Prepends the ``family``, ``mask`` and implicit ``is_cidr`` fields.
Supports cidr and inet types.
"""
(family, mask, data) = family_mask_data
return b''.join((
byte_pack(family),
byte_pack(mask),
b'\x01',
byte_pack(len(data)),
data
))
def net_unpack(data):
"""
Given serialized cidr data, return a tuple:
(family, mask, data)
"""
family, mask, is_cidr, size = BBBB_unpack(data[:4])
rd = data[4:]
if len(rd) != size:
raise ValueError("invalid size parameter")
return (family, mask, rd)
def record_unpack(data):
"""
Given serialized record data, return a tuple of tuples of type Oids and
attributes.
"""
columns = long_unpack(data[0:4])
offset = 4
for x in range(columns):
typid = oid_unpack(data[offset:offset+4])
offset += 4
if data[offset:offset+4] == null_sequence:
att = None
offset += 4
else:
size = long_unpack(data[offset:offset+4])
offset += 4
att = data[offset:offset + size]
if size < -1 or len(att) != size:
raise ValueError("insufficient data left in message")
offset += size
yield (typid, att)
if len(data) - offset != 0:
raise ValueError("extra data, %d octets, at end of record" %(len(data),))
def record_pack(seq):
"""
pack a record given an iterable of (type_oid, data) pairs.
"""
return long_pack(len(seq)) + b''.join([
# typid + (null_seq or data)
oid_pack(x) + (y is None and null_sequence or (long_pack(len(y)) + y))
for x, y in seq
])
def elements_pack(elements):
"""
Pack the elements for containment within a serialized array.
This is used by array_pack.
"""
for x in elements:
if x is None:
yield null_sequence
else:
yield long_pack(len(x))
yield x
def array_pack(array_data):
"""
Pack a raw array. A raw array consists of flags, type oid, sequence of lower
and upper bounds, and an iterable of already serialized element data:
(0, element type oid, (lower bounds, upper bounds, ...), iterable of element_data)
The lower bounds and upper bounds specifies boundaries of the dimension. So the length
of the boundaries sequence is two times the number of dimensions that the array has.
array_pack((flags, type_id, lower_upper_bounds, element_data))
The format of ``lower_upper_bounds`` is a sequence of lower bounds and upper
bounds. First lower then upper inlined within the sequence:
[lower, upper, lower, upper]
The above array `dlb` has two dimensions. The lower and upper bounds of the
first dimension is defined by the first two elements in the sequence. The
second dimension is then defined by the last two elements in the sequence.
"""
(flags, typid, dlb, elements) = array_data
header = llL_pack((len(dlb) // 2, flags, typid))
return header + \
struct.pack("!%dl" %(len(dlb),), *dlb) + \
b''.join(elements_pack(elements))
def elements_unpack(data, offset):
"""
Unpack the serialized elements of an array into a list.
This is used by array_unpack.
"""
data_len = len(data)
while offset < data_len:
lend = data[offset:offset+4]
offset += 4
if lend == null_sequence:
yield None
else:
sizeof_el = long_unpack(lend)
yield data[offset:offset+sizeof_el]
offset += sizeof_el
def array_unpack(data):
"""
Given a serialized array, unpack it into a tuple:
(flags, typid, (lower bounds, upper bounds, ...), [elements])
"""
ndim, flags, typid = llL_unpack(data[0:12])
if ndim < 0:
raise ValueError("invalid number of dimensions: %d" %(ndim,))
# "ndim" number of pairs of longs
end = 4 * 2 * ndim + 12
# Dimension Bounds
dlb = struct.unpack("!%dl"%(2 * ndim,), data[12:end])
return (flags, typid, dlb, elements_unpack(data, end))
def return_arg(arg):
return arg
literal = (return_arg, return_arg)
del return_arg
oid_to_io = {
pg_types.RECORDOID : (record_pack, record_unpack),
pg_types.ANYARRAYOID : (array_pack, array_unpack),
pg_types.BOOLOID : (bool_pack, bool_unpack),
pg_types.BITOID : (bit_pack, bit_unpack),
pg_types.VARBITOID : (varbit_pack, varbit_unpack),
pg_types.BYTEAOID : (bytes, bytes),
pg_types.CHAROID : literal,
# pg_types.MACADDROID : literal,
pg_types.INETOID : (net_pack, net_unpack),
pg_types.CIDROID : (net_pack, net_unpack),
pg_types.DATEOID : (date_pack, date_unpack),
pg_types.ABSTIMEOID : (long_pack, long_unpack),
pg_types.INT2OID : (int2_pack, int2_unpack),
pg_types.INT4OID : (int4_pack, int4_unpack),
pg_types.INT8OID : (int8_pack, int8_unpack),
pg_types.NUMERICOID : (numeric_pack, numeric_unpack),
pg_types.OIDOID : (oid_pack, oid_unpack),
pg_types.XIDOID : (xid_pack, xid_unpack),
pg_types.CIDOID : (cid_pack, cid_unpack),
pg_types.TIDOID : (tid_pack, tid_unpack),
pg_types.FLOAT4OID : (float_pack, float_unpack),
pg_types.FLOAT8OID : (double_pack, double_unpack),
pg_types.POINTOID : (point_pack, point_unpack),
pg_types.LSEGOID : (lseg_pack, lseg_unpack),
pg_types.BOXOID : (box_pack, box_unpack),
pg_types.CIRCLEOID : (circle_pack, circle_unpack),
pg_types.PATHOID : (path_pack, path_unpack),
pg_types.POLYGONOID : (polygon_pack, polygon_unpack),
#pg_types.ACLITEMOID : (aclitem_pack, aclitem_unpack),
#pg_types.LINEOID : (line_pack, line_unpack),
#pg_types.CASHOID : (cash_pack, cash_unpack),
}
time_io = {
pg_types.TIMEOID : (time_pack, time_unpack),
pg_types.TIMETZOID : (timetz_pack, timetz_unpack),
pg_types.TIMESTAMPOID : (time_pack, time_unpack),
pg_types.TIMESTAMPTZOID : (time_pack, time_unpack),
pg_types.INTERVALOID : (interval_pack, interval_unpack)
}
time_noday_io = {
pg_types.TIMEOID : (time_pack, time_unpack),
pg_types.TIMETZOID : (timetz_pack, timetz_unpack),
pg_types.TIMESTAMPOID : (time_pack, time_unpack),
pg_types.TIMESTAMPTZOID : (time_pack, time_unpack),
pg_types.INTERVALOID : (interval_noday_pack, interval_noday_unpack)
}
time64_io = {
pg_types.TIMEOID : (time64_pack, time64_unpack),
pg_types.TIMETZOID : (timetz64_pack, timetz64_unpack),
pg_types.TIMESTAMPOID : (time64_pack, time64_unpack),
pg_types.TIMESTAMPTZOID : (time64_pack, time64_unpack),
pg_types.INTERVALOID : (interval64_pack, interval64_unpack)
}
time64_noday_io = {
pg_types.TIMEOID : (time64_pack, time64_unpack),
pg_types.TIMETZOID : (timetz64_pack, timetz64_unpack),
pg_types.TIMESTAMPOID : (time64_pack, time64_unpack),
pg_types.TIMESTAMPTZOID : (time64_pack, time64_unpack),
pg_types.INTERVALOID : (interval64_noday_pack, interval64_noday_unpack)
}