Skip to content

Commit 764aebd

Browse files
author
James William Pye
committed
Implement consume_tuple_messages.
This offers a C optimization for processing Tuple('D') messages in put_tupledata. The method will continue parsing messages until the message type is not 'D'. The existing pure-Python implementation proved to be rather costly as the invocation overhead of the comparison and parse_tuple_messages are frequent. consume_tuple_messages also offered a more appropriate place to implement a 'takewhile' iterator, which allows put_tupledata to return the number consumed. This means that put_tupledata can now be more optimistic and avoid double processing of tuple data near command completion.
1 parent 82c793a commit 764aebd

3 files changed

Lines changed: 219 additions & 95 deletions

File tree

postgresql/port/optimized/element3.c

Lines changed: 95 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
mFUNC(cat_messages, METH_O, "cat the serialized form of the messages in the given list") \
66
mFUNC(parse_tuple_message, METH_O, "parse the given tuple data into a tuple of raw data") \
77
mFUNC(pack_tuple_data, METH_O, "serialize the give tuple message[tuple of bytes()]") \
8+
mFUNC(consume_tuple_messages, METH_O, "create a list of parsed tuple data tuples") \
89

910
/*
1011
* Given a tuple of bytes and None objects, join them into a
@@ -109,67 +110,60 @@ _pack_tuple_data(PyObject *tup)
109110
* dst must be of PyTuple_Type with at least natts items slots.
110111
*/
111112
static int
112-
_unpack_tuple_data(PyObject *dst, uint16_t natts, register const char *data, Py_ssize_t data_len)
113+
_unpack_tuple_data(PyObject *dst, register uint16_t natts, register const char *data, Py_ssize_t data_len)
113114
{
114-
PyObject *ob;
115-
uint16_t cnatt = 0;
116-
uint32_t attsize = 0;
117-
register uint32_t position = 0;
115+
static const unsigned char null_sequence[4] = {0xFF, 0xFF, 0xFF, 0xFF};
116+
register PyObject *ob;
117+
register uint16_t cnatt = 0;
118+
register uint32_t attsize;
119+
register const char *next;
120+
register const char *eod = data + data_len;
121+
char attsize_buf[4];
118122

119123
while (cnatt < natts)
120124
{
121125
/*
122126
* Need enough data for the attribute size.
123127
*/
124-
if (position + 4 > data_len)
128+
next = data + 4;
129+
if (next > eod)
125130
{
126131
PyErr_Format(PyExc_ValueError,
127132
"not enough data available for attribute %d's size header: "
128133
"needed %d bytes, but only %lu remain at position %lu",
129-
cnatt, 4, data_len - position, position
134+
cnatt, 4, eod - data, data_len - (eod - data)
130135
);
131136
return(-1);
132137
}
133138

134-
Py_MEMCPY(&attsize, data + position, 4);
135-
attsize = local_ntohl(attsize);
136-
position += 4;
137-
/*
138-
* NULL.
139-
*/
140-
if (attsize == (uint32_t) 0xFFFFFFFFL)
139+
Py_MEMCPY(attsize_buf, data, 4);
140+
data = next;
141+
if ((*((uint32_t *) attsize_buf)) == (*((uint32_t *) null_sequence)))
141142
{
143+
/*
144+
* NULL.
145+
*/
142146
Py_INCREF(Py_None);
143147
PyTuple_SET_ITEM(dst, cnatt, Py_None);
144148
}
145149
else
146150
{
147-
if ((position + attsize) < position)
151+
attsize = local_ntohl(*((uint32_t *) attsize_buf));
152+
153+
next = data + attsize;
154+
if (next > eod || next < data)
148155
{
149156
/*
150-
* Likely a "limitation" over the pure-Python version, *but*
151-
* the message content size is limited to 0xFFFFFFFF-4 anyways,
152-
* so it is unexpected for an attsize to cause wrap-around.
157+
* Increment caused wrap...
153158
*/
154159
PyErr_Format(PyExc_ValueError,
155-
"tuple data caused position (uint32_t) "
156-
"to wrap on attribute %d, position %lu + size %lu",
157-
cnatt, position, attsize
158-
);
159-
return(-1);
160-
}
161-
162-
if (position + attsize > data_len)
163-
{
164-
PyErr_Format(PyExc_ValueError,
165-
"not enough data for attribute %d, size %lu, "
166-
"as only %lu bytes remain in message",
167-
cnatt, attsize, data_len - position
160+
"attribute %d has invalid size %lu",
161+
cnatt, attsize
168162
);
169163
return(-1);
170164
}
171165

172-
ob = PyBytes_FromStringAndSize(data + position, attsize);
166+
ob = PyBytes_FromStringAndSize(data, attsize);
173167
if (ob == NULL)
174168
{
175169
/*
@@ -178,18 +172,18 @@ _unpack_tuple_data(PyObject *dst, uint16_t natts, register const char *data, Py_
178172
return(-1);
179173
}
180174
PyTuple_SET_ITEM(dst, cnatt, ob);
181-
position += attsize;
175+
data = next;
182176
}
183177

184178
cnatt++;
185179
}
186180

187-
if (position != data_len)
181+
if (data != eod)
188182
{
189183
PyErr_Format(PyExc_ValueError,
190184
"invalid tuple(D) message, %lu remaining "
191185
"bytes after processing %d attributes",
192-
data_len - position, cnatt
186+
(unsigned long) (eod - data), cnatt
193187
);
194188
return(-1);
195189
}
@@ -230,6 +224,72 @@ parse_tuple_message(PyObject *self, PyObject *arg)
230224
return(rob);
231225
}
232226

227+
static PyObject *
228+
consume_tuple_messages(PyObject *self, PyObject *list)
229+
{
230+
Py_ssize_t i;
231+
PyObject *rob; /* builtins.list */
232+
233+
if (!PyTuple_Check(list))
234+
{
235+
PyErr_SetString(PyExc_TypeError,
236+
"consume_tuple_messages requires a tuple");
237+
return(NULL);
238+
}
239+
rob = PyList_New(PyTuple_GET_SIZE(list));
240+
if (rob == NULL)
241+
return(NULL);
242+
243+
for (i = 0; i < PyTuple_GET_SIZE(list); ++i)
244+
{
245+
register PyObject *data;
246+
PyObject *msg, *typ, *ptm;
247+
248+
msg = PyTuple_GET_ITEM(list, i);
249+
if (!PyTuple_CheckExact(msg) || PyTuple_GET_SIZE(msg) != 2)
250+
{
251+
Py_DECREF(rob);
252+
PyErr_SetString(PyExc_TypeError,
253+
"consume_tuple_messages requires tuples items to be tuples (pairs)");
254+
return(NULL);
255+
}
256+
257+
typ = PyTuple_GET_ITEM(msg, 0);
258+
if (!PyBytes_CheckExact(typ) || PyBytes_GET_SIZE(typ) != 1)
259+
{
260+
Py_DECREF(rob);
261+
PyErr_SetString(PyExc_TypeError,
262+
"consume_tuple_messages requires pairs to consist of bytes");
263+
return(NULL);
264+
}
265+
266+
/*
267+
* End of tuple messages.
268+
*/
269+
if (*(PyBytes_AS_STRING(typ)) != 'D')
270+
break;
271+
272+
data = PyTuple_GET_ITEM(msg, 1);
273+
ptm = parse_tuple_message(NULL, data);
274+
if (ptm == NULL)
275+
{
276+
Py_DECREF(rob);
277+
return(NULL);
278+
}
279+
PyList_SET_ITEM(rob, i, ptm);
280+
}
281+
282+
if (i < PyTuple_GET_SIZE(list))
283+
{
284+
PyObject *newrob;
285+
newrob = PyList_GetSlice(rob, 0, i);
286+
Py_DECREF(rob);
287+
rob = newrob;
288+
}
289+
290+
return(rob);
291+
}
292+
233293
static PyObject *
234294
pack_tuple_data(PyObject *self, PyObject *tup)
235295
{

0 commit comments

Comments
 (0)