Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions docs/library/zlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,25 @@ Functions

Return decompressed *data* as bytes. *wbits* is DEFLATE dictionary window
size used during compression (8-15, the dictionary size is power of 2 of
that value). Additionally, if value is positive, *data* is assumed to be
zlib stream (with zlib header). Otherwise, if it's negative, it's assumed
to be raw DEFLATE stream. *bufsize* parameter is for compatibility with
CPython and is ignored.
that value). Additionally:
* if the value is negative, *data* is assumed to be raw DEFLATE stream.
* if the value has 16 added to it (24-31, 16 + 8..15) *data* will be
* handed as gzip
* Otherwise *data* is assumed to be zlib stream (with zlib header).
*bufsize* parameter is for compatibility with CPython and is ignored.

.. function:: compress(data, wbits=15 /)

Return gzip DEFLATE compressed *data* as bytearray. *wbits* is DEFLATE dictionary window
size used during compression (8-15, the dictionary size is power of 2 of
that value). *data* can be decompressed with *DecompIO()* or *decompess()*
in gzip mode (*wbits=31*)

.. class:: DecompIO(stream, wbits=0, /)

Create a `stream` wrapper which allows transparent decompression of
compressed data in another *stream*. This allows to process compressed
streams with data larger than available heap size. In addition to
values described in :func:`decompress`, *wbits* may take values
24..31 (16 + 8..15), meaning that input stream has gzip header.
streams with data larger than available heap size.

.. admonition:: Difference to CPython
:class: attention
Expand Down
1 change: 1 addition & 0 deletions examples/natmod/uzlib/uzlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *a

mp_store_global(MP_QSTR___name__, MP_OBJ_NEW_QSTR(MP_QSTR_uzlib));
mp_store_global(MP_QSTR_decompress, MP_OBJ_FROM_PTR(&mod_uzlib_decompress_obj));
mp_store_global(MP_QSTR_compress, MP_OBJ_FROM_PTR(&mod_uzlib_compress_obj));
mp_store_global(MP_QSTR_DecompIO, MP_OBJ_FROM_PTR(&decompio_type));

MP_DYNRUNTIME_INIT_EXIT
Expand Down
75 changes: 69 additions & 6 deletions extmod/moduzlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <stdio.h>
#include <string.h>

#include "py/gc.h"
#include "py/runtime.h"
#include "py/stream.h"
#include "py/mperrno.h"
Expand Down Expand Up @@ -165,13 +166,14 @@ STATIC mp_obj_t mod_uzlib_decompress(size_t n_args, const mp_obj_t *args) {
decomp->source_limit = (byte *)bufinfo.buf + bufinfo.len;

int st;
bool is_zlib = true;
int wbits = n_args > 1 ? MP_OBJ_SMALL_INT_VALUE(args[1]) : 0; // zlib enabled by default

if (n_args > 1 && MP_OBJ_SMALL_INT_VALUE(args[1]) < 0) {
is_zlib = false;
}

if (is_zlib) {
if (wbits >= 16) {
st = uzlib_gzip_parse_header(decomp);
if (st != TINF_OK) {
goto error;
}
} else if (wbits >= 0) {
st = uzlib_zlib_parse_header(decomp);
if (st < 0) {
goto error;
Expand Down Expand Up @@ -206,9 +208,68 @@ STATIC mp_obj_t mod_uzlib_decompress(size_t n_args, const mp_obj_t *args) {
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_uzlib_decompress_obj, 1, 3, mod_uzlib_decompress);

#if !MICROPY_ENABLE_DYNRUNTIME
STATIC mp_obj_t mod_uzlib_compress(size_t n_args, const mp_obj_t *args) {
mp_obj_t data = args[0];
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ);
uint32_t len = bufinfo.len;

struct uzlib_comp *comp = m_new0(struct uzlib_comp, 1);

// set deflate compression parameters for gzip
comp->dict_size = 32768;
if (n_args > 1) {
comp->dict_size = 1 << MP_OBJ_SMALL_INT_VALUE(args[1]);
}
comp->hash_bits = 12;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This and previsou line could use a comment stating why this is chosen I think (I know nothing about gzip though)

Copy link
Copy Markdown
Contributor

@codefreax codefreax Feb 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to expose these settings in the API?

size_t hash_size = sizeof(uzlib_hash_entry_t) * (1 << comp->hash_bits);
comp->hash_table = gc_alloc(hash_size, false);
memset(comp->hash_table, 0, hash_size);

zlib_start_block(&comp->out);
uzlib_compress(comp, bufinfo.buf, len);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uzlib_compress as well as zlib_start_block internally (re-)allocate a buffer at comp->out which is never free'd. As a result the system runs out of memory after a few compression runs.

A free(comp->out.outbuf); after the subsequent memcpy solved this.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uzlib_compress as well as zlib_start_block internally (re-)allocate a buffer at comp->out which is never free'd. As a result the system runs out of memory after a few compression runs.

A free(comp->out.outbuf); after the subsequent memcpy solved this.

Im trying to encode a string to get into the shorter sms data possible.,
questio is : how can I add this gzip uzlib extension to my current 1.14 toolchain for esp32?

zlib_finish_block(&comp->out);

DEBUG_printf("compressed from %u to %u raw bytes\n", len, comp->out.outlen);

// allocate final buffer incl. 10 header bytes and 8 trailing bytes
mp_uint_t dest_buf_size = comp->out.outlen + 18;
byte *dest_buf = m_new(byte, dest_buf_size);

/* GZIP header bytes: */
/* 0-1: GZIP ID1, ID2 = 0x1f, 0x8b */
/* 2: compression method (8 = deflate) */
/* 3: flags (0 = no additional header fields) */
/* 4-7: modification time (0 = none) */
/* 8: extra flags (4 = compressor used fastest algo) */
/* 9: operating system (3 = unix) */
static const unsigned char gzip_header[] =
{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03 };

memcpy(dest_buf, gzip_header, sizeof(gzip_header));
memcpy(dest_buf + sizeof(gzip_header), comp->out.outbuf, comp->out.outlen);

// append 32 bit crc of original data
uint32_t offset = sizeof(gzip_header) + comp->out.outlen;
uint32_t crc = ~uzlib_crc32(bufinfo.buf, len, ~0);
memcpy(dest_buf + offset, &crc, sizeof(crc));
// append 32 bit length of original data
memcpy(dest_buf + offset + sizeof(crc), &len, sizeof(len));

// free all temporarily used memory
gc_free(comp->out.outbuf); // free internal buffer allocated by compression
gc_free(comp->hash_table);
m_del_obj(struct uzlib_comp, comp);

// return result as MP bytearray
return mp_obj_new_bytearray_by_ref(dest_buf_size, dest_buf);
}
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_uzlib_compress_obj, 1, 2, mod_uzlib_compress);

STATIC const mp_rom_map_elem_t mp_module_uzlib_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_uzlib) },
{ MP_ROM_QSTR(MP_QSTR_decompress), MP_ROM_PTR(&mod_uzlib_decompress_obj) },
{ MP_ROM_QSTR(MP_QSTR_compress), MP_ROM_PTR(&mod_uzlib_compress_obj) },
{ MP_ROM_QSTR(MP_QSTR_DecompIO), MP_ROM_PTR(&decompio_type) },
};

Expand All @@ -228,5 +289,7 @@ const mp_obj_module_t mp_module_uzlib = {
#include "lib/uzlib/tinfgzip.c"
#include "lib/uzlib/adler32.c"
#include "lib/uzlib/crc32.c"
#include "lib/uzlib/genlz77.c"
#include "lib/uzlib/defl_static.c"

#endif // MICROPY_PY_UZLIB
Loading