Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unpack performance optimization: preallocate the table when unpack map #22

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
##### Available defines for CMSGPACK_CFLAGS #####
##
## USE_INTERNAL_ISINF: Workaround for Solaris platforms missing isinf().
## DISABLE_INVALID_NUMBERS: Permanently disable invalid JSON numbers:
## NaN, Infinity, hex.
##
## Optional built-in number conversion uses the following defines:
## USE_INTERNAL_FPCONV: Use builtin strtod/dtoa for numeric conversions.
## IEEE_BIG_ENDIAN: Required on big endian architectures.
## MULTIPLE_THREADS: Must be set when Lua CMSGPACK may be used in a
## multi-threaded application. Requries _pthreads_.

##### Build defaults #####
LUA_VERSION = 5.1
TARGET = cmsgpack.so
PREFIX = /usr/local
CFLAGS = -g -O3 -Wall -pedantic
#CFLAGS = -g -Wall -pedantic -fno-inline -fprofile-arcs -ftest-coverage -lgcov
#CFLAGS = -O3 -Wall -pedantic -DNDEBUG
CMSGPACK_CFLAGS = -fpic
CMSGPACK_LDFLAGS = -shared
#CMSGPACK_LDFLAGS = -shared -fprofile-arcs
LUA_INCLUDE_DIR = $(PREFIX)/include
LUA_CMODULE_DIR = $(PREFIX)/lib/lua/$(LUA_VERSION)
LUA_MODULE_DIR = $(PREFIX)/share/lua/$(LUA_VERSION)
LUA_BIN_DIR = $(PREFIX)/bin


##### End customisable sections #####

DATAPERM = 644
EXECPERM = 755

ASCIIDOC = asciidoc

BUILD_CFLAGS = -I$(LUA_INCLUDE_DIR) $(CMSGPACK_CFLAGS)
OBJS = lua_cmsgpack.o
.PHONY: all clean install

%.o: %.c
$(CC) -c $(CFLAGS) $(BUILD_CFLAGS) -o $@ $<

all: $(TARGET)

$(TARGET): $(OBJS)
$(CC) $(CMSGPACK_LDFLAGS) -o $@ $(OBJS)

install: $(TARGET)
mkdir -p $(DESTDIR)/$(LUA_CMODULE_DIR)
cp $(TARGET) $(DESTDIR)/$(LUA_CMODULE_DIR)
chmod $(EXECPERM) $(DESTDIR)/$(LUA_CMODULE_DIR)/$(TARGET)

clean:
rm -f *.o $(TARGET)
22 changes: 5 additions & 17 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,9 @@ The library is currently considered in BETA STAGE for lack of extensive testing.
INSTALLATION
---

Using LuaRocks (http://luarocks.org):
make
sudo make install

* Install current stable release:

sudo luarocks install lua-cmsgpack

* Install current Git master head from GitHub:

sudo luarocks install lua-cmsgpack --from=rocks-cvs

* Install from current working copy

cd lua-cmsgpack/
sudo luarocks make rockspec/lua-cmsgpack-scm-1.rockspec

If you embed Lua and all modules into your C project, just add the
lua_cmsgpack.c file and call the following function after creating the Lua
Expand Down Expand Up @@ -54,9 +43,9 @@ maps.
NESTED TABLES
---
Nested tables are handled correctly up to LUACMSGPACK_MAX_NESTING levels of
nesting (that is set to 16 by default).
nesting (that is set to 9 by default).
Every table that is nested at a greater level than the maxium is encoded
as MessagePack nil value.
as MessagePack nil value and throw a exception.

It is worth to note that in Lua it is possible to create tables that mutually
refer to each other, creating a cycle. For example:
Expand All @@ -65,8 +54,7 @@ a = {x=nil,y=5}
b = {x=a}
a['x'] = b

This condition will simply make the encoder reach the max level of nesting,
thus avoiding an infinite loop.
This condition will throw a exception.

CREDITS
---
Expand Down
150 changes: 137 additions & 13 deletions lua_cmsgpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define LUACMSGPACK_COPYRIGHT "Copyright (C) 2012, Salvatore Sanfilippo"
#define LUACMSGPACK_DESCRIPTION "MessagePack C implementation for Lua"

#define LUACMSGPACK_MAX_NESTING 16 /* Max tables nesting. */
#define LUACMSGPACK_MAX_NESTING 9 /* Max tables nesting. */

/* ==============================================================================
* MessagePack implementation and bindings for Lua 5.1.
Expand Down Expand Up @@ -69,7 +69,7 @@ typedef struct mp_buf {

static mp_buf *mp_buf_new(void) {
mp_buf *buf = malloc(sizeof(*buf));

buf->b = NULL;
buf->len = buf->free = 0;
return buf;
Expand Down Expand Up @@ -367,26 +367,24 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
* of keys from numerical keys from 1 up to N, with N being the total number
* of elements, without any hole in the middle. */
static int table_is_an_array(lua_State *L) {
long count = 0, max = 0, idx = 0;
long idx = 0;
lua_Number n;

lua_pushnil(L);
while(lua_next(L,-2)) {
idx++;
/* Stack: ... key value */
lua_pop(L,1); /* Stack: ... key */
if (!lua_isnumber(L,-1)) goto not_array;
n = lua_tonumber(L,-1);
idx = n;
if (idx != n || idx < 1) goto not_array;
count++;
max = idx;
}
/* We have the total number of elements in "count". Also we have
* the max index encountered in "idx". We can't reach this code
* if there are indexes <= 0. If you also note that there can not be
* repeated keys into a table, you have that if idx==count you are sure
* that there are all the keys form 1 to count (both included). */
return idx == count;
return 1;

not_array:
lua_pop(L,1);
Expand Down Expand Up @@ -415,7 +413,12 @@ static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {

/* Limit the encoding of nested tables to a specfiied maximum depth, so that
* we survive when called against circular references in tables. */
if (t == LUA_TTABLE && level == LUACMSGPACK_MAX_NESTING) t = LUA_TNIL;
if (t == LUA_TTABLE && level == LUACMSGPACK_MAX_NESTING) {
t = LUA_TNIL;
lua_pushliteral(L, "table nesting level too deep");
lua_error(L);
}

switch(t) {
case LUA_TSTRING: mp_encode_lua_string(L,buf); break;
case LUA_TBOOLEAN: mp_encode_lua_bool(L,buf); break;
Expand All @@ -437,12 +440,128 @@ static int mp_pack(lua_State *L) {

/* --------------------------------- Decoding --------------------------------- */

unsigned int next_power_of_two(int n) {
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
n++;
return n;
}

int calc_array_num(mp_cur *c, size_t len) {
const unsigned char *curr = c->p;
size_t left = c->left;
int narray = 0;
int i;
int idx = 0;
int value;
for (i = 0; i < len * 2; i++) {
switch (curr[0]) {
case 0xcc: /* uint 8 */
case 0xd0: /* int 8 */
if (i % 2 == 0) {
idx++;
if (left < 2) {
return narray;
}

if (idx == curr[1] || curr[1] <= next_power_of_two(idx))
narray++;
else
return narray;
}
curr += 2;
left -=2;
break;
case 0xcd: /* uint 16 */
case 0xd1: /* int 16 */
if (i % 2 == 0) {
idx++;
if (left < 3) {
return narray;
}
value = (curr[1] << 8) | curr[2];
if (idx == value || value <= next_power_of_two(idx))
narray++;
else
return narray;
}
curr += 3;
left -= 3;
break;
case 0xce: /* uint 32 */
case 0xd2: /* int 32 */
if (i % 2 == 0) {
idx++;
if (left < 5) {
return narray;
}
value = ((uint32_t)curr[1] << 24) |
((uint32_t)curr[2] << 16) |
((uint32_t)curr[3] << 8) |
(uint32_t)curr[4];
if (idx == value || value <= next_power_of_two(idx))
narray++;
else
return narray;
}
curr += 5;
left -= 5;
break;
case 0xcf: /* uint 64 */
case 0xd3: /* int 64 */
if (i % 2 == 0) {
idx++;
if (left < 9) {
return narray;
}

value = ((uint64_t)curr[1] << 56) |
((uint64_t)curr[2] << 48) |
((uint64_t)curr[3] << 40) |
((uint64_t)curr[4] << 32) |
((uint64_t)curr[5] << 24) |
((uint64_t)curr[6] << 16) |
((uint64_t)curr[7] << 8) |
(uint64_t)curr[8];
if (idx == value || value <= next_power_of_two(idx))
narray++;
else
return narray;
}
curr += 9;
left -= 9;
break;
default: /* types that can't be idenitified by first byte value. */
if ((curr[0] & 0x80) == 0) { /* positive fixnum */
if (i % 2 == 0) {
idx++;
if (idx == curr[0] || curr[0] <= next_power_of_two(idx)) {
narray++;
} else {
return narray;
}
}
curr += 1;
left -= 1;

} else { /* other */
return narray;
}
}
}
return narray;
}

void mp_decode_to_lua_type(lua_State *L, mp_cur *c);

void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
int index = 1;

lua_newtable(L);
/*lua_newtable(L);*/
lua_createtable(L, len, 0);
while(len--) {
lua_pushnumber(L,index++);
mp_decode_to_lua_type(L,c);
Expand All @@ -452,7 +571,12 @@ void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
}

void mp_decode_to_lua_hash(lua_State *L, mp_cur *c, size_t len) {
lua_newtable(L);
int narray;
int nhash;
narray = calc_array_num(c, len);
nhash = len - narray;
lua_createtable(L, narray, nhash);
/*lua_newtable(L);*/
while(len--) {
mp_decode_to_lua_type(L,c); /* key */
if (c->err) return;
Expand Down Expand Up @@ -667,7 +791,7 @@ static int mp_unpack(lua_State *L) {
s = (const unsigned char*) lua_tolstring(L,-1,&len);
c = mp_cur_new(s,len);
mp_decode_to_lua_type(L,c);

if (c->err == MP_CUR_ERROR_EOF) {
mp_cur_free(c);
lua_pushstring(L,"Missing bytes in input.");
Expand Down Expand Up @@ -701,7 +825,7 @@ LUALIB_API int luaopen_cmsgpack (lua_State *L) {
lua_pushliteral(L, LUACMSGPACK_COPYRIGHT);
lua_setfield(L, -2, "_COPYRIGHT");
lua_pushliteral(L, LUACMSGPACK_DESCRIPTION);
lua_setfield(L, -2, "_DESCRIPTION");
lua_setfield(L, -2, "_DESCRIPTION");
return 1;
}

Expand Down
Loading