Skip to content

Commit 4503a78

Browse files
committed
Added initial test for putting some often-called functions into an extension module, for now its only being built by a cheap hardcoded makefile. It shows that the performance gain is rather small, bottlenecks are attr accesses, so in fact the whole type wants to be put into C to get real performance. Its not really worth it for 25% I believe
1 parent ae6d08e commit 4503a78

4 files changed

Lines changed: 124 additions & 0 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
*.pyc
2+
*.o
3+
*.so

_fun.c

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#include <Python.h>
2+
#include <stdint.h>
3+
4+
static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
5+
{
6+
const unsigned char *sha;
7+
const unsigned int sha_len;
8+
9+
// Note: self is only set if we are a c type. We emulate an instance method,
10+
// hence we have to get the instance as 'first' argument
11+
12+
// get instance and sha
13+
PyObject* inst = 0;
14+
if (!PyArg_ParseTuple(args, "Os#", &inst, &sha, &sha_len))
15+
return NULL;
16+
17+
if (sha_len != 20) {
18+
PyErr_SetString(PyExc_ValueError, "Sha is not 20 bytes long");
19+
return NULL;
20+
}
21+
22+
if( !inst){
23+
PyErr_SetString(PyExc_ValueError, "Cannot be called without self");
24+
return NULL;
25+
}
26+
27+
// read lo and hi bounds
28+
PyObject* fanout_table = PyObject_GetAttrString(inst, "_fanout_table");
29+
if (!fanout_table){
30+
PyErr_SetString(PyExc_ValueError, "Couldn't obtain fanout table");
31+
return NULL;
32+
}
33+
34+
unsigned int lo = 0, hi = 0;
35+
if (sha[0]){
36+
PyObject* item = PySequence_GetItem(fanout_table, (const Py_ssize_t)(sha[0]-1));
37+
lo = PyInt_AS_LONG(item);
38+
Py_DECREF(item);
39+
}
40+
PyObject* item = PySequence_GetItem(fanout_table, (const Py_ssize_t)sha[0]);
41+
hi = PyInt_AS_LONG(item);
42+
Py_DECREF(item);
43+
item = 0;
44+
45+
Py_DECREF(fanout_table);
46+
47+
// get sha query function
48+
PyObject* get_sha = PyObject_GetAttrString(inst, "sha");
49+
if (!get_sha){
50+
PyErr_SetString(PyExc_ValueError, "Couldn't obtain sha method");
51+
return NULL;
52+
}
53+
54+
PyObject *sha_str = 0;
55+
while (lo < hi) {
56+
const int mid = (lo + hi)/2;
57+
sha_str = PyObject_CallFunction(get_sha, "i", mid);
58+
if (!sha_str) {
59+
return NULL;
60+
}
61+
62+
// we really trust that string ... for speed
63+
const int cmp = memcmp(PyString_AS_STRING(sha_str), sha, 20);
64+
Py_DECREF(sha_str);
65+
sha_str = 0;
66+
67+
if (cmp < 0){
68+
lo = mid + 1;
69+
}
70+
else if (cmp > 0) {
71+
hi = mid;
72+
}
73+
else {
74+
Py_DECREF(get_sha);
75+
return PyInt_FromLong(mid);
76+
}// END handle comparison
77+
}// END while lo < hi
78+
79+
// nothing found, cleanup
80+
Py_DECREF(get_sha);
81+
Py_RETURN_NONE;
82+
}
83+
84+
85+
static PyMethodDef py_fun[] = {
86+
{ "PackIndexFile_sha_to_index", (PyCFunction)PackIndexFile_sha_to_index, METH_VARARGS, NULL },
87+
{ NULL, NULL, 0, NULL }
88+
};
89+
90+
void init_fun(void)
91+
{
92+
PyObject *m;
93+
94+
m = Py_InitModule3("_fun", py_fun, NULL);
95+
if (m == NULL)
96+
return;
97+
}

makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
_fun.o: _fun.c
3+
gcc -pthread -fno-strict-aliasing -DNDEBUG -g -fwrapv -O2 -Wall -Wstrict-prototypes -fPIC -I/usr/include/python2.6 -c $< -o $@
4+
5+
_fun.so: _fun.o
6+
gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions $^ -o $@
7+
8+
all: _fun.so
9+
10+
clean:
11+
-rm *.so
12+
-rm *.o

pack.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
msb_size
2424
)
2525

26+
try:
27+
from _fun import PackIndexFile_sha_to_index
28+
except ImportError:
29+
pass
30+
# END try c module
31+
2632
from base import ( # Amazing !
2733
OInfo,
2834
OStream,
@@ -298,6 +304,13 @@ def sha_to_index(self, sha):
298304
# END bisect
299305
return None
300306

307+
if 'PackIndexFile_sha_to_index' in globals():
308+
# NOTE: Its just about 25% faster, the major bottleneck might be the attr
309+
# accesses
310+
def sha_to_index(self, sha):
311+
return PackIndexFile_sha_to_index(self, sha)
312+
# END redefine heavy-hitter with c version
313+
301314
#} END properties
302315

303316

0 commit comments

Comments
 (0)