Skip to content

Commit 5b5ba21

Browse files
committed
Merge branch 'pack'
2 parents 937d592 + 3cee78e commit 5b5ba21

27 files changed

Lines changed: 2240 additions & 329 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
*.pyc
2+
*.o
3+
*.so

__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ def _init_externals():
1414

1515
# default imports
1616
from db import *
17+
from base import *
1718
from stream import *
1819

_fun.c

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#include <Python.h>
2+
#include <stdint.h>
3+
4+
static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
5+
{
6+
const unsigned char *sha;
7+
const unsigned int sha_len;
8+
9+
// Note: self is only set if we are a c type. We emulate an instance method,
10+
// hence we have to get the instance as 'first' argument
11+
12+
// get instance and sha
13+
PyObject* inst = 0;
14+
if (!PyArg_ParseTuple(args, "Os#", &inst, &sha, &sha_len))
15+
return NULL;
16+
17+
if (sha_len != 20) {
18+
PyErr_SetString(PyExc_ValueError, "Sha is not 20 bytes long");
19+
return NULL;
20+
}
21+
22+
if( !inst){
23+
PyErr_SetString(PyExc_ValueError, "Cannot be called without self");
24+
return NULL;
25+
}
26+
27+
// read lo and hi bounds
28+
PyObject* fanout_table = PyObject_GetAttrString(inst, "_fanout_table");
29+
if (!fanout_table){
30+
PyErr_SetString(PyExc_ValueError, "Couldn't obtain fanout table");
31+
return NULL;
32+
}
33+
34+
unsigned int lo = 0, hi = 0;
35+
if (sha[0]){
36+
PyObject* item = PySequence_GetItem(fanout_table, (const Py_ssize_t)(sha[0]-1));
37+
lo = PyInt_AS_LONG(item);
38+
Py_DECREF(item);
39+
}
40+
PyObject* item = PySequence_GetItem(fanout_table, (const Py_ssize_t)sha[0]);
41+
hi = PyInt_AS_LONG(item);
42+
Py_DECREF(item);
43+
item = 0;
44+
45+
Py_DECREF(fanout_table);
46+
47+
// get sha query function
48+
PyObject* get_sha = PyObject_GetAttrString(inst, "sha");
49+
if (!get_sha){
50+
PyErr_SetString(PyExc_ValueError, "Couldn't obtain sha method");
51+
return NULL;
52+
}
53+
54+
PyObject *sha_str = 0;
55+
while (lo < hi) {
56+
const int mid = (lo + hi)/2;
57+
sha_str = PyObject_CallFunction(get_sha, "i", mid);
58+
if (!sha_str) {
59+
return NULL;
60+
}
61+
62+
// we really trust that string ... for speed
63+
const int cmp = memcmp(PyString_AS_STRING(sha_str), sha, 20);
64+
Py_DECREF(sha_str);
65+
sha_str = 0;
66+
67+
if (cmp < 0){
68+
lo = mid + 1;
69+
}
70+
else if (cmp > 0) {
71+
hi = mid;
72+
}
73+
else {
74+
Py_DECREF(get_sha);
75+
return PyInt_FromLong(mid);
76+
}// END handle comparison
77+
}// END while lo < hi
78+
79+
// nothing found, cleanup
80+
Py_DECREF(get_sha);
81+
Py_RETURN_NONE;
82+
}
83+
84+
85+
static PyMethodDef py_fun[] = {
86+
{ "PackIndexFile_sha_to_index", (PyCFunction)PackIndexFile_sha_to_index, METH_VARARGS, NULL },
87+
{ NULL, NULL, 0, NULL }
88+
};
89+
90+
void init_fun(void)
91+
{
92+
PyObject *m;
93+
94+
m = Py_InitModule3("_fun", py_fun, NULL);
95+
if (m == NULL)
96+
return;
97+
}

base.py

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
"""Module with basic data structures - they are designed to be lightweight and fast"""
2+
from util import (
3+
to_hex_sha,
4+
to_bin_sha,
5+
zlib
6+
)
7+
8+
from fun import (
9+
type_id_to_type_map,
10+
type_to_type_id_map
11+
)
12+
13+
__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo',
14+
'OStream', 'OPackStream', 'ODeltaPackStream',
15+
'IStream', 'InvalidOInfo', 'InvalidOStream' )
16+
17+
#{ ODB Bases
18+
19+
class OInfo(tuple):
20+
"""Carries information about an object in an ODB, provdiing information
21+
about the sha of the object, the type_string as well as the uncompressed size
22+
in bytes.
23+
24+
It can be accessed using tuple notation and using attribute access notation::
25+
26+
assert dbi[0] == dbi.sha
27+
assert dbi[1] == dbi.type
28+
assert dbi[2] == dbi.size
29+
30+
The type is designed to be as lighteight as possible."""
31+
__slots__ = tuple()
32+
33+
def __new__(cls, sha, type, size):
34+
return tuple.__new__(cls, (sha, type, size))
35+
36+
def __init__(self, *args):
37+
tuple.__init__(self)
38+
39+
#{ Interface
40+
@property
41+
def sha(self):
42+
return self[0]
43+
44+
@property
45+
def type(self):
46+
return self[1]
47+
48+
@property
49+
def type_id(self):
50+
return type_to_type_id_map[self[1]]
51+
52+
@property
53+
def size(self):
54+
return self[2]
55+
#} END interface
56+
57+
58+
class OPackInfo(tuple):
59+
"""As OInfo, but provides a type_id property to retrieve the numerical type id, and
60+
does not include a sha.
61+
62+
Additionally, the pack_offset is the absolute offset into the packfile at which
63+
all object information is located. The data_offset property points to the abosolute
64+
location in the pack at which that actual data stream can be found."""
65+
__slots__ = tuple()
66+
67+
def __new__(cls, packoffset, type, size):
68+
return tuple.__new__(cls, (packoffset,type, size))
69+
70+
def __init__(self, *args):
71+
tuple.__init__(self)
72+
73+
#{ Interface
74+
75+
@property
76+
def pack_offset(self):
77+
return self[0]
78+
79+
@property
80+
def type(self):
81+
return type_id_to_type_map[self[1]]
82+
83+
@property
84+
def type_id(self):
85+
return self[1]
86+
87+
@property
88+
def size(self):
89+
return self[2]
90+
91+
#} END interface
92+
93+
94+
class ODeltaPackInfo(OPackInfo):
95+
"""Adds delta specific information,
96+
Either the 20 byte sha which points to some object in the database,
97+
or the negative offset from the pack_offset, so that pack_offset - delta_info yields
98+
the pack offset of the base object"""
99+
__slots__ = tuple()
100+
101+
def __new__(cls, packoffset, type, size, delta_info):
102+
return tuple.__new__(cls, (packoffset, type, size, delta_info))
103+
104+
#{ Interface
105+
@property
106+
def delta_info(self):
107+
return self[3]
108+
#} END interface
109+
110+
111+
class OStream(OInfo):
112+
"""Base for object streams retrieved from the database, providing additional
113+
information about the stream.
114+
Generally, ODB streams are read-only as objects are immutable"""
115+
__slots__ = tuple()
116+
117+
def __new__(cls, sha, type, size, stream, *args, **kwargs):
118+
"""Helps with the initialization of subclasses"""
119+
return tuple.__new__(cls, (sha, type, size, stream))
120+
121+
122+
def __init__(self, *args, **kwargs):
123+
tuple.__init__(self)
124+
125+
#{ Stream Reader Interface
126+
127+
def read(self, size=-1):
128+
return self[3].read(size)
129+
130+
@property
131+
def stream(self):
132+
return self[3]
133+
#} END stream reader interface
134+
135+
136+
class OPackStream(OPackInfo):
137+
"""Next to pack object information, a stream outputting an undeltified base object
138+
is provided"""
139+
__slots__ = tuple()
140+
141+
def __new__(cls, packoffset, type, size, stream, *args):
142+
"""Helps with the initialization of subclasses"""
143+
return tuple.__new__(cls, (packoffset, type, size, stream))
144+
145+
#{ Stream Reader Interface
146+
def read(self, size=-1):
147+
return self[3].read(size)
148+
149+
@property
150+
def stream(self):
151+
return self[3]
152+
#} END stream reader interface
153+
154+
155+
class ODeltaPackStream(ODeltaPackInfo):
156+
"""Provides a stream outputting the uncompressed offset delta information"""
157+
__slots__ = tuple()
158+
159+
def __new__(cls, packoffset, type, size, delta_info, stream):
160+
return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
161+
162+
163+
#{ Stream Reader Interface
164+
def read(self, size=-1):
165+
return self[4].read(size)
166+
167+
@property
168+
def stream(self):
169+
return self[4]
170+
#} END stream reader interface
171+
172+
173+
class IStream(list):
174+
"""Represents an input content stream to be fed into the ODB. It is mutable to allow
175+
the ODB to record information about the operations outcome right in this instance.
176+
177+
It provides interfaces for the OStream and a StreamReader to allow the instance
178+
to blend in without prior conversion.
179+
180+
The only method your content stream must support is 'read'"""
181+
__slots__ = tuple()
182+
183+
def __new__(cls, type, size, stream, sha=None):
184+
return list.__new__(cls, (sha, type, size, stream, None))
185+
186+
def __init__(self, type, size, stream, sha=None):
187+
list.__init__(self, (sha, type, size, stream, None))
188+
189+
#{ Interface
190+
191+
@property
192+
def hexsha(self):
193+
""":return: our sha, hex encoded, 40 bytes"""
194+
return to_hex_sha(self[0])
195+
196+
@property
197+
def binsha(self):
198+
""":return: our sha as binary, 20 bytes"""
199+
return to_bin_sha(self[0])
200+
201+
def _error(self):
202+
""":return: the error that occurred when processing the stream, or None"""
203+
return self[4]
204+
205+
def _set_error(self, exc):
206+
"""Set this input stream to the given exc, may be None to reset the error"""
207+
self[4] = exc
208+
209+
error = property(_error, _set_error)
210+
211+
#} END interface
212+
213+
#{ Stream Reader Interface
214+
215+
def read(self, size=-1):
216+
"""Implements a simple stream reader interface, passing the read call on
217+
to our internal stream"""
218+
return self[3].read(size)
219+
220+
#} END stream reader interface
221+
222+
#{ interface
223+
224+
def _set_sha(self, sha):
225+
self[0] = sha
226+
227+
def _sha(self):
228+
return self[0]
229+
230+
sha = property(_sha, _set_sha)
231+
232+
233+
def _type(self):
234+
return self[1]
235+
236+
def _set_type(self, type):
237+
self[1] = type
238+
239+
type = property(_type, _set_type)
240+
241+
def _size(self):
242+
return self[2]
243+
244+
def _set_size(self, size):
245+
self[2] = size
246+
247+
size = property(_size, _set_size)
248+
249+
def _stream(self):
250+
return self[3]
251+
252+
def _set_stream(self, stream):
253+
self[3] = stream
254+
255+
stream = property(_stream, _set_stream)
256+
257+
#} END odb info interface
258+
259+
260+
class InvalidOInfo(tuple):
261+
"""Carries information about a sha identifying an object which is invalid in
262+
the queried database. The exception attribute provides more information about
263+
the cause of the issue"""
264+
__slots__ = tuple()
265+
266+
def __new__(cls, sha, exc):
267+
return tuple.__new__(cls, (sha, exc))
268+
269+
def __init__(self, sha, exc):
270+
tuple.__init__(self, (sha, exc))
271+
272+
@property
273+
def sha(self):
274+
return self[0]
275+
276+
@property
277+
def error(self):
278+
""":return: exception instance explaining the failure"""
279+
return self[1]
280+
281+
282+
class InvalidOStream(InvalidOInfo):
283+
"""Carries information about an invalid ODB stream"""
284+
__slots__ = tuple()
285+
286+
#} END ODB Bases
287+

0 commit comments

Comments
 (0)