Skip to content

Commit 6fd8d74

Browse files
committed
Base implementation and stubs added for git-like db, as well as the reference db ( for the alternates implementation )
1 parent 5b5ba21 commit 6fd8d74

12 files changed

Lines changed: 258 additions & 62 deletions

File tree

db/base.py

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
)
1010

1111

12-
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB')
12+
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
1313

1414

1515
class ObjectDBR(object):
@@ -66,6 +66,14 @@ def stream_async(self, reader):
6666
# base implementation just uses the stream method repeatedly
6767
task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
6868
return pool.add_task(task)
69+
70+
def size(self):
71+
""":return: amount of objects in this database"""
72+
raise NotImplementedError()
73+
74+
def sha_iter(self):
75+
"""Return iterator yielding 20 byte shas for all objects in this data base"""
76+
raise NotImplementedError()
6977

7078
#} END query interface
7179

@@ -150,6 +158,63 @@ def db_path(self, rela_path):
150158
#} END interface
151159

152160

153-
class CompoundDB(ObjectDBR):
154-
"""A database which delegates calls to sub-databases"""
155-
# TODO
161+
class CachingDB(object):
162+
"""A database which uses caches to speed-up access"""
163+
164+
#{ Interface
165+
def update_cache(self, force=False):
166+
"""Call this method if the underlying data changed to trigger an update
167+
of the internal caching structures.
168+
:param force: if True, the update must be performed. Otherwise the implementation
169+
may decide not to perform an update if it thinks nothing has changed.
170+
:return: True if an update was performed as something change indeed"""
171+
172+
# END interface
173+
174+
175+
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
176+
"""A database which delegates calls to sub-databases.
177+
178+
Databases are stored in the lazy-loaded _dbs attribute.
179+
Define _set_cache_ to update it with your databases"""
180+
181+
def _set_cache_(self, attr):
182+
if attr == '_dbs':
183+
self._dbs = list()
184+
185+
#{ ObjectDBR interface
186+
187+
def has_object(self, sha):
188+
raise NotImplementedError("To be implemented in subclass")
189+
190+
def info(self, sha):
191+
raise NotImplementedError("To be implemented in subclass")
192+
193+
def stream(self, sha):
194+
raise NotImplementedError()
195+
196+
def size(self):
197+
raise NotImplementedError()
198+
199+
def sha_iter(self):
200+
raise NotImplementedError()
201+
202+
#} END object DBR Interface
203+
204+
#{ Interface
205+
206+
def databases(self):
207+
""":return: tuple of database instances we use for lookups"""
208+
return tuple(self._dbs)
209+
210+
def update_cache(self, force=False):
211+
stat = False
212+
for db in self._dbs:
213+
if isinstance(db, CachingDB):
214+
stat |= db.update_cache(force)
215+
# END if is caching db
216+
# END for each database to update
217+
return stat
218+
#} END interface
219+
220+

db/git.py

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,49 @@
1-
2-
from gitdb.base import (
3-
OInfo,
4-
OStream
5-
)
1+
from base import (
2+
CompoundDB,
3+
FileDBBase,
4+
)
65

76
from loose import LooseObjectDB
7+
from pack import PackedDB
8+
from ref import ReferenceDB
9+
10+
from gitdb.util import LazyMixin
11+
from gitdb.exc import InvalidDBRoot
12+
import os
813

9-
__all__ = ('GitObjectDB', )
14+
__all__ = ('GitDB', )
1015

11-
#class GitObjectDB(CompoundDB, ObjectDBW):
12-
class GitObjectDB(LooseObjectDB):
13-
"""A database representing the default git object store, which includes loose
14-
objects, pack files and an alternates file
16+
class GitDB(FileDBBase, CompoundDB):
17+
"""A git-style object database, which contains all objects in the 'objects'
18+
subdirectory"""
19+
# Configuration
20+
PackDBCls = PackedDB
21+
LooseDBCls = LooseObjectDB
22+
ReferenceDBCls = ReferenceDB
1523

16-
It will create objects only in the loose object database.
17-
:note: for now, we use the git command to do all the lookup, just until he
18-
have packs and the other implementations
19-
"""
20-
def __init__(self, root_path, git):
21-
"""Initialize this instance with the root and a git command"""
22-
super(GitObjectDB, self).__init__(root_path)
23-
self._git = git
24+
# Directories
25+
packs_dir = 'packs'
26+
loose_dir = ''
27+
alternates_dir = os.path.join('info', 'alternates')
28+
29+
def __init__(self, root_path):
30+
"""Initialize ourselves on a git objects directory"""
31+
super(GitDB, self).__init__(root_path)
2432

25-
def info(self, sha):
26-
t = self._git.get_object_header(sha)
27-
return OInfo(*t)
33+
def _set_cache_(self, attr):
34+
if attr == '_dbs':
35+
self._dbs = list()
36+
for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
37+
(self.loose_dir, self.LooseDBCls),
38+
(self.alternates_dir, self.ReferenceDBCls)):
39+
path = self.db_path(subpath)
40+
if os.path.exists(path):
41+
self._dbs.append(dbcls(path))
42+
# END check path exists
43+
# END for each db type
44+
45+
# should have at least one subdb
46+
if not self._dbs:
47+
raise InvalidDBRoot(self.root_path())
48+
# END handle dbs
2849

29-
def stream(self, sha):
30-
"""For now, all lookup is done by git itself"""
31-
t = self._git.stream_object_data(sha)
32-
return OStream(*t)
33-

db/loose.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@
2424
from gitdb.util import (
2525
ENOENT,
2626
to_hex_sha,
27+
hex_to_bin,
2728
exists,
2829
isdir,
2930
mkdir,
3031
rename,
3132
dirname,
33+
basename,
3234
join
3335
)
3436

@@ -186,4 +188,21 @@ def store(self, istream):
186188

187189
istream.sha = sha
188190
return istream
191+
192+
def sha_iter(self):
193+
# find all files which look like an object, extract sha from there
194+
for root, dirs, files in os.walk(self.root_path()):
195+
root_base = basename(root)
196+
if len(root_base) != 2:
197+
continue
198+
199+
for f in files:
200+
if len(f) != 38:
201+
continue
202+
yield hex_to_bin(root_base + f)
203+
# END for each file
204+
# END for each walk iteration
205+
206+
def size(self):
207+
return len(tuple(self.sha_iter()))
189208

db/pack.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""Module containing a database to deal with packs"""
22
from base import (
33
FileDBBase,
4-
ObjectDBR
4+
ObjectDBR,
5+
CachingDB
56
)
67

78
from gitdb.util import (
@@ -18,13 +19,13 @@
1819

1920
import os
2021
import glob
21-
__all__ = ('PackedDB', )
2222

23+
__all__ = ('PackedDB', )
2324

2425
#{ Utilities
2526

2627

27-
class PackedDB(FileDBBase, ObjectDBR, LazyMixin):
28+
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
2829
"""A database operating on a set of object packs"""
2930

3031
# sort the priority list every N queries
@@ -43,9 +44,10 @@ def __init__(self, root_path):
4344
self._st_mtime = 0 # last modification data of our root path
4445

4546
def _set_cache_(self, attr):
46-
# currently it can only be our _entities attribute
47-
self._entities = list()
48-
self.update_pack_entity_cache()
47+
if attr == '_entities':
48+
self._entities = list()
49+
self.update_cache()
50+
# END handle entities initialization
4951

5052
def _sort_entities(self):
5153
self._entities.sort(key=lambda l: l[0], reverse=True)
@@ -95,6 +97,20 @@ def info(self, sha):
9597
def stream(self, sha):
9698
entity, index = self._pack_info(sha)
9799
return entity.stream_at_index(index)
100+
101+
def sha_iter(self):
102+
sha_list = list()
103+
for entity in self.entities():
104+
index = entity.index()
105+
sha_by_index = index.sha
106+
for index in xrange(index.size()):
107+
yield sha_by_index(index)
108+
# END for each index
109+
# END for each entity
110+
111+
def size(self):
112+
sizes = [item[1].index().size() for item in self._entities]
113+
return reduce(lambda x,y: x+y, sizes)
98114

99115
#} END object db read
100116

@@ -115,7 +131,7 @@ def store_async(self, reader):
115131

116132
#{ Interface
117133

118-
def update_pack_entity_cache(self, force=False):
134+
def update_cache(self, force=False):
119135
"""Update our cache with the acutally existing packs on disk. Add new ones,
120136
and remove deleted ones. We keep the unchanged ones
121137
:param force: If True, the cache will be updated even though the directory
@@ -162,19 +178,4 @@ def entities(self):
162178
""":return: list of pack entities operated upon by this database"""
163179
return [ item[1] for item in self._entities ]
164180

165-
def sha_iter(self):
166-
"""Return iterator yielding 20 byte shas for the packed objects in this data base"""
167-
sha_list = list()
168-
for entity in self.entities():
169-
index = entity.index()
170-
sha_by_index = index.sha
171-
for index in xrange(index.size()):
172-
yield sha_by_index(index)
173-
# END for each index
174-
# END for each entity
175-
176-
def size(self):
177-
""":return: amount of packed objects in this database"""
178-
sizes = [item[1].index().size() for item in self._entities]
179-
return reduce(lambda x,y: x+y, sizes)
180181
#} END interface

db/ref.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,64 @@
1-
from base import CompoundDB
1+
from base import (
2+
CompoundDB,
3+
)
24

5+
import os
36
__all__ = ('CompoundDB', )
47

58
class ReferenceDB(CompoundDB):
69
"""A database consisting of database referred to in a file"""
7-
10+
11+
# Configuration
12+
# Specifies the object database to use for the paths found in the alternates
13+
# file. If None, it defaults to the GitDB
14+
ObjectDBCls = None
15+
16+
def __init__(self, ref_file):
17+
super(ReferenceDB, self).__init__()
18+
self._ref_file = ref_file
19+
20+
def _set_cache_(self, attr):
21+
if attr == '_dbs':
22+
self._dbs = list()
23+
self._update_dbs_from_ref_file()
24+
# END handle dbs
25+
26+
def _update_dbs_from_ref_file(self):
27+
dbcls = self.ObjectDBCls
28+
if dbcls is None:
29+
# late import
30+
from git import GitDB
31+
dbcls = GitDB
32+
# END get db type
33+
34+
# try to get as many as possible, don't fail if some are unavailable
35+
ref_paths = list()
36+
try:
37+
ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
38+
except OSError:
39+
pass
40+
# END handle alternates
41+
42+
ref_paths_set = set(ref_paths)
43+
cur_ref_paths_set = set(db.root_path() for db in self._dbs)
44+
45+
# remove existing
46+
for path in (cur_ref_paths_set - ref_paths_set):
47+
for i, db in enumerate(self._dbs[:]):
48+
if db.root_path() == path:
49+
del(self._dbs[i])
50+
continue
51+
# END del matching db
52+
# END for each path to remove
53+
54+
# add new
55+
# sort them to maintain order
56+
added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
57+
for path in added_paths:
58+
self._dbs.append(dbcls(path))
59+
# END for each path to add
60+
61+
def update_cache(self, force=False):
62+
# re-read alternates and update databases
63+
self._update_dbs_from_ref_file()
64+
return super(ReferenceDB, self).update_cache(force)

test/db/lib.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
with_rw_directory,
44
with_packs_rw,
55
ZippedStoreShaWriter,
6+
fixture_path,
67
TestBase
78
)
89

test/db/test_git.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from lib import *
2+
from gitdb.db import GitDB
3+
4+
class TestGitDB(TestBase):
5+
6+
def test_reading(self):
7+
ldb = GitDB(fixture_path('../../.git/objects')
8+
self.fail("todo")
9+

test/db/test_loose.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@ def test_writing(self, path):
1111
self._assert_object_writing(ldb)
1212
self._assert_object_writing_async(ldb)
1313

14+
# verify sha iteration and size
15+
shas = list(ldb.sha_iter())
16+
assert shas and len(shas[0]) == 20
17+
18+
assert len(shas) == ldb.size()

0 commit comments

Comments
 (0)