|
1 | 1 | """Contains PackIndex and PackFile implementations""" |
| 2 | +from util import ( |
| 3 | + LockedFD, |
| 4 | + LazyMixin, |
| 5 | + file_contents_ro, |
| 6 | + unpack_from |
| 7 | + ) |
| 8 | + |
| 9 | +from struct import ( |
| 10 | + pack, |
| 11 | + ) |
| 12 | + |
| 13 | +__all__ = ('PackIndex', 'Pack') |
| 14 | + |
| 15 | + |
| 16 | +class PackIndex(LazyMixin): |
| 17 | + """A pack index provides offsets into the corresponding pack, allowing to find |
| 18 | + locations for offsets faster.""" |
| 19 | + |
| 20 | + # Dont use slots as we dynamically bind functions for each version, need a dict for this |
| 21 | + # The slots you see here are just to keep track of our instance variables |
| 22 | + # __slots__ = ('_indexpath', '_fanout_table', '_data', '_version', |
| 23 | + # '_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset') |
| 24 | + |
| 25 | + # used in v2 indices |
| 26 | + _sha_list_offset = 8 + 1024 |
| 27 | + |
| 28 | + def __init__(self, indexpath): |
| 29 | + super(PackIndex, self).__init__() |
| 30 | + self._indexpath = indexpath |
| 31 | + |
| 32 | + def _set_cache_(self, attr): |
| 33 | + if attr == "_packfile_checksum": |
| 34 | + self._packfile_checksum = self._data[-40:-20] |
| 35 | + elif attr == "_packfile_checksum": |
| 36 | + self._packfile_checksum = self._data[-20:] |
| 37 | + elif attr == "_data": |
| 38 | + lfd = LockedFD(self._indexpath) |
| 39 | + fd = lfd.open() |
| 40 | + self._data = file_contents_ro(fd) |
| 41 | + lfd.rollback() |
| 42 | + else: |
| 43 | + # now its time to initialize everything - if we are here, someone wants |
| 44 | + # to access the fanout table or related properties |
| 45 | + |
| 46 | + # CHECK VERSION |
| 47 | + self._version = (self._data[:4] == '\377tOc' and 2) or 1 |
| 48 | + if self._version == 2: |
| 49 | + version_id = unpack_from(">L", self._data, 4)[0] |
| 50 | + assert version_id == self._version, "Unsupported index version: %i" % version_id |
| 51 | + # END assert version |
| 52 | + |
| 53 | + # SETUP FUNCTIONS |
| 54 | + # setup our functions according to the actual version |
| 55 | + for fname in ('entry', 'offset', 'sha', 'crc'): |
| 56 | + setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version))) |
| 57 | + # END for each function to initialize |
| 58 | + |
| 59 | + |
| 60 | + # INITIALIZE DATA |
| 61 | + # byte offset is 8 if version is 2, 0 otherwise |
| 62 | + self._initialize() |
| 63 | + # END handle attributes |
| 64 | + |
| 65 | + |
| 66 | + #{ Access V1 |
| 67 | + |
| 68 | + def _entry_v1(self, i): |
| 69 | + """:return: tuple(offset, binsha)""" |
| 70 | + return unpack_from(">L20s", self._data, 1024 + i*24)[0] |
| 71 | + |
| 72 | + def _offset_v1(self, i): |
| 73 | + """see ``_offset_v2``""" |
| 74 | + return unpack_from(">L", self._data, 1024 + i*24)[0] |
| 75 | + |
| 76 | + def _sha_v1(self, i): |
| 77 | + """see ``_sha_v2``""" |
| 78 | + base = 1024 + i*24 |
| 79 | + return self._data[base:base+20] |
| 80 | + |
| 81 | + def _crc_v1(self, i): |
| 82 | + """unsupported""" |
| 83 | + return 0 |
| 84 | + |
| 85 | + #} END access V1 |
| 86 | + |
| 87 | + #{ Access V2 |
| 88 | + def _entry_v2(self, i): |
| 89 | + """:return: tuple(offset, binsha, crc)""" |
| 90 | + return (self._offset_v2(i), self._sha_v2(i), self._crc_v2(i)) |
| 91 | + |
| 92 | + def _offset_v2(self, i): |
| 93 | + """:return: 32 or 64 byte offset into pack files. 64 byte offsets will only |
| 94 | + be returned if the pack is larger than 4 GiB, or 2^32""" |
| 95 | + offset = unpack_from(">L", self._data, self._pack_offset + i * 4)[0] |
| 96 | + |
| 97 | + # if the high-bit is set, this indicates that we have to lookup the offset |
| 98 | + # in the 64 bit region of the file. The current offset ( lower 31 bits ) |
| 99 | + # are the index into it |
| 100 | + if offset & 0x80000000: |
| 101 | + offset = unpack_from(">Q", self._data, self._pack_64_offset + (self.offset & ~0x80000000) * 8)[0] |
| 102 | + # END handle 64 bit offset |
| 103 | + |
| 104 | + return offset |
| 105 | + |
| 106 | + def _sha_v2(self, i): |
| 107 | + """:return: sha at the given index of this file index instance""" |
| 108 | + base = self._sha_list_offset + i * 20 |
| 109 | + return self._data[base:base+20] |
| 110 | + |
| 111 | + def _crc_v2(self, i): |
| 112 | + """:return: 4 bytes crc for the object at index i""" |
| 113 | + return unpack_from(">L", self._data, self._crc_list_offset + i * 4)[0] |
| 114 | + |
| 115 | + #} END access V2 |
| 116 | + |
| 117 | + #{ Initialization |
| 118 | + |
| 119 | + def _initialize(self): |
| 120 | + """initialize base data""" |
| 121 | + self._fanout_table = self._read_fanout((self._version == 2) * 8) |
| 122 | + |
| 123 | + if self._version == 2: |
| 124 | + self._crc_list_offset = self._sha_list_offset + self.size * 20 |
| 125 | + self._pack_offset = self._crc_list_offset + self.size * 4 |
| 126 | + self._pack_64_offset = self._pack_offset + self.size * 4 |
| 127 | + # END setup base |
| 128 | + |
| 129 | + def _read_fanout(self, byte_offset): |
| 130 | + """Generate a fanout table from our data""" |
| 131 | + d = self._data |
| 132 | + out = list() |
| 133 | + append = out.append |
| 134 | + for i in range(256): |
| 135 | + append(unpack_from('>L', d, byte_offset + i*4)[0]) |
| 136 | + # END for each entry |
| 137 | + return out |
| 138 | + |
| 139 | + #} END initialization |
| 140 | + |
| 141 | + #{ Properties |
| 142 | + @property |
| 143 | + def version(self): |
| 144 | + return self._version |
| 145 | + |
| 146 | + @property |
| 147 | + def size(self): |
| 148 | + """:return: amount of objects referred to by this index""" |
| 149 | + return self._fanout_table[255] |
| 150 | + |
| 151 | + @property |
| 152 | + def packfile_checksum(self): |
| 153 | + """:return: 20 byte sha representing the sha1 hash of the pack file""" |
| 154 | + return self._data[-40:-20] |
| 155 | + |
| 156 | + @property |
| 157 | + def indexfile_checksum(self): |
| 158 | + """:return: 20 byte sha representing the sha1 hash of this index file""" |
| 159 | + return self._data[-20:] |
| 160 | + |
| 161 | + def sha_to_index(self, sha): |
| 162 | + """ |
| 163 | + :return: index usable with the ``offset`` or ``entry`` method, or None |
| 164 | + if the sha was not found in this pack index |
| 165 | + :param sha: 20 byte sha to lookup""" |
| 166 | + first_byte = ord(sha[0]) |
| 167 | + lo = 0 # lower index, the left bound of the bisection |
| 168 | + if first_byte != 0: |
| 169 | + lo = self._fanout_table[first_byte-1] |
| 170 | + hi = self._fanout_table[first_byte] # the upper, right bound of the bisection |
| 171 | + |
| 172 | + # bisect until we have the sha |
| 173 | + while lo < hi: |
| 174 | + mid = (lo + hi) / 2 |
| 175 | + c = cmp(sha, self.sha(mid)) |
| 176 | + if c < 0: |
| 177 | + hi = mid |
| 178 | + elif not c: |
| 179 | + return mid |
| 180 | + else: |
| 181 | + lo = mid |
| 182 | + # END handle midpoint |
| 183 | + # END bisect |
| 184 | + return None |
| 185 | + |
| 186 | + #} END properties |
| 187 | + |
| 188 | + |
| 189 | +class Pack(LazyMixin): |
| 190 | + """A pack is a file written according to the Version 2 for git packs""" |
| 191 | + |
0 commit comments