Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,17 @@ Inspect a local model checkpoint:
modelinfo mistral-7b.safetensors
```

Inspect a remote model directly from the Hugging Face Hub:
Inspect a remote model directly from the Hugging Face Hub (both SafeTensors and GGUF):

```bash
# Inspect a remote SafeTensors repository
modelinfo meta-llama/Llama-2-7b-hf

# Inspect a remote GGUF repository (shows a comparison table of all quantizations)
modelinfo bartowski/Meta-Llama-3-8B-Instruct-GGUF

# Inspect a specific remote GGUF file in a repository
modelinfo bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf
```

For gated models (e.g., Llama 2), you must provide authentication by setting the `HF_TOKEN` environment variable. You can create a token in your [Hugging Face settings](https://huggingface.co/settings/tokens).
Expand Down
13 changes: 9 additions & 4 deletions src/modelinfo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,12 @@ def analyze_model(

file_path_lower = file_path.lower()

if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
is_remote = False
if not os.path.exists(file_path):
if "/" in file_path or not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
is_remote = True

if is_remote:
from modelinfo.parsers.huggingface import fetch_huggingface_repo
tensors, config, format_name, disk_size = fetch_huggingface_repo(
file_path, fetch_tensors=fetch_tensors, timeout=timeout
Expand Down Expand Up @@ -180,7 +185,7 @@ def analyze_model(
max_context = None
if config:
max_context = config.get("max_position_embeddings")
elif format_name == "GGUF":
elif format_name in ("GGUF", "GGUF_group"):
metadata = tensors.get("__metadata__", {})
gen_arch = metadata.get("general.architecture")
if gen_arch:
Expand All @@ -207,8 +212,8 @@ def analyze_model(
num_layers = footprint["num_layers"]
arch_name = identify_architecture_name(tensors, num_layers, config)

if format_name != "SafeTensors" or os.path.exists(file_path):
disk_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0.0
if os.path.exists(file_path):
disk_size = os.path.getsize(file_path)

tensor_count = len([k for k in tensors.keys() if k != "__metadata__"])

Expand Down
81 changes: 44 additions & 37 deletions src/modelinfo/parsers/gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,46 +44,53 @@ def _read_gguf_value(f: Any, val_type: int) -> Any:
raise ValueError(f"Unknown GGUF value type: {val_type}")


def parse_gguf_header(path: str) -> Dict[str, Any]:
def parse_gguf_header(path_or_file: str | Any) -> Dict[str, Any]:
"""Parses a GGUF file header and extracts tensor information."""
if isinstance(path_or_file, str):
with open(path_or_file, "rb") as f:
return _parse_gguf_header_from_stream(f)
else:
return _parse_gguf_header_from_stream(path_or_file)


def _parse_gguf_header_from_stream(f: Any) -> Dict[str, Any]:
tensors: Dict[str, Any] = {}
magic = f.read(4)
if magic != b"GGUF":
raise ValueError("Invalid GGUF file: Magic bytes missing.")

version = struct.unpack("<I", f.read(4))[0]
if version < 2:
raise ValueError(f"Unsupported GGUF version: {version}")

tensor_count = struct.unpack("<Q", f.read(8))[0]
kv_count = struct.unpack("<Q", f.read(8))[0]

with open(path, "rb") as f:
magic = f.read(4)
if magic != b"GGUF":
raise ValueError("Invalid GGUF file: Magic bytes missing.")

version = struct.unpack("<I", f.read(4))[0]
if version < 2:
raise ValueError(f"Unsupported GGUF version: {version}")

tensor_count = struct.unpack("<Q", f.read(8))[0]
kv_count = struct.unpack("<Q", f.read(8))[0]
metadata = {}
for _ in range(kv_count):
key_len = struct.unpack("<Q", f.read(8))[0]
key_name = f.read(key_len).decode("utf-8")
val_type = struct.unpack("<I", f.read(4))[0]
metadata[key_name] = _read_gguf_value(f, val_type)

metadata = {}
for _ in range(kv_count):
key_len = struct.unpack("<Q", f.read(8))[0]
key_name = f.read(key_len).decode("utf-8")
val_type = struct.unpack("<I", f.read(4))[0]
metadata[key_name] = _read_gguf_value(f, val_type)

tensors["__metadata__"] = metadata

for _ in range(tensor_count):
name_len = struct.unpack("<Q", f.read(8))[0]
name = f.read(name_len).decode("utf-8")

n_dims = struct.unpack("<I", f.read(4))[0]
shape = []
for _ in range(n_dims):
shape.append(struct.unpack("<Q", f.read(8))[0])

t_type = struct.unpack("<I", f.read(4))[0]
f.read(8) # skip offset bytes

# Strict GGUF tensor type mapping
dtype = GGML_TYPE_MAP.get(t_type, "Unknown")

tensors[name] = {"shape": shape, "dtype": dtype}
tensors["__metadata__"] = metadata

for _ in range(tensor_count):
name_len = struct.unpack("<Q", f.read(8))[0]
name = f.read(name_len).decode("utf-8")

n_dims = struct.unpack("<I", f.read(4))[0]
shape = []
for _ in range(n_dims):
shape.append(struct.unpack("<Q", f.read(8))[0])

t_type = struct.unpack("<I", f.read(4))[0]
f.read(8) # skip offset bytes

# Strict GGUF tensor type mapping
dtype = GGML_TYPE_MAP.get(t_type, "Unknown")

tensors[name] = {"shape": shape, "dtype": dtype}

return tensors

Loading
Loading