pipe1os · pipe1os · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/README.md b/README.md
@@ -64,10 +64,17 @@ Inspect a local model checkpoint:
 modelinfo mistral-7b.safetensors
 ```
 
-Inspect a remote model directly from the Hugging Face Hub:
+Inspect a remote model directly from the Hugging Face Hub (both SafeTensors and GGUF):
 
 ```bash
+# Inspect a remote SafeTensors repository
 modelinfo meta-llama/Llama-2-7b-hf
+
+# Inspect a remote GGUF repository (shows a comparison table of all quantizations)
+modelinfo bartowski/Meta-Llama-3-8B-Instruct-GGUF
+
+# Inspect a specific remote GGUF file in a repository
+modelinfo bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf
 ```
 
 For gated models (e.g., Llama 2), you must provide authentication by setting the `HF_TOKEN` environment variable. You can create a token in your [Hugging Face settings](https://huggingface.co/settings/tokens).

diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py
@@ -149,7 +149,12 @@ def analyze_model(
 
     file_path_lower = file_path.lower()
 
-    if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
+    is_remote = False
+    if not os.path.exists(file_path):
+        if "/" in file_path or not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
+            is_remote = True
+
+    if is_remote:
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
         tensors, config, format_name, disk_size = fetch_huggingface_repo(
             file_path, fetch_tensors=fetch_tensors, timeout=timeout
@@ -180,7 +185,7 @@ def analyze_model(
     max_context = None
     if config:
         max_context = config.get("max_position_embeddings")
-    elif format_name == "GGUF":
+    elif format_name in ("GGUF", "GGUF_group"):
         metadata = tensors.get("__metadata__", {})
         gen_arch = metadata.get("general.architecture")
         if gen_arch:
@@ -207,8 +212,8 @@ def analyze_model(
     num_layers = footprint["num_layers"]
     arch_name = identify_architecture_name(tensors, num_layers, config)
 
-    if format_name != "SafeTensors" or os.path.exists(file_path):
-        disk_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0.0
+    if os.path.exists(file_path):
+        disk_size = os.path.getsize(file_path)
 
     tensor_count = len([k for k in tensors.keys() if k != "__metadata__"])
 

diff --git a/src/modelinfo/parsers/gguf.py b/src/modelinfo/parsers/gguf.py
@@ -44,46 +44,53 @@ def _read_gguf_value(f: Any, val_type: int) -> Any:
         raise ValueError(f"Unknown GGUF value type: {val_type}")
 
 
-def parse_gguf_header(path: str) -> Dict[str, Any]:
+def parse_gguf_header(path_or_file: str | Any) -> Dict[str, Any]:
     """Parses a GGUF file header and extracts tensor information."""
+    if isinstance(path_or_file, str):
+        with open(path_or_file, "rb") as f:
+            return _parse_gguf_header_from_stream(f)
+    else:
+        return _parse_gguf_header_from_stream(path_or_file)
+
+
+def _parse_gguf_header_from_stream(f: Any) -> Dict[str, Any]:
     tensors: Dict[str, Any] = {}
+    magic = f.read(4)
+    if magic != b"GGUF":
+        raise ValueError("Invalid GGUF file: Magic bytes missing.")
+
+    version = struct.unpack("<I", f.read(4))[0]
+    if version < 2:
+        raise ValueError(f"Unsupported GGUF version: {version}")
+
+    tensor_count = struct.unpack("<Q", f.read(8))[0]
+    kv_count = struct.unpack("<Q", f.read(8))[0]
 
-    with open(path, "rb") as f:
-        magic = f.read(4)
-        if magic != b"GGUF":
-            raise ValueError("Invalid GGUF file: Magic bytes missing.")
-
-        version = struct.unpack("<I", f.read(4))[0]
-        if version < 2:
-            raise ValueError(f"Unsupported GGUF version: {version}")
-
-        tensor_count = struct.unpack("<Q", f.read(8))[0]
-        kv_count = struct.unpack("<Q", f.read(8))[0]
+    metadata = {}
+    for _ in range(kv_count):
+        key_len = struct.unpack("<Q", f.read(8))[0]
+        key_name = f.read(key_len).decode("utf-8")
+        val_type = struct.unpack("<I", f.read(4))[0]
+        metadata[key_name] = _read_gguf_value(f, val_type)
 
-        metadata = {}
-        for _ in range(kv_count):
-            key_len = struct.unpack("<Q", f.read(8))[0]
-            key_name = f.read(key_len).decode("utf-8")
-            val_type = struct.unpack("<I", f.read(4))[0]
-            metadata[key_name] = _read_gguf_value(f, val_type)
-
-        tensors["__metadata__"] = metadata
-
-        for _ in range(tensor_count):
-            name_len = struct.unpack("<Q", f.read(8))[0]
-            name = f.read(name_len).decode("utf-8")
-
-            n_dims = struct.unpack("<I", f.read(4))[0]
-            shape = []
-            for _ in range(n_dims):
-                shape.append(struct.unpack("<Q", f.read(8))[0])
-
-            t_type = struct.unpack("<I", f.read(4))[0]
-            f.read(8)  # skip offset bytes
-
-            # Strict GGUF tensor type mapping
-            dtype = GGML_TYPE_MAP.get(t_type, "Unknown")
-
-            tensors[name] = {"shape": shape, "dtype": dtype}
+    tensors["__metadata__"] = metadata
+
+    for _ in range(tensor_count):
+        name_len = struct.unpack("<Q", f.read(8))[0]
+        name = f.read(name_len).decode("utf-8")
+
+        n_dims = struct.unpack("<I", f.read(4))[0]
+        shape = []
+        for _ in range(n_dims):
+            shape.append(struct.unpack("<Q", f.read(8))[0])
+
+        t_type = struct.unpack("<I", f.read(4))[0]
+        f.read(8)  # skip offset bytes
+
+        # Strict GGUF tensor type mapping
+        dtype = GGML_TYPE_MAP.get(t_type, "Unknown")
 
+        tensors[name] = {"shape": shape, "dtype": dtype}
+
     return tensors
+