diff --git a/README.md b/README.md
index 3dfef3c..f5186a4 100644
--- a/README.md
+++ b/README.md
@@ -64,10 +64,17 @@ Inspect a local model checkpoint:
 modelinfo mistral-7b.safetensors
 ```
 
-Inspect a remote model directly from the Hugging Face Hub:
+Inspect a remote model directly from the Hugging Face Hub (both SafeTensors and GGUF):
 
 ```bash
+# Inspect a remote SafeTensors repository
 modelinfo meta-llama/Llama-2-7b-hf
+
+# Inspect a remote GGUF repository (shows a comparison table of all quantizations)
+modelinfo bartowski/Meta-Llama-3-8B-Instruct-GGUF
+
+# Inspect a specific remote GGUF file in a repository
+modelinfo bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf
 ```
 
 For gated models (e.g., Llama 2), you must provide authentication by setting the `HF_TOKEN` environment variable. You can create a token in your [Hugging Face settings](https://huggingface.co/settings/tokens).
diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py
index c89b6d7..cb4be02 100644
--- a/src/modelinfo/cli.py
+++ b/src/modelinfo/cli.py
@@ -149,7 +149,12 @@ def analyze_model(
     
     file_path_lower = file_path.lower()
     
-    if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
+    is_remote = False
+    if not os.path.exists(file_path):
+        if "/" in file_path or not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
+            is_remote = True
+
+    if is_remote:
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
         tensors, config, format_name, disk_size = fetch_huggingface_repo(
             file_path, fetch_tensors=fetch_tensors, timeout=timeout
@@ -180,7 +185,7 @@ def analyze_model(
     max_context = None
     if config:
         max_context = config.get("max_position_embeddings")
-    elif format_name == "GGUF":
+    elif format_name in ("GGUF", "GGUF_group"):
         metadata = tensors.get("__metadata__", {})
         gen_arch = metadata.get("general.architecture")
         if gen_arch:
@@ -207,8 +212,8 @@ def analyze_model(
     num_layers = footprint["num_layers"]
     arch_name = identify_architecture_name(tensors, num_layers, config)
 
-    if format_name != "SafeTensors" or os.path.exists(file_path):
-        disk_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0.0
+    if os.path.exists(file_path):
+        disk_size = os.path.getsize(file_path)
         
     tensor_count = len([k for k in tensors.keys() if k != "__metadata__"])
     
diff --git a/src/modelinfo/parsers/gguf.py b/src/modelinfo/parsers/gguf.py
index 5f3d210..3af2fb4 100644
--- a/src/modelinfo/parsers/gguf.py
+++ b/src/modelinfo/parsers/gguf.py
@@ -44,46 +44,53 @@ def _read_gguf_value(f: Any, val_type: int) -> Any:
         raise ValueError(f"Unknown GGUF value type: {val_type}")
 
 
-def parse_gguf_header(path: str) -> Dict[str, Any]:
+def parse_gguf_header(path_or_file: str | Any) -> Dict[str, Any]:
     """Parses a GGUF file header and extracts tensor information."""
+    if isinstance(path_or_file, str):
+        with open(path_or_file, "rb") as f:
+            return _parse_gguf_header_from_stream(f)
+    else:
+        return _parse_gguf_header_from_stream(path_or_file)
+
+
+def _parse_gguf_header_from_stream(f: Any) -> Dict[str, Any]:
     tensors: Dict[str, Any] = {}
+    magic = f.read(4)
+    if magic != b"GGUF":
+        raise ValueError("Invalid GGUF file: Magic bytes missing.")
+        
+    version = struct.unpack("<I", f.read(4))[0]
+    if version < 2:
+        raise ValueError(f"Unsupported GGUF version: {version}")
+        
+    tensor_count = struct.unpack("<Q", f.read(8))[0]
+    kv_count = struct.unpack("<Q", f.read(8))[0]
     
-    with open(path, "rb") as f:
-        magic = f.read(4)
-        if magic != b"GGUF":
-            raise ValueError("Invalid GGUF file: Magic bytes missing.")
-            
-        version = struct.unpack("<I", f.read(4))[0]
-        if version < 2:
-            raise ValueError(f"Unsupported GGUF version: {version}")
-            
-        tensor_count = struct.unpack("<Q", f.read(8))[0]
-        kv_count = struct.unpack("<Q", f.read(8))[0]
+    metadata = {}
+    for _ in range(kv_count):
+        key_len = struct.unpack("<Q", f.read(8))[0]
+        key_name = f.read(key_len).decode("utf-8")
+        val_type = struct.unpack("<I", f.read(4))[0]
+        metadata[key_name] = _read_gguf_value(f, val_type)
         
-        metadata = {}
-        for _ in range(kv_count):
-            key_len = struct.unpack("<Q", f.read(8))[0]
-            key_name = f.read(key_len).decode("utf-8")
-            val_type = struct.unpack("<I", f.read(4))[0]
-            metadata[key_name] = _read_gguf_value(f, val_type)
-            
-        tensors["__metadata__"] = metadata
-            
-        for _ in range(tensor_count):
-            name_len = struct.unpack("<Q", f.read(8))[0]
-            name = f.read(name_len).decode("utf-8")
-            
-            n_dims = struct.unpack("<I", f.read(4))[0]
-            shape = []
-            for _ in range(n_dims):
-                shape.append(struct.unpack("<Q", f.read(8))[0])
-            
-            t_type = struct.unpack("<I", f.read(4))[0]
-            f.read(8)  # skip offset bytes
-            
-            # Strict GGUF tensor type mapping
-            dtype = GGML_TYPE_MAP.get(t_type, "Unknown")
-                
-            tensors[name] = {"shape": shape, "dtype": dtype}
+    tensors["__metadata__"] = metadata
+        
+    for _ in range(tensor_count):
+        name_len = struct.unpack("<Q", f.read(8))[0]
+        name = f.read(name_len).decode("utf-8")
+        
+        n_dims = struct.unpack("<I", f.read(4))[0]
+        shape = []
+        for _ in range(n_dims):
+            shape.append(struct.unpack("<Q", f.read(8))[0])
+        
+        t_type = struct.unpack("<I", f.read(4))[0]
+        f.read(8)  # skip offset bytes
+        
+        # Strict GGUF tensor type mapping
+        dtype = GGML_TYPE_MAP.get(t_type, "Unknown")
             
+        tensors[name] = {"shape": shape, "dtype": dtype}
+        
     return tensors
+
diff --git a/src/modelinfo/parsers/huggingface.py b/src/modelinfo/parsers/huggingface.py
index a070e41..00f09e3 100644
--- a/src/modelinfo/parsers/huggingface.py
+++ b/src/modelinfo/parsers/huggingface.py
@@ -5,7 +5,7 @@
 import urllib.error
 import urllib.parse
 import urllib.request
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, List, Tuple
 
 def _get_hf_endpoint() -> str:
     endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co").strip()
@@ -101,19 +101,205 @@ def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0
         
     return json.loads(json_bytes)
 
+def _get_remote_file_size_fallback(repo_id: str, filename: str, timeout: float = 10.0) -> float:
+    req = urllib.request.Request(f"{_get_hf_endpoint()}/{repo_id}/resolve/main/{filename}", method="HEAD")
+    token = _get_hf_token()
+    if token:
+        req.add_header("Authorization", f"Bearer {token}")
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as response:
+            return float(response.headers.get("Content-Length", 0))
+    except Exception:
+        return 0.0
+
+
+class RemoteFileStream:
+    def __init__(self, url: str, chunk_size: int = 1024*1024, timeout: float = 10.0):
+        self.url = url
+        self.chunk_size = chunk_size
+        self.timeout = timeout
+        self.buffer = b""
+        self.position = 0
+
+    def read(self, size: int = -1) -> bytes:
+        if size == -1:
+            raise NotImplementedError("Unlimited remote read is not supported.")
+            
+        end_pos = self.position + size
+        if end_pos > 50 * 1024 * 1024:
+            raise ValueError("Remote header read limit exceeded (50MB). File might be invalid or too large.")
+            
+        while end_pos > len(self.buffer):
+            start_bytes = len(self.buffer)
+            end_bytes = start_bytes + self.chunk_size - 1
+            
+            headers = {"Range": f"bytes={start_bytes}-{end_bytes}"}
+            try:
+                chunk = _make_request(
+                    self.url,
+                    headers=headers,
+                    limit=self.chunk_size,
+                    timeout=self.timeout
+                )
+                if not chunk:
+                    break
+                self.buffer += chunk
+            except urllib.error.HTTPError as e:
+                if e.code == 416:
+                    break
+                raise
+            except Exception:
+                raise
+                
+        result = self.buffer[self.position:self.position+size]
+        self.position += len(result)
+        return result
+
+    def seek(self, offset: int, whence: int = 0) -> int:
+        if whence == 0:
+            self.position = offset
+        elif whence == 1:
+            self.position += offset
+        else:
+            raise NotImplementedError("Seek from end is not supported.")
+        return self.position
+
+    def tell(self) -> int:
+        return self.position
+
+    def close(self) -> None:
+        pass
+
+
+def _fetch_remote_gguf_single(real_repo_id: str, filename: str, fallback_size: float | None, timeout: float) -> Tuple[Dict[str, Any], float]:
+    url = f"{_get_hf_endpoint()}/{real_repo_id}/resolve/main/{filename}"
+    stream = RemoteFileStream(url, timeout=timeout)
+    from modelinfo.parsers.gguf import parse_gguf_header
+    tensors = parse_gguf_header(stream)
+    
+    size = fallback_size if fallback_size is not None else 0.0
+    if size == 0.0:
+        size = _get_remote_file_size_fallback(real_repo_id, filename, timeout)
+    return tensors, size
+
+
+def _fetch_remote_gguf_group(real_repo_id: str, gguf_files: List[Dict[str, Any]], timeout: float) -> Dict[str, Any]:
+    valid_sizes = [g for g in gguf_files if g["size"] is not None and g["size"] > 0]
+    if valid_sizes:
+        header_target = min(valid_sizes, key=lambda x: x["size"])
+    else:
+        header_target = gguf_files[0]
+    
+    header_file = header_target["filename"]
+    url = f"{_get_hf_endpoint()}/{real_repo_id}/resolve/main/{header_file}"
+    stream = RemoteFileStream(url, timeout=timeout)
+    from modelinfo.parsers.gguf import parse_gguf_header
+    tensors = parse_gguf_header(stream)
+    
+    variants = []
+    for g in gguf_files:
+        v_size = g["size"]
+        if v_size is None or v_size == 0:
+            v_size = _get_remote_file_size_fallback(real_repo_id, g["filename"], timeout)
+        variants.append({
+            "filename": g["filename"],
+            "size": float(v_size)
+        })
+    
+    tensors["__metadata__"] = tensors.get("__metadata__", {})
+    tensors["__metadata__"]["gguf_variants"] = variants
+    tensors["__metadata__"]["repo_id"] = real_repo_id
+    return tensors
+
+
+def _fetch_shards_concurrently(real_repo_id: str, unique_shards: List[str], timeout: float) -> Dict[str, Any]:
+    def fetch_shard(shard: str):
+        return shard, _fetch_safetensors_header(real_repo_id, shard, timeout=timeout)
+        
+    tensors = {}
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
+        future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
+        for future in concurrent.futures.as_completed(future_to_shard):
+            shard, shard_header = future.result()
+            for k, v in shard_header.items():
+                if k != "__metadata__":
+                    tensors[k] = v
+    return tensors
+
+
+def _fetch_remote_safetensors_sharded(
+    real_repo_id: str,
+    config: Dict[str, Any] | None,
+    fetch_tensors: bool,
+    timeout: float
+) -> Tuple[Dict[str, Any], float]:
+    index_url = f"{_get_hf_endpoint()}/{real_repo_id}/resolve/main/model.safetensors.index.json"
+    index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
+    
+    weight_map = index_data.get("weight_map", {})
+    unique_shards = list(set(weight_map.values()))
+    total_size = index_data.get("metadata", {}).get("total_size", 0.0)
+    
+    tensors = {}
+    if config and not fetch_tensors and total_size > 0:
+        for tensor_name in weight_map.keys():
+            tensors[tensor_name] = {"shape": [], "dtype": "BF16"}
+            
+        tensors["__metadata__"] = {
+            "missing_shards": 0,
+            "total_shards": len(unique_shards),
+            "is_sharded": True,
+            "lazy_fetch": True,
+            "total_size": total_size
+        }
+    else:
+        tensors = _fetch_shards_concurrently(real_repo_id, unique_shards, timeout)
+        tensors["__metadata__"] = {
+            "missing_shards": 0,
+            "total_shards": len(unique_shards),
+            "is_sharded": True
+        }
+    return tensors, float(total_size)
+
+
+
+def _fetch_remote_safetensors_single(real_repo_id: str, timeout: float) -> Tuple[Dict[str, Any], float]:
+    total_size = 0.0
+    req = urllib.request.Request(f"{_get_hf_endpoint()}/{real_repo_id}/resolve/main/model.safetensors", method="HEAD")
+    token = _get_hf_token()
+    if token:
+        req.add_header("Authorization", f"Bearer {token}")
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as response:
+            total_size = int(response.headers.get("Content-Length", 0))
+    except Exception:
+        pass
+
+    header = _fetch_safetensors_header(real_repo_id, "model.safetensors", timeout=timeout)
+    return header, float(total_size)
+
+
 def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
     """
     Fetches the metadata directly from the Hugging Face Hub over the network.
     Returns: (tensors, config, format_name, disk_size)
     """
-    api_url = f"{_get_hf_endpoint()}/api/models/{repo_id}"
+    target_filename = None
+    parts = repo_id.split("/")
+    if len(parts) >= 3 and parts[-1].lower().endswith(".gguf"):
+        real_repo_id = "/".join(parts[:2])
+        target_filename = "/".join(parts[2:])
+    else:
+        real_repo_id = repo_id
+
+    api_url = f"{_get_hf_endpoint()}/api/models/{real_repo_id}"
     try:
         api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
     except urllib.error.HTTPError as e:
         if e.code == 401:
-            raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
+            raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {real_repo_id}")
         if e.code == 404:
-             raise FileNotFoundError(f"Could not find repository on Hugging Face (404 Not Found): {repo_id}")
+             raise FileNotFoundError(f"Could not find repository on Hugging Face (404 Not Found): {real_repo_id}")
         raise
         
     siblings = api_data.get("siblings", [])
@@ -121,73 +307,44 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: f
     
     config = None
     if "config.json" in filenames:
-        config_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/config.json"
+        config_url = f"{_get_hf_endpoint()}/{real_repo_id}/resolve/main/config.json"
         config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
-        
-    tensors = {}
-    total_size = 0.0
-    
+
+    # Find GGUF siblings
+    gguf_files = []
+    for s in siblings:
+        fname = s.get("rfilename", "")
+        if fname.lower().endswith(".gguf"):
+            gguf_files.append({
+                "filename": fname,
+                "size": s.get("size")
+            })
+
+    if target_filename:
+        target_sibling = next((g for g in gguf_files if g["filename"] == target_filename), None)
+        if not target_sibling:
+            raise FileNotFoundError(f"Could not find file '{target_filename}' in Hugging Face repository '{real_repo_id}'.")
+        tensors, size = _fetch_remote_gguf_single(real_repo_id, target_filename, target_sibling["size"], timeout)
+        return tensors, config, "GGUF", float(size)
+
+    # Fallback to SafeTensors checks if no specific file is target
     if "model.safetensors.index.json" in filenames:
-        # Sharded SafeTensors
-        index_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors.index.json"
-        index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
-        
-        weight_map = index_data.get("weight_map", {})
-        unique_shards = list(set(weight_map.values()))
-        
-        total_size = index_data.get("metadata", {}).get("total_size", 0.0)
-        
-        if config and not fetch_tensors and total_size > 0:
-            # Lazy Fetch Paradigm
-            for tensor_name in weight_map.keys():
-                tensors[tensor_name] = {"shape": [], "dtype": "BF16"}
-                
-            tensors["__metadata__"] = {
-                "missing_shards": 0,
-                "total_shards": len(unique_shards),
-                "is_sharded": True,
-                "lazy_fetch": True,
-                "total_size": total_size
-            }
-        else:
-            def fetch_shard(shard: str):
-                return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
-                
-            with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
-                future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
-                for future in concurrent.futures.as_completed(future_to_shard):
-                    shard, shard_header = future.result()
-                    for k, v in shard_header.items():
-                        if k != "__metadata__":
-                            tensors[k] = v
-                            
-            tensors["__metadata__"] = {
-                "missing_shards": 0,
-                "total_shards": len(unique_shards),
-                "is_sharded": True
-            }
-        format_name = "SafeTensors"
+        tensors, total_size = _fetch_remote_safetensors_sharded(real_repo_id, config, fetch_tensors, timeout)
+        return tensors, config, "SafeTensors", total_size
         
     elif "model.safetensors" in filenames:
-        # Single SafeTensors
-        
-        # Determine total size first
-        req = urllib.request.Request(f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors", method="HEAD")
-        token = _get_hf_token()
-        if token:
-            req.add_header("Authorization", f"Bearer {token}")
-        try:
-            with urllib.request.urlopen(req, timeout=timeout) as response:
-                total_size = int(response.headers.get("Content-Length", 0))
-        except Exception:
-            pass
+        header, total_size = _fetch_remote_safetensors_single(real_repo_id, timeout)
+        return header, config, "SafeTensors", total_size
+
+    elif gguf_files:
+        if len(gguf_files) == 1:
+            single_file = gguf_files[0]["filename"]
+            tensors, size = _fetch_remote_gguf_single(real_repo_id, single_file, gguf_files[0]["size"], timeout)
+            return tensors, config, "GGUF", float(size)
+        else:
+            tensors = _fetch_remote_gguf_group(real_repo_id, gguf_files, timeout)
+            return tensors, config, "GGUF_group", 0.0
 
-        header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
-        tensors = header
-            
-        format_name = "SafeTensors"
-        
     else:
-        raise ValueError(f"Repository {repo_id} does not contain SafeTensors weights.")
-        
-    return tensors, config, format_name, float(total_size)
+        raise ValueError(f"Repository {real_repo_id} does not contain SafeTensors or GGUF weights.")
+
diff --git a/src/modelinfo/ui.py b/src/modelinfo/ui.py
index f69ce28..11431e1 100644
--- a/src/modelinfo/ui.py
+++ b/src/modelinfo/ui.py
@@ -56,6 +56,63 @@ def print_model_info(
     gpu_vram_gb: float = 0.0,
     gpu_util: float = 0.9
 ) -> None:
+    if format_name == "GGUF_group":
+        metadata = tensors.get("__metadata__", {})
+        variants = metadata.get("gguf_variants", [])
+        repo_id = metadata.get("repo_id", "")
+        
+        console.print(f"[bold]Repository:[/bold]      {repo_id}")
+        console.print("[bold]Format:[/bold]          GGUF (Multiple Quantizations)")
+        console.print(f"[bold]Architecture:[/bold]    {arch_name}")
+        if max_context:
+            console.print(f"[bold]Context Limit:[/bold]   {max_context:,} tokens")
+        console.print()
+        
+        table = Table(box=None, show_header=True, header_style="bold", pad_edge=False, padding=(0, 2))
+        table.add_column("Quantization File")
+        table.add_column("File Size", justify="right")
+        table.add_column("KV Cache", justify="right")
+        table.add_column("Total VRAM", justify="right")
+        
+        show_fits = gpu_name is not None
+        if show_fits:
+            table.add_column("Fits", justify="left")
+            
+        kv_cache_bytes = footprint["kv_cache_bytes"]
+        penalty_percentage = footprint.get("penalty_percentage", 0.0)
+        cuda_overhead = 600 * 1024 * 1024 * gpu_count
+        
+        sorted_variants = sorted(variants, key=lambda x: x["size"], reverse=True)
+        for var in sorted_variants:
+            filename = var["filename"]
+            size_bytes = var["size"]
+            variant_overhead = cuda_overhead + (size_bytes * penalty_percentage)
+            total_vram_bytes = size_bytes + kv_cache_bytes + variant_overhead
+            
+            file_size_str = format_bytes(size_bytes)
+            kv_cache_str = format_bytes(kv_cache_bytes)
+            
+            vram_color = get_vram_color(total_vram_bytes, max_vram_gb)
+            total_vram_str = f"[{vram_color}]~{format_bytes(total_vram_bytes)}[/{vram_color}]"
+            
+            row_data = [filename, file_size_str, kv_cache_str, total_vram_str]
+            if show_fits:
+                utilization = total_vram_bytes / (max_vram_gb * 1024**3) if max_vram_gb > 0 else 2.0
+                if utilization <= gpu_util:
+                    fit_text = "[green]✓ Yes[/green]"
+                elif utilization <= 0.99:
+                    fit_text = "[yellow]⚠ Warning[/yellow]"
+                else:
+                    fit_text = "[red]✗ No[/red]"
+                row_data.append(fit_text)
+                
+            table.add_row(*row_data)
+            
+        console.print(table)
+        console.print()
+        console.print(f"[dim]Tip: To view details for a specific quantization, run: modelinfo {repo_id}/{sorted_variants[0]['filename']}[/dim]")
+        return
+
     summary = Table(box=None, show_header=False, pad_edge=False, padding=(0, 2))
     summary.add_column("Property", style="bold")
     summary.add_column("Value")
@@ -143,7 +200,7 @@ def print_model_info(
         summary.add_row("VRAM (est):", vram_display)
         if gpu_name:
             utilization = vram_bytes / (max_vram_gb * 1024**3) if max_vram_gb > 0 else 2.0
-            if utilization <= 0.90:
+            if utilization <= gpu_util:
                 fit_text = f"[green]✓ Fits comfortably in {gpu_name} ({max_vram_gb:.1f} GB)[/green]"
             elif utilization <= 0.99:
                 fit_text = f"[yellow]⚠ Warning: Extreme hardware limit on {gpu_name}. High risk of fragmentation OOM.[/yellow]"
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a5792d3..857a225 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -177,3 +177,131 @@ def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
         "fetch_tensors": True,
         "timeout": 22.5,
     }
+
+
+def test_analyze_model_gguf_group(monkeypatch):
+    """Test that analyze_model correctly handles and propagates GGUF groups."""
+    from modelinfo.parsers import huggingface
+    
+    def fake_exists(path):
+        return False
+        
+    def fake_fetch(repo_id, *, fetch_tensors, timeout):
+        tensors = {
+            "__metadata__": {
+                "general.architecture": "llama",
+                "llama.block_count": 32,
+                "llama.attention.head_count_kv": 8,
+                "llama.attention.key_length": 128,
+                "gguf_variants": [
+                    {"filename": "model-q4.gguf", "size": 1000000000},
+                    {"filename": "model-q8.gguf", "size": 2000000000}
+                ],
+                "repo_id": "org/model-gguf"
+            }
+        }
+        return tensors, None, "GGUF_group", 0.0
+        
+    monkeypatch.setattr(cli.os.path, "exists", fake_exists)
+    monkeypatch.setattr(huggingface, "fetch_huggingface_repo", fake_fetch)
+    
+    def fake_calculate_footprint(*args, **kwargs):
+        return {
+            "total_params": 1000000,
+            "base_memory_bytes": 2000000.0,
+            "kv_cache_bytes": 1000000.0,
+            "overhead_bytes": 600000.0,
+            "total_memory_bytes": 3600000.0,
+            "num_layers": 32,
+            "kv_dim": 1024,
+            "primary_dtype": "Q4_0",
+            "kv_is_estimate": False,
+            "penalty_percentage": 0.0,
+            "vllm_metrics": {}
+        }
+    monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
+    
+    info = cli.analyze_model("org/model-gguf", context_override=128)
+    
+    assert info["format_name"] == "GGUF_group"
+    assert info["tensors"]["__metadata__"]["repo_id"] == "org/model-gguf"
+    assert len(info["tensors"]["__metadata__"]["gguf_variants"]) == 2
+
+
+def _get_mock_gguf_group_data():
+    tensors = {
+        "__metadata__": {
+            "general.architecture": "llama",
+            "llama.block_count": 32,
+            "llama.attention.head_count_kv": 8,
+            "llama.attention.key_length": 128,
+            "gguf_variants": [
+                {"filename": "model-q4.gguf", "size": 1000000000},
+                {"filename": "model-q8.gguf", "size": 2000000000}
+            ],
+            "repo_id": "org/model-gguf"
+        }
+    }
+    footprint = {
+        "total_params": 8000000000,
+        "base_memory_bytes": 4000000000.0,
+        "kv_cache_bytes": 1000000000.0,
+        "overhead_bytes": 600000000.0,
+        "total_memory_bytes": 5600000000.0,
+        "num_layers": 32,
+        "kv_dim": 1024,
+        "primary_dtype": "Q4_0",
+        "kv_is_estimate": False,
+        "penalty_percentage": 0.0,
+        "vllm_metrics": {}
+    }
+    return tensors, footprint
+
+
+def test_print_model_info_gguf_group_no_gpu(capsys):
+    """Test print_model_info renders comparison table without Fits column when no GPU target."""
+    from modelinfo.ui import print_model_info
+    tensors, footprint = _get_mock_gguf_group_data()
+    print_model_info(
+        format_name="GGUF_group",
+        arch_name="Llama (32 layers)",
+        tensor_count=0,
+        footprint=footprint,
+        disk_size=0.0,
+        context_length=8192,
+        is_default_context=True,
+        tensors=tensors,
+        max_context=32768,
+        max_vram_gb=8.0,
+        gpu_name=None
+    )
+    out, _ = capsys.readouterr()
+    assert "model-q4.gguf" in out
+    assert "model-q8.gguf" in out
+    assert "Fits" not in out
+    assert "Tip:" in out
+
+
+def test_print_model_info_gguf_group_with_gpu(capsys):
+    """Test print_model_info renders comparison table with Fits column when GPU target exists."""
+    from modelinfo.ui import print_model_info
+    tensors, footprint = _get_mock_gguf_group_data()
+    print_model_info(
+        format_name="GGUF_group",
+        arch_name="Llama (32 layers)",
+        tensor_count=0,
+        footprint=footprint,
+        disk_size=0.0,
+        context_length=8192,
+        is_default_context=True,
+        tensors=tensors,
+        max_context=32768,
+        max_vram_gb=8.0,
+        gpu_name="RTX4080"
+    )
+    out, _ = capsys.readouterr()
+    assert "model-q4.gguf" in out
+    assert "model-q8.gguf" in out
+    assert "Fits" in out
+
+
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index 10ebc91..c1d0b6e 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -82,3 +82,123 @@ def test_hf_endpoint_rejects_no_hostname(monkeypatch):
     monkeypatch.setenv("HF_ENDPOINT", "https:///repo")
     with pytest.raises(ValueError, match="must include a valid hostname"):
         _get_hf_endpoint()
+
+
+def test_remote_gguf_parsing_single(monkeypatch):
+    """Test remote GGUF parsing when a single GGUF is found in the repository."""
+    import json
+    from modelinfo.parsers import huggingface
+    
+    def fake_make_request(url, headers=None, limit=None, timeout=10.0):
+        if "/api/models/" in url:
+            return json.dumps({
+                "siblings": [
+                    {"rfilename": "model-q4.gguf", "size": 1000000000}
+                ]
+            }).encode("utf-8")
+        elif "model-q4.gguf" in url:
+            import struct
+            header = b"GGUF" + struct.pack("<IQQ", 2, 0, 0)
+            return header
+        raise ValueError(f"Unexpected url: {url}")
+        
+    monkeypatch.setattr(huggingface, "_make_request", fake_make_request)
+    
+    tensors, config, format_name, disk_size = huggingface.fetch_huggingface_repo("org/model-gguf")
+    
+    assert format_name == "GGUF"
+    assert disk_size == 1000000000.0
+    assert tensors.get("__metadata__") == {}
+
+
+def test_remote_gguf_parsing_group(monkeypatch):
+    """Test remote GGUF parsing when multiple GGUF files are present in the repository."""
+    import json
+    from modelinfo.parsers import huggingface
+    
+    def fake_make_request(url, headers=None, limit=None, timeout=10.0):
+        if "/api/models/" in url:
+            return json.dumps({
+                "siblings": [
+                    {"rfilename": "model-q4.gguf", "size": 1000000000},
+                    {"rfilename": "model-q8.gguf", "size": 2000000000}
+                ]
+            }).encode("utf-8")
+        elif "model-q4.gguf" in url:
+            import struct
+            header = b"GGUF" + struct.pack("<IQQ", 2, 0, 0)
+            return header
+        raise ValueError(f"Unexpected url: {url}")
+        
+    monkeypatch.setattr(huggingface, "_make_request", fake_make_request)
+    
+    tensors, config, format_name, disk_size = huggingface.fetch_huggingface_repo("org/model-gguf")
+    
+    assert format_name == "GGUF_group"
+    assert disk_size == 0.0
+    assert "gguf_variants" in tensors["__metadata__"]
+    assert len(tensors["__metadata__"]["gguf_variants"]) == 2
+
+
+def test_remote_gguf_parsing_explicit(monkeypatch):
+    """Test remote GGUF parsing when the user targets a specific GGUF file in the repo id."""
+    import json
+    from modelinfo.parsers import huggingface
+    
+    called_gguf = []
+    def fake_make_request(url, headers=None, limit=None, timeout=10.0):
+        if "/api/models/" in url:
+            return json.dumps({
+                "siblings": [
+                    {"rfilename": "model-q4.gguf", "size": 1000000000},
+                    {"rfilename": "model-q8.gguf", "size": 2000000000}
+                ]
+            }).encode("utf-8")
+        elif "model-q8.gguf" in url:
+            called_gguf.append("q8")
+            import struct
+            header = b"GGUF" + struct.pack("<IQQ", 2, 0, 0)
+            return header
+        raise ValueError(f"Unexpected url: {url}")
+        
+    monkeypatch.setattr(huggingface, "_make_request", fake_make_request)
+    
+    tensors, config, format_name, disk_size = huggingface.fetch_huggingface_repo("org/model-gguf/model-q8.gguf")
+    
+    assert format_name == "GGUF"
+    assert disk_size == 2000000000.0
+    assert called_gguf == ["q8"]
+
+
+def test_remote_gguf_parsing_unauthorized(monkeypatch):
+    """Test remote parsing raises PermissionError for gated/unauthorized (401) model repositories."""
+    import urllib.error
+    from modelinfo.parsers import huggingface
+    
+    def fake_make_request(url, headers=None, limit=None, timeout=10.0):
+        raise urllib.error.HTTPError(url, 401, "Unauthorized", {}, None)
+        
+    monkeypatch.setattr(huggingface, "_make_request", fake_make_request)
+    
+    import pytest
+    with pytest.raises(PermissionError) as exc_info:
+        huggingface.fetch_huggingface_repo("org/gated-model")
+    assert "Gated/Private Model" in str(exc_info.value)
+
+
+def test_remote_gguf_parsing_not_found(monkeypatch):
+    """Test remote parsing raises FileNotFoundError for missing (404) model repositories."""
+    import urllib.error
+    from modelinfo.parsers import huggingface
+    
+    def fake_make_request(url, headers=None, limit=None, timeout=10.0):
+        raise urllib.error.HTTPError(url, 404, "Not Found", {}, None)
+        
+    monkeypatch.setattr(huggingface, "_make_request", fake_make_request)
+    
+    import pytest
+    with pytest.raises(FileNotFoundError) as exc_info:
+        huggingface.fetch_huggingface_repo("org/nonexistent-model")
+    assert "Could not find repository on Hugging Face" in str(exc_info.value)
+
+