From 822fcf70d5c45d2a5bacee65f7ca66a26178c79a Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Fri, 29 May 2026 11:23:54 +0200 Subject: [PATCH] Handle text-mode file objects in parse_wpc_surface_bulletin parse_wpc_surface_bulletin documents accepting a file-like object, but it unconditionally called .decode('utf-8') on the result of file.read(). For a text-mode object such as io.StringIO, read() returns str, so this raised AttributeError: 'str' object has no attribute 'decode'. Only decode when read() returns bytes, so both binary (file paths opened in 'rb', BytesIO) and text (StringIO, files opened in text mode) sources work as the docstring promises. Add a regression test asserting a StringIO parses identically to the equivalent BytesIO, and move the misplaced 'year' entry from the Returns section to Parameters in the docstring. Closes #3923 --- src/metpy/io/text.py | 15 ++++++++++----- tests/io/test_text.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/metpy/io/text.py b/src/metpy/io/text.py index 262cf0ec8a5..c79f111c7b7 100644 --- a/src/metpy/io/text.py +++ b/src/metpy/io/text.py @@ -83,23 +83,28 @@ def parse_wpc_surface_bulletin(bulletin, year=None): bulletin : str or file-like object If str, the name of the file to be opened. If `bulletin` is a file-like object, this will be read from directly. + year : int, optional + Year to assume when parsing the timestamp from the bulletin. Defaults to `None`, + which results in the parser trying to find a year in the product header; if this + search fails, the current year is assumed. Returns ------- dataframe : pandas.DataFrame A `DataFrame` where each row represents a pressure center or front. The `DataFrame` has four columns: 'valid', 'feature', 'strength', and 'geometry'. - year : int - Year to assume when parsing the timestamp from the bulletin. Defaults to `None`, - which results in the parser trying to find a year in the product header; if this - search fails, the current year is assumed. """ from shapely.geometry import LineString, Point # Create list with lines of text from file with contextlib.closing(open_as_needed(bulletin)) as file: - text = file.read().decode('utf-8') + text = file.read() + # ``open_as_needed`` opens filename paths in binary mode, but a file-like object + # passed in directly (e.g. ``StringIO``) may already be text. Only decode bytes so + # that both binary and text sources work, as the docstring promises. + if isinstance(text, bytes): + text = text.decode('utf-8') parsed_text = [] valid_time = datetime.now(UTC).replace(tzinfo=None) diff --git a/tests/io/test_text.py b/tests/io/test_text.py index 14b6ee5c03f..eb5bd7ef52f 100644 --- a/tests/io/test_text.py +++ b/tests/io/test_text.py @@ -5,6 +5,7 @@ from datetime import datetime import numpy as np +import pandas as pd from metpy.cbook import get_test_data from metpy.io import parse_wpc_surface_bulletin @@ -98,3 +99,22 @@ def test_negative_lat(): """) df = parse_wpc_surface_bulletin(sample) assert df.geometry[0] == sgeom.Point([-51, -3]) + + +@needs_module('shapely') +def test_parse_wpc_surface_bulletin_text_file_object(): + """Test parsing a text-mode file-like object (e.g. StringIO), not just bytes.""" + from io import BytesIO, StringIO + + text = """VALID 062818Z +HIGHS 1022 3961069 1020 3851069 1026 3750773 +LOWS 1016 4510934 1002 3441145 1003 4271229 +TROF 2971023 2831018 2691008 + """ + # A text-mode file object (read() -> str) must parse identically to a binary one + df_text = parse_wpc_surface_bulletin(StringIO(text), year=2000) + df_bytes = parse_wpc_surface_bulletin(BytesIO(text.encode('utf-8')), year=2000) + + pd.testing.assert_frame_equal(df_text, df_bytes) + assert len(df_text) == 7 + assert all(df_text.valid == datetime(2000, 6, 28, 18, 0, 0))