4141)
4242from pyarrow.csv import write_csv
4343
44-
44+ MB = 1024* 1024
4545@pytest.fixture
4646def ctx():
4747 return SessionContext()
@@ -116,6 +116,30 @@ def clean_formatter_state():
116116 """Reset the HTML formatter after each test."""
117117 reset_formatter()
118118
119+ # custom style for testing with html formatter
120+ class CustomStyleProvider:
121+ def get_cell_style(self) -> str:
122+ return (
123+ "background-color: #f5f5f5; color: #333; padding: 8px; border: "
124+ "1px solid #ddd;"
125+ )
126+
127+ def get_header_style(self) -> str:
128+ return (
129+ "background-color: #4285f4; color: white; font-weight: bold; "
130+ "padding: 10px; border: 1px solid #3367d6;"
131+ )
132+
133+ def count_table_rows(html_content: str) -> int:
134+ """Count the number of table rows in HTML content.
135+
136+ Args:
137+ html_content: HTML string to analyze
138+
139+ Returns:
140+ Number of table rows found (number of <tr> tags)
141+ """
142+ return len(re.findall(r"<tr", html_content))
119143
120144def test_select(df):
121145 df_1 = df.select(
@@ -671,11 +695,10 @@ def test_window_frame_defaults_match_postgres(partitioned_df):
671695 assert df_2.sort(col_a).to_pydict() == expected
672696
673697
674- def test_html_formatter_configuration (df, clean_formatter_state):
698+ def test_html_formatter_cell_dimension (df, clean_formatter_state):
675699 """Test configuring the HTML formatter with different options."""
676700 # Configure with custom settings
677701 configure_formatter(
678- max_cell_length=5,
679702 max_width=500,
680703 max_height=200,
681704 enable_cell_expansion=False,
@@ -693,19 +716,6 @@ def test_html_formatter_configuration(df, clean_formatter_state):
693716def test_html_formatter_custom_style_provider(df, clean_formatter_state):
694717 """Test using custom style providers with the HTML formatter."""
695718
696- class CustomStyleProvider:
697- def get_cell_style(self) -> str:
698- return (
699- "background-color: #f5f5f5; color: #333; padding: 8px; border: "
700- "1px solid #ddd;"
701- )
702-
703- def get_header_style(self) -> str:
704- return (
705- "background-color: #4285f4; color: white; font-weight: bold; "
706- "padding: 10px; border: 1px solid #3367d6;"
707- )
708-
709719 # Configure with custom style provider
710720 configure_formatter(style_provider=CustomStyleProvider())
711721
@@ -917,37 +927,67 @@ def get_header_style(self) -> str:
917927 assert "color: #5af" in html_output # Even numbers
918928
919929
920- def test_html_formatter_memory_and_rows( ):
930+ def test_html_formatter_memory(df, clean_formatter_state ):
921931 """Test the memory and row control parameters in DataFrameHtmlFormatter."""
922-
923- # Test default values
924- formatter = DataFrameHtmlFormatter()
925- assert formatter.max_memory_bytes == 2 * 1024 * 1024 # 2 MB
926- assert formatter.min_rows_display == 20
927- assert formatter.repr_rows == 10
928-
929- # Test custom values
930- formatter = DataFrameHtmlFormatter(
931- max_memory_bytes=1024 * 1024, # 1 MB
932- min_rows_display=10,
933- repr_rows=5
932+ configure_formatter(
933+ max_memory_bytes = 10,
934+ min_rows_display = 1
934935 )
935- assert formatter.max_memory_bytes == 1024 * 1024
936- assert formatter.min_rows_display == 10
937- assert formatter.repr_rows == 5
938-
939- # Test extremely large values and tiny values (edge cases)
940- # These should not raise exceptions
941- extreme_formatter = DataFrameHtmlFormatter(
942- max_memory_bytes=10 * 1024 * 1024 * 1024, # 10 GB
943- min_rows_display=1,
944- repr_rows=1
936+ html_output = df._repr_html_()
937+
938+ # Count the number of table rows in the output
939+ tr_count = count_table_rows(html_output)
940+ # With a tiny memory limit of 10 bytes, the formatter should display
941+ # the minimum number of rows (1) plus a message about truncation
942+ assert tr_count == 2 # 1 for header row, 1 for data row
943+ assert "data truncated" in html_output.lower()
944+
945+ configure_formatter(
946+ max_memory_bytes = 10*MB,
947+ min_rows_display = 1
945948 )
946- assert extreme_formatter.max_memory_bytes == 10 * 1024 * 1024 * 1024
947- assert extreme_formatter.min_rows_display == 1
948- assert extreme_formatter.repr_rows == 1
949-
949+ html_output = df._repr_html_()
950+ # With larger memory limit and min_rows=2, should display all rows
951+ tr_count = count_table_rows(html_output)
952+ # Table should have header row (1) + 3 data rows = 4 rows
953+ assert tr_count == 4
954+ # No truncation message should appear
955+ assert "data truncated" not in html_output.lower()
956+
957+ def test_html_formatter_repr_rows(df, clean_formatter_state):
958+ configure_formatter(
959+ min_rows_display = 2,
960+ repr_rows = 2
961+ )
962+ html_output = df._repr_html_()
963+
964+ tr_count = count_table_rows(html_output)
965+ # Tabe should have header row (1) + 2 data rows = 3 rows
966+ assert tr_count == 3
967+
968+ configure_formatter(
969+ min_rows_display = 2,
970+ repr_rows = 3
971+ )
972+ html_output = df._repr_html_()
973+
974+ tr_count = count_table_rows(html_output)
975+ # Tabe should have header row (1) + 3 data rows = 4 rows
976+ assert tr_count == 4
977+
978+
979+ def test_html_formatter_validation():
950980 # Test validation for invalid parameters
981+
982+ with pytest.raises(ValueError, match="max_cell_length must be a positive integer"):
983+ DataFrameHtmlFormatter(max_cell_length=0)
984+
985+ with pytest.raises(ValueError, match="max_width must be a positive integer"):
986+ DataFrameHtmlFormatter(max_width=0)
987+
988+ with pytest.raises(ValueError, match="max_height must be a positive integer"):
989+ DataFrameHtmlFormatter(max_height=0)
990+
951991 with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"):
952992 DataFrameHtmlFormatter(max_memory_bytes=0)
953993
@@ -967,55 +1007,56 @@ def test_html_formatter_memory_and_rows():
9671007 DataFrameHtmlFormatter(repr_rows=-10)
9681008
9691009
970- def test_custom_style_provider_html_formatter (df, clean_formatter_state):
1010+ def test_configure_formatter (df, clean_formatter_state):
9711011 """Test using custom style providers with the HTML formatter and configured
9721012 parameters."""
9731013
974- class CustomStyleProvider:
975- def get_cell_style(self) -> str:
976- return (
977- "background-color: #f5f5f5; color: #333; padding: 8px; border: "
978- "1px solid #ddd;"
979- )
980-
981- def get_header_style(self) -> str:
982- return (
983- "background-color: #4285f4; color: white; font-weight: bold; "
984- "padding: 10px; border: 1px solid #3367d6;"
985- )
986-
987- # Configure with custom style provider
988- configure_formatter(style_provider=CustomStyleProvider())
989-
990- html_output = df._repr_html_()
991-
992- # Verify our custom styles were applied
993- assert "background-color: #4285f4" in html_output
994- assert "color: white" in html_output
995- assert "background-color: #f5f5f5" in html_output
996-
997- # Reset for the next part of the test
1014+ # these are non-default values
1015+ MAX_CELL_LENGTH = 10
1016+ MAX_WIDTH = 500
1017+ MAX_HEIGHT = 30
1018+ MAX_MEMORY_BYTES = 3*MB
1019+ MIN_ROWS_DISPLAY=2
1020+ REPR_ROWS = 2
1021+ ENABLE_CELL_EXPANSION = False
1022+ SHOW_TRUNCATION_MESSAGE = False
1023+ USE_SHARED_STYLES = False
1024+
9981025 reset_formatter()
1026+ formatter_default = get_formatter()
1027+
1028+ assert formatter_default.max_cell_length != MAX_CELL_LENGTH
1029+ assert formatter_default.max_width != MAX_WIDTH
1030+ assert formatter_default.max_height != MAX_HEIGHT
1031+ assert formatter_default.max_memory_bytes != MAX_MEMORY_BYTES
1032+ assert formatter_default.min_rows_display != MIN_ROWS_DISPLAY
1033+ assert formatter_default.repr_rows != REPR_ROWS
1034+ assert formatter_default.enable_cell_expansion != ENABLE_CELL_EXPANSION
1035+ assert formatter_default.show_truncation_message != SHOW_TRUNCATION_MESSAGE
1036+ assert formatter_default.use_shared_styles != USE_SHARED_STYLES
1037+
9991038 # Configure with custom style provider and additional parameters
10001039 configure_formatter(
1001- style_provider=CustomStyleProvider(),
1002- max_memory_bytes=3 * 1024 * 1024, # 3 MB
1003- min_rows_display=15,
1004- repr_rows=7
1040+ max_cell_length = MAX_CELL_LENGTH,
1041+ max_width = MAX_WIDTH,
1042+ max_height= MAX_HEIGHT,
1043+ max_memory_bytes=MAX_MEMORY_BYTES,
1044+ min_rows_display=MIN_ROWS_DISPLAY,
1045+ repr_rows=REPR_ROWS,
1046+ enable_cell_expansion = ENABLE_CELL_EXPANSION,
1047+ show_truncation_message = SHOW_TRUNCATION_MESSAGE,
1048+ use_shared_styles = USE_SHARED_STYLES
10051049 )
1006-
1007- html_output = df._repr_html_()
1008-
1009- # Verify our custom styles were applied
1010- assert "background-color: #4285f4" in html_output
1011- assert "color: white" in html_output
1012- assert "background-color: #f5f5f5" in html_output
1013-
1014- # Test memory and row parameters were properly set
1015- formatter = get_formatter()
1016- assert formatter.max_memory_bytes == 3 * 1024 * 1024 # 3 MB
1017- assert formatter.min_rows_display == 15
1018- assert formatter.repr_rows == 7
1050+ formatter_custom = get_formatter()
1051+ assert formatter_custom.max_cell_length == MAX_CELL_LENGTH
1052+ assert formatter_custom.max_width == MAX_WIDTH
1053+ assert formatter_custom.max_height == MAX_HEIGHT
1054+ assert formatter_custom.max_memory_bytes == MAX_MEMORY_BYTES
1055+ assert formatter_custom.min_rows_display == MIN_ROWS_DISPLAY
1056+ assert formatter_custom.repr_rows == REPR_ROWS
1057+ assert formatter_custom.enable_cell_expansion == ENABLE_CELL_EXPANSION
1058+ assert formatter_custom.show_truncation_message == SHOW_TRUNCATION_MESSAGE
1059+ assert formatter_custom.use_shared_styles == USE_SHARED_STYLES
10191060
10201061
10211062def test_get_dataframe(tmp_path):
@@ -1606,9 +1647,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
16061647 assert result["new_col"] == [3 for _i in range(3)]
16071648
16081649
1609- def test_dataframe_repr_html_structure(df) -> None:
1650+ def test_dataframe_repr_html_structure(df, clean_formatter_state ) -> None:
16101651 """Test that DataFrame._repr_html_ produces expected HTML output structure."""
1611- import re
16121652
16131653 output = df._repr_html_()
16141654
@@ -1638,13 +1678,13 @@ def test_dataframe_repr_html_structure(df) -> None:
16381678 assert len(body_matches) == 1, "Expected pattern of values not found in HTML output"
16391679
16401680
1641- def test_dataframe_repr_html_values(df):
1681+ def test_dataframe_repr_html_values(df, clean_formatter_state ):
16421682 """Test that DataFrame._repr_html_ contains the expected data values."""
16431683 html = df._repr_html_()
16441684 assert html is not None
16451685
16461686 # Create a more flexible pattern that handles values being wrapped in spans
1647- # This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless
1687+ # This pattern will match the sequence of values 1,4,8,2,5,5 regardless
16481688 # of formatting
16491689 pattern = re.compile(
16501690 r"<td[^>]*?>(?:<span[^>]*?>)?1(?:</span>)?</td>.*?"
@@ -1748,4 +1788,4 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
17481788
17491789 assert "<style>" in local_html_1
17501790 assert "<style>" in local_html_2
1751-
1791+
0 commit comments