From 7fbb73cbdcc3f980748cf4067dd4232e5b66d500 Mon Sep 17 00:00:00 2001 From: patchwright <292882882+patchwright@users.noreply.github.com> Date: Sun, 14 Jun 2026 20:13:58 +0200 Subject: [PATCH 1/3] Fix leading space in surnames after capitalize() with empty middle name capitalize() split each attribute with str.split(' '), which returns [''] (not []) for an empty string. cap_piece() returns '' for an empty part, so an empty middle name produced middle_list = [''], which leaked into surnames_list (middle_list + last_list) and yielded a leading space in the surnames property: >>> hn = HumanName('john doe'); hn.capitalize(); hn.surnames ' Doe' # leading space (should be 'Doe') The same spurious '' element also appeared in title_list/first_list/last_list for empty attributes. Using str.split() instead returns [] for empty strings and is equivalent for the already-whitespace-collapsed pieces cap_piece() returns. The suffix split (', ') is intentionally left unchanged. Added a regression test in HumanNameCapitalizationTestCase. --- nameparser/parser.py | 8 ++++---- tests/test_capitalization.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/nameparser/parser.py b/nameparser/parser.py index 9edc905..e53cd50 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -984,10 +984,10 @@ def capitalize(self, force: bool | None = None) -> None: if not force and not (name == name.upper() or name == name.lower()): return - self.title_list = self.cap_piece(self.title, 'title').split(' ') - self.first_list = self.cap_piece(self.first, 'first').split(' ') - self.middle_list = self.cap_piece(self.middle, 'middle').split(' ') - self.last_list = self.cap_piece(self.last, 'last').split(' ') + self.title_list = self.cap_piece(self.title, 'title').split() + self.first_list = self.cap_piece(self.first, 'first').split() + self.middle_list = self.cap_piece(self.middle, 'middle').split() + self.last_list = self.cap_piece(self.last, 'last').split() self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') def handle_capitalization(self) -> None: diff --git a/tests/test_capitalization.py b/tests/test_capitalization.py index a281b81..9e4befd 100644 --- a/tests/test_capitalization.py +++ b/tests/test_capitalization.py @@ -40,6 +40,28 @@ def test_capitization_middle_initial_is_also_a_conjunction(self) -> None: hn.capitalize() self.m(str(hn), 'Scott E. Werner', hn) + def test_capitalize_empty_name_part_has_no_leading_space_in_surnames(self) -> None: + # capitalize() split each attribute with str.split(' '), which returns + # [''] (rather than []) for an empty string. That spurious element + # leaked into surnames_list (middle_list + last_list) and produced a + # leading space in the surnames property, e.g. ' Doe' instead of 'Doe'. + hn = HumanName('john doe') + hn.capitalize() + self.m(hn.surnames, 'Doe', hn) + self.assertEqual(hn.middle_list, []) + self.assertEqual(hn.surnames_list, ['Doe']) + + # force=True on a mixed-case name hits the same code path + hn = HumanName('Jane Doe') + hn.capitalize(force=True) + self.m(hn.surnames, 'Doe', hn) + self.assertEqual(hn.middle_list, []) + + # other empty attribute lists are also free of the spurious '' element + self.assertEqual(hn.title_list, []) + self.assertEqual(hn.first_list, ['Jane']) + self.assertEqual(hn.last_list, ['Doe']) + # Leaving already-capitalized names alone def test_no_change_to_mixed_chase(self) -> None: hn = HumanName('Shirley Maclaine') From 665f148e8b1d7f4bc7f5e334a32488eeb2cc7183 Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Sat, 27 Jun 2026 12:03:21 -0700 Subject: [PATCH 2/3] tests: split capitalize regression test and clarify comments - Split test_capitalize_empty_name_part_has_no_leading_space_in_surnames into three focused tests (normal path, force=True path, list invariants) so failures self-localize to the scenario - Added test_capitalize_title_and_last_only_no_spurious_tokens covering a name with empty first and middle simultaneously - Sharpened inline comments to accurately describe root cause and why force=True matters (early-return guard) - Added comment to suffix_list line explaining the intentional split(', ') asymmetry vs the space-delimited attributes Co-Authored-By: Claude Sonnet 4.6 --- nameparser/parser.py | 1 + tests/test_capitalization.py | 28 +++++++++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/nameparser/parser.py b/nameparser/parser.py index e53cd50..19a2ed1 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -988,6 +988,7 @@ def capitalize(self, force: bool | None = None) -> None: self.first_list = self.cap_piece(self.first, 'first').split() self.middle_list = self.cap_piece(self.middle, 'middle').split() self.last_list = self.cap_piece(self.last, 'last').split() + # suffix is stored comma-separated ("Ph.D., J.D."), not space-separated self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') def handle_capitalization(self) -> None: diff --git a/tests/test_capitalization.py b/tests/test_capitalization.py index 9e4befd..a308da8 100644 --- a/tests/test_capitalization.py +++ b/tests/test_capitalization.py @@ -40,28 +40,42 @@ def test_capitization_middle_initial_is_also_a_conjunction(self) -> None: hn.capitalize() self.m(str(hn), 'Scott E. Werner', hn) - def test_capitalize_empty_name_part_has_no_leading_space_in_surnames(self) -> None: - # capitalize() split each attribute with str.split(' '), which returns - # [''] (rather than []) for an empty string. That spurious element - # leaked into surnames_list (middle_list + last_list) and produced a - # leading space in the surnames property, e.g. ' Doe' instead of 'Doe'. + def test_capitalize_empty_middle_produces_no_leading_space_in_surnames(self) -> None: + # str.split(' ') on an empty string returns [''] rather than [], so an + # absent middle produced a spurious token that leaked into surnames_list + # and caused a leading space in the surnames property (' Doe' not 'Doe'). hn = HumanName('john doe') hn.capitalize() self.m(hn.surnames, 'Doe', hn) self.assertEqual(hn.middle_list, []) self.assertEqual(hn.surnames_list, ['Doe']) - # force=True on a mixed-case name hits the same code path + def test_capitalize_force_empty_middle_produces_no_leading_space_in_surnames(self) -> None: + # Without force=True, capitalize() exits early for mixed-case names and + # never reaches the split lines. Confirm the fix covers that path too. hn = HumanName('Jane Doe') hn.capitalize(force=True) self.m(hn.surnames, 'Doe', hn) self.assertEqual(hn.middle_list, []) - # other empty attribute lists are also free of the spurious '' element + def test_capitalize_empty_attributes_produce_no_spurious_tokens(self) -> None: + # Confirm the fix extends beyond surnames: empty attribute lists are [] + # not [''], and non-empty ones contain only real tokens. + hn = HumanName('Jane Doe') + hn.capitalize(force=True) self.assertEqual(hn.title_list, []) self.assertEqual(hn.first_list, ['Jane']) self.assertEqual(hn.last_list, ['Doe']) + def test_capitalize_title_and_last_only_no_spurious_tokens(self) -> None: + # title+last with no first or middle leaves first_list and middle_list + # both empty. All-caps triggers capitalize() without force=True. + hn = HumanName('DR DOE') + hn.capitalize() + self.assertEqual(hn.first_list, []) + self.assertEqual(hn.middle_list, []) + self.m(str(hn), 'Dr Doe', hn) + # Leaving already-capitalized names alone def test_no_change_to_mixed_chase(self) -> None: hn = HumanName('Shirley Maclaine') From 06cd54398fd47456ea0eb9a601e67047d47fb6bd Mon Sep 17 00:00:00 2001 From: Derek Gulbranson Date: Sat, 27 Jun 2026 12:13:31 -0700 Subject: [PATCH 3/3] Fix spurious '' token in suffix_list after capitalize() with no suffix ''.split(', ') returns [''] just like ''.split(' ') did for the other attributes. Use a filtered list comprehension to preserve the comma delimiter while dropping empty tokens, making suffix_list consistent with the [] invariant the rest of the codebase relies on. Co-Authored-By: Claude Sonnet 4.6 --- nameparser/parser.py | 2 +- tests/test_capitalization.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/nameparser/parser.py b/nameparser/parser.py index 19a2ed1..27e350c 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -989,7 +989,7 @@ def capitalize(self, force: bool | None = None) -> None: self.middle_list = self.cap_piece(self.middle, 'middle').split() self.last_list = self.cap_piece(self.last, 'last').split() # suffix is stored comma-separated ("Ph.D., J.D."), not space-separated - self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') + self.suffix_list = [s for s in self.cap_piece(self.suffix, 'suffix').split(', ') if s] def handle_capitalization(self) -> None: """ diff --git a/tests/test_capitalization.py b/tests/test_capitalization.py index a308da8..0b23e4e 100644 --- a/tests/test_capitalization.py +++ b/tests/test_capitalization.py @@ -76,6 +76,23 @@ def test_capitalize_title_and_last_only_no_spurious_tokens(self) -> None: self.assertEqual(hn.middle_list, []) self.m(str(hn), 'Dr Doe', hn) + def test_capitalize_empty_suffix_produces_no_spurious_tokens(self) -> None: + # ''.split(', ') returns [''] just like ''.split(' ') did for the other + # attributes — an absent suffix should produce suffix_list == [], not ['']. + hn = HumanName('JOHN DOE') + hn.capitalize() + self.assertEqual(hn.suffix_list, []) + + def test_capitalize_single_suffix_still_works(self) -> None: + hn = HumanName('JOHN DOE PHD') + hn.capitalize() + self.assertEqual(hn.suffix_list, ['Ph.D.']) + + def test_capitalize_multiple_suffixes_still_split_correctly(self) -> None: + hn = HumanName('JOHN DOE PHD MD') + hn.capitalize() + self.assertEqual(hn.suffix_list, ['Ph.D.', 'M.D.']) + # Leaving already-capitalized names alone def test_no_change_to_mixed_chase(self) -> None: hn = HumanName('Shirley Maclaine')