diff --git a/nameparser/parser.py b/nameparser/parser.py index 9edc905..27e350c 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -984,11 +984,12 @@ def capitalize(self, force: bool | None = None) -> None: if not force and not (name == name.upper() or name == name.lower()): return - self.title_list = self.cap_piece(self.title, 'title').split(' ') - self.first_list = self.cap_piece(self.first, 'first').split(' ') - self.middle_list = self.cap_piece(self.middle, 'middle').split(' ') - self.last_list = self.cap_piece(self.last, 'last').split(' ') - self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') + self.title_list = self.cap_piece(self.title, 'title').split() + self.first_list = self.cap_piece(self.first, 'first').split() + self.middle_list = self.cap_piece(self.middle, 'middle').split() + self.last_list = self.cap_piece(self.last, 'last').split() + # suffix is stored comma-separated ("Ph.D., J.D."), not space-separated + self.suffix_list = [s for s in self.cap_piece(self.suffix, 'suffix').split(', ') if s] def handle_capitalization(self) -> None: """ diff --git a/tests/test_capitalization.py b/tests/test_capitalization.py index a281b81..0b23e4e 100644 --- a/tests/test_capitalization.py +++ b/tests/test_capitalization.py @@ -40,6 +40,59 @@ def test_capitization_middle_initial_is_also_a_conjunction(self) -> None: hn.capitalize() self.m(str(hn), 'Scott E. Werner', hn) + def test_capitalize_empty_middle_produces_no_leading_space_in_surnames(self) -> None: + # str.split(' ') on an empty string returns [''] rather than [], so an + # absent middle produced a spurious token that leaked into surnames_list + # and caused a leading space in the surnames property (' Doe' not 'Doe'). + hn = HumanName('john doe') + hn.capitalize() + self.m(hn.surnames, 'Doe', hn) + self.assertEqual(hn.middle_list, []) + self.assertEqual(hn.surnames_list, ['Doe']) + + def test_capitalize_force_empty_middle_produces_no_leading_space_in_surnames(self) -> None: + # Without force=True, capitalize() exits early for mixed-case names and + # never reaches the split lines. Confirm the fix covers that path too. + hn = HumanName('Jane Doe') + hn.capitalize(force=True) + self.m(hn.surnames, 'Doe', hn) + self.assertEqual(hn.middle_list, []) + + def test_capitalize_empty_attributes_produce_no_spurious_tokens(self) -> None: + # Confirm the fix extends beyond surnames: empty attribute lists are [] + # not [''], and non-empty ones contain only real tokens. + hn = HumanName('Jane Doe') + hn.capitalize(force=True) + self.assertEqual(hn.title_list, []) + self.assertEqual(hn.first_list, ['Jane']) + self.assertEqual(hn.last_list, ['Doe']) + + def test_capitalize_title_and_last_only_no_spurious_tokens(self) -> None: + # title+last with no first or middle leaves first_list and middle_list + # both empty. All-caps triggers capitalize() without force=True. + hn = HumanName('DR DOE') + hn.capitalize() + self.assertEqual(hn.first_list, []) + self.assertEqual(hn.middle_list, []) + self.m(str(hn), 'Dr Doe', hn) + + def test_capitalize_empty_suffix_produces_no_spurious_tokens(self) -> None: + # ''.split(', ') returns [''] just like ''.split(' ') did for the other + # attributes — an absent suffix should produce suffix_list == [], not ['']. + hn = HumanName('JOHN DOE') + hn.capitalize() + self.assertEqual(hn.suffix_list, []) + + def test_capitalize_single_suffix_still_works(self) -> None: + hn = HumanName('JOHN DOE PHD') + hn.capitalize() + self.assertEqual(hn.suffix_list, ['Ph.D.']) + + def test_capitalize_multiple_suffixes_still_split_correctly(self) -> None: + hn = HumanName('JOHN DOE PHD MD') + hn.capitalize() + self.assertEqual(hn.suffix_list, ['Ph.D.', 'M.D.']) + # Leaving already-capitalized names alone def test_no_change_to_mixed_chase(self) -> None: hn = HumanName('Shirley Maclaine')