diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 721acaa..0cc23c2 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -4,6 +4,7 @@ name: Test the Python package on: + workflow_dispatch: push: branches: [ master ] pull_request: @@ -16,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 @@ -27,7 +28,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + python -m pip install twine + python -m pip install sphinx if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi - name: Run Tests run: | python tests.py + python setup.py sdist + twine check dist/* + sphinx-build -b html docs dist/docs diff --git a/README.rst b/README.rst index b347593..eebde5b 100644 --- a/README.rst +++ b/README.rst @@ -13,6 +13,7 @@ individual components. * hn.suffix * hn.nickname * hn.surnames *(middle + last)* +* hn.initials *(first initial of each name part)* Supported Name Structures ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -135,9 +136,8 @@ https://github.com/derek73/python-nameparser .. _Start a New Issue: https://github.com/derek73/python-nameparser/issues .. _click here to propose changes to the titles: https://github.com/derek73/python-nameparser/edit/master/nameparser/config/titles.py - -.. |Build Status| image:: https://travis-ci.org/derek73/python-nameparser.svg?branch=master - :target: https://travis-ci.org/derek73/python-nameparser +.. |Build Status| image:: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml/badge.svg + :target: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml .. |PyPI| image:: https://img.shields.io/pypi/v/nameparser.svg :target: https://pypi.org/project/nameparser/ .. |Documentation| image:: https://readthedocs.org/projects/nameparser/badge/?version=latest diff --git a/docs/modules.rst b/docs/modules.rst index eaf3240..2056330 100644 --- a/docs/modules.rst +++ b/docs/modules.rst @@ -7,6 +7,7 @@ HumanName.parser .. py:module:: nameparser.parser .. py:class:: HumanName + :noindex: .. autoclass:: HumanName :members: diff --git a/docs/release_log.rst b/docs/release_log.rst index 57b7bf5..a0ab7ee 100644 --- a/docs/release_log.rst +++ b/docs/release_log.rst @@ -1,5 +1,13 @@ Release Log =========== +* 1.1.3 - September 20, 2023 + - Fix case when we have two same prefixes in the name ()#147) +* 1.1.2 - November 13, 2022 + - Add support for attributes in constructor (#140) + - Make HumanName instances hashable (#138) + - Update repr for names with single quotes (#137) +* 1.1.1 - January 28, 2022 + - Fix bug in is_suffix handling of lists (#129) * 1.1.0 - January 3, 2022 - Add initials support (#128) - Add more titles and prefixes (#120, #127, #128, #119) diff --git a/docs/resources.rst b/docs/resources.rst index 6cc28e8..8934aae 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -7,6 +7,8 @@ Naming Practices and Resources * Wikipedia_Anthroponymy_ * Wikipedia_Naming_conventions_ * Wikipedia_List_Of_Titles_ + * Tussenvoegsel_ + * Family_Name_Affixes_ .. _US_Census_Surname_Data_2000: https://www.census.gov/data/developers/data-sets/surnames/2000.html .. _US_Social_Security_Administration_Baby_Names_Index: https://www.ssa.gov/oact/babynames/limits.html @@ -14,3 +16,5 @@ Naming Practices and Resources .. _Wikipedia_Anthroponymy: https://en.wikipedia.org/wiki/Anthroponymy .. _Wikipedia_Naming_conventions: http://en.wikipedia.org/wiki/Wikipedia:Naming_conventions_(people) .. _Wikipedia_List_Of_Titles: https://en.wikipedia.org/wiki/Title +.. _Tussenvoegsel: https://en.wikipedia.org/wiki/Tussenvoegsel +.. _Family_Name_Affixes : https://en.wikipedia.org/wiki/List_of_family_name_affixes diff --git a/nameparser/__init__.py b/nameparser/__init__.py index a9ee753..ab914e9 100644 --- a/nameparser/__init__.py +++ b/nameparser/__init__.py @@ -1,4 +1,4 @@ -VERSION = (1, 1, 0) +VERSION = (1, 1, 3) __version__ = '.'.join(map(str, VERSION)) __author__ = "Derek Gulbranson" __author_email__ = 'derek73@gmail.com' diff --git a/nameparser/parser.py b/nameparser/parser.py index 35f4135..a5eb352 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -36,7 +36,10 @@ class HumanName(object): Instantiation assigns to ``full_name``, and assignment to :py:attr:`full_name` triggers :py:func:`parse_full_name`. After parsing the - name, these instance attributes are available. + name, these instance attributes are available. Alternatively, you can pass + any of the instance attributes to the constructor method and skip the parsing + process. If any of the the instance attributes are passed to the constructor + as keywords, :py:func:`parse_full_name` will not be performed. **HumanName Instance Attributes** @@ -56,6 +59,12 @@ class HumanName(object): :param str string_format: python string formatting :param str initials_format: python initials string formatting :param str initials_delimter: string delimiter for initials + :param str first: first name + :param str middle: middle name + :param str last: last name + :param str title: The title or prenominal + :param str suffix: The suffix or postnominal + :param str nickname: Nicknames """ C = CONSTANTS @@ -77,7 +86,9 @@ class HumanName(object): _full_name = '' def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING, - string_format=None, initials_format=None, initials_delimiter=None): + string_format=None, initials_format=None, initials_delimiter=None, + first=None, middle=None, last=None, title=None, suffix=None, + nickname=None): self.C = constants if type(self.C) is not type(CONSTANTS): self.C = Constants() @@ -86,8 +97,17 @@ def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING, self.string_format = string_format or self.C.string_format self.initials_format = initials_format or self.C.initials_format self.initials_delimiter = initials_delimiter or self.C.initials_delimiter - # full_name setter triggers the parse - self.full_name = full_name + if (first or middle or last or title or suffix or nickname): + self.first = first + self.middle = middle + self.last = last + self.title = title + self.suffix = suffix + self.nickname = nickname + self.unparsable = False + else: + # full_name setter triggers the parse + self.full_name = full_name def __iter__(self): return self @@ -141,6 +161,9 @@ def __unicode__(self): return self.collapse_whitespace(_s).strip(', ') return " ".join(self) + def __hash__(self): + return hash(str(self)) + def __str__(self): if sys.version_info[0] >= 3: return self.__unicode__() @@ -150,7 +173,7 @@ def __repr__(self): if self.unparsable: _string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__, } else: - _string = "<%(class)s : [\n\ttitle: '%(title)s' \n\tfirst: '%(first)s' \n\tmiddle: '%(middle)s' \n\tlast: '%(last)s' \n\tsuffix: '%(suffix)s'\n\tnickname: '%(nickname)s'\n]>" % { + _string = "<%(class)s : [\n\ttitle: %(title)r \n\tfirst: %(first)r \n\tmiddle: %(middle)r \n\tlast: %(last)r \n\tsuffix: %(suffix)r\n\tnickname: %(nickname)r\n]>" % { 'class': self.__class__.__name__, 'title': self.title or '', 'first': self.first or '', @@ -191,7 +214,7 @@ def as_dict(self, include_empty=True): def __process_initial__(self, name_part, firstname=False): """ - Name parts may include prefixes or conjuctions. This function filters these from the name unless it is + Name parts may include prefixes or conjunctions. This function filters these from the name unless it is a first name, since first names cannot be conjunctions or prefixes. """ parts = name_part.split(" ") @@ -416,8 +439,8 @@ def is_suffix(self, piece): """ # suffixes may have periods inside them like "M.D." if isinstance(piece, list): - for piece in pieces: - if self.is_suffix(piece): + for item in piece: + if self.is_suffix(item): return True else: return ((lc(piece).replace('.', '') in self.C.suffix_acronyms) @@ -513,9 +536,9 @@ def parse_nicknames(self): Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`; `quoted_word`, `double_quotes` and `parenthesis`. """ - + empty_re = re.compile("") - + re_quoted_word = self.C.regexes.quoted_word or empty_re re_double_quotes = self.C.regexes.double_quotes or empty_re re_parenthesis = self.C.regexes.parenthesis or empty_re @@ -591,9 +614,9 @@ def parse_full_name(self): nxt = None # title must have a next piece, unless it's just a title - if self.is_title(piece) \ + if not self.first \ and (nxt or p_len == 1) \ - and not self.first: + and self.is_title(piece): self.title_list.append(piece) continue if not self.first: @@ -641,9 +664,9 @@ def parse_full_name(self): except IndexError: nxt = None - if self.is_title(piece) \ + if not self.first \ and (nxt or len(pieces) == 1) \ - and not self.first: + and self.is_title(piece): self.title_list.append(piece) continue if not self.first: @@ -681,9 +704,9 @@ def parse_full_name(self): except IndexError: nxt = None - if self.is_title(piece) \ + if not self.first \ and (nxt or len(post_comma_pieces) == 1) \ - and not self.first: + and self.is_title(piece): self.title_list.append(piece) continue if not self.first: @@ -883,7 +906,7 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0): # If it's the first piece and there are more than 1 rootnames, assume it's a first name continue next_prefix = next(iter(filter(self.is_prefix, pieces[i + 1:]))) - j = pieces.index(next_prefix) + j = pieces.index(next_prefix, i + 1) if j == i + 1: # if there are two prefixes in sequence, join to the following piece j += 1 diff --git a/setup.py b/setup.py index ba0cc5a..2067716 100755 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ def read(fname): packages = ['nameparser','nameparser.config'], description = 'A simple Python module for parsing human names into their individual components.', long_description = README, + long_description_content_type = "text/x-rst", version = nameparser.__version__, url = nameparser.__url__, author = nameparser.__author__, diff --git a/tests.py b/tests.py index 91917a4..2cdd526 100644 --- a/tests.py +++ b/tests.py @@ -2071,6 +2071,11 @@ def test_multiple_prefixes(self): self.m(hn.first, "Mike", hn) self.m(hn.last, "van der Velt", hn) + def test_2_same_prefixes_in_the_name(self): + hh = HumanName("Vincent van Gogh van Beethoven") + self.m(hh.first, "Vincent", hh) + self.m(hh.middle, "van Gogh", hh) + self.m(hh.last, "van Beethoven", hh) class HumanNameCapitalizationTestCase(HumanNameTestBase): def test_capitalization_exception_for_III(self): @@ -2344,6 +2349,43 @@ def test_initials_with_prefix(self): hn = HumanName("Alex van Johnson") self.m(hn.initials_list(), ["A", "J"], hn) + def test_constructor_first(self): + hn = HumanName(first="TheName") + self.assertFalse(hn.unparsable) + self.m(hn.first, "TheName", hn) + + def test_constructor_middle(self): + hn = HumanName(middle="TheName") + self.assertFalse(hn.unparsable) + self.m(hn.middle, "TheName", hn) + + def test_constructor_last(self): + hn = HumanName(last="TheName") + self.assertFalse(hn.unparsable) + self.m(hn.last, "TheName", hn) + + def test_constructor_title(self): + hn = HumanName(title="TheName") + self.assertFalse(hn.unparsable) + self.m(hn.title, "TheName", hn) + + def test_constructor_suffix(self): + hn = HumanName(suffix="TheName") + self.assertFalse(hn.unparsable) + self.m(hn.suffix, "TheName", hn) + + def test_constructor_nickname(self): + hn = HumanName(nickname="TheName") + self.assertFalse(hn.unparsable) + self.m(hn.nickname, "TheName", hn) + + def test_constructor_multiple(self): + hn = HumanName(first="TheName", last="lastname", title="mytitle", full_name="donotparse") + self.assertFalse(hn.unparsable) + self.m(hn.first, "TheName", hn) + self.m(hn.last, "lastname", hn) + self.m(hn.title, "mytitle", hn) + TEST_NAMES = ( "John Doe",