Skip to content

Commit 415f1bf

Browse files
committed
v0.3.8, use regex to check for roman numerals at end of name (#36)
1 parent 15a54dc commit 415f1bf

File tree

5 files changed

+30
-14
lines changed

5 files changed

+30
-14
lines changed

docs/release_log.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
Release Log
22
===========
3+
* 0.3.8 - September 2, 2015
4+
- Use regex to check for roman numerals at end of name (#36)
5+
- Add DVM to suffixes
36
* 0.3.7 - August 30, 2015
4-
- Make HumanName instances pickleable
57
- Speed improvement, 3x faster
8+
- Make HumanName instances pickleable
69
* 0.3.6 - August 6, 2015
710
- Fix strings that start with conjunctions (#20)
811
- handle assigning lists of names to a name attribute

nameparser/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION = (0, 3, 7)
1+
VERSION = (0, 3, 8)
22
__version__ = '.'.join(map(str, VERSION))
33
__author__ = "Derek Gulbranson"
44
__author_email__ = '[email protected]'

nameparser/config/regexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
("mac", re.compile(r'^(ma?c)(\w+)', re.I | re.U)),
99
("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)),
1010
("nickname", re.compile(r'\s*?[\("](.+?)[\)"]', re.U)),
11-
("roman_numeral", re.compile(r'^(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$', re.I | re.U)),
11+
("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
1212
("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U))
1313
])
1414
"""

nameparser/parser.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,13 @@ def is_conjunction(self, piece):
270270
def is_prefix(self, piece):
271271
"""Is in the prefixes set and not :py:func:`is_an_initial()`."""
272272
return lc(piece) in self.C.prefixes and not self.is_an_initial(piece)
273+
274+
def is_roman_numeral(self, value):
275+
"""
276+
Matches the ``roman_numeral`` regular expression in
277+
:py:data:`~nameparser.config.regexes.REGEXES`.
278+
"""
279+
return bool(self.C.regexes.roman_numeral.match(value))
273280

274281
def is_suffix(self, piece):
275282
"""Is in the suffixes set and not :py:func:`is_an_initial()`."""
@@ -295,7 +302,7 @@ def is_an_initial(self, value):
295302
Matches the ``initial`` regular expression in
296303
:py:data:`~nameparser.config.regexes.REGEXES`.
297304
"""
298-
return self.C.regexes.initial.match(value) or False
305+
return bool(self.C.regexes.initial.match(value))
299306

300307
# def is_a_roman_numeral(value):
301308
# return re_roman_numeral.match(value) or False
@@ -397,21 +404,25 @@ def parse_full_name(self):
397404
# part[0]
398405

399406
pieces = self.parse_pieces(parts)
400-
407+
p_len = len(pieces)
401408
for i, piece in enumerate(pieces):
402409
try:
403410
nxt = pieces[i + 1]
404411
except IndexError:
405412
nxt = None
406413

407414
# title must have a next piece, unless it's just a title
408-
if self.is_title(piece) and (nxt or len(pieces) == 1):
415+
if self.is_title(piece) and (nxt or p_len == 1):
409416
self.title_list.append(piece)
410417
continue
411418
if not self.first:
412419
self.first_list.append(piece)
413420
continue
414-
if self.are_suffixes(pieces[i+1:]):
421+
if self.are_suffixes(pieces[i+1:]) or \
422+
(self.is_roman_numeral(nxt) and i == p_len - 2 \
423+
and not self.is_an_initial(piece)):
424+
# if the next piece is the last piece and a roman numeral
425+
# but this piece is not an initial
415426
self.last_list.append(piece)
416427
self.suffix_list += pieces[i+1:]
417428
break

tests.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,8 @@ def test_title_with_three_part_name_last_initial_is_suffix_uppercase_no_period(s
11351135
hn = HumanName("King John Alexander V")
11361136
self.m(hn.title, "King", hn)
11371137
self.m(hn.first, "John", hn)
1138-
self.m(hn.last, "V", hn)
1138+
self.m(hn.last, "Alexander", hn)
1139+
self.m(hn.suffix, "V", hn)
11391140

11401141
def test_four_name_parts_with_suffix_that_could_be_initial_lowercase_no_period(self):
11411142
hn = HumanName("larry james edward johnson v")
@@ -1144,19 +1145,20 @@ def test_four_name_parts_with_suffix_that_could_be_initial_lowercase_no_period(s
11441145
self.m(hn.last, "johnson", hn)
11451146
self.m(hn.suffix, "v", hn)
11461147

1147-
@unittest.expectedFailure
11481148
def test_four_name_parts_with_suffix_that_could_be_initial_uppercase_no_period(self):
11491149
hn = HumanName("Larry James Johnson I")
11501150
self.m(hn.first, "Larry", hn)
11511151
self.m(hn.middle, "James", hn)
11521152
self.m(hn.last, "Johnson", hn)
1153-
# if it's in upper case, we currently assume it's an initial
1154-
# it's not really clear if we can assume it's one or the other.
1155-
# If they really are the "first", they are probably used to using a
1156-
# comma to avoid confusion. Humans know that "Johnson" is a last name,
1157-
# but that wouldn't really be a "simple" nameparser.
11581153
self.m(hn.suffix, "I", hn)
11591154

1155+
def test_roman_numeral_initials(self):
1156+
hn = HumanName("Larry V I")
1157+
self.m(hn.first, "Larry", hn)
1158+
self.m(hn.middle, "V", hn)
1159+
self.m(hn.last, "I", hn)
1160+
self.m(hn.suffix, "", hn)
1161+
11601162
# tests for Rev. title (Reverend)
11611163
def test124(self):
11621164
hn = HumanName("Rev. John A. Kenneth Doe")

0 commit comments

Comments
 (0)