@@ -439,41 +439,28 @@ def str_extract(arr, pat, flags=0):
439
439
440
440
"""
441
441
regex = re.compile(pat, flags=flags)
442
-
443
442
# just to be safe, check this
444
443
if regex.groups == 0:
445
444
raise ValueError("This pattern contains no groups to capture.")
446
- elif regex.groups == 1:
447
- def f(x):
448
- if not isinstance(x, compat.string_types):
449
- return None
450
- m = regex.search(x)
451
- if m:
452
- return m.groups()[0] # may be None
453
- else:
454
- return None
445
+ empty_row = [np.nan]*regex.groups
446
+ def f(x):
447
+ if not isinstance(x, compat.string_types):
448
+ return empty_row
449
+ m = regex.search(x)
450
+ if m:
451
+ return [np.nan if item is None else item for item in m.groups()]
452
+ else:
453
+ return empty_row
454
+ if regex.groups == 1:
455
+ result = Series([f(val)[0] for val in arr], name=regex.groupindex.get(1))
455
456
else:
456
- empty_row = Series(regex.groups * [None])
457
-
458
- def f(x):
459
- if not isinstance(x, compat.string_types):
460
- return empty_row
461
- m = regex.search(x)
462
- if m:
463
- return Series(list(m.groups())) # may contain None
464
- else:
465
- return empty_row
466
- result = arr.apply(f)
467
- result.replace({None: np.nan}, inplace=True)
468
- if regex.groups > 1:
469
- result = DataFrame(result) # Don't rely on the wrapper; name columns.
470
457
names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
471
- result.columns = [names.get(1 + i, i) for i in range(regex.groups)]
472
- else:
473
- result.name = regex.groupindex.get(0)
458
+ columns = [names.get(1 + i, i) for i in range(regex.groups)]
459
+ result = DataFrame([f(val) for val in arr], columns=columns)
474
460
return result
475
461
476
462
463
+
477
464
def str_join(arr, sep):
478
465
"""
479
466
Join lists contained as elements in array, a la str.join
0 commit comments