Skip to content

Commit 738a2e5

Browse files
authored
Uses f strings (#29)
1 parent 5cd722d commit 738a2e5

File tree

11 files changed

+39
-48
lines changed

11 files changed

+39
-48
lines changed

_unittests/ut_df/test_connex_split.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def test_split_connex(self):
110110
s2 = set(test[col])
111111
if s1 & s2:
112112
raise Exception(
113-
'Non empty intersection {0} & {1}\n{2}\n{3}'.format(s1, s2, train, test))
113+
f'Non empty intersection {s1} & {s2}\n{train}\n{test}')
114114

115115
df['connex'] = 'ole'
116116
train, test = train_test_connex_split( # pylint: disable=W0632
@@ -148,7 +148,7 @@ def test_split_connex2(self):
148148
if s1 & s2:
149149
rows = []
150150
for k, v in sorted(stats[0].items()):
151-
rows.append("{0}={1}".format(k, v))
151+
rows.append(f"{k}={v}")
152152
raise Exception(
153153
'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))
154154

@@ -180,7 +180,7 @@ def test_split_connex_missing(self):
180180
if s1 & s2:
181181
rows = []
182182
for k, v in sorted(stats[0].items()):
183-
rows.append("{0}={1}".format(k, v))
183+
rows.append(f"{k}={v}")
184184
raise Exception(
185185
'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))
186186

_unittests/ut_df/test_pandas_groupbynan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_pandas_groupbynan(self):
3131
try:
3232
self.assertIsInstance(li[-1], float)
3333
except AssertionError as e:
34-
raise AssertionError("Issue with {0}".format(ty)) from e
34+
raise AssertionError(f"Issue with {ty}") from e
3535
try:
3636
self.assertTrue(numpy.isnan(li[-1]))
3737
except AssertionError as e:

pandas_streaming/data/dummy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
2020
"""
2121
if asfloat:
2222
df = DataFrame(dict(cfloat=[_ + 0.1 for _ in range(0, n)], cstr=[
23-
"s{0}".format(i) for i in range(0, n)]))
23+
f"s{i}" for i in range(0, n)]))
2424
else:
2525
df = DataFrame(dict(cint=list(range(0, n)), cstr=[
26-
"s{0}".format(i) for i in range(0, n)]))
26+
f"s{i}" for i in range(0, n)]))
2727
for k, v in cols.items():
2828
df[k] = v
2929
return StreamingDataFrame.read_df(df, chunksize=chunksize)

pandas_streaming/df/connex_split.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
4848
if weights is None:
4949
if test_size == 0 or train_size == 0:
5050
raise ValueError(
51-
"test_size={0} or train_size={1} cannot be null (1)."
52-
"".format(test_size, train_size))
51+
f"test_size={test_size} or train_size={train_size} cannot be null (1).")
5352
return train_test_split(df, test_size=test_size,
5453
train_size=train_size,
5554
random_state=random_state)
@@ -69,8 +68,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
6968
test_size = 1 - p
7069
if p is None or min(test_size, p) <= 0:
7170
raise ValueError(
72-
"test_size={0} or train_size={1} cannot be null (2)."
73-
"".format(test_size, train_size))
71+
f"test_size={test_size} or train_size={train_size} cannot be null (2).")
7472
ratio = test_size / p
7573

7674
if random_state is None:
@@ -330,11 +328,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
330328
grsum = dfids[[name, one]].groupby(name, as_index=False).sum()
331329
if fLOG:
332330
for g in groups:
333-
fLOG("[train_test_connex_split] #nb in '{0}': {1}".format(
334-
g, len(set(dfids[g]))))
331+
fLOG(f"[train_test_connex_split] #nb in '{g}': {len(set(dfids[g]))}")
335332
fLOG(
336-
"[train_test_connex_split] #connex {0}/{1}".format(
337-
grsum.shape[0], dfids.shape[0]))
333+
f"[train_test_connex_split] #connex {grsum.shape[0]}/{dfids.shape[0]}")
338334
if grsum.shape[0] <= 1:
339335
raise ValueError( # pragma: no cover
340336
"Every element is in the same connected components.")
@@ -358,10 +354,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
358354
"={2}/{3}".format(k, v, cum, len(elements)))
359355

360356
# Most important component
361-
fLOG('[train_test_connex_split] first row of the biggest component '
362-
'{0}'.format(maxc))
357+
fLOG(f'[train_test_connex_split] first row of the biggest component {maxc}')
363358
tdf = dfids[dfids[name] == maxc[0]]
364-
fLOG('[train_test_connex_split] \n{0}'.format(tdf.head(n=10)))
359+
fLOG(f'[train_test_connex_split] \n{tdf.head(n=10)}')
365360

366361
# Splits.
367362
train, test = train_test_split_weights(
@@ -457,8 +452,7 @@ def train_test_apart_stratify(df, group, test_size=0.25, train_size=None,
457452
test_size = 1 - p
458453
if p is None or min(test_size, p) <= 0:
459454
raise ValueError( # pragma: no cover
460-
"test_size={0} or train_size={1} cannot be null".format(
461-
test_size, train_size))
455+
f"test_size={test_size} or train_size={train_size} cannot be null")
462456

463457
couples = df[[group, stratify]].itertuples(name=None, index=False)
464458
hist = Counter(df[stratify])

pandas_streaming/df/dataframe.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -355,14 +355,13 @@ def read_df(df, chunksize=None, check_schema=True) -> 'StreamingDataFrame':
355355
chunksize = df.shape[0]
356356
else:
357357
raise NotImplementedError(
358-
"Cannot retrieve size to infer chunksize for type={0}"
359-
".".format(type(df)))
358+
f"Cannot retrieve size to infer chunksize for type={type(df)}.")
360359

361360
if hasattr(df, 'shape'):
362361
size = df.shape[0]
363362
else:
364363
raise NotImplementedError( # pragma: no cover
365-
"Cannot retrieve size for type={0}.".format(type(df)))
364+
f"Cannot retrieve size for type={type(df)}.")
366365

367366
def local_iterator():
368367
"local iterator"
@@ -696,10 +695,10 @@ def iterator_concat(this, lothers):
696695
if check:
697696
if list(columns) != list(df.columns):
698697
raise ValueError(
699-
"Frame others[{0}] do not have the same column names or the same order.".format(i))
698+
f"Frame others[{i}] do not have the same column names or the same order.")
700699
if list(dtypes) != list(df.dtypes):
701700
raise ValueError(
702-
"Frame others[{0}] do not have the same column types.".format(i))
701+
f"Frame others[{i}] do not have the same column types.")
703702
check = False
704703
yield df
705704

@@ -896,7 +895,7 @@ def iterate_streaming():
896895
return StreamingDataFrame(lambda: iterate_streaming(), **self.get_kwargs())
897896

898897
raise ValueError( # pragma: no cover
899-
"Unknown strategy '{0}'".format(strategy))
898+
f"Unknown strategy '{strategy}'")
900899

901900
def ensure_dtype(self, df, dtypes):
902901
"""
@@ -951,7 +950,7 @@ def __setitem__(self, index, value):
951950
"""
952951
if not isinstance(index, str):
953952
raise ValueError(
954-
"Only column affected are supported but index=%r." % index)
953+
f"Only column affected are supported but index={index!r}.")
955954
if isinstance(value, (int, float, numpy.number, str)):
956955
# Is is equivalent to add_column.
957956
iter_creation = self.iter_creation
@@ -1145,7 +1144,7 @@ def sort_values(self, by, axis=0, ascending=True, kind='quicksort',
11451144
"""
11461145
if not isinstance(by, str):
11471146
raise NotImplementedError(
1148-
"Only one column can be used to sort not %r." % by)
1147+
f"Only one column can be used to sort not {by!r}.")
11491148
keys = {}
11501149
nans = []
11511150
indices = []
@@ -1224,7 +1223,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
12241223
self, iter_creation, check_schema=check_schema, stable=stable)
12251224
if len(self.columns) != 1:
12261225
raise RuntimeError(
1227-
"A series can contain only one column not %r." % len(self.columns))
1226+
f"A series can contain only one column not {len(self.columns)!r}.")
12281227

12291228
def apply(self, *args, **kwargs) -> 'StreamingDataFrame':
12301229
"""

pandas_streaming/df/dataframe_helpers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def hash_str(c, hash_length):
4949
if isinstance(c, float):
5050
if numpy.isnan(c):
5151
return c
52-
raise ValueError("numpy.nan expected, not {0}".format(c))
52+
raise ValueError(f"numpy.nan expected, not {c}")
5353
m = hashlib.sha256()
5454
m.update(c.encode("utf-8"))
5555
r = m.hexdigest()
@@ -70,7 +70,7 @@ def hash_int(c, hash_length):
7070
if numpy.isnan(c):
7171
return c
7272
else:
73-
raise ValueError("numpy.nan expected, not {0}".format(c))
73+
raise ValueError(f"numpy.nan expected, not {c}")
7474
else:
7575
b = struct.pack("i", c)
7676
m = hashlib.sha256()
@@ -167,7 +167,7 @@ def hash_floatl(c):
167167
df[c] = df[c].apply(hash_strl)
168168
else:
169169
raise NotImplementedError(
170-
"Conversion of type {0} in column '{1}' is not implemented".format(t, c))
170+
f"Conversion of type {t} in column '{c}' is not implemented")
171171

172172
return df
173173

@@ -413,7 +413,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
413413
df.columns, df.dtypes)} # pylint: disable=R1721
414414
if typ[by[0]] != do:
415415
warnings.warn( # pragma: no cover
416-
"[pandas_groupby_nan] NaN value: {0}".format(rep))
416+
f"[pandas_groupby_nan] NaN value: {rep}")
417417
return res
418418
for b in by:
419419
fnan = rep[b]
@@ -468,7 +468,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
468468
for b in by:
469469
if typ[b] != do:
470470
warnings.warn( # pragma: no cover
471-
"[pandas_groupby_nan] NaN values: {0}".format(rep))
471+
f"[pandas_groupby_nan] NaN values: {rep}")
472472
break
473473
return res
474474
raise NotImplementedError(

pandas_streaming/df/dataframe_io.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,22 +79,22 @@ def to_zip(df, zipfilename, zname="df.csv", **kwargs):
7979
numpy.save(stb, df, **kwargs)
8080
else:
8181
raise TypeError( # pragma: no cover
82-
"Type not handled {0}".format(type(df)))
82+
f"Type not handled {type(df)}")
8383
text = stb.getvalue()
8484

8585
if isinstance(zipfilename, str):
8686
ext = os.path.splitext(zipfilename)[-1]
8787
if ext != '.zip':
8888
raise NotImplementedError( # pragma: no cover
89-
"Only zip file are implemented not '{0}'.".format(ext))
89+
f"Only zip file are implemented not '{ext}'.")
9090
zf = zipfile.ZipFile(zipfilename, 'w') # pylint: disable=R1732
9191
close = True
9292
elif isinstance(zipfilename, zipfile.ZipFile):
9393
zf = zipfilename
9494
close = False
9595
else:
9696
raise TypeError( # pragma: no cover
97-
"No implementation for type '{0}'".format(type(zipfilename)))
97+
f"No implementation for type '{type(zipfilename)}'")
9898

9999
zf.writestr(zname, text)
100100
if close:
@@ -115,15 +115,15 @@ def read_zip(zipfilename, zname=None, **kwargs):
115115
ext = os.path.splitext(zipfilename)[-1]
116116
if ext != '.zip':
117117
raise NotImplementedError( # pragma: no cover
118-
"Only zip files are supported not '{0}'.".format(ext))
118+
f"Only zip files are supported not '{ext}'.")
119119
zf = zipfile.ZipFile(zipfilename, 'r') # pylint: disable=R1732
120120
close = True
121121
elif isinstance(zipfilename, zipfile.ZipFile):
122122
zf = zipfilename
123123
close = False
124124
else:
125125
raise TypeError( # pragma: no cover
126-
"No implementation for type '{0}'".format(type(zipfilename)))
126+
f"No implementation for type '{type(zipfilename)}'")
127127

128128
if zname is None:
129129
zname = zf.namelist()[0]

pandas_streaming/df/dataframe_io_helpers.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,11 @@ def _flatten(obj, key):
131131
if not isinstance(k, str):
132132
raise TypeError(
133133
"All keys must a string.") # pragma: no cover
134-
k2 = k if key is None else "{0}{1}{2}".format(key, sep, k)
134+
k2 = k if key is None else f"{key}{sep}{k}"
135135
_flatten(v, k2)
136136
elif isinstance(obj, (list, set)):
137137
for index, item in enumerate(obj):
138-
k2 = k if key is None else "{0}{1}{2}".format(key, sep, index)
138+
k2 = k if key is None else f"{key}{sep}{index}"
139139
_flatten(item, k2)
140140
else:
141141
flattened_dict[key] = obj
@@ -266,15 +266,14 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
266266
for i, (_, event, value) in enumerate(parser):
267267
if i % 1000000 == 0 and fLOG is not None:
268268
fLOG( # pragma: no cover
269-
"[enumerate_json_items] i={0} yielded={1}"
270-
"".format(i, nbyield))
269+
f"[enumerate_json_items] i={i} yielded={nbyield}")
271270
if event == "start_array":
272271
if curkey is None:
273272
current = []
274273
else:
275274
if not isinstance(current, dict):
276275
raise RuntimeError( # pragma: no cover
277-
"Type issue {0}".format(type(current)))
276+
f"Type issue {type(current)}")
278277
c = []
279278
current[curkey] = c # pylint: disable=E1137
280279
current = c
@@ -324,8 +323,7 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
324323
current[curkey] = None # pylint: disable=E1137
325324
curkey = None
326325
else:
327-
raise ValueError("Unknown event '{0}'".format(
328-
event)) # pragma: no cover
326+
raise ValueError(f"Unknown event '{event}'") # pragma: no cover
329327

330328

331329
class JsonIterator2Stream:

pandas_streaming/df/dataframe_split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def iterator_internal(part_requested):
255255
part = cache.get(h)
256256
if part is None:
257257
raise ValueError( # pragma: no cover
258-
"Second iteration. A row was never met in the first one\n{0}".format(obs))
258+
f"Second iteration. A row was never met in the first one\n{obs}")
259259
if part == part_requested:
260260
accumul.append(obs)
261261
if len(accumul) >= static_schema[2]:

pandas_streaming/exc/exc_streaming.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ def __init__(self, meth):
1919
:param meth: inefficient method
2020
"""
2121
Exception.__init__(
22-
self, "{0} should not be done in streaming mode.".format(meth))
22+
self, f"{meth} should not be done in streaming mode.")

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#########
1111

1212
project_var_name = "pandas_streaming"
13-
versionPython = "%s.%s" % (sys.version_info.major, sys.version_info.minor)
13+
versionPython = f"{sys.version_info.major}.{sys.version_info.minor}"
1414
path = "Lib/site-packages/" + project_var_name
1515
readme = 'README.rst'
1616
history = "HISTORY.rst"

0 commit comments

Comments
 (0)