Skip to content

Uses f strings #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions _unittests/ut_df/test_connex_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_split_connex(self):
s2 = set(test[col])
if s1 & s2:
raise Exception(
'Non empty intersection {0} & {1}\n{2}\n{3}'.format(s1, s2, train, test))
f'Non empty intersection {s1} & {s2}\n{train}\n{test}')

df['connex'] = 'ole'
train, test = train_test_connex_split( # pylint: disable=W0632
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_split_connex2(self):
if s1 & s2:
rows = []
for k, v in sorted(stats[0].items()):
rows.append("{0}={1}".format(k, v))
rows.append(f"{k}={v}")
raise Exception(
'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))

Expand Down Expand Up @@ -180,7 +180,7 @@ def test_split_connex_missing(self):
if s1 & s2:
rows = []
for k, v in sorted(stats[0].items()):
rows.append("{0}={1}".format(k, v))
rows.append(f"{k}={v}")
raise Exception(
'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))

Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_df/test_pandas_groupbynan.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_pandas_groupbynan(self):
try:
self.assertIsInstance(li[-1], float)
except AssertionError as e:
raise AssertionError("Issue with {0}".format(ty)) from e
raise AssertionError(f"Issue with {ty}") from e
try:
self.assertTrue(numpy.isnan(li[-1]))
except AssertionError as e:
Expand Down
4 changes: 2 additions & 2 deletions pandas_streaming/data/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
"""
if asfloat:
df = DataFrame(dict(cfloat=[_ + 0.1 for _ in range(0, n)], cstr=[
"s{0}".format(i) for i in range(0, n)]))
f"s{i}" for i in range(0, n)]))
else:
df = DataFrame(dict(cint=list(range(0, n)), cstr=[
"s{0}".format(i) for i in range(0, n)]))
f"s{i}" for i in range(0, n)]))
for k, v in cols.items():
df[k] = v
return StreamingDataFrame.read_df(df, chunksize=chunksize)
20 changes: 7 additions & 13 deletions pandas_streaming/df/connex_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
if weights is None:
if test_size == 0 or train_size == 0:
raise ValueError(
"test_size={0} or train_size={1} cannot be null (1)."
"".format(test_size, train_size))
f"test_size={test_size} or train_size={train_size} cannot be null (1).")
return train_test_split(df, test_size=test_size,
train_size=train_size,
random_state=random_state)
Expand All @@ -69,8 +68,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
test_size = 1 - p
if p is None or min(test_size, p) <= 0:
raise ValueError(
"test_size={0} or train_size={1} cannot be null (2)."
"".format(test_size, train_size))
f"test_size={test_size} or train_size={train_size} cannot be null (2).")
ratio = test_size / p

if random_state is None:
Expand Down Expand Up @@ -330,11 +328,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
grsum = dfids[[name, one]].groupby(name, as_index=False).sum()
if fLOG:
for g in groups:
fLOG("[train_test_connex_split] #nb in '{0}': {1}".format(
g, len(set(dfids[g]))))
fLOG(f"[train_test_connex_split] #nb in '{g}': {len(set(dfids[g]))}")
fLOG(
"[train_test_connex_split] #connex {0}/{1}".format(
grsum.shape[0], dfids.shape[0]))
f"[train_test_connex_split] #connex {grsum.shape[0]}/{dfids.shape[0]}")
if grsum.shape[0] <= 1:
raise ValueError( # pragma: no cover
"Every element is in the same connected components.")
Expand All @@ -358,10 +354,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
"={2}/{3}".format(k, v, cum, len(elements)))

# Most important component
fLOG('[train_test_connex_split] first row of the biggest component '
'{0}'.format(maxc))
fLOG(f'[train_test_connex_split] first row of the biggest component {maxc}')
tdf = dfids[dfids[name] == maxc[0]]
fLOG('[train_test_connex_split] \n{0}'.format(tdf.head(n=10)))
fLOG(f'[train_test_connex_split] \n{tdf.head(n=10)}')

# Splits.
train, test = train_test_split_weights(
Expand Down Expand Up @@ -457,8 +452,7 @@ def train_test_apart_stratify(df, group, test_size=0.25, train_size=None,
test_size = 1 - p
if p is None or min(test_size, p) <= 0:
raise ValueError( # pragma: no cover
"test_size={0} or train_size={1} cannot be null".format(
test_size, train_size))
f"test_size={test_size} or train_size={train_size} cannot be null")

couples = df[[group, stratify]].itertuples(name=None, index=False)
hist = Counter(df[stratify])
Expand Down
17 changes: 8 additions & 9 deletions pandas_streaming/df/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,14 +355,13 @@ def read_df(df, chunksize=None, check_schema=True) -> 'StreamingDataFrame':
chunksize = df.shape[0]
else:
raise NotImplementedError(
"Cannot retrieve size to infer chunksize for type={0}"
".".format(type(df)))
f"Cannot retrieve size to infer chunksize for type={type(df)}.")

if hasattr(df, 'shape'):
size = df.shape[0]
else:
raise NotImplementedError( # pragma: no cover
"Cannot retrieve size for type={0}.".format(type(df)))
f"Cannot retrieve size for type={type(df)}.")

def local_iterator():
"local iterator"
Expand Down Expand Up @@ -696,10 +695,10 @@ def iterator_concat(this, lothers):
if check:
if list(columns) != list(df.columns):
raise ValueError(
"Frame others[{0}] do not have the same column names or the same order.".format(i))
f"Frame others[{i}] do not have the same column names or the same order.")
if list(dtypes) != list(df.dtypes):
raise ValueError(
"Frame others[{0}] do not have the same column types.".format(i))
f"Frame others[{i}] do not have the same column types.")
check = False
yield df

Expand Down Expand Up @@ -896,7 +895,7 @@ def iterate_streaming():
return StreamingDataFrame(lambda: iterate_streaming(), **self.get_kwargs())

raise ValueError( # pragma: no cover
"Unknown strategy '{0}'".format(strategy))
f"Unknown strategy '{strategy}'")

def ensure_dtype(self, df, dtypes):
"""
Expand Down Expand Up @@ -951,7 +950,7 @@ def __setitem__(self, index, value):
"""
if not isinstance(index, str):
raise ValueError(
"Only column affected are supported but index=%r." % index)
f"Only column affected are supported but index={index!r}.")
if isinstance(value, (int, float, numpy.number, str)):
# Is is equivalent to add_column.
iter_creation = self.iter_creation
Expand Down Expand Up @@ -1145,7 +1144,7 @@ def sort_values(self, by, axis=0, ascending=True, kind='quicksort',
"""
if not isinstance(by, str):
raise NotImplementedError(
"Only one column can be used to sort not %r." % by)
f"Only one column can be used to sort not {by!r}.")
keys = {}
nans = []
indices = []
Expand Down Expand Up @@ -1224,7 +1223,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
self, iter_creation, check_schema=check_schema, stable=stable)
if len(self.columns) != 1:
raise RuntimeError(
"A series can contain only one column not %r." % len(self.columns))
f"A series can contain only one column not {len(self.columns)!r}.")

def apply(self, *args, **kwargs) -> 'StreamingDataFrame':
"""
Expand Down
10 changes: 5 additions & 5 deletions pandas_streaming/df/dataframe_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def hash_str(c, hash_length):
if isinstance(c, float):
if numpy.isnan(c):
return c
raise ValueError("numpy.nan expected, not {0}".format(c))
raise ValueError(f"numpy.nan expected, not {c}")
m = hashlib.sha256()
m.update(c.encode("utf-8"))
r = m.hexdigest()
Expand All @@ -70,7 +70,7 @@ def hash_int(c, hash_length):
if numpy.isnan(c):
return c
else:
raise ValueError("numpy.nan expected, not {0}".format(c))
raise ValueError(f"numpy.nan expected, not {c}")
else:
b = struct.pack("i", c)
m = hashlib.sha256()
Expand Down Expand Up @@ -167,7 +167,7 @@ def hash_floatl(c):
df[c] = df[c].apply(hash_strl)
else:
raise NotImplementedError(
"Conversion of type {0} in column '{1}' is not implemented".format(t, c))
f"Conversion of type {t} in column '{c}' is not implemented")

return df

Expand Down Expand Up @@ -413,7 +413,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
df.columns, df.dtypes)} # pylint: disable=R1721
if typ[by[0]] != do:
warnings.warn( # pragma: no cover
"[pandas_groupby_nan] NaN value: {0}".format(rep))
f"[pandas_groupby_nan] NaN value: {rep}")
return res
for b in by:
fnan = rep[b]
Expand Down Expand Up @@ -468,7 +468,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
for b in by:
if typ[b] != do:
warnings.warn( # pragma: no cover
"[pandas_groupby_nan] NaN values: {0}".format(rep))
f"[pandas_groupby_nan] NaN values: {rep}")
break
return res
raise NotImplementedError(
Expand Down
10 changes: 5 additions & 5 deletions pandas_streaming/df/dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,22 +79,22 @@ def to_zip(df, zipfilename, zname="df.csv", **kwargs):
numpy.save(stb, df, **kwargs)
else:
raise TypeError( # pragma: no cover
"Type not handled {0}".format(type(df)))
f"Type not handled {type(df)}")
text = stb.getvalue()

if isinstance(zipfilename, str):
ext = os.path.splitext(zipfilename)[-1]
if ext != '.zip':
raise NotImplementedError( # pragma: no cover
"Only zip file are implemented not '{0}'.".format(ext))
f"Only zip file are implemented not '{ext}'.")
zf = zipfile.ZipFile(zipfilename, 'w') # pylint: disable=R1732
close = True
elif isinstance(zipfilename, zipfile.ZipFile):
zf = zipfilename
close = False
else:
raise TypeError( # pragma: no cover
"No implementation for type '{0}'".format(type(zipfilename)))
f"No implementation for type '{type(zipfilename)}'")

zf.writestr(zname, text)
if close:
Expand All @@ -115,15 +115,15 @@ def read_zip(zipfilename, zname=None, **kwargs):
ext = os.path.splitext(zipfilename)[-1]
if ext != '.zip':
raise NotImplementedError( # pragma: no cover
"Only zip files are supported not '{0}'.".format(ext))
f"Only zip files are supported not '{ext}'.")
zf = zipfile.ZipFile(zipfilename, 'r') # pylint: disable=R1732
close = True
elif isinstance(zipfilename, zipfile.ZipFile):
zf = zipfilename
close = False
else:
raise TypeError( # pragma: no cover
"No implementation for type '{0}'".format(type(zipfilename)))
f"No implementation for type '{type(zipfilename)}'")

if zname is None:
zname = zf.namelist()[0]
Expand Down
12 changes: 5 additions & 7 deletions pandas_streaming/df/dataframe_io_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ def _flatten(obj, key):
if not isinstance(k, str):
raise TypeError(
"All keys must a string.") # pragma: no cover
k2 = k if key is None else "{0}{1}{2}".format(key, sep, k)
k2 = k if key is None else f"{key}{sep}{k}"
_flatten(v, k2)
elif isinstance(obj, (list, set)):
for index, item in enumerate(obj):
k2 = k if key is None else "{0}{1}{2}".format(key, sep, index)
k2 = k if key is None else f"{key}{sep}{index}"
_flatten(item, k2)
else:
flattened_dict[key] = obj
Expand Down Expand Up @@ -266,15 +266,14 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
for i, (_, event, value) in enumerate(parser):
if i % 1000000 == 0 and fLOG is not None:
fLOG( # pragma: no cover
"[enumerate_json_items] i={0} yielded={1}"
"".format(i, nbyield))
f"[enumerate_json_items] i={i} yielded={nbyield}")
if event == "start_array":
if curkey is None:
current = []
else:
if not isinstance(current, dict):
raise RuntimeError( # pragma: no cover
"Type issue {0}".format(type(current)))
f"Type issue {type(current)}")
c = []
current[curkey] = c # pylint: disable=E1137
current = c
Expand Down Expand Up @@ -324,8 +323,7 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
current[curkey] = None # pylint: disable=E1137
curkey = None
else:
raise ValueError("Unknown event '{0}'".format(
event)) # pragma: no cover
raise ValueError(f"Unknown event '{event}'") # pragma: no cover


class JsonIterator2Stream:
Expand Down
2 changes: 1 addition & 1 deletion pandas_streaming/df/dataframe_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def iterator_internal(part_requested):
part = cache.get(h)
if part is None:
raise ValueError( # pragma: no cover
"Second iteration. A row was never met in the first one\n{0}".format(obs))
f"Second iteration. A row was never met in the first one\n{obs}")
if part == part_requested:
accumul.append(obs)
if len(accumul) >= static_schema[2]:
Expand Down
2 changes: 1 addition & 1 deletion pandas_streaming/exc/exc_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ def __init__(self, meth):
:param meth: inefficient method
"""
Exception.__init__(
self, "{0} should not be done in streaming mode.".format(meth))
self, f"{meth} should not be done in streaming mode.")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#########

project_var_name = "pandas_streaming"
versionPython = "%s.%s" % (sys.version_info.major, sys.version_info.minor)
versionPython = f"{sys.version_info.major}.{sys.version_info.minor}"
path = "Lib/site-packages/" + project_var_name
readme = 'README.rst'
history = "HISTORY.rst"
Expand Down