sdpython · sdpython · Jul 19, 2022 · Jul 19, 2022
diff --git a/_unittests/ut_df/test_connex_split.py b/_unittests/ut_df/test_connex_split.py
@@ -110,7 +110,7 @@ def test_split_connex(self):
             s2 = set(test[col])
             if s1 & s2:
                 raise Exception(
-                    'Non empty intersection {0} & {1}\n{2}\n{3}'.format(s1, s2, train, test))
+                    f'Non empty intersection {s1} & {s2}\n{train}\n{test}')
 
         df['connex'] = 'ole'
         train, test = train_test_connex_split(  # pylint: disable=W0632
@@ -148,7 +148,7 @@ def test_split_connex2(self):
             if s1 & s2:
                 rows = []
                 for k, v in sorted(stats[0].items()):
-                    rows.append("{0}={1}".format(k, v))
+                    rows.append(f"{k}={v}")
                 raise Exception(
                     'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))
 
@@ -180,7 +180,7 @@ def test_split_connex_missing(self):
             if s1 & s2:
                 rows = []
                 for k, v in sorted(stats[0].items()):
-                    rows.append("{0}={1}".format(k, v))
+                    rows.append(f"{k}={v}")
                 raise Exception(
                     'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))
 

diff --git a/_unittests/ut_df/test_pandas_groupbynan.py b/_unittests/ut_df/test_pandas_groupbynan.py
@@ -31,7 +31,7 @@ def test_pandas_groupbynan(self):
             try:
                 self.assertIsInstance(li[-1], float)
             except AssertionError as e:
-                raise AssertionError("Issue with {0}".format(ty)) from e
+                raise AssertionError(f"Issue with {ty}") from e
             try:
                 self.assertTrue(numpy.isnan(li[-1]))
             except AssertionError as e:

diff --git a/pandas_streaming/data/dummy.py b/pandas_streaming/data/dummy.py
@@ -20,10 +20,10 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
     """
     if asfloat:
         df = DataFrame(dict(cfloat=[_ + 0.1 for _ in range(0, n)], cstr=[
-                       "s{0}".format(i) for i in range(0, n)]))
+                       f"s{i}" for i in range(0, n)]))
     else:
         df = DataFrame(dict(cint=list(range(0, n)), cstr=[
-                       "s{0}".format(i) for i in range(0, n)]))
+                       f"s{i}" for i in range(0, n)]))
     for k, v in cols.items():
         df[k] = v
     return StreamingDataFrame.read_df(df, chunksize=chunksize)
diff --git a/pandas_streaming/df/connex_split.py b/pandas_streaming/df/connex_split.py
@@ -48,8 +48,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
     if weights is None:
         if test_size == 0 or train_size == 0:
             raise ValueError(
-                "test_size={0} or train_size={1} cannot be null (1)."
-                "".format(test_size, train_size))
+                f"test_size={test_size} or train_size={train_size} cannot be null (1).")
         return train_test_split(df, test_size=test_size,
                                 train_size=train_size,
                                 random_state=random_state)
@@ -69,8 +68,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
         test_size = 1 - p
     if p is None or min(test_size, p) <= 0:
         raise ValueError(
-            "test_size={0} or train_size={1} cannot be null (2)."
-            "".format(test_size, train_size))
+            f"test_size={test_size} or train_size={train_size} cannot be null (2).")
     ratio = test_size / p
 
     if random_state is None:
@@ -330,11 +328,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
     grsum = dfids[[name, one]].groupby(name, as_index=False).sum()
     if fLOG:
         for g in groups:
-            fLOG("[train_test_connex_split]     #nb in '{0}': {1}".format(
-                g, len(set(dfids[g]))))
+            fLOG(f"[train_test_connex_split]     #nb in '{g}': {len(set(dfids[g]))}")
         fLOG(
-            "[train_test_connex_split] #connex {0}/{1}".format(
-                grsum.shape[0], dfids.shape[0]))
+            f"[train_test_connex_split] #connex {grsum.shape[0]}/{dfids.shape[0]}")
     if grsum.shape[0] <= 1:
         raise ValueError(  # pragma: no cover
             "Every element is in the same connected components.")
@@ -358,10 +354,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
                  "={2}/{3}".format(k, v, cum, len(elements)))
 
         # Most important component
-        fLOG('[train_test_connex_split] first row of the biggest component '
-             '{0}'.format(maxc))
+        fLOG(f'[train_test_connex_split] first row of the biggest component {maxc}')
         tdf = dfids[dfids[name] == maxc[0]]
-        fLOG('[train_test_connex_split] \n{0}'.format(tdf.head(n=10)))
+        fLOG(f'[train_test_connex_split] \n{tdf.head(n=10)}')
 
     # Splits.
     train, test = train_test_split_weights(
@@ -457,8 +452,7 @@ def train_test_apart_stratify(df, group, test_size=0.25, train_size=None,
     test_size = 1 - p
     if p is None or min(test_size, p) <= 0:
         raise ValueError(  # pragma: no cover
-            "test_size={0} or train_size={1} cannot be null".format(
-                test_size, train_size))
+            f"test_size={test_size} or train_size={train_size} cannot be null")
 
     couples = df[[group, stratify]].itertuples(name=None, index=False)
     hist = Counter(df[stratify])

diff --git a/pandas_streaming/df/dataframe.py b/pandas_streaming/df/dataframe.py
@@ -355,14 +355,13 @@ def read_df(df, chunksize=None, check_schema=True) -> 'StreamingDataFrame':
                 chunksize = df.shape[0]
             else:
                 raise NotImplementedError(
-                    "Cannot retrieve size to infer chunksize for type={0}"
-                    ".".format(type(df)))
+                    f"Cannot retrieve size to infer chunksize for type={type(df)}.")
 
         if hasattr(df, 'shape'):
             size = df.shape[0]
         else:
             raise NotImplementedError(  # pragma: no cover
-                "Cannot retrieve size for type={0}.".format(type(df)))
+                f"Cannot retrieve size for type={type(df)}.")
 
         def local_iterator():
             "local iterator"
@@ -696,10 +695,10 @@ def iterator_concat(this, lothers):
                     if check:
                         if list(columns) != list(df.columns):
                             raise ValueError(
-                                "Frame others[{0}] do not have the same column names or the same order.".format(i))
+                                f"Frame others[{i}] do not have the same column names or the same order.")
                         if list(dtypes) != list(df.dtypes):
                             raise ValueError(
-                                "Frame others[{0}] do not have the same column types.".format(i))
+                                f"Frame others[{i}] do not have the same column types.")
                         check = False
                     yield df
 
@@ -896,7 +895,7 @@ def iterate_streaming():
             return StreamingDataFrame(lambda: iterate_streaming(), **self.get_kwargs())
 
         raise ValueError(  # pragma: no cover
-            "Unknown strategy '{0}'".format(strategy))
+            f"Unknown strategy '{strategy}'")
 
     def ensure_dtype(self, df, dtypes):
         """
@@ -951,7 +950,7 @@ def __setitem__(self, index, value):
         """
         if not isinstance(index, str):
             raise ValueError(
-                "Only column affected are supported but index=%r." % index)
+                f"Only column affected are supported but index={index!r}.")
         if isinstance(value, (int, float, numpy.number, str)):
             # Is is equivalent to add_column.
             iter_creation = self.iter_creation
@@ -1145,7 +1144,7 @@ def sort_values(self, by, axis=0, ascending=True, kind='quicksort',
         """
         if not isinstance(by, str):
             raise NotImplementedError(
-                "Only one column can be used to sort not %r." % by)
+                f"Only one column can be used to sort not {by!r}.")
         keys = {}
         nans = []
         indices = []
@@ -1224,7 +1223,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
             self, iter_creation, check_schema=check_schema, stable=stable)
         if len(self.columns) != 1:
             raise RuntimeError(
-                "A series can contain only one column not %r." % len(self.columns))
+                f"A series can contain only one column not {len(self.columns)!r}.")
 
     def apply(self, *args, **kwargs) -> 'StreamingDataFrame':
         """

diff --git a/pandas_streaming/df/dataframe_helpers.py b/pandas_streaming/df/dataframe_helpers.py
@@ -49,7 +49,7 @@ def hash_str(c, hash_length):
     if isinstance(c, float):
         if numpy.isnan(c):
             return c
-        raise ValueError("numpy.nan expected, not {0}".format(c))
+        raise ValueError(f"numpy.nan expected, not {c}")
     m = hashlib.sha256()
     m.update(c.encode("utf-8"))
     r = m.hexdigest()
@@ -70,7 +70,7 @@ def hash_int(c, hash_length):
         if numpy.isnan(c):
             return c
         else:
-            raise ValueError("numpy.nan expected, not {0}".format(c))
+            raise ValueError(f"numpy.nan expected, not {c}")
     else:
         b = struct.pack("i", c)
         m = hashlib.sha256()
@@ -167,7 +167,7 @@ def hash_floatl(c):
             df[c] = df[c].apply(hash_strl)
         else:
             raise NotImplementedError(
-                "Conversion of type {0} in column '{1}' is not implemented".format(t, c))
+                f"Conversion of type {t} in column '{c}' is not implemented")
 
     return df
 
@@ -413,7 +413,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
                     df.columns, df.dtypes)}  # pylint: disable=R1721
                 if typ[by[0]] != do:
                     warnings.warn(  # pragma: no cover
-                        "[pandas_groupby_nan] NaN value: {0}".format(rep))
+                        f"[pandas_groupby_nan] NaN value: {rep}")
                 return res
             for b in by:
                 fnan = rep[b]
@@ -468,7 +468,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
                 for b in by:
                     if typ[b] != do:
                         warnings.warn(  # pragma: no cover
-                            "[pandas_groupby_nan] NaN values: {0}".format(rep))
+                            f"[pandas_groupby_nan] NaN values: {rep}")
                         break
                 return res
             raise NotImplementedError(

diff --git a/pandas_streaming/df/dataframe_io.py b/pandas_streaming/df/dataframe_io.py
@@ -79,22 +79,22 @@ def to_zip(df, zipfilename, zname="df.csv", **kwargs):
         numpy.save(stb, df, **kwargs)
     else:
         raise TypeError(  # pragma: no cover
-            "Type not handled {0}".format(type(df)))
+            f"Type not handled {type(df)}")
     text = stb.getvalue()
 
     if isinstance(zipfilename, str):
         ext = os.path.splitext(zipfilename)[-1]
         if ext != '.zip':
             raise NotImplementedError(  # pragma: no cover
-                "Only zip file are implemented not '{0}'.".format(ext))
+                f"Only zip file are implemented not '{ext}'.")
         zf = zipfile.ZipFile(zipfilename, 'w')  # pylint: disable=R1732
         close = True
     elif isinstance(zipfilename, zipfile.ZipFile):
         zf = zipfilename
         close = False
     else:
         raise TypeError(  # pragma: no cover
-            "No implementation for type '{0}'".format(type(zipfilename)))
+            f"No implementation for type '{type(zipfilename)}'")
 
     zf.writestr(zname, text)
     if close:
@@ -115,15 +115,15 @@ def read_zip(zipfilename, zname=None, **kwargs):
         ext = os.path.splitext(zipfilename)[-1]
         if ext != '.zip':
             raise NotImplementedError(  # pragma: no cover
-                "Only zip files are supported not '{0}'.".format(ext))
+                f"Only zip files are supported not '{ext}'.")
         zf = zipfile.ZipFile(zipfilename, 'r')  # pylint: disable=R1732
         close = True
     elif isinstance(zipfilename, zipfile.ZipFile):
         zf = zipfilename
         close = False
     else:
         raise TypeError(  # pragma: no cover
-            "No implementation for type '{0}'".format(type(zipfilename)))
+            f"No implementation for type '{type(zipfilename)}'")
 
     if zname is None:
         zname = zf.namelist()[0]

diff --git a/pandas_streaming/df/dataframe_io_helpers.py b/pandas_streaming/df/dataframe_io_helpers.py
@@ -131,11 +131,11 @@ def _flatten(obj, key):
                 if not isinstance(k, str):
                     raise TypeError(
                         "All keys must a string.")  # pragma: no cover
-                k2 = k if key is None else "{0}{1}{2}".format(key, sep, k)
+                k2 = k if key is None else f"{key}{sep}{k}"
                 _flatten(v, k2)
         elif isinstance(obj, (list, set)):
             for index, item in enumerate(obj):
-                k2 = k if key is None else "{0}{1}{2}".format(key, sep, index)
+                k2 = k if key is None else f"{key}{sep}{index}"
                 _flatten(item, k2)
         else:
             flattened_dict[key] = obj
@@ -266,15 +266,14 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
         for i, (_, event, value) in enumerate(parser):
             if i % 1000000 == 0 and fLOG is not None:
                 fLOG(  # pragma: no cover
-                    "[enumerate_json_items] i={0} yielded={1}"
-                    "".format(i, nbyield))
+                    f"[enumerate_json_items] i={i} yielded={nbyield}")
             if event == "start_array":
                 if curkey is None:
                     current = []
                 else:
                     if not isinstance(current, dict):
                         raise RuntimeError(  # pragma: no cover
-                            "Type issue {0}".format(type(current)))
+                            f"Type issue {type(current)}")
                     c = []
                     current[curkey] = c  # pylint: disable=E1137
                     current = c
@@ -324,8 +323,7 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
                     current[curkey] = None  # pylint: disable=E1137
                     curkey = None
             else:
-                raise ValueError("Unknown event '{0}'".format(
-                    event))  # pragma: no cover
+                raise ValueError(f"Unknown event '{event}'")  # pragma: no cover
 
 
 class JsonIterator2Stream:

diff --git a/pandas_streaming/df/dataframe_split.py b/pandas_streaming/df/dataframe_split.py
@@ -255,7 +255,7 @@ def iterator_internal(part_requested):
                     part = cache.get(h)
                     if part is None:
                         raise ValueError(  # pragma: no cover
-                            "Second iteration. A row was never met in the first one\n{0}".format(obs))
+                            f"Second iteration. A row was never met in the first one\n{obs}")
                     if part == part_requested:
                         accumul.append(obs)
                         if len(accumul) >= static_schema[2]:

diff --git a/pandas_streaming/exc/exc_streaming.py b/pandas_streaming/exc/exc_streaming.py
@@ -19,4 +19,4 @@ def __init__(self, meth):
         :param meth: inefficient method
         """
         Exception.__init__(
-            self, "{0} should not be done in streaming mode.".format(meth))
+            self, f"{meth} should not be done in streaming mode.")
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 #########
 
 project_var_name = "pandas_streaming"
-versionPython = "%s.%s" % (sys.version_info.major, sys.version_info.minor)
+versionPython = f"{sys.version_info.major}.{sys.version_info.minor}"
 path = "Lib/site-packages/" + project_var_name
 readme = 'README.rst'
 history = "HISTORY.rst"