sdpython · sdpython · Jul 27, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023
diff --git a/.github/workflows/check-urls.yml b/.github/workflows/check-urls.yml
@@ -43,5 +43,5 @@ jobs:
         timeout: 2
         retry_count# : 2
         # exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document
-        # exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/
+        exclude_patterns: https://circleci.com/gh/sdpython/pandas_streaming/
         # force_pass : true
diff --git a/.local.jenkins.lin.yml b/.local.jenkins.lin.yml
@@ -9,7 +9,7 @@ virtualenv:
 
 install:
   - $PYINT -m pip install --upgrade pip
-  - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
+  - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
   - $PYINT -m pip install -r requirements.txt
   - $PYINT -m pip install -r requirements-dev.txt
   - $PYINT --version

diff --git a/README.rst b/README.rst
@@ -12,8 +12,8 @@ pandas-streaming: streaming API over pandas
     :target: https://ci.appveyor.com/project/sdpython/pandas-streaming
     :alt: Build Status Windows
 
-.. image:: https://circleci.com/gh/sdpython/pandas_streaming/tree/main.svg?style=svg
-    :target: https://circleci.com/gh/sdpython/pandas_streaming/tree/main
+.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
+    :target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
 
 .. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
     :target: https://dev.azure.com/xavierdupre3/pandas_streaming/

diff --git a/_doc/conf.py b/_doc/conf.py
@@ -61,7 +61,7 @@
 
 # The following is used by sphinx.ext.linkcode to provide links to github
 linkcode_resolve = make_linkcode_resolve(
-    "pandas_streaming",
+    "pandas-streaming",
     (
         "https://github.com/sdpython/pandas-streaming/"
         "blob/{revision}/{package}/"

diff --git a/_doc/index.rst b/_doc/index.rst
@@ -16,8 +16,8 @@ pandas-streaming: streaming API over pandas
     :target: https://ci.appveyor.com/project/sdpython/pandas-streaming
     :alt: Build Status Windows
 
-.. image:: https://circleci.com/gh/sdpython/pandas_streaming/tree/main.svg?style=svg
-    :target: https://circleci.com/gh/sdpython/pandas_streaming/tree/main
+.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
+    :target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
 
 .. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
     :target: https://dev.azure.com/xavierdupre3/pandas_streaming/

diff --git a/_unittests/ut_df/test_connex_split.py b/_unittests/ut_df/test_connex_split.py
@@ -1,6 +1,6 @@
 import unittest
 import pandas
-from pyquickhelper.pycode import ExtTestCase
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import (
     dataframe_shuffle,
     train_test_split_weights,

diff --git a/_unittests/ut_df/test_connex_split_big.py b/_unittests/ut_df/test_connex_split_big.py
@@ -3,7 +3,7 @@
 import unittest
 from collections import Counter
 import pandas
-from pyquickhelper.pycode import ExtTestCase
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import train_test_connex_split
 
 

diff --git a/_unittests/ut_df/test_connex_split_cat.py b/_unittests/ut_df/test_connex_split_cat.py
@@ -3,7 +3,7 @@
 import unittest
 from collections import Counter
 import pandas
-from pyquickhelper.pycode import ExtTestCase
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import train_test_apart_stratify
 
 

diff --git a/_unittests/ut_df/test_dataframe_helpers.py b/_unittests/ut_df/test_dataframe_helpers.py
@@ -2,7 +2,7 @@
 import unittest
 import numpy
 import pandas
-from pyquickhelper.pycode import ExtTestCase
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import dataframe_hash_columns
 
 

diff --git a/_unittests/ut_df/test_dataframe_helpers_simple.py b/_unittests/ut_df/test_dataframe_helpers_simple.py
@@ -1,7 +1,7 @@
 import unittest
 import pandas
 import numpy
-from pyquickhelper.pycode import ExtTestCase
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import dataframe_unfold
 from pandas_streaming.df.dataframe_helpers import hash_int, hash_str, hash_float
 

diff --git a/_unittests/ut_df/test_dataframe_io.py b/_unittests/ut_df/test_dataframe_io.py
@@ -1,10 +1,11 @@
 import os
+import tempfile
 import unittest
 import io
 import zipfile
 import numpy
 import pandas
-from pyquickhelper.pycode import ExtTestCase, get_temp_folder
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import to_zip, read_zip
 
 
@@ -20,43 +21,43 @@ def test_zip_dataframe(self):
             ]
         )
 
-        temp = get_temp_folder(__file__, "temp_zip")
-        name = os.path.join(temp, "df.zip")
-        to_zip(df, name, encoding="utf-8", index=False)
-        df2 = read_zip(name, encoding="utf-8")
-        self.assertEqualDataFrame(df, df2)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "df.zip")
+            to_zip(df, name, encoding="utf-8", index=False)
+            df2 = read_zip(name, encoding="utf-8")
+            self.assertEqualDataFrame(df, df2)
 
-        st = io.BytesIO()
-        zp = zipfile.ZipFile(st, "w")
-        to_zip(df, zp, encoding="utf-8", index=False)
-        zp.close()
+            st = io.BytesIO()
+            zp = zipfile.ZipFile(st, "w")
+            to_zip(df, zp, encoding="utf-8", index=False)
+            zp.close()
 
-        st = io.BytesIO(st.getvalue())
-        zp = zipfile.ZipFile(st, "r")
-        df3 = read_zip(zp, encoding="utf-8")
-        zp.close()
-        self.assertEqualDataFrame(df, df3)
+            st = io.BytesIO(st.getvalue())
+            zp = zipfile.ZipFile(st, "r")
+            df3 = read_zip(zp, encoding="utf-8")
+            zp.close()
+            self.assertEqualDataFrame(df, df3)
 
     def test_zip_numpy(self):
         df = numpy.zeros((3, 4))
         df[2, 3] = 1
 
-        temp = get_temp_folder(__file__, "temp_zip")
-        name = os.path.join(temp, "df.zip")
-        to_zip(df, name, "arr.npy")
-        df2 = read_zip(name, "arr.npy")
-        self.assertEqualArray(df, df2)
-
-        st = io.BytesIO()
-        zp = zipfile.ZipFile(st, "w")
-        to_zip(df, zp, "arr.npy")
-        zp.close()
-
-        st = io.BytesIO(st.getvalue())
-        zp = zipfile.ZipFile(st, "r")
-        df3 = read_zip(zp, "arr.npy")
-        zp.close()
-        self.assertEqualArray(df, df3)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "df.zip")
+            to_zip(df, name, "arr.npy")
+            df2 = read_zip(name, "arr.npy")
+            self.assertEqualArray(df, df2)
+
+            st = io.BytesIO()
+            zp = zipfile.ZipFile(st, "w")
+            to_zip(df, zp, "arr.npy")
+            zp.close()
+
+            st = io.BytesIO(st.getvalue())
+            zp = zipfile.ZipFile(st, "r")
+            df3 = read_zip(zp, "arr.npy")
+            zp.close()
+            self.assertEqualArray(df, df3)
 
 
 if __name__ == "__main__":

diff --git a/_unittests/ut_df/test_dataframe_io_helpers.py b/_unittests/ut_df/test_dataframe_io_helpers.py
@@ -2,7 +2,7 @@
 from io import StringIO, BytesIO
 from json import loads
 import pandas
-from pyquickhelper.pycode import ExtTestCase
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df.dataframe_io_helpers import (
     enumerate_json_items,
     JsonPerRowsStream,

diff --git a/_unittests/ut_df/test_dataframe_sort.py b/_unittests/ut_df/test_dataframe_sort.py
@@ -1,104 +1,105 @@
 import os
+import tempfile
 import unittest
 import pandas
-from pyquickhelper.pycode import ExtTestCase, get_temp_folder
+from pandas_streaming.ext_test_case import ExtTestCase
 from pandas_streaming.df import StreamingDataFrame
 
 
 class TestDataFrameSort(ExtTestCase):
     def test_sort_values(self):
-        temp = get_temp_folder(__file__, "temp_sort_values")
-        name = os.path.join(temp, "_data_")
-        df = pandas.DataFrame(
-            [
-                dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
-                dict(a=5, b="f", c=5.7, ind="a2", ai=2),
-                dict(a=4, b="g", ind="a3", ai=3),
-                dict(a=8, b="h", c=5.9, ai=4),
-                dict(a=16, b="i", c=6.2, ind="a5", ai=5),
-            ]
-        )
-        sdf = StreamingDataFrame.read_df(df, chunksize=2)
-        sorted_df = df.sort_values(by="a")
-        res = sdf.sort_values(by="a", temp_file=name)
-        res_df = res.to_df()
-        self.assertEqualDataFrame(sorted_df, res_df)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "_data_")
+            df = pandas.DataFrame(
+                [
+                    dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
+                    dict(a=5, b="f", c=5.7, ind="a2", ai=2),
+                    dict(a=4, b="g", ind="a3", ai=3),
+                    dict(a=8, b="h", c=5.9, ai=4),
+                    dict(a=16, b="i", c=6.2, ind="a5", ai=5),
+                ]
+            )
+            sdf = StreamingDataFrame.read_df(df, chunksize=2)
+            sorted_df = df.sort_values(by="a")
+            res = sdf.sort_values(by="a", temp_file=name)
+            res_df = res.to_df()
+            self.assertEqualDataFrame(sorted_df, res_df)
 
     def test_sort_values_twice(self):
-        temp = get_temp_folder(__file__, "temp_sort_values_twice")
-        name = os.path.join(temp, "_data_")
-        df = pandas.DataFrame(
-            [
-                dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
-                dict(a=5, b="f", c=5.7, ind="a2", ai=2),
-                dict(a=4, b="g", ind="a3", ai=3),
-                dict(a=8, b="h", c=5.9, ai=4),
-                dict(a=16, b="i", c=6.2, ind="a5", ai=5),
-            ]
-        )
-        sdf = StreamingDataFrame.read_df(df, chunksize=2)
-        sorted_df = df.sort_values(by="a")
-        res = sdf.sort_values(by="a", temp_file=name)
-        res_df = res.to_df()
-        self.assertEqualDataFrame(sorted_df, res_df)
-        res_df = res.to_df()
-        self.assertEqualDataFrame(sorted_df, res_df)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "_data_")
+            df = pandas.DataFrame(
+                [
+                    dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
+                    dict(a=5, b="f", c=5.7, ind="a2", ai=2),
+                    dict(a=4, b="g", ind="a3", ai=3),
+                    dict(a=8, b="h", c=5.9, ai=4),
+                    dict(a=16, b="i", c=6.2, ind="a5", ai=5),
+                ]
+            )
+            sdf = StreamingDataFrame.read_df(df, chunksize=2)
+            sorted_df = df.sort_values(by="a")
+            res = sdf.sort_values(by="a", temp_file=name)
+            res_df = res.to_df()
+            self.assertEqualDataFrame(sorted_df, res_df)
+            res_df = res.to_df()
+            self.assertEqualDataFrame(sorted_df, res_df)
 
     def test_sort_values_reverse(self):
-        temp = get_temp_folder(__file__, "temp_sort_values_reverse")
-        name = os.path.join(temp, "_data_")
-        df = pandas.DataFrame(
-            [
-                dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
-                dict(a=5, b="f", c=5.7, ind="a2", ai=2),
-                dict(a=4, b="g", ind="a3", ai=3),
-                dict(a=8, b="h", c=5.9, ai=4),
-                dict(a=16, b="i", c=6.2, ind="a5", ai=5),
-            ]
-        )
-        sdf = StreamingDataFrame.read_df(df, chunksize=2)
-        sorted_df = df.sort_values(by="a", ascending=False)
-        res = sdf.sort_values(by="a", temp_file=name, ascending=False)
-        res_df = res.to_df()
-        self.assertEqualDataFrame(sorted_df, res_df)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "_data_")
+            df = pandas.DataFrame(
+                [
+                    dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
+                    dict(a=5, b="f", c=5.7, ind="a2", ai=2),
+                    dict(a=4, b="g", ind="a3", ai=3),
+                    dict(a=8, b="h", c=5.9, ai=4),
+                    dict(a=16, b="i", c=6.2, ind="a5", ai=5),
+                ]
+            )
+            sdf = StreamingDataFrame.read_df(df, chunksize=2)
+            sorted_df = df.sort_values(by="a", ascending=False)
+            res = sdf.sort_values(by="a", temp_file=name, ascending=False)
+            res_df = res.to_df()
+            self.assertEqualDataFrame(sorted_df, res_df)
 
     def test_sort_values_nan_last(self):
-        temp = get_temp_folder(__file__, "temp_sort_values_nan_last")
-        name = os.path.join(temp, "_data_")
-        df = pandas.DataFrame(
-            [
-                dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
-                dict(b="f", c=5.7, ind="a2", ai=2),
-                dict(b="f", c=5.8, ind="a2", ai=2),
-                dict(a=4, b="g", ind="a3", ai=3),
-                dict(a=8, b="h", c=5.9, ai=4),
-                dict(a=16, b="i", c=6.2, ind="a5", ai=5),
-            ]
-        )
-        sdf = StreamingDataFrame.read_df(df, chunksize=2)
-        sorted_df = df.sort_values(by="a", na_position="last")
-        res = sdf.sort_values(by="a", temp_file=name, na_position="last")
-        res_df = res.to_df()
-        self.assertEqualDataFrame(sorted_df, res_df)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "_data_")
+            df = pandas.DataFrame(
+                [
+                    dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
+                    dict(b="f", c=5.7, ind="a2", ai=2),
+                    dict(b="f", c=5.8, ind="a2", ai=2),
+                    dict(a=4, b="g", ind="a3", ai=3),
+                    dict(a=8, b="h", c=5.9, ai=4),
+                    dict(a=16, b="i", c=6.2, ind="a5", ai=5),
+                ]
+            )
+            sdf = StreamingDataFrame.read_df(df, chunksize=2)
+            sorted_df = df.sort_values(by="a", na_position="last")
+            res = sdf.sort_values(by="a", temp_file=name, na_position="last")
+            res_df = res.to_df()
+            self.assertEqualDataFrame(sorted_df, res_df)
 
     def test_sort_values_nan_first(self):
-        temp = get_temp_folder(__file__, "temp_sort_values_nan_first")
-        name = os.path.join(temp, "_data_")
-        df = pandas.DataFrame(
-            [
-                dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
-                dict(b="f", c=5.7, ind="a2", ai=2),
-                dict(b="f", c=5.8, ind="a2", ai=2),
-                dict(a=4, b="g", ind="a3", ai=3),
-                dict(a=8, b="h", c=5.9, ai=4),
-                dict(a=16, b="i", c=6.2, ind="a5", ai=5),
-            ]
-        )
-        sdf = StreamingDataFrame.read_df(df, chunksize=2)
-        sorted_df = df.sort_values(by="a", na_position="first")
-        res = sdf.sort_values(by="a", temp_file=name, na_position="first")
-        res_df = res.to_df()
-        self.assertEqualDataFrame(sorted_df, res_df)
+        with tempfile.TemporaryDirectory() as temp:
+            name = os.path.join(temp, "_data_")
+            df = pandas.DataFrame(
+                [
+                    dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
+                    dict(b="f", c=5.7, ind="a2", ai=2),
+                    dict(b="f", c=5.8, ind="a2", ai=2),
+                    dict(a=4, b="g", ind="a3", ai=3),
+                    dict(a=8, b="h", c=5.9, ai=4),
+                    dict(a=16, b="i", c=6.2, ind="a5", ai=5),
+                ]
+            )
+            sdf = StreamingDataFrame.read_df(df, chunksize=2)
+            sorted_df = df.sort_values(by="a", na_position="first")
+            res = sdf.sort_values(by="a", temp_file=name, na_position="first")
+            res_df = res.to_df()
+            self.assertEqualDataFrame(sorted_df, res_df)
 
 
 if __name__ == "__main__":

diff --git a/_unittests/ut_df/test_pandas_groupbynan.py b/_unittests/ut_df/test_pandas_groupbynan.py
@@ -2,7 +2,7 @@
 import pandas
 import numpy
 from scipy.sparse.linalg import lsqr as sparse_lsqr
-from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from pandas_streaming.ext_test_case import ExtTestCase, ignore_warnings
 from pandas_streaming.df import pandas_groupby_nan, numpy_types