Skip to content

PERF: lazify IO imports #52421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 83 additions & 37 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@
from pandas.tseries.api import infer_freq
from pandas.tseries import offsets

from pandas.core.computation.api import eval

from pandas.core.reshape.api import (
concat,
lreshape,
Expand All @@ -135,43 +133,8 @@
)

from pandas import api, arrays, errors, io, plotting, tseries
from pandas import testing
from pandas.util._print_versions import show_versions

from pandas.io.api import (
# excel
ExcelFile,
ExcelWriter,
read_excel,
# parsers
read_csv,
read_fwf,
read_table,
# pickle
read_pickle,
to_pickle,
# pytables
HDFStore,
read_hdf,
# sql
read_sql,
read_sql_query,
read_sql_table,
# misc
read_clipboard,
read_parquet,
read_orc,
read_feather,
read_gbq,
read_html,
read_xml,
read_json,
read_stata,
read_sas,
read_spss,
)

from pandas.io.json._normalize import json_normalize

from pandas.util._tester import test

Expand All @@ -184,6 +147,89 @@
del get_versions, v


def __getattr__(key: str):
# lazy imports to speed up 'import pandas as pd'
if key == "eval":
from pandas.core.computation.api import eval

return eval
elif key == "testing":
import pandas.testing

return pandas.testing

elif key in {
"ExcelFile",
"ExcelWriter",
"read_excel",
"read_csv",
"read_fwf",
"read_table",
"read_pickle",
"to_pickle",
"HDFStore",
"read_hdf",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_clipboard",
"read_parquet",
"read_orc",
"read_feather",
"read_gbq",
"read_html",
"read_xml",
"read_json",
"read_stata",
"read_sas",
"read_spss",
}:
import pandas.io.api

return getattr(pandas.io.api, key)
elif key == "json_normalize":
from pandas.io.json._normalize import json_normalize

return json_normalize
raise AttributeError(f"module 'pandas' has no attribute '{key}'")


def __dir__() -> list[str]:
# include lazy imports defined in __getattr__ in dir()
base = list(globals().keys())
result = (
base
+ [
"ExcelFile",
"ExcelWriter",
"read_excel",
"read_csv",
"read_fwf",
"read_table",
"read_pickle",
"to_pickle",
"HDFStore",
"read_hdf",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_clipboard",
"read_parquet",
"read_orc",
"read_feather",
"read_gbq",
"read_html",
"read_xml",
"read_json",
"read_stata",
"read_sas",
"read_spss",
]
+ ["eval", "json_normalize", "testing"]
)
return result


# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
Expand Down
15 changes: 13 additions & 2 deletions pandas/api/interchange/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@
Public API for DataFrame interchange protocol.
"""

from pandas.core.interchange.dataframe_protocol import DataFrame
from pandas.core.interchange.from_dataframe import from_dataframe

def __getattr__(key: str):
# lazy imports to speed 'import pandas as pd'
if key == "DataFrame":
from pandas.core.interchange.dataframe_protocol import DataFrame

return DataFrame
elif key == "from_dataframe":
from pandas.core.interchange.from_dataframe import from_dataframe

return from_dataframe
raise AttributeError(key)


__all__ = ["from_dataframe", "DataFrame"]
35 changes: 25 additions & 10 deletions pandas/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
# pyright: reportUnusedImport = false
from pandas.util._decorators import ( # noqa:F401
Appender,
Substitution,
cache_readonly,
)

from pandas.core.util.hashing import ( # noqa:F401
hash_array,
hash_pandas_object,
)


def __getattr__(key: str):
# lazify imports to speed 'import pandas as pd'
if key == "Appender":
from pandas.util._decorators import Appender

return Appender
if key == "Substitution":
from pandas.util._decorators import Substitution

return Substitution
if key == "cache_readonly":
from pandas.util._decorators import cache_readonly

return cache_readonly
if key == "hash_array":
from pandas.core.util.hashing import hash_array

return hash_array
if key == "hash_pandas_object":
from pandas.core.util.hashing import hash_pandas_object

return hash_pandas_object
raise AttributeError(f"module 'pandas.util' has no attribute '{key}'")