-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CI, ENH: Check each minimum dependency is enforced in *.yaml and environment.yml #51189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
4a15e02
f71a869
e4b90a2
527bd37
cbd87e5
c0c4ffc
e4789d2
1c8c8b4
6d6c391
7b3f50a
a4b4bdb
e2bb536
8397c94
3a73573
ed4827b
0968c10
6d30e63
f923b98
49e00c6
536450f
1431ae8
d2d5d73
1af6771
3b6123f
2c8c746
4af5291
6e8b0ef
5bd4908
78114fb
04dda2e
db0feae
4bd6e7a
bec1dd1
3c4e800
964e201
e89dd4d
47179f0
2c39a81
554ed85
b2a672b
985730f
0b25f3a
07c68b3
74965c2
635c055
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,12 @@ | |
""" | ||
from __future__ import annotations | ||
|
||
import os | ||
import pathlib | ||
import sys | ||
|
||
import yaml | ||
|
||
if sys.version_info >= (3, 11): | ||
import tomllib | ||
else: | ||
|
@@ -28,6 +31,8 @@ | |
) | ||
CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve() | ||
SETUP_PATH = pathlib.Path("pyproject.toml").resolve() | ||
YAML_PATH = pathlib.Path("ci/deps") | ||
ENV_PATH = pathlib.Path("environment.yml") | ||
EXCLUDE_DEPS = {"tzdata", "blosc"} | ||
# pandas package is not available | ||
# in pre-commit environment | ||
|
@@ -41,6 +46,84 @@ | |
import _optional | ||
|
||
|
||
def pin_min_versions_to_ci_deps(): | ||
exclusion_list = { | ||
"python=3.8[build=*_pypy]": None, | ||
} | ||
toml_dependencies = get_versions_from_toml() | ||
all_yaml_files = list(YAML_PATH.iterdir()) | ||
all_yaml_files.append(ENV_PATH) | ||
for curr_file in all_yaml_files: | ||
with open(curr_file) as yaml_f: | ||
data = yaml_f.read() | ||
yaml_file = yaml.safe_load(data) | ||
yaml_deps = yaml_file["dependencies"] | ||
res = [] | ||
[res.append(x) for x in yaml_deps if x not in res] | ||
yaml_deps = res | ||
for dep_line in yaml_deps: | ||
replace_text = operator = yaml_left = "" | ||
search_text = str(dep_line) | ||
if str(dep_line) in exclusion_list: | ||
continue | ||
if ">=" in dep_line: | ||
operator = ">=" | ||
elif "==" in dep_line: | ||
operator = "==" | ||
elif "=" in dep_line: | ||
operator = "=" | ||
elif "<" in dep_line: | ||
operator = "<" | ||
elif ">" in dep_line: | ||
operator = ">" | ||
else: | ||
operator = "" | ||
if operator == "": | ||
yaml_package, yaml_version = str(dep_line).strip(), None | ||
yaml_left = yaml_package | ||
else: | ||
yaml_package, yaml_version = str(dep_line).strip().split(operator) | ||
if operator == "<" or operator == ">": | ||
if yaml_package in toml_dependencies: | ||
if version.parse(yaml_version) <= version.parse( | ||
toml_dependencies[yaml_package] | ||
): | ||
yaml_left = yaml_package | ||
else: | ||
yaml_left = str(dep_line) + ", " | ||
else: | ||
yaml_left = yaml_package + operator | ||
if yaml_package in toml_dependencies: | ||
if ">" in yaml_left or "<" in yaml_left: | ||
if "," in yaml_left: | ||
# ex: "numpy<1.24.0," + ">=" + "1.2" | ||
replace_text = ( | ||
yaml_left + ">=" + toml_dependencies[yaml_package] | ||
) | ||
else: | ||
replace_text = yaml_left + toml_dependencies[yaml_package] | ||
# update yaml package version to TOML min version | ||
elif yaml_version is not None: | ||
if version.parse( | ||
toml_dependencies[yaml_package] | ||
) > version.parse(yaml_version): | ||
# ex: "hypothesis>=" + "6.34.2" | ||
replace_text = yaml_left + toml_dependencies[yaml_package] | ||
elif version.parse( | ||
toml_dependencies[yaml_package] | ||
) == version.parse(yaml_version): | ||
replace_text = dep_line | ||
else: | ||
# ex: "hypothesis + ">=" + 6.34.2" | ||
replace_text = ( | ||
yaml_package + ">=" + toml_dependencies[yaml_package] | ||
) | ||
data = data.replace(search_text, replace_text) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is great, but it's getting quite complicated 😄 how about taking this part out to its own function (which accepts There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good! I've separated this portion of code into its own function. I'm writing unit tests now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks! BTW if you notice some CI failures for some unrelated jobs, that's probably exactly what they are (i.e. unrelated) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wrote a new unit test file at It comes with 3 additional files under scripts/tests/:
The .toml file can be adjusted to change the baselines for the minimum dependencies. The I was thinking of creating more sets of expected/unmodified YAML files so that each set can test its own edge case like duplicates, same version, no version, range, etc. That way, each unit test can be smaller and more meaningful. Currently, I think the dataset is too large to provide meaningful test assertion results. @MarcoGorelli What are your thoughts? Feedback appreciated! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! this isn't user-facing, so arguably doesn't need too extensive testing. but yes, smaller test files would make it easier to reason about what the script is doing |
||
os.remove(curr_file) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is necessary (the next line will overwrite the file) |
||
with open(curr_file, "w") as f: | ||
f.write(data) | ||
|
||
|
||
def get_versions_from_code() -> dict[str, str]: | ||
"""Min versions for checking within pandas code.""" | ||
install_map = _optional.INSTALL_MAPPING | ||
|
@@ -108,11 +191,11 @@ def get_versions_from_toml() -> dict[str, str]: | |
|
||
for item in EXCLUDE_DEPS: | ||
optional_dependencies.pop(item, None) | ||
|
||
return optional_dependencies | ||
|
||
|
||
def main(): | ||
pin_min_versions_to_ci_deps() | ||
with open(CI_PATH, encoding="utf-8") as f: | ||
_, ci_optional = get_versions_from_ci(f.readlines()) | ||
code_optional = get_versions_from_code() | ||
|
Uh oh!
There was an error while loading. Please reload this page.