Skip to content

Commit fb75910

Browse files
committed
Pushing the docs to dev/ for branch: main, commit bac46762be1d2926663f99de306d9794e63e1fb2
1 parent f77fb5a commit fb75910

File tree

1,551 files changed

+6331
-6130
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,551 files changed

+6331
-6130
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/0b601219a14824c971bbf8bb797e8973/plot_logistic_path.ipynb

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,25 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"from sklearn import datasets\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nX = X[y != 2]\ny = y[y != 2]\n\nX /= X.max() # Normalize X to speed-up convergence"
36+
"from sklearn import datasets\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nfeature_names = iris.feature_names"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"Here we remove the third class to make the problem a binary classification\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"X = X[y != 2]\ny = y[y != 2]"
3755
]
3856
},
3957
{
@@ -51,7 +69,25 @@
5169
},
5270
"outputs": [],
5371
"source": [
54-
"import numpy as np\n\nfrom sklearn import linear_model\nfrom sklearn.svm import l1_min_c\n\ncs = l1_min_c(X, y, loss=\"log\") * np.logspace(0, 10, 16)\n\nclf = linear_model.LogisticRegression(\n penalty=\"l1\",\n solver=\"liblinear\",\n tol=1e-6,\n max_iter=int(1e6),\n warm_start=True,\n intercept_scaling=10000.0,\n)\ncoefs_ = []\nfor c in cs:\n clf.set_params(C=c)\n clf.fit(X, y)\n coefs_.append(clf.coef_.ravel().copy())\n\ncoefs_ = np.array(coefs_)"
72+
"import numpy as np\n\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import l1_min_c\n\ncs = l1_min_c(X, y, loss=\"log\") * np.logspace(0, 1, 16)"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"Create a pipeline with `StandardScaler` and `LogisticRegression`, to normalize\nthe data before fitting a linear model, in order to speed-up convergence and\nmake the coefficients comparable. Also, as a side effect, since the data is now\ncentered around 0, we don't need to fit an intercept.\n\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [],
89+
"source": [
90+
"clf = make_pipeline(\n StandardScaler(),\n LogisticRegression(\n penalty=\"l1\",\n solver=\"liblinear\",\n tol=1e-6,\n max_iter=int(1e6),\n warm_start=True,\n fit_intercept=False,\n ),\n)\ncoefs_ = []\nfor c in cs:\n clf.set_params(logisticregression__C=c)\n clf.fit(X, y)\n coefs_.append(clf[\"logisticregression\"].coef_.ravel().copy())\n\ncoefs_ = np.array(coefs_)"
5591
]
5692
},
5793
{
@@ -69,7 +105,7 @@
69105
},
70106
"outputs": [],
71107
"source": [
72-
"import matplotlib.pyplot as plt\n\nplt.plot(np.log10(cs), coefs_, marker=\"o\")\nymin, ymax = plt.ylim()\nplt.xlabel(\"log(C)\")\nplt.ylabel(\"Coefficients\")\nplt.title(\"Logistic Regression Path\")\nplt.axis(\"tight\")\nplt.show()"
108+
"import matplotlib.pyplot as plt\n\n# Colorblind-friendly palette (IBM Color Blind Safe palette)\ncolors = [\"#648FFF\", \"#785EF0\", \"#DC267F\", \"#FE6100\"]\n\nplt.figure(figsize=(10, 6))\nfor i in range(coefs_.shape[1]):\n plt.semilogx(cs, coefs_[:, i], marker=\"o\", color=colors[i], label=feature_names[i])\n\nymin, ymax = plt.ylim()\nplt.xlabel(\"C\")\nplt.ylabel(\"Coefficients\")\nplt.title(\"Logistic Regression Path\")\nplt.legend()\nplt.axis(\"tight\")\nplt.show()"
73109
]
74110
}
75111
],
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/33d1c05f40549996ff7b58dfd3eb9d23/plot_logistic_path.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,36 +37,47 @@
3737
iris = datasets.load_iris()
3838
X = iris.data
3939
y = iris.target
40+
feature_names = iris.feature_names
4041

42+
# %%
43+
# Here we remove the third class to make the problem a binary classification
4144
X = X[y != 2]
4245
y = y[y != 2]
4346

44-
X /= X.max() # Normalize X to speed-up convergence
45-
4647
# %%
4748
# Compute regularization path
4849
# ---------------------------
4950

5051
import numpy as np
5152

52-
from sklearn import linear_model
53+
from sklearn.linear_model import LogisticRegression
54+
from sklearn.pipeline import make_pipeline
55+
from sklearn.preprocessing import StandardScaler
5356
from sklearn.svm import l1_min_c
5457

55-
cs = l1_min_c(X, y, loss="log") * np.logspace(0, 10, 16)
58+
cs = l1_min_c(X, y, loss="log") * np.logspace(0, 1, 16)
5659

57-
clf = linear_model.LogisticRegression(
58-
penalty="l1",
59-
solver="liblinear",
60-
tol=1e-6,
61-
max_iter=int(1e6),
62-
warm_start=True,
63-
intercept_scaling=10000.0,
60+
# %%
61+
# Create a pipeline with `StandardScaler` and `LogisticRegression`, to normalize
62+
# the data before fitting a linear model, in order to speed-up convergence and
63+
# make the coefficients comparable. Also, as a side effect, since the data is now
64+
# centered around 0, we don't need to fit an intercept.
65+
clf = make_pipeline(
66+
StandardScaler(),
67+
LogisticRegression(
68+
penalty="l1",
69+
solver="liblinear",
70+
tol=1e-6,
71+
max_iter=int(1e6),
72+
warm_start=True,
73+
fit_intercept=False,
74+
),
6475
)
6576
coefs_ = []
6677
for c in cs:
67-
clf.set_params(C=c)
78+
clf.set_params(logisticregression__C=c)
6879
clf.fit(X, y)
69-
coefs_.append(clf.coef_.ravel().copy())
80+
coefs_.append(clf["logisticregression"].coef_.ravel().copy())
7081

7182
coefs_ = np.array(coefs_)
7283

@@ -76,10 +87,17 @@
7687

7788
import matplotlib.pyplot as plt
7889

79-
plt.plot(np.log10(cs), coefs_, marker="o")
90+
# Colorblind-friendly palette (IBM Color Blind Safe palette)
91+
colors = ["#648FFF", "#785EF0", "#DC267F", "#FE6100"]
92+
93+
plt.figure(figsize=(10, 6))
94+
for i in range(coefs_.shape[1]):
95+
plt.semilogx(cs, coefs_[:, i], marker="o", color=colors[i], label=feature_names[i])
96+
8097
ymin, ymax = plt.ylim()
81-
plt.xlabel("log(C)")
98+
plt.xlabel("C")
8299
plt.ylabel("Coefficients")
83100
plt.title("Logistic Regression Path")
101+
plt.legend()
84102
plt.axis("tight")
85103
plt.show()

0 commit comments

Comments
 (0)