Skip to content

Commit 4a5375a

Browse files
committed
Check for index columns and pass through the kwarg to read_csv
Use the number of leading tab's in the first row when we are copying from excel as the value for `index_col`
1 parent 7796a03 commit 4a5375a

File tree

2 files changed

+23
-3
lines changed

2 files changed

+23
-3
lines changed

pandas/io/clipboards.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,15 @@ def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover
5858
# 0 1 2
5959
# 1 3 4
6060

61-
counts = {
62-
x.count("\t") if i > 0 else x.lstrip().count("\t") for i, x in enumerate(lines)
63-
}
61+
counts = {x.lstrip(" ").count("\t") for x in lines}
6462
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
6563
sep = "\t"
64+
# check the number of leading tabs in the first line
65+
# to account for index columns
66+
index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
67+
if index_length != 0:
68+
print(index_length)
69+
kwargs.setdefault("index_col", [0, index_length - 1])
6670

6771
# Edge case where sep is specified to be None, return to default
6872
if sep is None and kwargs.get("delim_whitespace") is None:

pandas/tests/io/test_clipboard.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas import (
77
DataFrame,
8+
MultiIndex,
89
get_option,
910
read_clipboard,
1011
)
@@ -256,6 +257,21 @@ def test_infer_excel_with_nulls(self, request, mock_clipboard):
256257
# excel data is parsed correctly
257258
tm.assert_frame_equal(df, df_expected)
258259

260+
def test_infer_excel_with_multiindex(self, request, mock_clipboard):
261+
# GH41108
262+
text = "\t\tcol1\tcol2\nA\t0\t1\tred\nA\t1\t\tblue\nB\t0\t2\tgreen"
263+
264+
mock_clipboard[request.node.name] = text
265+
df = read_clipboard()
266+
multiindex = MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0)])
267+
df_expected = DataFrame(
268+
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
269+
index=multiindex,
270+
)
271+
272+
# excel data is parsed correctly
273+
tm.assert_frame_equal(df, df_expected)
274+
259275
def test_invalid_encoding(self, df):
260276
msg = "clipboard only supports utf-8 encoding"
261277
# test case for testing invalid encoding

0 commit comments

Comments
 (0)