Skip to content

Commit 65df683

Browse files
committed
proposed fix for issue #61026
1 parent abeba1b commit 65df683

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

pandas/core/construction.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,24 @@ def sanitize_array(
611611
data = data.A
612612

613613
if dtype is None:
614+
# GH#61026: special-case 2D+ object ndarrays when dtype is None.
615+
if data.dtype == object and data.ndim > 1:
616+
if data.ndim == 2 and data.shape[1] == 1:
617+
# allow assigning a (n, 1) object array to a single column, flatten it:
618+
data = data[:, 0]
619+
elif data.ndim == 2:
620+
# More than one column but caller is behaving as if this is a single-column assignment.
621+
raise ValueError(
622+
"Setting a DataFrame column with a 2D object array "
623+
f"requires shape (n, 1); got shape {data.shape}."
624+
)
625+
else:
626+
# ndim >= 3
627+
raise ValueError(
628+
f"Setting a DataFrame column with ndim {data.ndim} "
629+
"object array is not supported."
630+
)
631+
614632
subarr = data
615633
if data.dtype == object and infer_object:
616634
subarr = lib.maybe_convert_objects(
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pandas._testing as tm
4+
import pytest
5+
6+
7+
class TestSetItem2DObjectArray:
8+
# GH#61026
9+
def test_setitem_2d_object_array_single_column_unravel(self):
10+
df = pd.DataFrame({"c1": [1, 2, 3]})
11+
t = np.array([["A"], ["B"], ["C"]], dtype=object)
12+
13+
df["c1"] = t
14+
15+
expected = pd.Series(["A", "B", "C"], name="c1")
16+
tm.assert_series_equal(df["c1"], expected)
17+
18+
# GH#61026
19+
def test_setitem_2d_object_array_wrong_shape_raises(self):
20+
df = pd.DataFrame({"c1": [1, 2, 3]})
21+
t = np.array([["A", "B"], ["C", "D"], ["E", "F"]], dtype=object)
22+
23+
with pytest.raises(ValueError, match="requires shape"):
24+
df["c1"] = t
25+
26+
# GH#61026
27+
def test_setitem_3d_object_array_raises(self):
28+
df = pd.DataFrame({"c1": [1, 2, 3]})
29+
t = np.array([[["A"]], [["B"]], [["C"]]], dtype=object)
30+
31+
with pytest.raises(ValueError, match="ndim 3"):
32+
df["c1"] = t
33+
34+
# GH#61026
35+
def test_setitem_2d_string_array_regression(self):
36+
df = pd.DataFrame({"c1": [1, 2, 3]})
37+
t = np.array([["A"], ["B"], ["C"]]) # dtype '<U1'
38+
39+
df["c1"] = t
40+
41+
# just checking values; internal mask shape isn't part of the contract.
42+
assert list(df["c1"]) == ["A", "B", "C"]
43+
44+
# GH#61026
45+
def test_setitem_multicolumn_object_array_still_ok(self):
46+
df = pd.DataFrame({"c1": [1, 2, 3], "c2": [4, 5, 6]})
47+
t = np.array([["A", "X"], ["B", "Y"], ["C", "Z"]], dtype=object)
48+
49+
df[["c1", "c2"]] = t
50+
51+
expected = pd.DataFrame({"c1": ["A", "B", "C"], "c2": ["X", "Y", "Z"]})
52+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)