From 6333c3b906d86b5bf2072012fa910ea05c766c40 Mon Sep 17 00:00:00 2001
From: U-S-jun <u.s.junn@gmail.com>
Date: Thu, 28 Nov 2024 20:55:36 +0900
Subject: [PATCH 1/3] ENH: Add sort_columns parameter to combine_first

---
 pandas/core/frame.py                          | 27 ++++++++++++++++++-
 .../tests/frame/methods/test_combine_first.py |  8 ++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d1450537dd740..1132c6a355179 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8712,7 +8712,7 @@ def combine(
         frame_result = self._constructor(result, index=new_index, columns=new_columns)
         return frame_result.__finalize__(self, method="combine")
 
-    def combine_first(self, other: DataFrame) -> DataFrame:
+    def combine_first(self, other: DataFrame, sort_columns=True) -> DataFrame:
         """
         Update null elements with value in the same location in `other`.
 
@@ -8728,6 +8728,10 @@ def combine_first(self, other: DataFrame) -> DataFrame:
         ----------
         other : DataFrame
             Provided DataFrame to use to fill null values.
+        sort_columns : bool, default True
+            Whether to sort the columns in the result DataFrame. If False, the
+            order of the columns in `self` is preserved.
+
 
         Returns
         -------
@@ -8741,6 +8745,8 @@ def combine_first(self, other: DataFrame) -> DataFrame:
 
         Examples
         --------
+        Default behavior with `sort_columns=True` (default):
+
         >>> df1 = pd.DataFrame({"A": [None, 0], "B": [None, 4]})
         >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]})
         >>> df1.combine_first(df2)
@@ -8748,6 +8754,16 @@ def combine_first(self, other: DataFrame) -> DataFrame:
         0  1.0  3.0
         1  0.0  4.0
 
+
+        Preserving the column order of `self` with `sort_columns=False`:
+
+        >>> df1 = pd.DataFrame({"B": [None, 4], "A": [0, None]})
+        >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]})
+        >>> df1.combine_first(df2, sort_columns=False)
+             B    A
+        0  3.0  0.0
+        1  4.0  1.0
+
         Null values still persist if the location of that null value
         does not exist in `other`
 
@@ -8773,6 +8789,8 @@ def combiner(x: Series, y: Series):
                 return y_values
 
             return expressions.where(mask, y_values, x_values)
+        
+        all_columns = self.columns.union(other.columns)
 
         if len(other) == 0:
             combined = self.reindex(
@@ -8790,6 +8808,13 @@ def combiner(x: Series, y: Series):
 
         if dtypes:
             combined = combined.astype(dtypes)
+        
+        combined = combined.reindex(columns=all_columns, fill_value=None)
+
+        if not sort_columns:
+            combined = combined[self.columns]
+        
+
 
         return combined.__finalize__(self, method="combine_first")
 
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 87b7d5052a345..e60b2fbe524fc 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -560,3 +560,11 @@ def test_combine_first_empty_columns():
     result = left.combine_first(right)
     expected = DataFrame(columns=["a", "b", "c"])
     tm.assert_frame_equal(result, expected)
+
+def test_combine_first_column_order():
+    df1 = pd.DataFrame({"B": [1, 2], "A": [3, 4]})
+    df2 = pd.DataFrame({"A": [5]}, index=[1])
+
+    result = df1.combine_first(df2,sort_columns=False)
+    expected = pd.DataFrame({"B": [1, 2], "A": [3, 4]})
+    pd.testing.assert_frame_equal(result, expected)

From 0b4ebc7fffe4854f96851776a998f47f3e654c6b Mon Sep 17 00:00:00 2001
From: U-S-jun <u.s.junn@gmail.com>
Date: Thu, 28 Nov 2024 21:46:07 +0900
Subject: [PATCH 2/3] ENH: Add sort_columns parameter to combine_first

---
 doc/source/whatsnew/v3.0.0.rst                 |  4 ++++
 pandas/core/frame.py                           | 18 ++++++++++--------
 .../tests/frame/methods/test_combine_first.py  | 11 ++++++-----
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 4bd31de185bb4..63062fbd44a63 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -13,6 +13,10 @@ including other versions of pandas.
 
 Enhancements
 ~~~~~~~~~~~~
+- Added a ``sort_columns`` parameter to :meth:`DataFrame.combine_first` to allow
+  control over whether the result's column order should follow the original
+  DataFrame's order or be sorted lexicographically. ([#60427](https://github.com/pandas-dev/pandas/issues/60427))
+
 
 .. _whatsnew_300.enhancements.enhancement1:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1132c6a355179..5f1df1dc92372 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8712,7 +8712,9 @@ def combine(
         frame_result = self._constructor(result, index=new_index, columns=new_columns)
         return frame_result.__finalize__(self, method="combine")
 
-    def combine_first(self, other: DataFrame, sort_columns=True) -> DataFrame:
+    def combine_first(
+        self, other: DataFrame, *, sort_columns: bool = True
+    ) -> DataFrame:
         """
         Update null elements with value in the same location in `other`.
 
@@ -8789,7 +8791,7 @@ def combiner(x: Series, y: Series):
                 return y_values
 
             return expressions.where(mask, y_values, x_values)
-        
+
         all_columns = self.columns.union(other.columns)
 
         if len(other) == 0:
@@ -8808,13 +8810,11 @@ def combiner(x: Series, y: Series):
 
         if dtypes:
             combined = combined.astype(dtypes)
-        
+
         combined = combined.reindex(columns=all_columns, fill_value=None)
 
         if not sort_columns:
             combined = combined[self.columns]
-        
-
 
         return combined.__finalize__(self, method="combine_first")
 
@@ -10543,9 +10543,11 @@ def _append(
 
             index = Index(
                 [other.name],
-                name=self.index.names
-                if isinstance(self.index, MultiIndex)
-                else self.index.name,
+                name=(
+                    self.index.names
+                    if isinstance(self.index, MultiIndex)
+                    else self.index.name
+                ),
             )
             row_df = other.to_frame().T
             # infer_objects is needed for
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index e60b2fbe524fc..00f4393abb569 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -561,10 +561,11 @@ def test_combine_first_empty_columns():
     expected = DataFrame(columns=["a", "b", "c"])
     tm.assert_frame_equal(result, expected)
 
+
 def test_combine_first_column_order():
-    df1 = pd.DataFrame({"B": [1, 2], "A": [3, 4]})
-    df2 = pd.DataFrame({"A": [5]}, index=[1])
+    df1 = DataFrame({"B": [1, 2], "A": [3, 4]})
+    df2 = DataFrame({"A": [5]}, index=[1])
 
-    result = df1.combine_first(df2,sort_columns=False)
-    expected = pd.DataFrame({"B": [1, 2], "A": [3, 4]})
-    pd.testing.assert_frame_equal(result, expected)
+    result = df1.combine_first(df2, sort_columns=False)
+    expected = DataFrame({"B": [1, 2], "A": [3, 4]})
+    tm.assert_frame_equal(result, expected)

From edc2e8d193145ebd2d2f20636bc26bb57dca1787 Mon Sep 17 00:00:00 2001
From: U-S-jun <u.s.junn@gmail.com>
Date: Thu, 28 Nov 2024 23:23:02 +0900
Subject: [PATCH 3/3] ENH: Add sort_columns parameter to combine_first

---
 pandas/core/frame.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5f1df1dc92372..c07567d5d4786 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8721,10 +8721,10 @@ def combine_first(
         Combine two DataFrame objects by filling null values in one DataFrame
         with non-null values from other DataFrame. The row and column indexes
         of the resulting DataFrame will be the union of the two. The resulting
-        dataframe contains the 'first' dataframe values and overrides the
-        second one values where both first.loc[index, col] and
-        second.loc[index, col] are not missing values, upon calling
-        first.combine_first(second).
+        DataFrame contains the 'first' DataFrame values and overrides the
+        second one values where both `first.loc[index, col]` and
+        `second.loc[index, col]` are not missing values, upon calling
+        `first.combine_first(second)`.
 
         Parameters
         ----------
@@ -8734,7 +8734,6 @@ def combine_first(
             Whether to sort the columns in the result DataFrame. If False, the
             order of the columns in `self` is preserved.
 
-
         Returns
         -------
         DataFrame
@@ -8752,27 +8751,26 @@ def combine_first(
         >>> df1 = pd.DataFrame({"A": [None, 0], "B": [None, 4]})
         >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]})
         >>> df1.combine_first(df2)
-             A    B
+            A    B
         0  1.0  3.0
         1  0.0  4.0
 
-
         Preserving the column order of `self` with `sort_columns=False`:
 
         >>> df1 = pd.DataFrame({"B": [None, 4], "A": [0, None]})
         >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]})
         >>> df1.combine_first(df2, sort_columns=False)
-             B    A
+            B    A
         0  3.0  0.0
         1  4.0  1.0
 
         Null values still persist if the location of that null value
-        does not exist in `other`
+        does not exist in `other`.
 
         >>> df1 = pd.DataFrame({"A": [None, 0], "B": [4, None]})
         >>> df2 = pd.DataFrame({"B": [3, 3], "C": [1, 1]}, index=[1, 2])
         >>> df1.combine_first(df2)
-             A    B    C
+            A    B    C
         0  NaN  4.0  NaN
         1  0.0  3.0  1.0
         2  NaN  3.0  1.0