diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst
index c786e77e1..52c33a8f4 100644
--- a/docs/user_guide/index.rst
+++ b/docs/user_guide/index.rst
@@ -28,6 +28,7 @@ Creation
 
    creation/index
    datetime/index
+   text/index
 
 
 Selection
diff --git a/docs/user_guide/text/TextFeatures.rst b/docs/user_guide/text/TextFeatures.rst
new file mode 100644
index 000000000..009a220f9
--- /dev/null
+++ b/docs/user_guide/text/TextFeatures.rst
@@ -0,0 +1,152 @@
+.. _text_features:
+
+.. currentmodule:: feature_engine.text
+
+TextFeatures
+============
+
+The :class:`TextFeatures()` extracts numerical features from text/string variables.
+This transformer is useful for extracting basic text statistics that can be used
+as features in machine learning models.
+
+Unlike scikit-learn's CountVectorizer or TfidfVectorizer which create sparse matrices,
+:class:`TextFeatures()` extracts metadata features that remain in DataFrame format
+and can be easily combined with other Feature-engine transformers in a pipeline.
+
+Available Features
+~~~~~~~~~~~~~~~~~~
+
+The transformer can extract the following features:
+
+- **char_count**: Number of characters in the text
+- **word_count**: Number of words (whitespace-separated tokens)
+- **sentence_count**: Number of sentences (based on .!? punctuation)
+- **avg_word_length**: Average length of words
+- **digit_count**: Number of digit characters
+- **uppercase_count**: Number of uppercase letters
+- **lowercase_count**: Number of lowercase letters
+- **special_char_count**: Number of special characters (non-alphanumeric)
+- **whitespace_count**: Number of whitespace characters
+- **whitespace_ratio**: Ratio of whitespace to total characters
+- **digit_ratio**: Ratio of digits to total characters
+- **uppercase_ratio**: Ratio of uppercase to total characters
+- **has_digits**: Binary indicator if text contains digits
+- **has_uppercase**: Binary indicator if text contains uppercase
+- **is_empty**: Binary indicator if text is empty
+- **starts_with_uppercase**: Binary indicator if text starts with uppercase
+- **ends_with_punctuation**: Binary indicator if text ends with .!?
+- **unique_word_count**: Number of unique words (case-insensitive)
+- **lexical_diversity**: Ratio of total words to unique words
+
+Example
+~~~~~~~
+
+Let's create a dataframe with text data and extract features:
+
+.. code:: python
+
+    import pandas as pd
+    from feature_engine.text import TextFeatures
+
+    # Create sample data
+    X = pd.DataFrame({
+        'review': [
+            'This product is AMAZING! Best purchase ever.',
+            'Not great. Would not recommend.',
+            'OK for the price. 3 out of 5 stars.',
+            'TERRIBLE!!! DO NOT BUY!',
+        ],
+        'title': [
+            'Great Product',
+            'Disappointed',
+            'Average',
+            'Awful',
+        ]
+    })
+
+Now let's extract specific text features:
+
+.. code:: python
+
+    # Set up the transformer with specific features
+    tf = TextFeatures(
+        variables=['review'],
+        features=['word_count', 'char_count', 'has_digits', 'uppercase_ratio']
+    )
+
+    # Fit and transform
+    tf.fit(X)
+    X_transformed = tf.transform(X)
+
+    print(X_transformed.columns.tolist())
+
+Output:
+
+.. code:: python
+
+    ['review', 'title', 'review_word_count', 'review_char_count',
+     'review_has_digits', 'review_uppercase_ratio']
+
+Extracting all features
+~~~~~~~~~~~~~~~~~~~~~~~
+
+By default, if no features are specified, all available features will be extracted:
+
+.. code:: python
+
+    # Extract all features from all text columns
+    tf = TextFeatures()
+    tf.fit(X)
+    X_transformed = tf.transform(X)
+
+    # This will create 19 new columns for each text variable
+    print(f"Original columns: {len(X.columns)}")
+    print(f"Transformed columns: {len(X_transformed.columns)}")
+
+Dropping original columns
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can drop the original text columns after extracting features:
+
+.. code:: python
+
+    tf = TextFeatures(
+        variables=['review'],
+        features=['word_count', 'char_count'],
+        drop_original=True
+    )
+
+    tf.fit(X)
+    X_transformed = tf.transform(X)
+
+    # 'review' column is now removed
+    print(X_transformed.columns.tolist())
+
+Using in a Pipeline
+~~~~~~~~~~~~~~~~~~~
+
+:class:`TextFeatures()` works seamlessly with scikit-learn pipelines:
+
+.. code:: python
+
+    from sklearn.pipeline import Pipeline
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.linear_model import LogisticRegression
+
+    # Create a pipeline
+    pipe = Pipeline([
+        ('text_features', TextFeatures(
+            variables=['review'],
+            features=['word_count', 'char_count', 'uppercase_ratio'],
+            drop_original=True
+        )),
+        ('scaler', StandardScaler()),
+        ('classifier', LogisticRegression())
+    ])
+
+API Reference
+-------------
+
+.. autoclass:: TextFeatures
+    :members:
+    :inherited-members:
diff --git a/docs/user_guide/text/index.rst b/docs/user_guide/text/index.rst
new file mode 100644
index 000000000..ea23d7362
--- /dev/null
+++ b/docs/user_guide/text/index.rst
@@ -0,0 +1,18 @@
+.. -*- mode: rst -*-
+
+Text Feature Extraction
+=======================
+
+Feature-engine's text module includes transformers to extract numerical features
+from text/string variables.
+
+Text feature extraction is useful for machine learning problems where you have
+text data but want to derive numerical statistics without creating sparse
+bag-of-words or TF-IDF representations.
+
+**Transformers**
+
+.. toctree::
+   :maxdepth: 1
+
+   TextFeatures
diff --git a/feature_engine/text/__init__.py b/feature_engine/text/__init__.py
new file mode 100644
index 000000000..14626b79c
--- /dev/null
+++ b/feature_engine/text/__init__.py
@@ -0,0 +1,9 @@
+"""
+The module text includes classes to extract features from text/string variables.
+"""
+
+from .text_features import TextFeatures
+
+__all__ = [
+    "TextFeatures",
+]
diff --git a/feature_engine/text/text_features.py b/feature_engine/text/text_features.py
new file mode 100644
index 000000000..b4cac844f
--- /dev/null
+++ b/feature_engine/text/text_features.py
@@ -0,0 +1,314 @@
+# Authors: Ankit Hemant Lade (contributor)
+# License: BSD 3 clause
+
+from typing import List, Optional, Union
+
+import pandas as pd
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.utils.validation import check_is_fitted
+
+from feature_engine._base_transformers.mixins import GetFeatureNamesOutMixin
+from feature_engine._check_init_parameters.check_init_input_params import (
+    _check_param_drop_original,
+)
+from feature_engine._check_init_parameters.check_variables import (
+    _check_variables_input_value,
+)
+from feature_engine.dataframe_checks import _check_X_matches_training_df, check_X
+from feature_engine.tags import _return_tags
+from feature_engine.variable_handling import find_categorical_variables
+
+# Available text features and their computation functions
+TEXT_FEATURES = {
+    "char_count": lambda x: x.str.len(),
+    "word_count": lambda x: x.str.split().str.len(),
+    "sentence_count": lambda x: x.str.count(r"[.!?]+"),
+    "avg_word_length": lambda x: x.str.len() / x.str.split().str.len().replace(0, 1),
+    "digit_count": lambda x: x.str.count(r"\d"),
+    "uppercase_count": lambda x: x.str.count(r"[A-Z]"),
+    "lowercase_count": lambda x: x.str.count(r"[a-z]"),
+    "special_char_count": lambda x: x.str.count(r"[^a-zA-Z0-9\s]"),
+    "whitespace_count": lambda x: x.str.count(r"\s"),
+    "whitespace_ratio": lambda x: x.str.count(r"\s") / x.str.len().replace(0, 1),
+    "digit_ratio": lambda x: x.str.count(r"\d") / x.str.len().replace(0, 1),
+    "uppercase_ratio": lambda x: x.str.count(r"[A-Z]") / x.str.len().replace(0, 1),
+    "has_digits": lambda x: x.str.contains(r"\d", regex=True).astype(int),
+    "has_uppercase": lambda x: x.str.contains(r"[A-Z]", regex=True).astype(int),
+    "is_empty": lambda x: (x.str.len() == 0).astype(int),
+    "starts_with_uppercase": lambda x: x.str.match(r"^[A-Z]").astype(int),
+    "ends_with_punctuation": lambda x: x.str.match(r".*[.!?]$").astype(int),
+    "unique_word_count": lambda x: x.str.lower().str.split().apply(set).str.len(),
+    "lexical_diversity": lambda x: (
+        x.str.split().str.len()
+        / x.str.lower().str.split().apply(set).str.len().replace(0, 1)
+    ),
+}
+
+
+class TextFeatures(TransformerMixin, BaseEstimator, GetFeatureNamesOutMixin):
+    """
+    TextFeatures() extracts numerical features from text/string variables. This
+    transformer is useful for extracting basic text statistics that can be used
+    as features in machine learning models.
+
+    The transformer can extract various text features including character counts,
+    word counts, sentence counts, and various ratios and indicators.
+
+    A list of variables can be passed as an argument. Alternatively, the transformer
+    will automatically select and transform all variables of type object (string).
+
+    More details in the :ref:`User Guide <text_features>`.
+
+    Parameters
+    ----------
+    variables: list, default=None
+        The list of text/string variables to extract features from. If None, the
+        transformer will automatically select all object (string) columns.
+
+    features: list, default=None
+        List of text features to extract. Available features are:
+
+        - 'char_count': Number of characters in the text
+        - 'word_count': Number of words (whitespace-separated tokens)
+        - 'sentence_count': Number of sentences (based on .!? punctuation)
+        - 'avg_word_length': Average length of words
+        - 'digit_count': Number of digit characters
+        - 'uppercase_count': Number of uppercase letters
+        - 'lowercase_count': Number of lowercase letters
+        - 'special_char_count': Number of special characters (non-alphanumeric)
+        - 'whitespace_count': Number of whitespace characters
+        - 'whitespace_ratio': Ratio of whitespace to total characters
+        - 'digit_ratio': Ratio of digits to total characters
+        - 'uppercase_ratio': Ratio of uppercase to total characters
+        - 'has_digits': Binary indicator if text contains digits
+        - 'has_uppercase': Binary indicator if text contains uppercase
+        - 'is_empty': Binary indicator if text is empty
+        - 'starts_with_uppercase': Binary indicator if text starts with uppercase
+        - 'ends_with_punctuation': Binary indicator if text ends with .!?
+        - 'unique_word_count': Number of unique words (case-insensitive)
+        - 'lexical_diversity': Ratio of total words to unique words
+
+        If None, extracts all features.
+
+    drop_original: bool, default=False
+        Whether to drop the original text columns after transformation.
+
+    Attributes
+    ----------
+    variables_:
+        The list of text variables that will be transformed.
+
+    features_:
+        The list of features that will be extracted.
+
+    feature_names_in_:
+        List with the names of features seen during fit.
+
+    n_features_in_:
+        The number of features in the train set used in fit.
+
+    Methods
+    -------
+    fit:
+        This transformer does not learn parameters. It stores the feature names
+        and validates input.
+
+    fit_transform:
+        Fit to data, then transform it.
+
+    transform:
+        Extract text features and add them to the dataframe.
+
+    get_feature_names_out:
+        Get output feature names for transformation.
+
+    See Also
+    --------
+    feature_engine.encoding.StringSimilarityEncoder :
+        Encodes categorical variables based on string similarity.
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.text import TextFeatures
+    >>> X = pd.DataFrame({
+    ...     'text': ['Hello World!', 'Python is GREAT.', 'ML rocks 123']
+    ... })
+    >>> tf = TextFeatures(features=['char_count', 'word_count', 'has_digits'])
+    >>> tf.fit(X)
+    >>> X = tf.transform(X)
+    >>> X
+                   text  text_char_count  text_word_count  text_has_digits
+    0      Hello World!               12                2                0
+    1  Python is GREAT.               16                3                0
+    2       ML rocks 123               12                3                1
+    """
+
+    def __init__(
+        self,
+        variables: Optional[Union[int, str, List[Union[str, int]]]] = None,
+        features: Optional[List[str]] = None,
+        drop_original: bool = False,
+    ) -> None:
+
+        # Validate features
+        if features is not None:
+            if not isinstance(features, list) or not all(
+                isinstance(f, str) for f in features
+            ):
+                raise ValueError(
+                    "features must be None or a list of strings. "
+                    f"Got {type(features).__name__} instead."
+                )
+            invalid_features = set(features) - set(TEXT_FEATURES.keys())
+            if invalid_features:
+                raise ValueError(
+                    f"Invalid features: {invalid_features}. "
+                    f"Available features are: {list(TEXT_FEATURES.keys())}"
+                )
+
+        _check_param_drop_original(drop_original)
+
+        self.variables = _check_variables_input_value(variables)
+        self.features = features
+        self.drop_original = drop_original
+
+    def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
+        """
+        Stores feature names and validates that the specified variables are
+        present and are of string/object type.
+
+        Parameters
+        ----------
+        X: pandas dataframe of shape = [n_samples, n_features]
+            The training input samples.
+
+        y: pandas Series, or np.array. Defaults to None.
+            It is not needed in this transformer. You can pass y or None.
+
+        Returns
+        -------
+        self: TextFeatures
+            The fitted transformer.
+        """
+
+        # check input dataframe
+        X = check_X(X)
+
+        # Find or validate text variables
+        if self.variables is None:
+            # Select object/string columns using existing utility
+            self.variables_ = find_categorical_variables(X)
+        else:
+            # Validate user-specified variables exist
+            missing = set(self.variables) - set(X.columns)
+            if missing:
+                raise ValueError(
+                    f"Variables {missing} are not present in the dataframe."
+                )
+            if isinstance(self.variables, (list, tuple)):
+                self.variables_ = list(self.variables)
+            else:
+                self.variables_ = [self.variables]
+
+        # Set features to extract
+        if self.features is None:
+            self.features_ = list(TEXT_FEATURES.keys())
+        else:
+            self.features_ = self.features
+
+        # save input features
+        self.feature_names_in_ = X.columns.tolist()
+
+        # save train set shape
+        self.n_features_in_ = X.shape[1]
+
+        return self
+
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
+        """
+        Extract text features and add them to the dataframe.
+
+        Parameters
+        ----------
+        X: pandas dataframe of shape = [n_samples, n_features]
+            The data to transform.
+
+        Returns
+        -------
+        X_new: Pandas dataframe
+            The dataframe with the original columns plus the new text features.
+        """
+
+        # Check method fit has been called
+        check_is_fitted(self)
+
+        # check that input is a dataframe
+        X = check_X(X)
+
+        # Check if input data contains same number of columns as dataframe used to fit.
+        _check_X_matches_training_df(X, self.n_features_in_)
+
+        # reorder variables to match train set
+        X = X[self.feature_names_in_]
+
+        # Extract features for each text variable
+        for var in self.variables_:
+            # Fill NaN with empty string for feature extraction
+            text_col = X[var].fillna("")
+
+            for feature_name in self.features_:
+                new_col_name = f"{var}_{feature_name}"
+                feature_func = TEXT_FEATURES[feature_name]
+                X[new_col_name] = feature_func(text_col)
+
+                # Fill any NaN values resulting from computation with 0
+                X[new_col_name] = X[new_col_name].fillna(0)
+
+        if self.drop_original:
+            X = X.drop(columns=self.variables_)
+
+        return X
+
+    def get_feature_names_out(self, input_features=None) -> List[str]:
+        """
+        Get output feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Input features. If None, uses ``feature_names_in_``.
+
+        Returns
+        -------
+        feature_names_out : list of str
+            Output feature names.
+        """
+        check_is_fitted(self)
+
+        # Start with original features
+        if self.drop_original:
+            feature_names = [
+                f for f in self.feature_names_in_ if f not in self.variables_
+            ]
+        else:
+            feature_names = list(self.feature_names_in_)
+
+        # Add new text feature names
+        for var in self.variables_:
+            for feature_name in self.features_:
+                feature_names.append(f"{var}_{feature_name}")
+
+        return feature_names
+
+    def _more_tags(self):
+        tags_dict = _return_tags()
+        tags_dict["allow_nan"] = True
+        tags_dict["variables"] = "categorical"
+        return tags_dict
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        return tags
diff --git a/tests/test_text/__init__.py b/tests/test_text/__init__.py
new file mode 100644
index 000000000..64a0339b6
--- /dev/null
+++ b/tests/test_text/__init__.py
@@ -0,0 +1 @@
+# Tests for text module
diff --git a/tests/test_text/test_text_features.py b/tests/test_text/test_text_features.py
new file mode 100644
index 000000000..5da1054c8
--- /dev/null
+++ b/tests/test_text/test_text_features.py
@@ -0,0 +1,237 @@
+import pandas as pd
+import pytest
+
+from feature_engine.text import TextFeatures
+
+
+@pytest.fixture
+def df_text():
+    """Fixture providing sample text data."""
+    return pd.DataFrame({"text": ["Hello World!", "Python 123", "AI"]})
+
+
+@pytest.fixture
+def df_multi_text():
+    """Fixture providing DataFrame with multiple text columns."""
+    return pd.DataFrame({
+        "text1": ["Hello", "World"],
+        "text2": ["Foo", "Bar"],
+        "numeric": [1, 2],
+    })
+
+
+def test_default_all_features(df_text):
+    """Test extracting all features with default parameters.
+
+    Test data: ["Hello World!", "Python 123", "AI"]
+    Verifies all 19 text features produce expected values.
+    """
+    transformer = TextFeatures()
+    X_tr = transformer.fit_transform(df_text.copy())
+
+    # Verify all 19 features have expected values
+    # Basic counts
+    assert X_tr["text_char_count"].tolist() == [12, 10, 2]
+    assert X_tr["text_word_count"].tolist() == [2, 2, 1]
+    assert X_tr["text_sentence_count"].tolist() == [1, 0, 0]
+    assert X_tr["text_avg_word_length"].tolist() == [6.0, 5.0, 2.0]
+
+    # Character type counts
+    assert X_tr["text_digit_count"].tolist() == [0, 3, 0]
+    assert X_tr["text_uppercase_count"].tolist() == [2, 1, 2]
+    assert X_tr["text_lowercase_count"].tolist() == [8, 5, 0]
+    assert X_tr["text_special_char_count"].tolist() == [1, 0, 0]
+    assert X_tr["text_whitespace_count"].tolist() == [1, 1, 0]
+
+    # Ratios (using pytest.approx for floating point comparison)
+    import pytest
+    assert X_tr["text_whitespace_ratio"].tolist() == pytest.approx(
+        [1 / 12, 1 / 10, 0.0], rel=1e-5
+    )
+    assert X_tr["text_digit_ratio"].tolist() == pytest.approx(
+        [0.0, 3 / 10, 0.0], rel=1e-5
+    )
+    assert X_tr["text_uppercase_ratio"].tolist() == pytest.approx(
+        [2 / 12, 1 / 10, 1.0], rel=1e-5
+    )
+
+    # Binary indicators
+    assert X_tr["text_has_digits"].tolist() == [0, 1, 0]
+    assert X_tr["text_has_uppercase"].tolist() == [1, 1, 1]
+    assert X_tr["text_is_empty"].tolist() == [0, 0, 0]
+    assert X_tr["text_starts_with_uppercase"].tolist() == [1, 1, 1]
+    assert X_tr["text_ends_with_punctuation"].tolist() == [1, 0, 0]
+
+    # Unique word features
+    assert X_tr["text_unique_word_count"].tolist() == [2, 2, 1]
+    assert X_tr["text_lexical_diversity"].tolist() == pytest.approx(
+        [1.0, 1.0, 1.0], rel=1e-5
+    )
+
+
+def test_specific_features():
+    """Test extracting specific features only."""
+    X = pd.DataFrame({"text": ["Hello", "World"]})
+    transformer = TextFeatures(features=["char_count", "word_count"])
+    X_tr = transformer.fit_transform(X)
+
+    # Verify only specified features are present
+    assert "text_char_count" in X_tr.columns
+    assert "text_word_count" in X_tr.columns
+    assert "text_digit_count" not in X_tr.columns
+    assert "text_uppercase_count" not in X_tr.columns
+
+    # Verify expected values
+    assert X_tr["text_char_count"].tolist() == [5, 5]
+    assert X_tr["text_word_count"].tolist() == [1, 1]
+
+
+def test_specific_variables(df_multi_text):
+    """Test extracting features from specific variables only."""
+    transformer = TextFeatures(variables=["text1"], features=["char_count"])
+    X_tr = transformer.fit_transform(df_multi_text.copy())
+
+    # Verify only specified variable has features extracted
+    assert "text1_char_count" in X_tr.columns
+    assert "text2_char_count" not in X_tr.columns
+
+    # Verify expected values
+    assert X_tr["text1_char_count"].tolist() == [5, 5]
+
+
+def test_drop_original():
+    """Test drop_original parameter removes text columns."""
+    X = pd.DataFrame({"text": ["Hello", "World"], "other": [1, 2]})
+    transformer = TextFeatures(features=["char_count"], drop_original=True)
+    X_tr = transformer.fit_transform(X)
+
+    assert "text" not in X_tr.columns
+    assert "text_char_count" in X_tr.columns
+    assert "other" in X_tr.columns
+
+
+def test_empty_string_handling():
+    """Test handling of empty strings."""
+    X = pd.DataFrame({"text": ["", "Hello", ""]})
+    transformer = TextFeatures(features=["char_count", "word_count", "is_empty"])
+    X_tr = transformer.fit_transform(X)
+
+    assert X_tr["text_char_count"].tolist() == [0, 5, 0]
+    assert X_tr["text_is_empty"].tolist() == [1, 0, 1]
+
+
+def test_nan_handling():
+    """Test handling of NaN values."""
+    X = pd.DataFrame({"text": ["Hello", None, "World"]})
+    transformer = TextFeatures(features=["char_count"])
+    X_tr = transformer.fit_transform(X)
+
+    assert X_tr["text_char_count"].tolist() == [5, 0, 5]
+
+
+def test_uppercase_features():
+    """Test uppercase-related features."""
+    X = pd.DataFrame({"text": ["HELLO", "hello", "HeLLo"]})
+    transformer = TextFeatures(
+        features=["uppercase_count", "has_uppercase", "starts_with_uppercase"]
+    )
+    X_tr = transformer.fit_transform(X)
+
+    assert X_tr["text_uppercase_count"].tolist() == [5, 0, 3]
+    assert X_tr["text_has_uppercase"].tolist() == [1, 0, 1]
+    assert X_tr["text_starts_with_uppercase"].tolist() == [1, 0, 1]
+
+
+def test_sentence_count():
+    """Test sentence counting."""
+    X = pd.DataFrame({"text": ["Hello. World!", "One sentence", "A? B! C."]})
+    transformer = TextFeatures(features=["sentence_count"])
+    X_tr = transformer.fit_transform(X)
+
+    assert X_tr["text_sentence_count"].tolist() == [2, 0, 3]
+
+
+def test_unique_word_features():
+    """Test unique word features."""
+    X = pd.DataFrame({"text": ["the the the", "a b c", "x"]})
+    transformer = TextFeatures(features=["unique_word_count", "lexical_diversity"])
+    X_tr = transformer.fit_transform(X)
+
+    assert X_tr["text_unique_word_count"].tolist() == [1, 3, 1]
+    # lexical_diversity = word_count / unique_word_count
+    assert X_tr["text_lexical_diversity"].tolist() == [3.0, 1.0, 1.0]
+
+
+@pytest.mark.parametrize("invalid_feature", ["invalid_feature", "not_a_feature"])
+def test_invalid_feature_raises_error(invalid_feature):
+    """Test that invalid feature names raise ValueError."""
+    with pytest.raises(ValueError, match="Invalid features"):
+        TextFeatures(features=[invalid_feature])
+
+
+def test_non_string_feature_raises_error():
+    """Test that non-string feature raises ValueError."""
+    with pytest.raises(ValueError, match="features must be None or a list of strings"):
+        TextFeatures(features=[123])
+
+
+@pytest.mark.parametrize("invalid_variables", [0.5, {"a": 1}])
+def test_invalid_variables_raises_error(invalid_variables):
+    """Test that invalid variables parameter raises ValueError."""
+    with pytest.raises(ValueError, match="variables"):
+        TextFeatures(variables=invalid_variables)
+
+
+def test_missing_variable_raises_error():
+    """Test that missing variable raises ValueError on fit."""
+    X = pd.DataFrame({"text": ["Hello"]})
+    transformer = TextFeatures(variables=["nonexistent"])
+    with pytest.raises(ValueError, match="not present in the dataframe"):
+        transformer.fit(X)
+
+
+def test_no_text_columns_raises_error():
+    """Test that no text columns raises error when variables=None."""
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    transformer = TextFeatures()
+    with pytest.raises(TypeError, match="No categorical variables found"):
+        transformer.fit(X)
+
+
+def test_fit_stores_attributes():
+    """Test that fit stores expected attributes with correct values."""
+    X = pd.DataFrame({"text": ["Hello"]})
+    transformer = TextFeatures()
+    transformer.fit(X)
+
+    assert hasattr(transformer, "variables_")
+    assert hasattr(transformer, "features_")
+    assert hasattr(transformer, "feature_names_in_")
+    assert hasattr(transformer, "n_features_in_")
+    assert transformer.variables_ == ["text"]
+    assert transformer.n_features_in_ == 1
+
+
+def test_get_feature_names_out():
+    """Test get_feature_names_out returns correct feature names."""
+    X = pd.DataFrame({"text": ["Hello"], "other": [1]})
+    transformer = TextFeatures(features=["char_count", "word_count"])
+    transformer.fit(X)
+
+    feature_names = transformer.get_feature_names_out()
+    assert "text" in feature_names
+    assert "other" in feature_names
+    assert "text_char_count" in feature_names
+    assert "text_word_count" in feature_names
+
+
+def test_get_feature_names_out_with_drop():
+    """Test get_feature_names_out with drop_original=True."""
+    X = pd.DataFrame({"text": ["Hello"], "other": [1]})
+    transformer = TextFeatures(features=["char_count"], drop_original=True)
+    transformer.fit(X)
+
+    feature_names = transformer.get_feature_names_out()
+    assert "text" not in feature_names
+    assert "other" in feature_names
+    assert "text_char_count" in feature_names