diff --git a/bin/compare_calc.py b/bin/compare_calc.py
index 81a83ef..4f611b7 100755
--- a/bin/compare_calc.py
+++ b/bin/compare_calc.py
@@ -52,8 +52,13 @@ def morisita_horn_index(counts1, counts2):
     # -------------------------
     # Load samplesheet
     # -------------------------
-    sample_df = pd.read_csv(args.sample_utf8)
+    sample_df = pd.read_csv(args.sample_utf8, index_col=None)
 
+    # Basic hygiene: drop rows missing sample or file
+    sample_df = sample_df.dropna(subset=["sample", "file"])
+    print(sample_df.head())
+    print(sample_df.columns.tolist())
+    
     samples = sample_df["sample"].tolist()
     files = sample_df["file"].tolist()
     n = len(samples)
@@ -64,44 +69,46 @@ def morisita_horn_index(counts1, counts2):
     # Preload data structures
     # -------------------------
     junction_sets = {}
-    count_vectors = {}
+    count_series = {}
 
     for sample, file in zip(samples, files):
         df = pd.read_csv(file, sep="\t", usecols=["junction_aa", "duplicate_count"])
         df = df.dropna(subset=["junction_aa"])
 
+        # Ensure counts are numeric
+        df["duplicate_count"] = pd.to_numeric(df["duplicate_count"], errors="coerce").fillna(0)
+
         # Set for presence/absence metrics
         junction_sets[sample] = set(df["junction_aa"])
 
-        # Counts for Morisita–Horn
-        count_vectors[sample] = (
+        # Counts for Morisita–Horn as a pandas Series (index = junction_aa)
+        counts = (
             df.groupby("junction_aa")["duplicate_count"]
             .sum()
         )
-
+        # Ensure we have a Series with a named index
+        if not isinstance(counts, pd.Series):
+            counts = pd.Series(counts)
+        count_series[sample] = counts
 
     # -------------------------
-    # Align count vectors across union space
+    # Build union index and align counts
     # -------------------------
-    all_junctions = sorted(
-        set().union(*junction_sets.values())
-    )
+    union_set = set().union(*junction_sets.values()) if junction_sets else set()
+    all_junctions = pd.Index(sorted(union_set))
 
+    aligned_vectors = {}
     for sample in samples:
-        count_vectors[sample] = (
-            count_vectors[sample]
-            .reindex(all_junctions, fill_value=0)
-            .to_numpy()
-        )
-
+        aligned = count_series[sample].reindex(all_junctions, fill_value=0)
+        # Store as numpy for MH computation
+        aligned_vectors[sample] = aligned.to_numpy(dtype=float)
 
     # -------------------------
     # Initialize matrices
     # -------------------------
-    jaccard_mat = np.zeros((n, n))
-    sorensen_mat = np.zeros((n, n))
-    morisita_mat = np.zeros((n, n))
-
+    jaccard_mat = np.zeros((n, n), dtype=float)
+    sorensen_mat = np.zeros((n, n), dtype=float)
+    morisita_mat = np.zeros((n, n), dtype=float)
 
     # -------------------------
     # Compute upper triangle only
@@ -111,7 +118,7 @@ def morisita_horn_index(counts1, counts2):
     for i in range(n):
         s1 = samples[i]
         set1 = junction_sets[s1]
-        counts1 = count_vectors[s1]
+        counts1 = aligned_vectors[s1]
 
         # Diagonal
         jaccard_mat[i, i] = 1.0
@@ -120,16 +127,14 @@ def morisita_horn_index(counts1, counts2):
 
         for j in range(i + 1, n):
             s2 = samples[j]
-
             j_val = jaccard_index(set1, junction_sets[s2])
             s_val = sorensen_index(set1, junction_sets[s2])
-            m_val = morisita_horn_index(counts1, count_vectors[s2])
+            m_val = morisita_horn_index(counts1, aligned_vectors[s2])
 
             jaccard_mat[i, j] = jaccard_mat[j, i] = j_val
             sorensen_mat[i, j] = sorensen_mat[j, i] = s_val
             morisita_mat[i, j] = morisita_mat[j, i] = m_val
 
-
     # -------------------------
     # Write outputs
     # -------------------------
diff --git a/bin/create_pheno_samplesheet.py b/bin/create_pheno_samplesheet.py
new file mode 100755
index 0000000..64832ac
--- /dev/null
+++ b/bin/create_pheno_samplesheet.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+
+import sys
+import json
+from pathlib import Path
+
+# The script now takes two simple arguments:
+# 1. The input JSON file
+# 2. The output CSV filename
+meta_json_file = sys.argv[1]
+output_csv = sys.argv[2]
+
+# Load the list from the JSON file
+with open(meta_json_file, 'r') as f_in:
+    # This will be a list, e.g.,
+    # [ [ {'sample': 'Patient01_Base_CD4', ...}, 'file1.tsv' ],
+    #   [ {'sample': 'Patient01_Base_CD8', ...}, 'file2.tsv' ] ]
+    all_new_meta = json.load(f_in)
+
+with open(output_csv, 'w') as f_out:
+    
+    # Write the new header
+    f_out.write('sample,subject_id,timepoint,origin,phenotype,file\n')
+
+    # Iterate over each item in the list
+    for item in all_new_meta:
+        meta = item                   # 'item' is the meta map
+        file_path = str(meta['file']) # Get the file path from the map
+
+        f_out.write(
+            f"{meta['sample']},"
+            f"{meta['subject_id']},"
+            f"{meta['timepoint']},"
+            f"{meta['origin']},"
+            f"{meta['phenotype']},"
+            f"{file_path}\n"
+        )
diff --git a/bin/pseudobulk.py b/bin/pseudobulk.py
index e0dc7aa..15cb30e 100755
--- a/bin/pseudobulk.py
+++ b/bin/pseudobulk.py
@@ -141,6 +141,120 @@ def pseudobulk(input_df, basename, airr_schema):
     bulk_df.to_csv(output_path, sep='\t', index=False)
     logger.info(f"Pseudobulked AIRR file written to: {output_path}")
 
+
+def add_phenotype_info(input_df, basename, sobject_gex):
+    import numpy as np
+    import pandas as pd
+    from rpy2.robjects import r
+    from rpy2.robjects import pandas2ri
+    import rpy2.robjects as ro
+    from rpy2.robjects.conversion import localconverter
+
+    # Read Seurat Object
+    readRDS = r['readRDS']
+    seurat_obj = readRDS(sobject_gex)
+    # Extract and Convert Metadata into a pandas DataFrame
+    with localconverter(ro.default_converter + pandas2ri.converter):
+        cell_metadata_pd = ro.conversion.rpy2py(seurat_obj.slots['meta.data'])
+
+    cell_metadata_pd["cell_id"] = cell_metadata_pd["library"] + "_" + cell_metadata_pd["cell_id"]
+    cell_metadata_pd = cell_metadata_pd.loc[:,["cell_id","phenotype"]]
+    cell_metadata_pd['phenotype'] = cell_metadata_pd['phenotype'].str.replace(' ', '_')
+
+    # ------ Fix TCR Barcodes ------ #
+    input_df["cell_id"] = basename + "_" + input_df["cell_id"]
+
+    # ------ Merge ------ #
+    merged_df = pd.merge(input_df, cell_metadata_pd, on="cell_id", how="left")
+    return merged_df
+
+def pseudobulk_phenotype(input_df, basename, airr_schema):
+    # Load data, filter out TRA calls
+    df = input_df
+    df_trb = df[df["v_call"].str.startswith("TRB") & df["j_call"].str.startswith("TRB")]
+
+    # Define required aggregations
+    agg_columns = {
+        "cell_id": pd.Series.nunique,
+        "junction": lambda x: assert_single_unique(x, "junction"),
+        "junction_aa": lambda x: assert_single_unique(x, "junction_aa"),
+        "v_call": lambda x: assert_single_unique(x, "v_call"),
+        "j_call": lambda x: assert_single_unique(x, "j_call"),
+        "consensus_count": "sum",
+        "duplicate_count": "sum"
+    }
+
+    # Identify columns not already aggregated
+    excluded_cols = set(agg_columns) | {"sequence", "phenotype"}
+    remaining_cols = [col for col in df_trb.columns if col not in excluded_cols]
+
+    # Group by sequence
+    grouped = df_trb.groupby(["sequence", "phenotype"])
+
+    # Determine which of the remaining columns have consistent values
+    consensus_cols = []
+    for col in remaining_cols:
+        if (grouped[col].nunique(dropna=True) <= 1).all():
+            consensus_cols.append(col)
+
+    # Add those columns to the aggregation with the unique value
+    for col in consensus_cols:
+        agg_columns[col] = lambda x, col=col: assert_single_unique(x, col)
+
+    # Aggregate
+    bulk_df = grouped.agg(agg_columns).reset_index()
+
+    # Rename and reorder columns
+    bulk_df.rename(columns={"cell_id": "cell_count"}, inplace=True)
+    bulk_df["sequence_id"] = [f"{basename}|sequence{i+1}|{bulk_df['phenotype'].iloc[i]}" for i in range(len(bulk_df))]
+    total_duplicate = bulk_df['duplicate_count'].sum()
+    bulk_df['duplicate_frequency_percent'] = bulk_df['duplicate_count'] / total_duplicate * 100
+
+    # Load schema
+    schema = airr_schema
+
+    required = schema["Rearrangement"]["required"]
+    properties = list(schema["Rearrangement"]["properties"].keys())
+
+    # Add missing required fields as blank
+    for col in required:
+        if col not in bulk_df.columns:
+            bulk_df[col] = pd.NA
+
+    # Apply to relevant columns if they exist
+    for col in ['v_call', 'd_call', 'j_call', 'c_call']:
+        if col in df.columns:
+            bulk_df[col] = bulk_df[col].apply(airr_compliant_gene)
+
+    # Identify all boolean columns from schema
+    bool_cols = [
+        name for name, spec in schema["Rearrangement"]["properties"].items()
+        if spec.get("type") == "boolean"
+    ]
+
+    bool_cols = bool_cols + ['is_cell']
+    for col in bool_cols:
+        if col in bulk_df.columns:
+            bulk_df[col] = bulk_df[col].apply(normalize_bool)
+
+    # Order columns
+    ordered_cols = [col for col in properties if col in bulk_df.columns]
+    extra_cols = [col for col in bulk_df.columns if col not in ordered_cols]
+    bulk_df = bulk_df[ordered_cols + extra_cols]
+
+    # Iterate through each unique phenotype and save to a separate file
+    for phenotype in bulk_df['phenotype'].unique():
+        # Filter the dataframe for the current phenotype
+        phenotype_df = bulk_df[bulk_df['phenotype'] == phenotype]
+
+        # Define the output path based on basename and phenotype
+        output_path = f"{basename}_{phenotype}_pseudobulk_phenotype.tsv"
+
+        # Save the filtered dataframe to a TSV file
+        phenotype_df.to_csv(output_path, sep='\t', index=False)
+        logger.info(f"Pseudobulked AIRR file for phenotype '{phenotype}' written to: {output_path}")
+
+
 if __name__ == "__main__":
     # Parse input arguments
     parser = argparse.ArgumentParser(description="Take positional args")
@@ -163,6 +277,18 @@ def pseudobulk(input_df, basename, airr_schema):
         help="Path to airr json schema for validating adaptive conversion.",
     )
 
+    parser.add_argument(
+        "--phenotype",
+        action="store_true",
+        help="A boolean flag to indicate wether the pseudobulk approach will consider phenotype.",
+    )
+
+    parser.add_argument(
+        "--sobject_gex",
+        type=str,
+        help="Path to Seurat Object, 'phenotype' should be a column in meta.data slot.",
+    )
+
     add_logging_args(parser)
     args = parser.parse_args()
 
@@ -181,5 +307,10 @@ def pseudobulk(input_df, basename, airr_schema):
     with open(args.airr_schema) as f:
         airr_schema = json.load(f)
     basename = args.basename
-    
-    pseudobulk(input_df, basename, airr_schema)
\ No newline at end of file
+
+    # Pseudobulk by phenotype (if available)
+    if args.phenotype:
+        input_df = add_phenotype_info(input_df, basename, args.sobject_gex)  # Add single-cell phenotype info from Seurat Object.
+        pseudobulk_phenotype(input_df, basename, airr_schema)
+    else:
+        pseudobulk(input_df, basename, airr_schema)
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 3e891f7..03d0b3d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -35,6 +35,22 @@ process {
         ]
     }
 
+    withName: /SAMPLE_.*_PHENO/ {
+        publishDir = [
+            path: { "${params.outdir}/sample_phenotype" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: /COMPARE_.*_PHENO/ {
+        publishDir = [
+            path: { "${params.outdir}/compare_phenotype" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: TCRDIST3_MATRIX {
         label = params.matrix_sparsity == 'sparse' ? 'process_medium' : ['process_high', 'process_high_memory']
     }
diff --git a/modules/local/airr_convert/pseudobulk_cellranger.nf b/modules/local/airr_convert/pseudobulk_cellranger.nf
index 653b2a9..ec1916c 100644
--- a/modules/local/airr_convert/pseudobulk_cellranger.nf
+++ b/modules/local/airr_convert/pseudobulk_cellranger.nf
@@ -1,6 +1,7 @@
 process PSEUDOBULK_CELLRANGER {
     tag "${sample_meta.sample}"
     label 'process_low'
+    container "ghcr.io/karchinlab/tcrtoolkit:main"
 
     input:
     tuple val(sample_meta), path(count_table)
diff --git a/modules/local/airr_convert/pseudobulk_phenotype_cellranger.nf b/modules/local/airr_convert/pseudobulk_phenotype_cellranger.nf
new file mode 100644
index 0000000..3866e9f
--- /dev/null
+++ b/modules/local/airr_convert/pseudobulk_phenotype_cellranger.nf
@@ -0,0 +1,25 @@
+process PSEUDOBULK_CELLRANGER_PHENOTYPE {
+    tag "${sample_meta.sample}"
+    label 'process_low'
+    container "ghcr.io/karchinlab/tcrtoolkit:main"
+
+    input:
+    tuple val(sample_meta), path(count_table)
+    path airr_schema
+    path sobject_gex
+
+    output:
+    tuple val(sample_meta), path("${sample_meta.sample}_*_pseudobulk_phenotype.tsv"), emit: "cellranger_pseudobulk_phenotype"
+
+    script:
+    // Dynamically add phenotype arguments only if sobject_gex is provided (i.e., not null)
+    def phenotype_args = sobject_gex ? "--phenotype --sobject_gex ${sobject_gex}" : ""
+
+    """
+    pseudobulk.py \\
+        ${count_table} \\
+        ${sample_meta.sample} \\
+        ${airr_schema} \\
+        ${phenotype_args}
+    """
+}
\ No newline at end of file
diff --git a/modules/local/samplesheet/generate_pheno_samplesheet.nf b/modules/local/samplesheet/generate_pheno_samplesheet.nf
new file mode 100644
index 0000000..f8b4587
--- /dev/null
+++ b/modules/local/samplesheet/generate_pheno_samplesheet.nf
@@ -0,0 +1,20 @@
+process GENERATE_PHENO_SAMPLESHEET {
+    tag "Generating phenotype samplesheet"
+    label 'process_single'
+    // container "ghcr.io/karchinlab/tcrtoolkit:main"
+
+    input:
+    path original_samplesheet
+    path meta_json 
+    path py_script  // NOTE: Remove when in container
+
+    output:
+    path "phenotype_samplesheet.csv", emit: samplesheet
+
+    script:
+    def out_file = "phenotype_samplesheet.csv" // Define the output file variable
+
+    """ 
+    ${py_script} ${meta_json} ${out_file}
+    """
+}
\ No newline at end of file
diff --git a/subworkflows/local/airr_convert.nf b/subworkflows/local/airr_convert.nf
index c625557..eb5395f 100644
--- a/subworkflows/local/airr_convert.nf
+++ b/subworkflows/local/airr_convert.nf
@@ -7,6 +7,7 @@
 
 include { CONVERT_ADAPTIVE } from '../../modules/local/airr_convert/convert_adaptive'
 include { PSEUDOBULK_CELLRANGER } from '../../modules/local/airr_convert/pseudobulk_cellranger'
+include { PSEUDOBULK_CELLRANGER_PHENOTYPE } from '../../modules/local/airr_convert/pseudobulk_phenotype_cellranger'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -18,8 +19,11 @@ workflow AIRR_CONVERT {
     take:
     sample_map
     input_format
+    sobject_gex
 
     main:    
+    ch_pseudobulk_phenotype = Channel.empty() // Initialize empty channel for phenotype files
+
     if (input_format == 'adaptive') {
         CONVERT_ADAPTIVE(
             sample_map,
@@ -35,8 +39,20 @@ workflow AIRR_CONVERT {
         )
             .cellranger_pseudobulk
             .set { sample_map_converted }
+        
+        // Pseudobulk by phenotype
+        if(sobject_gex) {
+            PSEUDOBULK_CELLRANGER_PHENOTYPE(
+                sample_map,
+                params.airr_schema,
+                file(sobject_gex) // Can remove file()
+            )
+            // Capture phenotype outputs
+            ch_pseudobulk_phenotype = PSEUDOBULK_CELLRANGER_PHENOTYPE.out.cellranger_pseudobulk_phenotype
+        }
     }
 
     emit:
     sample_map_converted
+    pseudobulk_phenotype_files = ch_pseudobulk_phenotype
 }
\ No newline at end of file
diff --git a/subworkflows/local/compare.nf b/subworkflows/local/compare.nf
index 2924a72..c7831a6 100644
--- a/subworkflows/local/compare.nf
+++ b/subworkflows/local/compare.nf
@@ -27,9 +27,25 @@ workflow COMPARE {
     all_sample_files
 
     main:
+
     COMPARE_CALC( samplesheet_resolved,
                     all_sample_files )
 
+    COMPARE_CALC.out.jaccard_mat
+        .collectFile(name: 'jaccard_mat.csv', sort: true, 
+                     storeDir: "${params.outdir}/compare")
+        .set { jaccard_mat }
+
+    COMPARE_CALC.out.sorensen_mat
+        .collectFile(name: 'sorensen_mat.csv', sort: true, 
+                     storeDir: "${params.outdir}/compare")
+        .set { sorensen_mat }
+
+    COMPARE_CALC.out.morisita_mat
+        .collectFile(name: 'morisita_mat.csv', sort: true, 
+                     storeDir: "${params.outdir}/compare")
+        .set { morisita_mat }
+
     COMPARE_PLOT( samplesheet_resolved,
                   COMPARE_CALC.out.jaccard_mat,
                   COMPARE_CALC.out.sorensen_mat,
@@ -65,7 +81,4 @@ workflow COMPARE {
         TCRSHARING_CALC.out.shared_cdr3
     )
 
-    // emit:
-    // compare_stats_html
-    // versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
 }
\ No newline at end of file
diff --git a/subworkflows/local/compare_pheno.nf b/subworkflows/local/compare_pheno.nf
new file mode 100644
index 0000000..6294ac8
--- /dev/null
+++ b/subworkflows/local/compare_pheno.nf
@@ -0,0 +1,46 @@
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT LOCAL MODULES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { COMPARE_CALC as COMPARE_CALC_PHENO } from '../../modules/local/compare/compare_calc'
+// include { COMPARE_PLOT  } from '../../modules/local/compare/compare_plot'
+include { COMPARE_CONCATENATE as COMPARE_CONCATENATE_PHENO } from '../../modules/local/compare/compare_concatenate'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN MAIN SUBWORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow COMPARE_PHENO {
+
+    // println("Welcome to the BULK TCRSEQ pipeline! -- COMPARE ")
+
+    take:
+    samplesheet_resolved
+    all_sample_files
+
+    main:
+
+    COMPARE_CALC_PHENO( samplesheet_resolved,
+                    all_sample_files )
+
+    COMPARE_CONCATENATE_PHENO( samplesheet_resolved,
+        all_sample_files )
+
+
+    /////// =================== COMPARE NOTEBOOK ===================  ///////
+
+    // COMPARE_PLOT( samplesheet_resolved,
+    //               COMPARE_CALC_PHENO.out.jaccard_mat,
+    //               COMPARE_CALC_PHENO.out.sorensen_mat,
+    //               COMPARE_CALC_PHENO.out.morisita_mat,
+    //               file(params.compare_stats_template),
+    //               params.project_name,
+    //               all_sample_files
+    //               )
+
+}
\ No newline at end of file
diff --git a/subworkflows/local/map_phenotypes.nf b/subworkflows/local/map_phenotypes.nf
new file mode 100644
index 0000000..e0d2966
--- /dev/null
+++ b/subworkflows/local/map_phenotypes.nf
@@ -0,0 +1,68 @@
+//
+// Generate phenotype-specific samplesheet and transform phenotype files
+//
+
+include { GENERATE_PHENO_SAMPLESHEET } from '../../modules/local/samplesheet/generate_pheno_samplesheet.nf'
+import groovy.json.JsonOutput
+
+workflow MAP_PHENOTYPES {
+
+    take:
+    ch_phenotype_files_raw 
+    original_samplesheet   
+
+    main:
+
+    // --- 1. Define transformation logic ---
+    ch_phenotype_files_transformed = ch_phenotype_files_raw
+    .transpose()
+    .map { meta, file ->
+        def original_sample_id = meta.sample
+        def phenotype = file.name
+                            .replaceFirst("^${original_sample_id}_", "")
+                            .replaceFirst("_pseudobulk_phenotype.tsv\$", "")
+
+        def new_sample_id = original_sample_id + '_' + phenotype
+
+        def new_meta = meta.clone()
+        new_meta.sample = new_sample_id
+        new_meta.phenotype = phenotype
+        new_meta.original_sample = original_sample_id
+
+        new_meta.file = file.toString() 
+        return [ new_meta, file ]               
+
+    }
+
+    // --- 2. Prepare inputs for samplesheet process ---
+    ch_meta_json = ch_phenotype_files_transformed
+        .map { meta, file -> meta }
+        .collect() // Collect channel into a list
+        .map { collected_list ->
+            def json_string = JsonOutput.toJson(collected_list)
+            def json_file = file("phenotype_meta.json") // Write to local temp file
+            json_file.write(json_string)
+            return json_file
+        }
+
+    // original_samplesheet_ = file(original_samplesheet)
+    ch_script = file("$baseDir/bin/create_pheno_samplesheet.py")
+    
+    // --- 3. Call GENERATE_PHENO_SAMPLESHEET ---
+    GENERATE_PHENO_SAMPLESHEET(
+        original_samplesheet, // original_samplesheet_,
+        ch_meta_json,
+        ch_script
+    )
+
+    // View the output (for debugging)
+    GENERATE_PHENO_SAMPLESHEET.out.samplesheet.view { file ->
+        "--- START: Output of phenotype_samplesheet.csv ---\n" +
+        file.text +
+        "--- END: Output of phenotype_samplesheet.csv ---"
+    }
+
+    emit:
+    files_transformed = ch_phenotype_files_transformed
+    samplesheet_pheno = GENERATE_PHENO_SAMPLESHEET.out.samplesheet
+}
\ No newline at end of file
diff --git a/subworkflows/local/resolve_samplesheet_pheno.nf b/subworkflows/local/resolve_samplesheet_pheno.nf
new file mode 100644
index 0000000..c8be87a
--- /dev/null
+++ b/subworkflows/local/resolve_samplesheet_pheno.nf
@@ -0,0 +1,18 @@
+//
+// Check input samplesheet and get read channels
+//
+
+workflow RESOLVE_SAMPLESHEET_PHENO {
+    take:
+    sample_map_final
+
+    main:
+    // Write resolved samplesheet with absolute paths
+    sample_map_final
+        .map { _meta, f -> f }
+        .collect()
+        .set { all_sample_files }
+
+    emit:
+    all_sample_files        // pass files to comparison tasks to be read by resolved samplesheet
+}
diff --git a/subworkflows/local/sample.nf b/subworkflows/local/sample.nf
index c63bbe6..a919617 100644
--- a/subworkflows/local/sample.nf
+++ b/subworkflows/local/sample.nf
@@ -127,9 +127,4 @@ workflow SAMPLE {
 
     VDJDB_VDJMATCH (sample_map, VDJDB_GET.out.ref_db)
 
-    // emit:
-    // sample_stats_csv
-    // v_family_csv
-    // sample_meta_csv
-    // versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
 }
\ No newline at end of file
diff --git a/subworkflows/local/sample_pheno.nf b/subworkflows/local/sample_pheno.nf
new file mode 100644
index 0000000..a7c737b
--- /dev/null
+++ b/subworkflows/local/sample_pheno.nf
@@ -0,0 +1,53 @@
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT LOCAL MODULES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { SAMPLE_CALC as SAMPLE_CALC_PHENO } from '../../modules/local/sample/sample_calc'
+include { SAMPLE_AGGREGATE as SAMPLE_AGG_STAT_PHENO } from '../../modules/local/sample/sample_aggregate' 
+include { SAMPLE_AGGREGATE as SAMPLE_AGG_V_PHENO } from '../../modules/local/sample/sample_aggregate'
+include { SAMPLE_AGGREGATE as SAMPLE_AGG_D_PHENO } from '../../modules/local/sample/sample_aggregate'
+include { SAMPLE_AGGREGATE as SAMPLE_AGG_J_PHENO } from '../../modules/local/sample/sample_aggregate'
+// include { SAMPLE_PLOT } from '../../modules/local/sample/sample_plot'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN MAIN WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow SAMPLE_PHENO {
+
+    take:
+    sample_map
+    pheno_samplesheet
+
+    main:
+
+    /////// =================== CALC SAMPLE ===================  ///////
+
+    SAMPLE_CALC_PHENO( sample_map )
+
+    SAMPLE_CALC_PHENO.out.sample_csv.collect().set { sample_csv_files }
+    SAMPLE_CALC_PHENO.out.v_family_csv.collect().set { v_family_csv_files }
+    SAMPLE_CALC_PHENO.out.d_family_csv.collect().set { d_family_csv_files }
+    SAMPLE_CALC_PHENO.out.j_family_csv.collect().set { j_family_csv_files }
+
+    SAMPLE_AGG_STAT_PHENO(sample_csv_files, "sample_stats.csv")
+    SAMPLE_AGG_V_PHENO(v_family_csv_files, "v_family.csv")
+    SAMPLE_AGG_D_PHENO(d_family_csv_files, "d_family.csv")
+    SAMPLE_AGG_J_PHENO(j_family_csv_files, "j_family.csv")
+
+
+    /////// =================== SAMPLE NOTEBOOK ===================  ///////
+
+    // SAMPLE_PLOT (
+    //     pheno_samplesheet,
+    //     file(params.sample_stats_template),
+    //     sample_stats_csv,
+    //     v_family_csv
+    //     )
+
+}
\ No newline at end of file
diff --git a/workflows/tcrtoolkit.nf b/workflows/tcrtoolkit.nf
index 5329576..257e389 100644
--- a/workflows/tcrtoolkit.nf
+++ b/workflows/tcrtoolkit.nf
@@ -16,6 +16,11 @@ include { SAMPLE }              from '../subworkflows/local/sample'
 include { COMPARE }             from '../subworkflows/local/compare'
 include { VALIDATE_PARAMS }     from '../subworkflows/local/validate_params'
 
+include { MAP_PHENOTYPES }      from '../subworkflows/local/map_phenotypes'
+include { RESOLVE_SAMPLESHEET_PHENO } from '../subworkflows/local/resolve_samplesheet_pheno'
+include { SAMPLE_PHENO } from '../subworkflows/local/sample_pheno'
+include { COMPARE_PHENO } from '../subworkflows/local/compare_pheno'
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -42,19 +47,38 @@ workflow TCRTOOLKIT {
 
     // Checking input tables
     INPUT_CHECK( file(params.samplesheet) )
+    ch_samplesheet_utf8 = INPUT_CHECK.out.samplesheet_utf8
+
+    // Initialize empty channels
+    ch_phenotype_files_transformed = Channel.empty() 
+    ch_phenotype_samplesheet = Channel.empty() 
 
     if (input_format in ['adaptive', 'cellranger']) {
+        def sobject_gex_file = params.sobject_gex ? file(params.sobject_gex) : []
+
         AIRR_CONVERT( INPUT_CHECK.out.sample_map,
-            input_format
+            input_format, sobject_gex_file
             )
             .sample_map_converted
             .set { sample_map_final }
+
+        if (params.sobject_gex) {
+            MAP_PHENOTYPES(
+                AIRR_CONVERT.out.pseudobulk_phenotype_files,
+                ch_samplesheet_utf8
+            )
+            ch_phenotype_files_transformed = MAP_PHENOTYPES.out.files_transformed
+            ch_phenotype_samplesheet = MAP_PHENOTYPES.out.samplesheet_pheno
+        }
+
     } else {
         INPUT_CHECK.out.sample_map
             .set { sample_map_final }
     }
 
-    RESOLVE_SAMPLESHEET( INPUT_CHECK.out.samplesheet_utf8,
+    // --- Main Analysis ---
+
+    RESOLVE_SAMPLESHEET( ch_samplesheet_utf8,
         sample_map_final )
 
     // Running sample level analysis
@@ -67,6 +91,27 @@ workflow TCRTOOLKIT {
         COMPARE( RESOLVE_SAMPLESHEET.out.samplesheet_resolved,
             RESOLVE_SAMPLESHEET.out.all_sample_files)
     }
+
+    // --- Phenotype Analysis ---
+
+    if (params.sobject_gex) {
+
+        RESOLVE_SAMPLESHEET_PHENO(
+            ch_phenotype_files_transformed
+        )
+
+        if (levels.contains('sample')) {
+            SAMPLE_PHENO( ch_phenotype_files_transformed, ch_phenotype_samplesheet ) // ch_phenotype_samplesheet // RESOLVE_SAMPLESHEET_PHENO.out.samplesheet_resolved
+        }
+
+        if (levels.contains('compare')) {
+            COMPARE_PHENO(
+                ch_phenotype_samplesheet,
+                RESOLVE_SAMPLESHEET_PHENO.out.all_sample_files
+            )
+        }
+    }
+    
 }
 
 /*