diff --git a/.cirro/process-form.json b/.cirro/process-form.json index ee8c4df..da2d687 100644 --- a/.cirro/process-form.json +++ b/.cirro/process-form.json @@ -50,8 +50,31 @@ "description": "p_depth (GLIPH2)", "title": "p_depth", "type": "string" + }, + "samplechart_x_col": { + "default": "timepoint", + "description": "Metadata column for x axis of sample-level plots for Sample workflow notebook", + "title": "Sample plot X axis column", + "type": "string" + }, + "samplechart_color_col": { + "default": "origin", + "description": "Metadata column for legend color of sample-level plots for Sample workflow notebook", + "title": "Sample plot X axis column", + "type": "string" + }, + "vgene_subject_col": { + "default": "subject_id", + "description": "Metadata column for grouping of samples for V gene plots for Sample workflow notebook", + "title": "V gene plot subject column", + "type": "string" + }, + "vgene_x_cols": { + "default": "timepoint,origin", + "description": "Comma-separated list of metadata columns for x axis of V gene plot (eg. timepoint,origin or timepoint)", + "title": "V gene plot X axis columns", + "type": "string" } - } }, "ui": {} diff --git a/.cirro/process-input.json b/.cirro/process-input.json index 98cf403..798fc2f 100644 --- a/.cirro/process-input.json +++ b/.cirro/process-input.json @@ -9,5 +9,9 @@ "local_min_OVE": "$.params.dataset.paramJson.local_min_OVE", "local_min_pvalue": "$.params.dataset.paramJson.local_min_pvalue", "outdir": "$.params.dataset.s3|/data/", - "p_depth": "$.params.dataset.paramJson.p_depth" + "p_depth": "$.params.dataset.paramJson.p_depth", + "samplechart_x_col": "$.params.dataset.paramJson.samplechart_x_col", + "samplechart_color_col": "$.params.dataset.paramJson.samplechart_color_col", + "vgene_subject_col": "$.params.dataset.paramJson.vgene_subject_col", + "vgene_x_cols": "$.params.dataset.paramJson.vgene_x_cols" } \ No newline at end of file diff --git a/bin/compare_concatenate.py b/bin/compare_concatenate.py index 7b92891..2138a2f 100755 --- a/bin/compare_concatenate.py +++ b/bin/compare_concatenate.py @@ -29,18 +29,12 @@ def main(): # Read the TSV file into a dataframe file_path = str(row['file']) df = pd.read_csv(file_path, sep="\t", header=0) - - # Get metadata - subject_id = row['subject_id'] - timepoint = row['timepoint'] - origin = row['origin'] - + # Add patient column - df['patient'] = f"{subject_id}:{timepoint}_{origin}" df['sample'] = row['sample'] - + # Select relevant columns - df = df[['junction_aa', 'v_call', 'j_call', 'duplicate_count', 'patient', 'sample']] + df = df[['junction_aa', 'v_call', 'j_call', 'duplicate_count', 'sample']] dfs.append(df) diff --git a/bin/sample_calc.py b/bin/sample_calc.py index f5d219a..9614775 100755 --- a/bin/sample_calc.py +++ b/bin/sample_calc.py @@ -22,18 +22,7 @@ def extract_trb_family(allele): match = re.match(r'(TRB[V|D|J])(\d+)', allele) return f"{match.group(1)}{match.group(2)}" if match else None -def compute_gene_family_table(counts, col_name, all_families, sample_meta): - fam_col = f"{col_name}FamilyName" - counts[fam_col] = counts[col_name].apply(extract_trb_family) - fam_df = counts[fam_col].value_counts(dropna=False).to_frame().T.sort_index(axis=1) - fam_df = fam_df.reindex(columns=all_families, fill_value=0) - - for col in ['origin', 'timepoint', 'subject_id']: - fam_df.insert(0, col, sample_meta[col]) - - return fam_df - -def calc_gene_family(counts, gene_column, family_prefix, max_index, output_file, meta_df): +def calc_gene_family(sample_name, counts, gene_column, family_prefix, max_index, output_file): # Build list of all possible family names all_fams = [f'{family_prefix}{i}' for i in range(1, max_index + 1)] @@ -43,12 +32,12 @@ def calc_gene_family(counts, gene_column, family_prefix, max_index, output_file, # Reindex to include all families fam_df = pd.DataFrame([fam_df.reindex(columns=all_fams, fill_value=0).iloc[0]]).reset_index(drop=True) - # Add metadata columns - fam_df = pd.concat([meta_df, fam_df], axis=1) + # Add sample column + fam_df.insert(0, 'sample', sample_name) fam_df.to_csv(output_file, header=True, index=False) -def calc_sample_stats(meta_df, counts, output_file): +def calc_sample_stats(sample_name, counts, output_file): """Calculate sample level statistics of TCR repertoire.""" ## first pass stats @@ -105,8 +94,8 @@ def calc_sample_stats(meta_df, counts, output_file): # Convert to single-row dataframe df_stats = pd.DataFrame([row_data]) - # Add metadata columns - df_stats = pd.concat([meta_df, df_stats], axis=1) + # Add sample column + df_stats.insert(0, 'sample', sample_name) # Save to CSV df_stats.to_csv(output_file, header=True, index=False) @@ -117,10 +106,10 @@ def main(): parser = argparse.ArgumentParser(description='Calculate clonality of a TCR repertoire') # add arguments - parser.add_argument('-s', '--sample_meta', - metavar='sample_meta', + parser.add_argument('-s', '--sample_name', + metavar='sample_name', type=str, - help='sample metadata passed in as json format') + help='sample name') parser.add_argument('-c', '--count_table', metavar='count_table', type=argparse.FileType('r'), @@ -128,29 +117,16 @@ def main(): args = parser.parse_args() - ## convert metadata to list - sample_meta = json.loads(args.sample_meta) + sample = args.sample_name # Read in the counts file - counts = pd.read_csv(args.count_table, sep='\t', header=0) - - # Build metadata row from selected keys - meta_keys = ['subject_id', 'timepoint', 'origin'] - meta_row = {k: sample_meta[k] for k in meta_keys} - meta_df = pd.DataFrame([meta_row]) - - sample = sample_meta['sample'] - - calc_gene_family(counts, 'v_call', 'TRBV', 30, f'vdj/v_family_{sample}.csv', meta_df) - calc_gene_family(counts, 'd_call', 'TRBD', 2, f'vdj/d_family_{sample}.csv', meta_df) - calc_gene_family(counts, 'j_call', 'TRBJ', 2, f'vdj/j_family_{sample}.csv', meta_df) + counts = pd.read_csv(args.count_table, sep='\t') - # Build metadata row from selected keys - meta_keys = ['sample', 'subject_id', 'timepoint', 'origin'] - meta_row = {k: sample_meta[k] for k in meta_keys} - meta_df = pd.DataFrame([meta_row]) + calc_gene_family(sample, counts, 'v_call', 'TRBV', 30, f'vdj/v_family_{sample}.csv') + calc_gene_family(sample, counts, 'd_call', 'TRBD', 2, f'vdj/d_family_{sample}.csv') + calc_gene_family(sample, counts, 'j_call', 'TRBJ', 2, f'vdj/j_family_{sample}.csv') - calc_sample_stats(meta_df, counts, f'stats/sample_stats_{sample}.csv') + calc_sample_stats(sample, counts, f'stats/sample_stats_{sample}.csv') if __name__ == "__main__": main() diff --git a/modules/local/compare/giana.nf b/modules/local/compare/giana.nf index b062f5f..561c1b3 100644 --- a/modules/local/compare/giana.nf +++ b/modules/local/compare/giana.nf @@ -28,30 +28,20 @@ process GIANA_CALC { > giana.log 2>&1 # Insert header after GIANA comments - python3 - < tmp && mv tmp giana_RotationEncodingBL62.txt mv giana_RotationEncodingBL62.txt_EncodingMatrix.txt giana_EncodingMatrix.txt """ diff --git a/modules/local/compare/gliph2.nf b/modules/local/compare/gliph2.nf index 36ca9f4..486442d 100644 --- a/modules/local/compare/gliph2.nf +++ b/modules/local/compare/gliph2.nf @@ -18,15 +18,14 @@ process GLIPH2_TURBOGLIPH { script: """ - # R script starts here - cat > run_gliph2.R < local_similarities.txt diff --git a/modules/local/compare/tcrsharing.nf b/modules/local/compare/tcrsharing.nf index b4b9fab..ff68619 100644 --- a/modules/local/compare/tcrsharing.nf +++ b/modules/local/compare/tcrsharing.nf @@ -19,30 +19,36 @@ process TCRSHARING_CALC { # Load data df = pd.read_csv("${concat_cdr3}", sep="\t") - # Step 1: Map samples to integers - sample_mapping = {sample: i + 1 for i, sample in enumerate(df['sample'].unique())} - sample_map_df = pd.DataFrame.from_dict(sample_mapping, orient='index', columns=['sample_id']).reset_index() - sample_map_df.columns = ['patient', 'sample_id'] + # Map sample to integer codes + df['sample'] = df['sample'].astype('category') + df['sample_id'] = df['sample'].cat.codes + 1 + + # Export mapping (uses category lookup directly) + sample_map_df = pd.DataFrame({ + 'patient': df['sample'].cat.categories, + 'sample_id': np.arange(1, len(df['sample'].cat.categories) + 1) + }) sample_map_df.to_csv("sample_mapping.tsv", sep="\t", index=False) - # Step 2: Group by CDR3b and aggregate sample_ids - df['sample_id'] = df['sample'].map(sample_mapping) - + # Get unique sample_ids per CDR3b — vectorized grouped = ( df.groupby('CDR3b')['sample_id'] - .apply(lambda x: sorted(set(x))) # remove duplicates if any + .unique() # UNIQUE — fast & vectorized + .apply(np.sort) # SORT — vectorized .reset_index() ) - # Step 3: Add comma-separated list and total count - grouped['samples_present'] = grouped['sample_id'].apply(lambda x: ",".join(map(str, x))) + # Calculate counts grouped['total_samples'] = grouped['sample_id'].apply(len) + grouped['samples_present'] = grouped['sample_id'].apply( + lambda arr: ",".join(arr.astype(str)) + ) - # Step 4: Final output — drop raw list + # Drop raw list final_df = grouped[['CDR3b', 'total_samples', 'samples_present']] - final_df = final_df.sort_values(by='total_samples', axis=0, ascending=False) + final_df = final_df.sort_values(by="total_samples", ascending=False) - # Step 5: Export both outputs + # Export final list final_df.to_csv("cdr3_sharing.tsv", sep="\t", index=False) EOF diff --git a/modules/local/sample/sample_calc.nf b/modules/local/sample/sample_calc.nf index 1a72867..dff4c0a 100644 --- a/modules/local/sample/sample_calc.nf +++ b/modules/local/sample/sample_calc.nf @@ -14,13 +14,11 @@ process SAMPLE_CALC { val sample_meta , emit: sample_meta script: - def meta_json = groovy.json.JsonOutput.toJson(sample_meta) - """ mkdir -p stats mkdir -p vdj - sample_calc.py -s '${meta_json}' -c ${count_table} + sample_calc.py -s '${sample_meta.sample}' -c ${count_table} """ stub: diff --git a/modules/local/sample/sample_plot.nf b/modules/local/sample/sample_plot.nf index 272533d..5ca0494 100644 --- a/modules/local/sample/sample_plot.nf +++ b/modules/local/sample/sample_plot.nf @@ -13,7 +13,7 @@ process SAMPLE_PLOT { output: path 'sample_stats.html' - script: + script: """ ## copy quarto notebook to output directory cp $sample_stats_template sample_stats.qmd @@ -26,6 +26,10 @@ process SAMPLE_PLOT { -P sample_table:$sample_table \ -P sample_stats_csv:$sample_stats_csv \ -P v_family_csv:$v_family_csv \ + -P samplechart_x_col:${params.samplechart_x_col} \ + -P samplechart_color_col:${params.samplechart_color_col} \ + -P vgene_subject_col:${params.vgene_subject_col} \ + -P vgene_x_cols:${params.vgene_x_cols} \ --to html """ diff --git a/nextflow.config b/nextflow.config index 2e531d0..f685ead 100644 --- a/nextflow.config +++ b/nextflow.config @@ -26,6 +26,12 @@ params { sample_stats_template = "${projectDir}/notebooks/sample_stats_template.qmd" compare_stats_template = "${projectDir}/notebooks/compare_stats_template.qmd" + // Sample stats metadata parameters + samplechart_x_col = 'timepoint' + samplechart_color_col = 'origin' + vgene_subject_col = 'subject_id' + vgene_x_cols = 'origin,timepoint' + // GIANA parameters threshold = 7.0 threshold_score = 3.6 diff --git a/notebooks/sample_stats_template.qmd b/notebooks/sample_stats_template.qmd index 648732f..5a97598 100644 --- a/notebooks/sample_stats_template.qmd +++ b/notebooks/sample_stats_template.qmd @@ -31,14 +31,21 @@ Thank you for using TCRtoolkit! This report is generated from sample data and me #| echo: false workflow_cmd='' -project_name='path/to/project_name' -project_dir='path/to/project_dir' -sample_table='path/to/sample_table.csv' -sample_stats_csv='path/to/sample_stats.csv' -v_family_csv='path/to/v_family.csv' +project_name='' +project_dir='' +sample_table='' +sample_stats_csv='' +v_family_csv='' + +samplechart_x_col='' +samplechart_color_col='' +vgene_subject_col='' +vgene_x_cols='' ``` ```{python} +#| code-fold: true + # 1. Load Packages from IPython.display import Image import os @@ -52,6 +59,13 @@ from matplotlib.colors import LinearSegmentedColormap import plotly.express as px import plotly.graph_objects as go +import warnings +warnings.filterwarnings( + "ignore", + category=FutureWarning, + module="plotly" +) + # 2. Print pipeline parameters print('Project Name: ' + project_name) @@ -65,19 +79,20 @@ print('Date and time: ' + str(datetime.datetime.now())) # TicToc = TicTocGenerator() # 4. Loading data +## reading sample metadata +meta = pd.read_csv(sample_table, sep=',') +meta_cols = meta.columns.tolist() ## reading combined repertoire statistics df = pd.read_csv(sample_stats_csv, sep=',') -# print('-- Imported sample_stats_csv as `df`...') - -## reading sample metadata -meta = pd.read_csv(sample_table, sep=',', header=None, index_col=None, - names=['sample', 'file', 'subject_id', 'timepoint', 'origin']) -# print('-- Imported sample_table as `meta`...') +df = pd.merge(df, meta, on='sample', how='left') +df = df[meta_cols + [c for c in df.columns if c not in meta_cols]] ## reading V gene family usage v_family = pd.read_csv(v_family_csv, sep=',') -v_family = v_family.sort_values(by=['subject_id', 'timepoint']) +v_family = pd.merge(v_family, meta, on='sample', how='left') +v_family = v_family[meta_cols + [c for c in v_family.columns if c not in meta_cols]] +v_family = v_family.sort_values(by=[vgene_subject_col]) ``` # Sample level statistics {#sec-sample-level-stats} @@ -86,46 +101,57 @@ Below are plots showing basic T cell repertoire statistics. Each plot has a desc Version 3 of these plots features plotly express interactive plots. This version is exploratory and may be updated in the future. +```{python} +#| code-fold: true + + +x_category = df[samplechart_x_col].unique().tolist() +x_category.sort() + +def samplechart(df, y_col): + fig = px.box( + df, + x=samplechart_x_col, + y=y_col, + color=samplechart_color_col, + points='all', + hover_data='sample', + category_orders={f'{samplechart_x_col}': x_category} + ) + + fig.update_layout( + margin=dict(b=80) + ) + + fig.show() +``` + ## Number of clones ```{python} -timepts = df.timepoint.unique().tolist() -timepts.sort() -fig = px.box(df, - x = 'timepoint', - y='num_clones', - color='origin', - points='all', hover_data=['sample'], - category_orders={'timepoint': timepts}) -fig.show() +#| code-fold: true + +samplechart(df, y_col='num_clones') ``` -**Figure 1. Number of clones across timepoints.** A clone is defined as a T cell with a unique CDR3 amino acid sequence. The number of clones is shown on the y-axis and 'origin_timepoint' is shown on the x-axis. +**Figure 1. Number of clones across sample groupings.** A clone is defined as a T cell with a unique CDR3 amino acid sequence. The number of clones is shown on the y-axis. The x-axis represents sample groupings defined by user-specified metadata fields (eg. origin, timepoint). ## Clonality ```{python} -fig = px.box(df, - x = 'timepoint', - y='clonality', - color='origin', - points='all', hover_data=['sample'], - category_orders={'timepoint': timepts}) -fig.show() +#| code-fold: true + +samplechart(df, y_col='clonality') ``` -**Figure 2. The clonality of samples across timepoints.** Clonality is a measure of T cell clonal expansion and reflects the degree to which the sample is dominated by 1 or more T cell clones. Clonality is calculated via: $$Clonality = \frac {1-H} {\log_{2} N} \quad\text{,}\quad H = -\sum\limits_{i=1}^N p_i \log_{2}{p_i}$$ where $H$ is the Shannon entropy of a given sample, $N$ is the number of unique TCRs in the sample, and $p_i$ is the frequency of the $i$ th unique TCR in the sample. +**Figure 2. The clonality of samples across sample groupings.** Clonality is a measure of T cell clonal expansion and reflects the degree to which the sample is dominated by 1 or more T cell clones. Clonality is calculated via: $$Clonality = \frac {1-H} {\log_{2} N} \quad\text{,}\quad H = -\sum\limits_{i=1}^N p_i \log_{2}{p_i}$$ where $H$ is the Shannon entropy of a given sample, $N$ is the number of unique TCRs in the sample, and $p_i$ is the frequency of the $i$ th unique TCR in the sample. ## Simpson Index ```{python} -fig = px.box(df, - x = 'timepoint', - y='simpson_index_corrected', - color='origin', - points='all', hover_data=['sample'], - category_orders={'timepoint': timepts}) -fig.show() +#| code-fold: true + +samplechart(df, y_col='simpson_index_corrected') ``` **Figure 3. Corrected Simpson Index.** The Simpson Index is a measure of diversity that takes into account the number of clones and the relative abundance of each clone in a sample. The corrected Simpson Index, $D$, is calculated as: @@ -137,13 +163,9 @@ Where $N$ is the number of unique TCRs in the sample, $p_i$ is the frequency of ## Percent of productive TCRs ```{python} -fig = px.box(df, - x = 'timepoint', - y='pct_prod', - color='origin', - points='all', hover_data=['sample'], - category_orders={'timepoint': timepts}) -fig.show() +#| code-fold: true + +samplechart(df, y_col='pct_prod') ``` **Figure 4. Percent of productive TCRs.** A productive TCR is a DNA/RNA sequence that can be translated into a protein sequence, i.e. it does not contain a premature stop codon or an out of frame rearrangement. The percent of productive TCRs is calculated as: @@ -155,13 +177,9 @@ where $P$ is the number of productive TCRs and $N$ is the total number of TCRs i ## Average productive CDR3 Length ```{python} -fig = px.box(df, - x = 'timepoint', - y='productive_cdr3_avg_len', - color='origin', - points='all', hover_data=['sample'], - category_orders={'timepoint': timepts}) -fig.show() +#| code-fold: true + +samplechart(df, y_col='productive_cdr3_avg_len') ``` **Figure 5. Average Productive CDR3 Length** The average length of the CDR3 region of the TCR for productive clones. The CDR3 region is the most variable region of the TCR and is the region that determines antigen specificity. @@ -169,13 +187,9 @@ fig.show() ## TCR Convergence ```{python} -fig = px.box(df, - x = 'timepoint', - y='ratio_convergent', - color='origin', - points='all', hover_data=['sample'], - category_orders={'timepoint': timepts}) -fig.show() +#| code-fold: true + +samplechart(df, y_col='ratio_convergent') ``` **Figure 6. TCR Convergence** The ratio of convergent TCRs to total TCRs. A convergent TCR is a TCR that is generated via 2 or more unique nucleotide sequences via codon degeneracy. @@ -195,44 +209,115 @@ $$ where $N_{k}$ is the number of TCRs that use the $k$ th V gene, and T is the total number of TCRs in the sample. ```{python} +#| code-fold: true + ## code adapted from https://www.moritzkoerber.com/posts/plotly-grouped-stacked-bar-chart/ colors = ["#fafa70","#fdef6b","#ffe566","#ffda63","#ffd061","#ffc660","#ffbb5f","#fdb15f","#fba860","#f79e61","#f39562","#ef8c63","#e98365","#e37b66","#dd7367","#d66b68","#ce6469","#c65e6a","#bd576b","#b4526b","#ab4c6b","#a1476a","#974369","#8c3e68","#823a66","#773764","#6d3361","#62305e","#572c5a","#4d2956"] +vgene_x_cols = vgene_x_cols.split(',') -## calculate calulate proportions and add to v_family_long -v_family_long = pd.melt(v_family, id_vars=['subject_id', 'timepoint', 'origin'], value_vars=v_family.columns[3:], var_name='v_gene', value_name='count') -v_family_long['proportion'] = v_family_long.groupby(['subject_id', 'timepoint', 'origin'])['count'].transform(lambda x: x / x.sum()) +## calculate proportions and add to v_family_long +v_family_long = pd.melt(v_family, + id_vars=meta_cols, + value_vars=[c for c in v_family.columns if c.startswith('TRBV')], + var_name='v_gene', + value_name='count') +v_family_long['proportion'] = v_family_long.groupby(meta_cols)['count'].transform(lambda x: x / x.sum()) ## add in the total number of v genes for each sample -total_v_genes = v_family_long.groupby(['subject_id', 'timepoint', 'origin'])['count'].sum().reset_index() -total_v_genes.columns = ['subject_id', 'timepoint', 'origin', 'total_v_genes'] -v_family_long = pd.merge(v_family_long, total_v_genes, on=['subject_id', 'timepoint', 'origin']) +total_v_genes = v_family_long.groupby(meta_cols)['count'].sum().reset_index().rename(columns={'count': 'total_v_genes'}) +v_family_long = v_family_long.merge(total_v_genes, on=meta_cols) -for patient in v_family_long.subject_id.unique().tolist(): - current = v_family_long[v_family_long.subject_id == patient] +subjects = ( + v_family_long[vgene_subject_col] + .dropna() + .unique() +) + +for subject in subjects: + current = v_family_long[v_family_long[vgene_subject_col] == subject] fig = go.Figure() fig.update_layout( template="simple_white", - title_text=f"Patient: {patient}", - xaxis=dict(title_text="timepoint"), + title_text=f"{vgene_subject_col}: {subject}", + xaxis=dict(title_text=f"{', '.join(vgene_x_cols)}"), yaxis=dict(title_text="proportion"), barmode="stack", + margin=dict(b=80, r=115), + legend=dict( + orientation="v", + x=1.02, + y=1, + yanchor="top", + itemsizing="constant", + traceorder="reversed" + ) ) + fig.update_xaxes( + automargin=True, + title_standoff=30 + ) + + if len(vgene_x_cols) == 1: + x_vals = current[vgene_x_cols[0]] + else: + x_vals = current[vgene_x_cols].astype(str).agg('_'.join, axis=1) + summary = ( + current + .assign(x=x_vals) + .drop_duplicates(subset=meta_cols) + .groupby('x', as_index=False) + .agg( + total_v_genes=('total_v_genes', 'sum'), + ) + ) + y_df = ( + current + .assign(x=x_vals) + .groupby('x', as_index=False) + .agg( + y_top=('proportion', 'sum') + ) + ) + summary = y_df.merge(summary, on='x') + for g, c in zip(current.v_gene.unique(), colors): plot_df = current[current.v_gene == g] - if g == 'TRBV30': - fig.add_trace( - go.Bar(x=[plot_df.timepoint, plot_df.origin], - y=plot_df.proportion, name=g, marker_color=c, - text=plot_df['total_v_genes'], textposition='outside' - ) - ) + if len(vgene_x_cols) == 1: + x = plot_df[vgene_x_cols[0]] else: - fig.add_trace( - go.Bar(x=[plot_df.timepoint, plot_df.origin], - y=plot_df.proportion, name=g, marker_color=c - ) + x = [plot_df[col] for col in vgene_x_cols] + fig.add_trace( + go.Bar( + x=x, + y=plot_df['proportion'], + name=g, + marker_color=c, + legendgroup=g, + + customdata=plot_df[['sample']], + hovertemplate=( + "V gene: %{fullData.name}
" + "Sample: %{customdata[0]}
" + "Proportion: %{y}
" + "" + ), + + text=plot_df['total_v_genes'] if ((g == 'TRBV30') and (len(vgene_x_cols) != 1)) else None, + textposition='outside', + ) ) + if len(vgene_x_cols) == 1: + for _, row in summary.iterrows(): + fig.add_annotation( + x=row['x'], + y=row['y_top'] + 0.02, + text=str(row['total_v_genes']), + showarrow=False, + yanchor='bottom', + font=dict(size=12) + ) + fig.show() ```