diff --git a/.gitignore b/.gitignore index 0b77ac7f..68ffdaf6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ *.pyc out/ td-bulk-import.log +/tool-box/cjo-profile-viewer/debug diff --git a/tool-box/cjo-profile-viewer/README.md b/tool-box/cjo-profile-viewer/README.md new file mode 100644 index 00000000..8f1b5971 --- /dev/null +++ b/tool-box/cjo-profile-viewer/README.md @@ -0,0 +1,199 @@ +# CJO Profile Viewer + +A Streamlit application for visualizing Treasure Data Customer Journey Orchestration (CJO) journeys with live profile data integration. + +## 🎯 Overview + +The CJO Profile Viewer provides comprehensive visualization of customer journeys from Treasure Data's CDP. It features real-time profile tracking, interactive canvas flowcharts, and detailed step information with live data integration. + +## ✨ Key Features + +- **🔄 Live Data Integration**: Real-time journey configuration and profile data from TD APIs +- **🎨 Interactive Canvas**: Horizontal flowchart visualization with clickable steps +- **📋 Step Selection**: Hierarchical dropdown with profile counts for precise navigation +- **🔍 Profile Viewing**: Customer ID filtering, search, and CSV export functionality +- **📊 Data Mapping**: Complete technical-to-display name mapping with full API response view +- **🎪 7 Step Types Supported**: Wait, Activation, Decision, AB Test, Jump, Merge, and End steps +- **📱 Responsive Design**: Clean interface that adapts to different screen sizes + +## 🛠️ Installation + +1. **Clone or download** the application files +2. **Install dependencies**: + ```bash + pip install -r requirements.txt + ``` + +## 🚀 Quick Start + +### 1. Configure TD API Access + +Choose one authentication method: + +**Environment Variable (Recommended)** +```bash +export TD_API_KEY="your_api_key_here" +``` + +**Config File** +```bash +echo "TD_API_KEY=your_api_key_here" > ~/.td/config +``` + +**Local Config File** +```bash +echo "TD_API_KEY=your_api_key_here" > td_config.txt +``` + +**Get API Key**: TD Console → Profile → API Keys + +### 2. Launch Application + +```bash +streamlit run app.py +``` + +### 3. Load Journey Data + +1. Open browser at `http://localhost:8501` +2. Enter a **Journey ID** in the configuration section +3. Click **"Load Journey Data"** - fetches configuration and live profile data +4. Explore using the three main tabs + +## 📱 Interface Guide + +### **📋 Step Selection Tab** +- **Hierarchical dropdown** with all journey steps (includes profile counts and UUIDs) +- **Detailed step info** shows step name, type, ID, and SQL query used +- **Customer ID list** with real-time search and filtering +- **CSV export** functionality for profile lists +- **Always shows step info** even for steps with 0 profiles + +### **🎨 Canvas Tab** +- **Interactive flowchart** with horizontal stage layout (responsive) +- **Color-coded step types** for visual identification: + - 🟨 Decision/AB Test/Merge (Yellow) - Branching logic + - 🟪 Wait Steps (Pink/Red) - Time-based operations + - 🟢 Activation (Green) - External actions + - 🟦 Jump/End (Blue/Purple) - Navigation/completion +- **Clean display names** without UUIDs or duplicate profile counts +- **Hover tooltips** show "Step UUID: [shortened-id]" +- **Clickable steps** open profile detail modals +- **Single profile count** display per step (no duplication) + +### **📊 Data & Mappings Tab** +- **Column mappings** (all technical → display name conversions) +- **Full API request/response** with redacted API key for transparency +- **No profile preview** or summary stats (focused on technical details) + +## 🔧 Technical Architecture + +### **Modular Design** +``` +├── app.py # Main Streamlit application +├── src/ +│ ├── services/ +│ │ └── td_api.py # TD API service layer +│ ├── components/ +│ │ └── flowchart_renderer.py # Canvas HTML generation +│ ├── styles/ # CSS styling (flowchart, modals, etc.) +│ ├── utils/ # Session state, profile filtering +│ ├── column_mapper.py # Technical-to-display name mapping +│ ├── flowchart_generator.py # Journey structure processing +│ └── hierarchical_step_formatter.py # Dropdown formatting +├── docs/ # Comprehensive guides +└── requirements.txt # Dependencies +``` + +### **Data Sources** + +**Journey Configuration** +- **API**: `https://api-cdp.treasuredata.com/entities/journeys/{journey_id}` +- **Authentication**: TD API key required +- **Response**: Complete journey structure with stages and steps + +**Profile Data** +- **Source**: Live queries via pytd client to TD +- **Tables**: `cdp_audience_{audienceId}.journey_{journeyId}` +- **Columns**: CJO naming conventions (`cdp_customer_id`, `intime_stage_*`, etc.) +- **Engine**: Presto (default configuration) + +## 🎪 Supported Step Types + +| Type | Description | Visual Color | +|------|-------------|--------------| +| **Wait Steps** | Duration waits, condition waits | 🟪 Pink/Red | +| **Activation Steps** | Data exports, syndication | 🟢 Green | +| **Decision Points** | Segment-based branching | 🟨 Yellow/Beige | +| **AB Test Steps** | Split testing with variants | 🟨 Yellow/Beige | +| **Jump Steps** | Stage/journey transitions | 🟦 Blue/Purple | +| **Merge Steps** | Path consolidation | 🟨 Yellow/Beige | +| **End Steps** | Journey termination | 🟦 Blue/Purple | + +## 🔍 Key Capabilities + +### **Profile Tracking** +- **Real-time counts** for each step showing active profiles +- **SQL query display** showing exact logic used for profile filtering +- **Customer ID search** with instant filtering +- **CSV export** of customer lists per step + +### **Hierarchy Display** +- **Clean step names** (no UUIDs in canvas, full detail in dropdown) +- **Proper indentation** for branching paths (Decision, AB Test, Wait Conditions) +- **Merge step handling** with consolidated post-merge paths +- **Breadcrumb context** for complex journey navigation + +### **Canvas Features** +- **Horizontal stages** with responsive design (mobile-friendly fallback to vertical) +- **Clean tooltips** with shortened UUIDs for identification +- **No duplicate information** (single profile count, clean step names) +- **Interactive modals** with detailed profile information + +## 📚 Documentation + +For detailed technical information, see the `/docs` directory: + +- **`PROJECT_SUMMARY.md`** - Complete technical overview and architecture +- **`STEP_TYPES_GUIDE.md`** - Implementation details for all 7 step types +- **`UI_IMPLEMENTATION_GUIDE.md`** - Interface patterns and formatting rules +- **`journey-tables-guide.md`** - Data structure and table schema reference + +## 🚨 Troubleshooting + +### **Common Issues** + +**API Authentication** +- Verify TD API key is set correctly +- Check key has CDP access permissions + +**Journey Loading** +- Ensure Journey ID exists and is accessible +- Verify journey has associated audience data + +**Profile Data** +- Check that journey tables exist in TD +- Verify audience has profile data in the specified journey + +**Performance** +- Use Step Selection tab for large journeys (better performance) +- Canvas generation is on-demand to avoid timeouts + +### **Debug Information** + +The application provides comprehensive debugging: +- **API request/response details** in Data & Mappings tab +- **SQL queries shown** for each step's profile filtering logic +- **Column mapping transparency** with full technical-to-display conversion +- **Error messages** with specific details for troubleshooting + +## 🎯 Production Ready + +This application is optimized for production use: +- **Modular architecture** for maintainability +- **Live data integration** with Treasure Data +- **Responsive design** for various screen sizes +- **Comprehensive documentation** for developers and users +- **Clean, minimal codebase** with zero development artifacts + +Perfect for visualizing customer journey performance, debugging CJO configurations, and understanding customer flow patterns with real-time data from Treasure Data's Customer Data Platform. \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/app.py b/tool-box/cjo-profile-viewer/app.py new file mode 100644 index 00000000..136629fc --- /dev/null +++ b/tool-box/cjo-profile-viewer/app.py @@ -0,0 +1,679 @@ +""" +CJO Profile Viewer - Streamlit Application (Refactored) + +A tool for visualizing Customer Journey Orchestration (CJO) journeys with profile data. +This refactored version uses modular components for better maintainability. +""" + +import streamlit as st +import pandas as pd +from typing import Dict, List, Optional + +# Import refactored modules +from src.services.td_api import TDAPIService +from src.column_mapper import CJOColumnMapper +from src.flowchart_generator import CJOFlowchartGenerator +from src.components.flowchart_renderer import create_flowchart_html +from src.styles import load_all_styles +from src.utils.session_state import SessionStateManager +from src.utils.step_display import get_step_display_name +from src.utils.profile_filtering import ( + get_step_column_name, + get_step_profiles, + get_step_profile_count, + get_filtered_profile_data, + create_step_profile_condition +) +from src.hierarchical_step_formatter import format_hierarchical_steps + + +def render_configuration_panel(): + """Render the journey configuration input panel.""" + st.header("🔧 Journey Configuration") + + with st.container(): + col1, col2 = st.columns([2, 1]) + + with col1: + journey_id = st.text_input( + "Journey ID", + placeholder="e.g., 12345", + key="main_journey_id", + on_change=lambda: st.session_state.update({"auto_load_triggered": True}), + label_visibility="collapsed" + ) + with col2: + load_config_button = st.button( + "📋 Load Journey Config", + type="primary", + key="load_config_button" + ) + + return journey_id, load_config_button + + +def render_attribute_selector(): + """Render the customer attribute selection interface.""" + load_profile_button = False + + if SessionStateManager.is_config_loaded(): + st.markdown("**Step 2: Select Additional Customer Attributes**") + st.caption("Select additional customer attributes to include when viewing step profiles. cdp_customer_id is included by default.") + + try: + audience_id = SessionStateManager.get_audience_id() + if audience_id: + available_attributes = SessionStateManager.get_available_attributes(audience_id) + + if available_attributes: + selected_attributes = st.multiselect( + "Select customer attributes:", + options=available_attributes, + default=SessionStateManager.get("selected_attributes", []), + key="attribute_selector", + help="These attributes will be joined from the customers table", + label_visibility="collapsed" + ) + + # Store selected attributes in session state + SessionStateManager.set("selected_attributes", selected_attributes) + + # Show Load Profile Data button + load_profile_button = st.button( + "📊 Load Profile Data", + type="primary", + key="load_profile_button", + help="Load customer profile data with selected attributes" + ) + else: + st.info("No additional customer attributes available.") + # Show Load Profile Data button even without attributes + load_profile_button = st.button( + "📊 Load Profile Data", + type="primary", + key="load_profile_button_no_attr", + help="Load customer profile data" + ) + else: + st.warning("Could not find audience ID or attributes not loaded.") + except Exception as e: + st.warning(f"Could not load customer attributes: {str(e)}") + else: + st.caption("Load journey configuration first to see available customer attributes.") + + return load_profile_button + + +def handle_config_loading(journey_id: str, load_config_button: bool, api_service: TDAPIService): + """Handle the journey configuration loading process.""" + # Check for auto-load trigger (when user presses Enter) + auto_load_triggered = SessionStateManager.get("auto_load_triggered", False) + if auto_load_triggered and journey_id: + SessionStateManager.set("auto_load_triggered", False) + load_config_button = True # Trigger the loading logic + + # Handle Step 1: Load Journey Configuration + if load_config_button: + if not journey_id or journey_id.strip() == "": + st.toast("Please enter a Journey ID", icon="⚠️") + st.stop() + + if not api_service.api_key: + st.error("❌ **API Key Required**: Please set up your TD API key (TD_API_KEY environment variable, ~/.td/config, or td_config.txt file)") + st.stop() + + # Fetch journey data + api_response, error = api_service.fetch_journey_data(journey_id) + + if error: + st.toast(f"API Error: {error}", icon="❌", duration=30) + st.stop() + + if api_response: + # Extract audience ID from API response + try: + audience_id = api_response.get('data', {}).get('attributes', {}).get('audienceId') + if not audience_id: + st.error("❌ **API Response Error**: Audience ID not found in API response") + st.stop() + except Exception as e: + st.error(f"❌ **API Response Error**: Failed to extract audience ID: {str(e)}") + st.stop() + + # Load available customer attributes + available_attributes = api_service.get_available_attributes(audience_id) + + # Store configuration in session state + SessionStateManager.set_config_loaded(api_response, audience_id, available_attributes) + + st.toast(f"Journey configuration for '{journey_id}' loaded successfully! Now select attributes and load profile data.", icon="✅") + st.rerun() + + +def handle_profile_loading(load_profile_button: bool, api_service: TDAPIService): + """Handle the profile data loading process.""" + if load_profile_button: + if not SessionStateManager.is_config_loaded(): + st.toast("Please load journey configuration first", icon="⚠️") + st.stop() + + if not api_service.api_key: + st.error("❌ **API Key Required**: Please set up your TD API key") + st.stop() + + # Get journey and audience info from session state + journey_id = SessionStateManager.get_journey_id() + audience_id = SessionStateManager.get_audience_id() + + if not journey_id or not audience_id: + st.error("❌ Missing journey or audience ID from configuration") + st.stop() + + # Get selected attributes + selected_attributes = SessionStateManager.get("selected_attributes", []) + + # Load profile data + profile_data = api_service.load_profile_data(journey_id, audience_id, selected_attributes) + if profile_data is not None: + SessionStateManager.set_profile_loaded(profile_data) + st.toast(f"Profile data loaded successfully! {len(profile_data)} profiles found.", icon="✅") + else: + st.toast("Could not load profile data. Some features may be limited.", icon="⚠️") + + st.rerun() + + +def render_journey_tabs(): + """Render the main journey visualization tabs.""" + if not SessionStateManager.is_journey_loaded(): + if not SessionStateManager.is_config_loaded(): + st.info("👆 **Step 1**: Enter a Journey ID and click 'Load Journey Config' to begin.") + else: + st.info("👆 **Step 2**: Select customer attributes (if desired) and click 'Load Profile Data' to begin visualization.") + return + + # Initialize components + try: + api_response = SessionStateManager.get('api_response') + profile_data = SessionStateManager.get('profile_data') + + column_mapper = CJOColumnMapper(api_response) + + # Handle profile data safely + if profile_data is not None and not profile_data.empty: + flowchart_generator = CJOFlowchartGenerator(api_response, profile_data) + else: + # Create generator with empty DataFrame + flowchart_generator = CJOFlowchartGenerator(api_response, pd.DataFrame()) + st.warning("⚠️ Profile data is empty or unavailable. Some features may be limited.") + + except Exception as e: + st.error(f"Error initializing components: {str(e)}") + return + + # Create tabs + step_tab, canvas_tab, data_tab = st.tabs(["📋 Step Selection", "🎨 Canvas", "📊 Data & Mappings"]) + + with step_tab: + render_step_selection_tab(flowchart_generator, column_mapper) + + with canvas_tab: + render_canvas_tab(flowchart_generator, column_mapper) + + with data_tab: + render_data_tab(flowchart_generator, column_mapper) + + +def render_step_selection_tab(generator: CJOFlowchartGenerator, column_mapper: CJOColumnMapper): + """Render the step selection tab.""" + st.subheader("Step Selection & Profile View") + + if generator.profile_data.empty: + st.warning("No profile data available. Please load profile data to use this feature.") + return + + # Get all steps for dropdown using the stages_data property + stages_data = generator.stages_data + if not stages_data: + st.warning("No steps found in the journey configuration.") + return + + # Add helpful description + st.markdown("**How to use:** First select a stage from the journey, then choose a specific step within that stage to view profile details.") + + # Stage selector + stage_options = {} + for stage_idx, stage_data in enumerate(stages_data): + stage_name = stage_data.get('name', f'Stage {stage_idx + 1}') + stage_options[stage_name] = { + 'idx': stage_idx, + 'name': stage_name, + 'data': stage_data + } + + if not stage_options: + st.warning("No stages available for selection.") + return + + selected_stage_name = st.selectbox( + "1. Select a stage:", + options=list(stage_options.keys()), + key="stage_selector", + index=0, # Default to first stage + help="Choose a stage from the customer journey" + ) + + # Show stage info + if selected_stage_name: + selected_stage = stage_options[selected_stage_name] + stage_data = selected_stage['data'] + steps_count = len(stage_data.get('steps', {})) + + # Step selector (updates based on selected stage) + if selected_stage_name: + selected_stage = stage_options[selected_stage_name] + stage_idx = selected_stage['idx'] + stage_data = selected_stage['data'] + steps = stage_data.get('steps', {}) + + if not steps: + st.warning("No steps found in the selected stage.") + return + + # Use hierarchical formatter to get properly formatted step display + try: + # Get hierarchical formatted steps for dropdown (with profile counts and UUIDs in names) + formatted_steps = format_hierarchical_steps(generator, include_profile_counts=True, include_uuid=True) + + # Build step options from hierarchical formatter output, filtering for selected stage + step_options = {} + display_name_counts = {} + step_items = [] + + # First pass: collect all steps and count duplicate display names + for display_name, step_info in formatted_steps: + # Skip empty lines used for visual separation + if step_info.get('is_empty_line', False): + continue + + # Only include steps from the selected stage + if step_info.get('stage_index', 0) == stage_idx: + # Update step_info with required fields for compatibility + step_info.update({ + 'stage_idx': stage_idx, + 'stage_name': selected_stage_name + }) + + # Use step_id as the id field for compatibility + if 'step_id' in step_info: + step_info['id'] = step_info['step_id'] + + # Add type field for compatibility (hierarchical formatter uses step_type) + if 'step_type' in step_info and 'type' not in step_info: + step_info['type'] = step_info['step_type'] + + step_items.append((display_name, step_info)) + display_name_counts[display_name] = display_name_counts.get(display_name, 0) + 1 + + # Second pass: disambiguate duplicates and build final step_options + def get_short_uuid(uuid_string: str) -> str: + """Extract the first part of a UUID (before first hyphen).""" + return uuid_string.split('-')[0] if uuid_string else uuid_string + + name_sequence = {} + for display_name, step_info in step_items: + if display_name_counts[display_name] > 1: + # Try UUID first, but if that would create duplicates, use sequence numbers + step_id = step_info.get('id', '') + short_uuid = get_short_uuid(step_id) + + # Check if UUID disambiguation would create a unique name + uuid_disambiguated = f"{display_name} ({short_uuid})" + + # Count how many times we've seen this UUID-disambiguated name + if uuid_disambiguated in step_options: + # UUID collision - use sequence numbers instead + sequence = name_sequence.get(display_name, 0) + 1 + name_sequence[display_name] = sequence + disambiguated_name = f"{display_name} (#{sequence})" + else: + # UUID is unique - use it + disambiguated_name = uuid_disambiguated + else: + disambiguated_name = display_name + + step_options[disambiguated_name] = step_info + + except Exception as e: + st.warning(f"Could not load hierarchical display, falling back to simple format: {str(e)}") + + # Fallback to simple display with disambiguation + step_options = {} + step_name_counts = {} + step_items = [] + + for step_id, step_data in steps.items(): + step_name = get_step_display_name(step_data) + step_type = step_data.get('type', 'Unknown') + + step_info = { + 'id': step_id, + 'name': step_name, + 'type': step_type, + 'stage_idx': stage_idx, + 'stage_name': selected_stage_name + } + step_items.append((step_name, step_info)) + step_name_counts[step_name] = step_name_counts.get(step_name, 0) + 1 + + # Disambiguate duplicates + name_sequence = {} + for step_name, step_info in step_items: + if step_name_counts[step_name] > 1: + sequence = name_sequence.get(step_name, 0) + 1 + name_sequence[step_name] = sequence + disambiguated_name = f"{step_name} (#{sequence})" + else: + disambiguated_name = step_name + + step_options[disambiguated_name] = step_info + + selected_step_name = st.selectbox( + "2. Select a step:", + options=list(step_options.keys()), + key=f"step_selector_{stage_idx}", # Unique key per stage + help="Choose a specific step to view customer profiles" + ) + + # Show step type info and render details + if selected_step_name: + selected_step = step_options[selected_step_name] + step_type = selected_step.get('step_type', selected_step.get('type', 'Unknown')) + + st.markdown("---") + render_step_details(selected_step, generator, column_mapper) + + +def generate_step_query_sql(step_column: str, profile_data_columns: List[str], selected_attributes: List[str] = None) -> str: + """ + Generate the equivalent SQL query that would be used to retrieve step profile data. + + Args: + step_column: The step column name (e.g., 'intime_stage_0_step_uuid') + profile_data_columns: List of all available columns in the profile data + selected_attributes: List of selected customer attributes to include + + Returns: + Formatted SQL query string + """ + # Get actual table name using audience ID and journey ID from session state + audience_id = SessionStateManager.get_audience_id() + journey_id = SessionStateManager.get_journey_id() + + if audience_id and journey_id: + journey_table = f"cdp_audience_{audience_id}.journey_{journey_id}" + customers_table = f"cdp_audience_{audience_id}.customers" + else: + journey_table = "journey_table" # Fallback for when IDs aren't available + customers_table = "customers_table" + + # Build WHERE conditions + where_conditions = [] + + # Step entry condition + where_conditions.append(f"j.{step_column} IS NOT NULL") + + # Step exit condition (profile still in this specific step) + step_outtime_column = step_column.replace('intime_', 'outtime_') + if step_outtime_column in profile_data_columns: + where_conditions.append(f"j.{step_outtime_column} IS NULL") + + # Journey exit condition + if 'outtime_journey' in profile_data_columns: + where_conditions.append("j.outtime_journey IS NULL") + + where_clause = "WHERE " + " AND ".join(where_conditions) + + # Determine columns to select and whether to join + if selected_attributes: + # Join with customers table for additional attributes + available_attributes = [attr for attr in selected_attributes if attr in profile_data_columns] + customer_columns = [f"c.{attr}" for attr in available_attributes] + select_columns = ["j.cdp_customer_id"] + customer_columns + + select_clause = "SELECT " + ", ".join(select_columns) + + # Query with JOIN + query = f"""{select_clause} +FROM {journey_table} j +JOIN {customers_table} c ON c.cdp_customer_id = j.cdp_customer_id +{where_clause} +ORDER BY j.cdp_customer_id""" + else: + # Simple query without JOIN + query = f"""SELECT cdp_customer_id +FROM {journey_table} +{where_clause.replace('j.', '')} +ORDER BY cdp_customer_id""" + + return query + + +def render_step_details(step_info: Dict, generator: CJOFlowchartGenerator, column_mapper: CJOColumnMapper): + """Render details for a selected step.""" + step_name = step_info.get('name', 'Unknown Step') + step_type = step_info.get('type', 'Unknown') + step_id = step_info.get('id', '') + stage_idx = step_info.get('stage_idx', 0) + + # Get profiles for this step using shared utility + try: + step_profiles = get_step_profiles(generator.profile_data, step_id, stage_idx) + + # Handle profile display (only if there are profiles) + if step_profiles: + # Show profiles with search functionality + search_term = st.text_input("Filter profiles by customer ID:", key=f"search_{step_id}") + + if search_term: + filtered_profiles = [p for p in step_profiles if search_term.lower() in str(p).lower()] + else: + filtered_profiles = step_profiles + + st.write(f"Showing {len(filtered_profiles)} of {len(step_profiles)} profiles") + + # Display profiles using shared utility + if filtered_profiles: + selected_attributes = SessionStateManager.get("selected_attributes", []) + + # Get filtered profile data with selected attributes + profile_df = get_filtered_profile_data( + generator.profile_data[generator.profile_data['cdp_customer_id'].isin(filtered_profiles)], + step_id, stage_idx, selected_attributes + ) + + if not profile_df.empty: + st.dataframe(profile_df, use_container_width=True) + + # Download button + csv = profile_df.to_csv(index=False) + st.download_button( + label="📥 Download as CSV", + data=csv, + file_name=f"step_{step_id}_profiles.csv", + mime="text/csv" + ) + else: + # Fallback to simple list + profile_df = pd.DataFrame({'cdp_customer_id': filtered_profiles}) + st.dataframe(profile_df, use_container_width=True) + + # Download button + csv = profile_df.to_csv(index=False) + st.download_button( + label="📥 Download as CSV", + data=csv, + file_name=f"step_{step_id}_profiles.csv", + mime="text/csv" + ) + else: + # Show appropriate message when no profiles + step_column = get_step_column_name(step_id, stage_idx) + if step_column not in generator.profile_data.columns: + st.warning("No profile data available for this step.") + else: + st.info("No profiles are currently in this step.") + + # Always display step information regardless of profile count + st.markdown("---") + st.markdown(f"**Step:** {step_name}") + st.markdown(f"**Type:** {step_type}") + if step_id: + st.markdown(f"**ID:** {step_id}") + + # Show SQL query used for this step + st.markdown("---") + st.markdown("**📊 SQL Query Used:**") + st.caption("This shows the equivalent SQL query that would be used to retrieve the profile data displayed above.") + + selected_attributes = SessionStateManager.get("selected_attributes", []) + step_column = get_step_column_name(step_id, stage_idx) + sql_query = generate_step_query_sql( + step_column, + generator.profile_data.columns.tolist(), + selected_attributes + ) + + # Show query in expandable section for better UI + with st.expander("🔍 View SQL Query", expanded=False): + st.code(sql_query, language="sql") + + # Add helpful explanation + st.markdown("**Query Explanation:**") + st.markdown(f"- **Step Entry**: `{step_column} IS NOT NULL` (profiles who entered this step)") + + step_outtime_column = step_column.replace('intime_', 'outtime_') + if step_outtime_column in generator.profile_data.columns: + st.markdown(f"- **Step Exit**: `{step_outtime_column} IS NULL` (exclude profiles that exited this step)") + + if 'outtime_journey' in generator.profile_data.columns: + st.markdown("- **Journey Filter**: `outtime_journey IS NULL` (exclude profiles that left the journey)") + + if selected_attributes: + st.markdown(f"- **Selected Attributes**: {', '.join(selected_attributes)}") + else: + st.markdown("- **Columns**: Only `cdp_customer_id` (no additional attributes selected)") + + except Exception as e: + st.error(f"Error loading step details: {str(e)}") + + +def render_canvas_tab(generator: CJOFlowchartGenerator, column_mapper: CJOColumnMapper): + """Render the canvas (flowchart) tab.""" + st.subheader("Interactive Journey Flowchart") + + if generator.profile_data.empty: + st.warning("Profile data is not available. The flowchart will show journey structure without profile counts.") + + # Performance note + st.info("💡 **Performance Note**: For better performance with large journeys, consider using the Step Selection tab for detailed analysis.") + + # Generate button + if st.button("🎨 Generate Canvas Visualization", type="primary"): + with st.spinner("Generating interactive flowchart..."): + try: + flowchart_html = create_flowchart_html(generator) + st.components.v1.html(flowchart_html, height=800, scrolling=True) + except Exception as e: + st.error(f"Error generating flowchart: {str(e)}") + else: + st.info("Click the button above to generate the interactive flowchart visualization.") + + +def render_data_tab(generator: CJOFlowchartGenerator, column_mapper: CJOColumnMapper): + """Render the data and mappings tab.""" + st.subheader("Data & Mappings") + + # Column Mappings (moved to top) + st.markdown("### 🗂️ Column Mappings") + st.caption("Technical column names → Display names") + + profile_data = SessionStateManager.get('profile_data') + if profile_data is not None and not profile_data.empty: + # Show ALL column mappings + all_columns = profile_data.columns.tolist() + mapping_data = [] + for col in all_columns: + display_name = column_mapper.map_column_to_display_name(col) + mapping_data.append({ + "Technical Name": col, + "Display Name": display_name + }) + + st.dataframe(pd.DataFrame(mapping_data), use_container_width=True) + else: + st.info("Load profile data to see column mappings.") + + # Journey API Response and Request Details + st.markdown("### 📋 Journey Configuration") + + api_response = SessionStateManager.get('api_response') + journey_id = SessionStateManager.get_journey_id() + + if api_response and journey_id: + # Show the API request details with redacted key + st.markdown("#### API Request Made:") + api_request_info = { + "method": "GET", + "url": f"https://api-cdp.treasuredata.com/entities/journeys/{journey_id}", + "headers": { + "Authorization": "TD1 [REDACTED_API_KEY]", + "Content-Type": "application/json" + } + } + st.code(f"curl -X GET '{api_request_info['url']}' \\\n -H 'Authorization: TD1 [REDACTED_API_KEY]' \\\n -H 'Content-Type: application/json'", language="bash") + + st.markdown("#### Full API Response:") + st.json(api_response) + else: + st.info("Load journey configuration to see API request and response details.") + + +def main(): + """Main application function.""" + st.set_page_config( + page_title="CJO Profile Viewer", + page_icon="🎯", + layout="wide" + ) + + # Load styles + load_all_styles() + + # Initialize session state + SessionStateManager.initialize() + + # Initialize API service + api_service = TDAPIService() + + st.title("🎯 CJO Profile Viewer") + st.markdown("Visualize Customer Journey Orchestration journeys with profile data") + + # Render configuration panel + journey_id, load_config_button = render_configuration_panel() + + # Render attribute selector + load_profile_button = render_attribute_selector() + + # Handle button clicks + handle_config_loading(journey_id, load_config_button, api_service) + handle_profile_loading(load_profile_button, api_service) + + st.markdown("---") + + # Render main content + render_journey_tabs() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/docs/PROJECT_SUMMARY.md b/tool-box/cjo-profile-viewer/docs/PROJECT_SUMMARY.md new file mode 100644 index 00000000..f7c40f3b --- /dev/null +++ b/tool-box/cjo-profile-viewer/docs/PROJECT_SUMMARY.md @@ -0,0 +1,229 @@ +# CJO Profile Viewer - Project Summary + +## 🎯 Project Overview + +The CJO Profile Viewer is a comprehensive Streamlit application for visualizing Customer Journey Orchestration (CJO) journeys from Treasure Data's CDP API. It provides real-time profile tracking, interactive flowcharts, and detailed journey analysis with live data integration. + +## 🏗️ Architecture + +### Modular Design (Post-Refactoring) + +The application follows a clean, modular architecture: + +``` +src/ +├── services/ +│ └── td_api.py # TD API service layer +├── components/ +│ └── flowchart_renderer.py # HTML flowchart generation +├── styles/ +│ ├── __init__.py # Style loading utilities +│ ├── flowchart.css # Flowchart visualization styles +│ ├── modal.css # Modal dialog styles +│ ├── buttons.css # Button styling +│ └── layout.css # General layout styles +├── utils/ +│ └── session_state.py # Session state management +├── column_mapper.py # Column name mapping +├── flowchart_generator.py # Journey structure processing +└── merge_display_formatter.py # Merge step formatting + +app.py # Main application (452 lines) +``` + +### Core Components + +#### 1. **TD API Service Layer** (`src/services/td_api.py`) +- **TDAPIService Class**: Centralized API interactions +- **Journey Configuration**: Fetches journey structure from CDP API +- **Profile Data Loading**: Real-time queries via pytd client +- **Customer Attributes**: Dynamic attribute discovery and selection + +#### 2. **Column Mapper** (`src/column_mapper.py`) +- **Technical to Display Name Conversion**: Maps database columns to readable names +- **CJO Step Type Support**: Handles all 7 step types with proper formatting +- **Journey Table Integration**: Works with dynamically generated table schemas + +#### 3. **Flowchart Generator** (`src/flowchart_generator.py`) +- **Journey Structure Processing**: Parses API responses into flowchart data +- **Profile Count Calculation**: Real-time profile counting per step +- **Complex Path Handling**: Decision points, AB tests, merge hierarchies + +#### 4. **Interactive Components** (`src/components/`) +- **HTML/CSS Flowchart Rendering**: Custom visualization engine +- **Step Click Handling**: Interactive profile exploration +- **Modal Profile Viewer**: Detailed customer data display + +## ✅ **Features Implemented** + +### **1. Two-Step Data Loading** +``` +Step 1: Load Journey Config → Extract audience ID → Get available attributes +Step 2: Select attributes → Load Profile Data → Enable visualization +``` + +### **2. Complete Step Type Support** +- **Wait Steps**: Duration, condition, date, days-of-week waits +- **Activation Steps**: Data export and syndication actions +- **Decision Points**: Segment-based branching with profile distribution +- **AB Test Steps**: Variant allocation with percentage display +- **Jump Steps**: Stage and journey transitions +- **Merge Steps**: Path consolidation with hierarchical display +- **End Steps**: Journey termination points + +### **3. Advanced Merge Step Handling** +**Hierarchical Display Format:** +``` +// Branch paths to merge +Decision: country routing (45 profiles) +--- Wait 3 days (12 profiles) +--- Merge (5eca44ab) (15 profiles) + +// Post-merge consolidated path +Merge: (5eca44ab) - grouping header (15 profiles) +--- Wait 1 day (8 profiles) +--- End Step (5 profiles) +``` + +### **4. Interactive Journey Visualization** +- **Clickable Flowchart**: HTML/CSS based rendering +- **Profile Modal**: Customer ID exploration with search/filter +- **Step Selection Dropdown**: Hierarchical step navigation +- **Real-time Profile Counts**: Live data from journey tables + +### **5. Customer Attribute Integration** +- **Dynamic Attribute Discovery**: Auto-detect available customer fields +- **Selective Loading**: Choose which attributes to include +- **Enhanced Profile Display**: Show customer data alongside journey progression + +## 🔧 **Technical Implementation** + +### **Data Flow** +``` +1. Journey ID Input → CDP API call (journey configuration) +2. Audience ID Extraction → Available attributes discovery +3. Attribute Selection → Profile data query (pytd) +4. Data Processing → Session state storage +5. Visualization → Interactive flowchart + step explorer +``` + +### **Profile Tracking Logic** +```sql +-- Active profiles in step +SELECT COUNT(*) FROM cdp_audience_{audience_id}.journey_{journey_id} +WHERE intime_journey IS NOT NULL + AND outtime_journey IS NULL + AND intime_goal IS NULL + AND intime_stage_{N}_{step_uuid} IS NOT NULL + AND outtime_stage_{N}_{step_uuid} IS NULL +``` + +### **Session State Management** +- **Modular State**: Centralized via `SessionStateManager` class +- **Two-Phase Loading**: Config loaded → Profile loaded states +- **Attribute Caching**: Available attributes stored per audience +- **Error Tracking**: Comprehensive error state management + +## 📊 **UI Implementation** + +### **Step Display Hierarchy** +- **Level 0**: Main steps and stage headers +- **Level 1**: Decision branches, AB variants (prefix: `---`) +- **Level 2**: Nested elements (prefix: `------`) + +### **Profile Count Display** +- **Active Profiles Only**: Currently in journey (not completed/exited) +- **Real-time Updates**: Live queries on button click +- **Aggregation Logic**: Proper counting across merged paths + +### **Interactive Elements** +- **Step Selection Tab**: Dropdown with profile exploration +- **Canvas Tab**: Interactive HTML flowchart +- **Data & Mappings Tab**: Technical column information + +## 🎨 **Visual Design** + +### **Color Coding** +- **Decision Points**: Yellow/beige (`#f8eac5`) +- **Wait Steps**: Light pink/red (`#f8dcda`) +- **Activations**: Light green (`#d8f3ed`) +- **Jumps/End Steps**: Light blue/purple (`#e8eaff`) +- **Merge Steps**: Yellow/beige (`#f8eac5`) + +### **Responsive Layout** +- **Streamlit Components**: Native responsive design +- **Modal Dialogs**: Custom CSS with proper overflow handling +- **Mobile Friendly**: Works across device sizes + +## 🚀 **Usage** + +### **Getting Started** +1. **Launch Application**: + ```bash + streamlit run app.py + ``` + +2. **Load Journey Configuration**: + - Enter Journey ID + - Click "📋 Load Journey Config" + - Wait for configuration and attributes to load + +3. **Load Profile Data**: + - Select desired customer attributes (optional) + - Click "📊 Load Profile Data" + - Explore data via tabs + +### **Navigation** +- **Step Selection Tab**: Choose steps from dropdown, view profile details +- **Canvas Tab**: Generate interactive flowchart visualization +- **Data & Mappings Tab**: View technical details and column mappings + +## 📈 **Performance** + +### **Optimizations** +- **Lazy Loading**: Profile data only loaded when requested +- **Session Caching**: API responses and processed data cached +- **Modular CSS**: Styles loaded separately for browser caching +- **On-Demand Rendering**: Flowchart generated only when needed + +### **Scalability** +- **Large Journeys**: Handles complex multi-stage journeys +- **High Profile Counts**: Efficient querying and display of 1000+ profiles +- **Memory Management**: Proper cleanup and state management + +## 🔍 **Error Handling** + +### **API Errors** +- **Authentication**: Clear TD API key error messages +- **Network Issues**: Timeout and connection error handling +- **Data Validation**: Missing table/column detection + +### **User Experience** +- **Progress Indicators**: Spinners during loading operations +- **Toast Notifications**: Success/error feedback +- **Graceful Degradation**: Partial functionality when data unavailable + +## 📚 **Documentation** + +### **Comprehensive Guides** +- **Journey Tables Guide**: Complete CJO architecture documentation +- **Step Types Guide**: All 7 step type implementations +- **UI Implementation Guide**: Display patterns and formatting rules + +### **Technical References** +- **Column Naming Conventions**: Database schema patterns +- **SQL Query Examples**: Profile tracking and analysis patterns +- **API Integration**: TD API usage and authentication + +--- + +## 🎉 **Success Metrics** + +1. **✅ Complete Feature Set**: All CJO step types supported +2. **✅ Real-time Integration**: Live TD API and profile data +3. **✅ Modular Architecture**: Clean, maintainable codebase (80% size reduction) +4. **✅ User Experience**: Intuitive two-step loading process +5. **✅ Performance**: Sub-second response times for typical usage +6. **✅ Documentation**: Comprehensive guides for architecture and implementation + +The CJO Profile Viewer successfully provides enterprise-grade journey visualization with real-time profile tracking, supporting the complete spectrum of Treasure Data's Customer Journey Orchestration capabilities. \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/docs/STEP_TYPES_GUIDE.md b/tool-box/cjo-profile-viewer/docs/STEP_TYPES_GUIDE.md new file mode 100644 index 00000000..435bcda4 --- /dev/null +++ b/tool-box/cjo-profile-viewer/docs/STEP_TYPES_GUIDE.md @@ -0,0 +1,410 @@ +# CJO Step Types Implementation Guide + +This guide documents the implementation of all CJO (Customer Journey Orchestration) step types in the Profile Viewer, including their display formatting, profile tracking, and special handling requirements. + +## Table of Contents +- [Overview](#overview) +- [Step Type Implementations](#step-type-implementations) +- [Display Formatting Patterns](#display-formatting-patterns) +- [Profile Tracking](#profile-tracking) +- [Technical Implementation](#technical-implementation) + +## Overview + +The CJO Profile Viewer supports all 7 core step types defined in the Treasure Data CDP system: + +1. **Wait Steps** - Time-based delays and condition waits +2. **Activation Steps** - Data export and syndication actions +3. **Decision Points** - Segment-based branching logic +4. **AB Test Steps** - Split testing with variant allocation +5. **Jump Steps** - Stage and journey transitions +6. **Merge Steps** - Path consolidation and convergence +7. **End Steps** - Journey termination points + +## Step Type Implementations + +### 1. Wait Steps + +**Types Supported:** +- **Duration Waits**: Fixed time delays (e.g., "Wait 7 days") +- **Condition Waits**: Wait for customer behavior with timeout +- **Date Waits**: Wait until specific date/time +- **Days of Week Waits**: Wait for specific days + +**Step Type Variants:** +- **`WaitStep`**: Standard wait steps (duration, date, days of week) +- **`WaitCondition_Path`**: Conditional wait paths with timeout handling + +**Display Format:** +``` +Wait 7 days (45 profiles) +Wait for purchase (timeout: 14 days) (23 profiles) +Wait until 2024-01-15 (12 profiles) +Wait for Monday, Wednesday (8 profiles) +Wait Condition: event_name - path_name (15 profiles) # WaitCondition_Path +``` + +**Profile Tracking:** +- **Entry Column**: `intime_stage_{N}_{step_uuid}` +- **Exit Column**: `outtime_stage_{N}_{step_uuid}` +- **Active Profiles**: `intime IS NOT NULL AND outtime IS NULL` + +### 2. Activation Steps + +**Purpose:** Data syndication and export to external systems + +**Display Format:** +``` +Activation: Email Campaign Send (67 profiles) +Activation: CRM Data Export (34 profiles) +``` + +**Profile Tracking:** +- **Entry Column**: `intime_stage_{N}_{step_uuid}` +- **Execution Logic**: Typically immediate (no wait state) +- **Success Tracking**: Via outtime columns + +### 3. Decision Points + +**Purpose:** Segment-based routing with multiple branches + +**Display Format:** +``` +Decision: country routing (145 profiles) +--- Branch: country is japan (67 profiles) +--- Branch: country is canada (23 profiles) +--- Branch: Default/Excluded path (55 profiles) +``` + +**Profile Tracking:** +- **Main Step**: `intime_stage_{N}_{step_uuid}` +- **Branch Columns**: `intime_stage_{N}_{step_uuid}_{segment_id}` +- **Branch Logic**: Each profile enters exactly one branch + +**Technical Implementation:** +- Branch detection via `branches[]` array in step definition +- Segment ID extraction from API response +- Hierarchical display with `---` indentation + +### 4. AB Test Steps + +**Purpose:** Split testing with percentage-based variant allocation + +**Display Format:** +``` +AB Test: email variants (89 profiles) +--- Variant A (5%): 4 profiles +--- Variant B (5%): 5 profiles +--- Control (90%): 80 profiles +``` + +**Profile Tracking:** +- **Main Step**: `intime_stage_{N}_{step_uuid}` +- **Variant Columns**: `intime_stage_{N}_{step_uuid}_variant_{variant_id}` +- **Assignment Logic**: Hash-based consistent allocation + +**Technical Implementation:** +- Variant detection via `variants[]` array +- Percentage display from variant configuration +- Profile distribution across variants + +### 5. Jump Steps + +**Purpose:** Transitions between stages or journeys + +**Display Format:** +``` +Jump to Stage 2 (12 profiles) +Jump to Journey 'Onboarding Flow' (8 profiles) +``` + +**Profile Tracking:** +- **Exit Tracking**: Via `journey_{id}_standby` table +- **Transition Logic**: Profiles move to target destination +- **History Preservation**: Via `journey_{id}_jump_history` table + +### 6. Merge Steps + +**Purpose:** Path consolidation where multiple branches converge + +**Special Implementation:** Merge steps require hierarchical display to avoid step duplication. + +#### 6.1 Merge Step Hierarchy Format + +**Before Merge (Branch Paths):** +``` +Decision: country is japan (2 profiles) +--- Wait 3 days (0 profiles) +--- Merge (5eca44ab-201f-40a7-98aa-b312449df0fe) (3 profiles) + +Decision: Excluded Profiles (1 profiles) +--- Merge (5eca44ab-201f-40a7-98aa-b312449df0fe) (3 profiles) +``` + +**After Merge (Consolidated Path):** +``` +Merge: (5eca44ab-201f-40a7-98aa-b312449df0fe) - grouping header (3 profiles) +--- Wait 1 day (0 profiles) +--- End Step (0 profiles) +``` + +#### 6.2 Merge Technical Implementation + +**Enhanced FlowchartStep Class:** +```python +class FlowchartStep: + is_merge_endpoint: bool = False # Merge at end of branch + is_merge_header: bool = False # Merge as grouping header +``` + +**Path Building Logic:** +- `_build_paths_with_merges()`: Handles stages with merge points +- `_trace_paths_to_merge()`: Traces branch paths to convergence +- `_build_pre_merge_paths()`: Builds paths leading to merges +- `_build_post_merge_paths()`: Handles paths after merge points + +**Display Integration:** +- Automatic merge point detection +- Conditional hierarchical formatting +- Breadcrumb preservation for post-merge steps +- Profile count aggregation at merge points + +**Specialized Formatter Module:** +- **`merge_display_formatter.py`**: Dedicated module for merge hierarchy formatting +- **`format_merge_hierarchy()`**: Creates the exact hierarchical display format +- **Branch Path Separation**: Distinguishes pre-merge and post-merge paths +- **Smart Detection**: Only activates when merge points are present in journey + +#### 6.3 Merge Step Profile Tracking + +**Branch Entry Tracking:** +```sql +-- Profiles entering merge from different branches +SELECT COUNT(*) FROM journey_{id} +WHERE intime_stage_{N}_{merge_uuid} IS NOT NULL +``` + +**Post-Merge Tracking:** +```sql +-- Profiles continuing after merge +SELECT COUNT(*) FROM journey_{id} +WHERE intime_stage_{N}_{merge_uuid} IS NOT NULL + AND outtime_stage_{N}_{merge_uuid} IS NOT NULL +``` + +### 7. End Steps + +**Purpose:** Journey termination points + +**Display Format:** +``` +End Step (23 profiles) +Goal Achievement (45 profiles) +``` + +**Profile Tracking:** +- **Entry Column**: `intime_stage_{N}_{step_uuid}` +- **Journey Completion**: Via `intime_goal` or `outtime_journey` +- **Final State**: No exit from end steps + +## Display Formatting Patterns + +### Indentation Rules + +**Standard Steps:** +``` +Step Name (profile count) +``` + +**Grouped Steps (Decision/AB Test branches):** +``` +Decision: name (total count) +--- Branch: name (branch count) +--- Branch: name (branch count) +``` + +**Merge Hierarchies:** +``` +Branch Path → Merge Endpoint: +--- Merge (uuid) (count) + +Merge Grouping Header: +Merge: (uuid) - grouping header (count) +--- Post-merge step (count) +``` + +### Profile Count Display + +**Active Profiles Only:** +- Profiles currently in the step (not completed/exited) +- Query pattern: `intime IS NOT NULL AND outtime IS NULL` + +**Aggregation Rules:** +- **Decision Points**: Sum of all branch profiles +- **AB Tests**: Sum of all variant profiles +- **Merge Points**: Aggregated count from all converging paths + +### UUID Handling + +**Display Format:** +- Short UUID format: First 8 characters (e.g., `5eca44ab`) +- Full UUID in tooltips and details +- Consistent shortening across all step types + +## Profile Tracking + +### Column Naming Patterns + +**Standard Steps:** +``` +intime_stage_{stage_index}_{step_uuid} +outtime_stage_{stage_index}_{step_uuid} +``` + +**Decision Point Branches:** +``` +intime_stage_{stage_index}_{step_uuid}_{segment_id} +outtime_stage_{stage_index}_{step_uuid}_{segment_id} +``` + +**AB Test Variants:** +``` +intime_stage_{stage_index}_{step_uuid}_variant_{variant_id} +outtime_stage_{stage_index}_{step_uuid}_variant_{variant_id} +``` + +### Profile State Logic + +**Active in Step:** +```sql +WHERE intime_stage_{N}_{step_uuid} IS NOT NULL + AND outtime_stage_{N}_{step_uuid} IS NULL + AND intime_journey IS NOT NULL + AND outtime_journey IS NULL + AND intime_goal IS NULL +``` + +**Actual Implementation Logic:** +The `CJOFlowchartGenerator` class implements detailed profile counting: + +```python +def _get_step_profile_count(self, step_id: str, stage_idx: int, step_type: str) -> int: + """Get profile count for a specific step with type-specific logic.""" + if self.profile_data.empty: + return 0 + + try: + # Convert step ID to column name + step_uuid = step_id.replace('-', '_') + step_column = f"intime_stage_{stage_idx}_{step_uuid}" + outtime_column = f"outtime_stage_{stage_idx}_{step_uuid}" + + if step_column not in self.profile_data.columns: + return 0 + + # Base condition: profiles that entered this step + condition = self.profile_data[step_column].notna() + + # For non-endpoint steps, only count active profiles + if outtime_column in self.profile_data.columns: + # Still in step (not exited) + condition = condition & self.profile_data[outtime_column].isna() + + # Only count profiles still active in journey + condition = condition & self.profile_data['intime_journey'].notna() + condition = condition & self.profile_data['outtime_journey'].isna() + condition = condition & self.profile_data['intime_goal'].isna() + + return len(self.profile_data[condition]) + except Exception: + return 0 +``` + +**Completed Step:** +```sql +WHERE intime_stage_{N}_{step_uuid} IS NOT NULL + AND outtime_stage_{N}_{step_uuid} IS NOT NULL +``` + +## Technical Implementation + +### Core Classes + +**FlowchartStep:** +```python +@dataclass +class FlowchartStep: + step_id: str + step_type: str + name: str + stage_index: int + profile_count: int = 0 + is_merge_endpoint: bool = False + is_merge_header: bool = False +``` + +**Step Type Detection:** +```python +def get_step_type(step_data: dict) -> str: + step_type = step_data.get('type', 'Unknown') + + # Handle complex step variants + if step_type == 'DecisionPoint': + return 'DecisionPoint_Branch' if has_branches else 'DecisionPoint' + elif step_type == 'ABTest': + return 'ABTest_Variant' if has_variants else 'ABTest' + elif step_type == 'WaitStep': + return 'WaitCondition_Path' if has_conditions else 'WaitStep' + + return step_type +``` + +**Column Mapper Integration:** +The `CJOColumnMapper` class handles complex step type detection and formatting: + +```python +# In column_mapper.py - Decision Point branch detection +if step_data.get('type') == 'DecisionPoint': + branches = step_data.get('branches', []) + for branch in branches: + segment_id = branch.get('segmentId') + # Creates DecisionPoint_Branch entries + +# AB Test variant detection +if step_data.get('type') == 'ABTest': + variants = step_data.get('variants', []) + for variant in variants: + variant_id = variant.get('id') + # Creates ABTest_Variant entries +``` + +### Display Integration + +**Step Formatting Pipeline:** +1. **Step Detection**: Identify step type from API response +2. **Profile Counting**: Query live journey table data +3. **Display Formatting**: Apply type-specific formatting rules +4. **Hierarchy Building**: Handle indentation and grouping +5. **UI Rendering**: Generate final display strings + +**Special Handling:** +- **Merge Detection**: Automatic identification of merge points in stages +- **Conditional Formatting**: Hierarchical display only when merges present +- **Breadcrumb Preservation**: Maintain path context through merges +- **Profile Aggregation**: Correct counting across merged paths + +### Error Handling + +**Missing Columns:** +- Graceful handling of non-existent step columns +- Default to 0 profiles for missing data +- Error logging for debugging + +**Invalid Step Types:** +- Fallback to generic step formatting +- Warning messages for unknown types +- Defensive programming throughout + +--- + +This implementation provides comprehensive support for all CJO step types while maintaining clean, hierarchical display formatting and accurate profile tracking across complex journey structures. \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/docs/UI_IMPLEMENTATION_GUIDE.md b/tool-box/cjo-profile-viewer/docs/UI_IMPLEMENTATION_GUIDE.md new file mode 100644 index 00000000..365139c6 --- /dev/null +++ b/tool-box/cjo-profile-viewer/docs/UI_IMPLEMENTATION_GUIDE.md @@ -0,0 +1,461 @@ +# UI Implementation Guide + +This guide documents the user interface patterns, display formatting rules, and implementation details for the CJO Profile Viewer's visual components. + +## Table of Contents +- [Overview](#overview) +- [Step Dropdown Formatting](#step-dropdown-formatting) +- [Flowchart Visualization](#flowchart-visualization) +- [Profile Display Components](#profile-display-components) +- [Indentation and Hierarchy](#indentation-and-hierarchy) +- [Interactive Elements](#interactive-elements) +- [Implementation Details](#implementation-details) + +## Overview + +The CJO Profile Viewer uses several key UI patterns to present complex journey data in an intuitive, hierarchical format. The interface consists of: + +1. **Step Selection Dropdown** - Hierarchical list of all journey steps +2. **Interactive Flowchart** - Visual journey representation with clickable steps +3. **Profile Detail Panels** - Customer data display and analysis +4. **Breadcrumb Navigation** - Path context and journey progression + +## Step Dropdown Formatting + +### Display Hierarchy Rules + +The step dropdown uses a consistent indentation pattern to show journey structure: + +#### Standard Format +``` +Stage Name → Step Name (profile count) +``` + +#### Grouped Elements (Decision Points, AB Tests) +``` +Decision: segment name (total profiles) +--- Branch: condition name (branch profiles) +--- Branch: condition name (branch profiles) +``` + +#### Merge Hierarchies +``` +// Branch paths leading to merge +Decision: country routing (45 profiles) +--- Wait 3 days (12 profiles) +--- Merge (5eca44ab) (15 profiles) + +// Post-merge consolidated path +Merge: (5eca44ab) - grouping header (15 profiles) +--- Wait 1 day (8 profiles) +--- End Step (5 profiles) +``` + +### Indentation Implementation + +**Indentation Levels:** +- **Level 0**: Main steps and grouping headers +- **Level 1**: Branch steps, variants, and post-merge steps (prefix: `---`) +- **Level 2**: Nested elements (prefix: `------`) + +**Code Implementation:** +```python +def format_step_display(step_name: str, profile_count: int, indent_level: int = 0) -> str: + """Format step display with proper indentation.""" + prefix = "--- " if indent_level > 0 else "" + return f"{prefix}{step_name} ({profile_count} profiles)" +``` + +### Special Formatting Cases + +#### Decision Point Branches +```python +# Main decision point (no profile count) +"Decision: country routing" + +# Individual branches (with counts) +"--- Branch: country is japan (23 profiles)" +"--- Branch: country is canada (15 profiles)" +"--- Branch: Default/Excluded path (7 profiles)" +``` + +#### AB Test Variants +```python +# Main AB test (no profile count) +"AB Test: email variants" + +# Individual variants (with percentages and counts) +"--- Variant A (5%): 2 profiles" +"--- Variant B (5%): 3 profiles" +"--- Control (90%): 40 profiles" +``` + +#### Merge Step Handling +```python +# Merge endpoint (end of branch path) +"--- Merge (5eca44ab) (15 profiles)" + +# Merge grouping header (start of consolidated path) +"Merge: (5eca44ab) - grouping header (15 profiles)" +``` + +## Flowchart Visualization + +### HTML/CSS Implementation + +The flowchart uses custom HTML/CSS rendering instead of external libraries for better performance and control. + +#### Stage Containers +```css +.stage-container { + margin: 30px 0; + padding: 20px; + border: 1px solid #444444; + border-radius: 8px; + background-color: #2D2D2D; +} + +.stage-header { + color: #FFFFFF; + font-size: 18px; + font-weight: 600; + margin-bottom: 15px; + text-align: center; +} +``` + +#### Step Boxes +```css +.step-box { + background-color: #f8eac5; + color: #000000; + padding: 15px 20px; + margin: 5px 0; + border-radius: 8px; + min-width: 180px; + max-width: 220px; + text-align: center; + cursor: pointer; + font-weight: 600; + font-size: 13px; + transition: all 0.3s ease; +} + +.step-box:hover { + transform: scale(1.03); + box-shadow: 0 2px 8px rgba(0,0,0,0.1); + border-color: #85C1E9; +} +``` + +#### Step Type Colors +```python +step_type_colors = { + 'DecisionPoint': '#f8eac5', # Decision Point - yellow/beige + 'DecisionPoint_Branch': '#f8eac5', # Decision Point Branch + 'ABTest': '#f8eac5', # AB Test + 'ABTest_Variant': '#f8eac5', # AB Test Variant + 'WaitStep': '#f8dcda', # Wait Step - light pink/red + 'WaitCondition_Path': '#f8dcda', # Wait Condition Path + 'Activation': '#d8f3ed', # Activation - light green + 'Jump': '#e8eaff', # Jump - light blue/purple + 'End': '#e8eaff', # End Step - light blue/purple + 'Merge': '#f8eac5', # Merge Step - yellow/beige + 'Unknown': '#f8eac5' # Unknown - default +} +``` + +### Interactive Features + +#### Click Handling +```javascript +function showProfileModal(stepDataKey) { + const stepData = stepDataStore[stepDataKey]; + if (!stepData) { + console.error('Step data not found for key:', stepDataKey); + return; + } + + // Display modal with profile details + document.getElementById('modalTitle').textContent = stepData.name; + displayProfiles(stepData.profiles, stepData.profile_data); + document.getElementById('profileModal').style.display = 'block'; +} +``` + +#### Tooltip Implementation +```css +.step-tooltip { + position: absolute; + top: -65px; + left: 50%; + transform: translateX(-50%); + background-color: rgba(0,0,0,0.9); + color: white; + padding: 8px 12px; + border-radius: 4px; + font-size: 14px; + opacity: 0; + transition: opacity 0.3s; + z-index: 999999; + max-width: 400px; + text-align: center; +} + +.step-box:hover .step-tooltip { + opacity: 1; +} +``` + +## Profile Display Components + +### Modal Profile Viewer + +#### Structure +```html + +``` + +#### Profile Data Table +```css +.profiles-table { + width: 100%; + border-collapse: collapse; + font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; + font-size: 12px; + color: #E0E0E0; + background-color: #3A3A3A; +} + +.profiles-table th { + background-color: #2D2D2D; + color: #FFFFFF; + padding: 10px 12px; + text-align: left; + border-bottom: 2px solid #444444; + font-weight: 600; + position: sticky; + top: 0; + z-index: 10; +} +``` + +### Keyboard Shortcuts + +#### Auto-Load on Enter +The application supports pressing Enter in the Journey ID field to automatically trigger configuration loading: + +```python +# In app.py +journey_id = st.text_input( + "Journey ID", + placeholder="e.g., 12345", + key="main_journey_id", + on_change=lambda: st.session_state.update({"auto_load_triggered": True}) +) + +# Auto-load trigger handling +auto_load_triggered = st.session_state.get("auto_load_triggered", False) +if auto_load_triggered and journey_id: + st.session_state["auto_load_triggered"] = False + load_config_button = True # Trigger the loading logic +``` + +### Search and Filtering + +#### Search Implementation +```javascript +function filterProfiles() { + const searchTerm = document.getElementById('searchBox').value.toLowerCase(); + + if (searchTerm === '') { + currentProfiles = allProfiles; + } else { + if (allProfileData.length > 0) { + // Search across all columns in the profile data + const matchingCustomerIds = allProfileData + .filter(profile => { + return Object.values(profile).some(value => + String(value).toLowerCase().includes(searchTerm) + ); + }) + .map(profile => profile.cdp_customer_id); + + currentProfiles = matchingCustomerIds; + } else { + // Fallback to simple customer ID search + currentProfiles = allProfiles.filter(profile => + profile.toLowerCase().includes(searchTerm) + ); + } + } + + displayProfiles(currentProfiles, allProfileData); +} +``` + +## Indentation and Hierarchy + +### Merge Step Indentation Logic + +The most complex UI challenge is properly displaying merge step hierarchies without duplication. + +#### Problem Solved +**Before Fix:** +``` +Merge (5eca44ab) (0 profiles) +Wait 1 day (0 profiles) ← Same level as merge (incorrect) +End Step (0 profiles) ← Same level as merge (incorrect) +``` + +**After Fix:** +``` +Merge: (5eca44ab) - grouping header (3 profiles) +--- Wait 1 day (0 profiles) ← Properly indented +--- End Step (0 profiles) ← Properly indented +``` + +#### Implementation Solution +```python +# Bypass reorganization logic for merge hierarchies +if all_steps and not has_merge_points: + # Original reorganization logic here + pass +else: + # Preserve merge hierarchy formatting + formatted_steps = hierarchical_step_formatter.format_hierarchical_steps( + generator, journey_api_response + ) +``` + +### Breadcrumb Preservation + +#### Breadcrumb Logic +```python +def build_breadcrumb_trail(steps_in_path: List[str]) -> str: + """Build complete breadcrumb trail showing path progression.""" + breadcrumb_parts = [] + + for step_id in steps_in_path: + step_display = format_step_name(step_id) + breadcrumb_parts.append(step_display) + + return " → ".join(breadcrumb_parts) +``` + +#### Post-Merge Breadcrumbs +For steps after merge points, breadcrumbs show the complete path: +``` +Entry → Decision: country routing → Wait 3 days → Merge → Wait 1 day +``` + +## Interactive Elements + +### Button Styling + +#### Primary Buttons +```css +.stButton > button[data-testid="baseButton-primary"], +.stButton > button[kind="primary"] { + background-color: #0066CC !important; + border-color: #0066CC !important; + color: white !important; +} + +.stButton > button[data-testid="baseButton-primary"]:hover, +.stButton > button[kind="primary"]:hover { + background-color: #0052A3 !important; + border-color: #0052A3 !important; + color: white !important; +} +``` + +#### Download Buttons +```python +st.download_button( + label="📥 Download as CSV", + data=csv_data, + file_name=f"step_{step_id}_profiles.csv", + mime="text/csv", + key=f"download_{step_id}" +) +``` + +### Progress Indicators + +#### Loading States +```python +with st.spinner("Loading journey configuration..."): + api_response, error = td_api_service.fetch_journey_data(journey_id) + +with st.spinner("Loading profile data..."): + profile_data = td_api_service.load_profile_data(journey_id, audience_id) +``` + +#### Status Messages +```python +st.toast(f"Journey configuration loaded successfully!", icon="✅") +st.toast(f"Profile data loaded: {len(profile_data)} profiles found.", icon="✅") +st.toast(f"API Error: {error}", icon="❌", duration=30) +``` + +## Implementation Details + +### Component Architecture + +#### Modular Structure +```python +# UI Component rendering functions +def render_configuration_panel() -> Tuple[str, bool]: +def render_attribute_selector() -> bool: +def render_journey_tabs() -> None: +def render_step_selection_tab(generator, column_mapper) -> None: +def render_canvas_tab(generator, column_mapper) -> None: +def render_data_tab(generator, column_mapper) -> None: +``` + +#### State Management +```python +# Session state for UI persistence +SessionStateManager.set("config_loaded", True) +SessionStateManager.set("journey_loaded", True) +SessionStateManager.set("selected_attributes", attributes) +``` + +### Performance Optimizations + +#### Lazy Loading +- Profile data only loaded when explicitly requested +- Flowchart generation on-demand via button click +- Modal content populated only when step is clicked + +#### Caching Strategy +- API responses cached in session state +- Column mapper initialized once per session +- Flowchart generator reused across tabs + +#### Memory Management +- Large profile datasets handled with pagination +- Search results filtered client-side for responsiveness +- Modal content cleared between uses + +--- + +This UI implementation provides a clean, hierarchical interface for complex journey data while maintaining good performance and user experience across all journey types and sizes. \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/docs/journey-tables-guide.md b/tool-box/cjo-profile-viewer/docs/journey-tables-guide.md new file mode 100644 index 00000000..f1fa3c53 --- /dev/null +++ b/tool-box/cjo-profile-viewer/docs/journey-tables-guide.md @@ -0,0 +1,450 @@ +# Journey System-Generated Tables Guide + +This guide provides comprehensive documentation for the system-generated journey tables within the CDP Audience framework (`cdp_audience_{audienceid}` databases) and how to use them to trace profile movement through customer journeys. + +## Table of Contents +- [Overview](#overview) +- [Journey Table Structure](#journey-table-structure) +- [Auxiliary Journey Tables](#auxiliary-journey-tables) +- [Column Naming Conventions](#column-naming-conventions) +- [Tracing Profile Movement](#tracing-profile-movement) +- [SQL Query Examples](#sql-query-examples) +- [Common Use Cases](#common-use-cases) + +## Overview + +The Journey system in TD-CDP-API creates a set of dynamically generated tables to track customer profiles as they move through defined journey stages. These tables are created within each audience's database (`cdp_audience_{audienceid}`) and provide detailed tracking of profile progression, timestamps, and state transitions. + +### Core Architecture +- **Main Journey Table**: Tracks profile progression through stages and steps +- **Auxiliary Tables**: Support reentry, jump history, and workflow management +- **Temporal Tracking**: Precise timestamping of all profile state changes +- **Multi-Version Support**: Handles journey versioning and sibling journeys + +## Journey Table Structure + +### Main Journey Table: `journey_{journeyid}` + +This is the primary table that tracks profiles as they move through a journey. The table structure is dynamically generated based on the journey definition. + +#### Core Columns +- `cdp_customer_id`: Unique customer identifier +- `intime_journey`: Timestamp when profile enters the journey +- `outtime_journey`: Timestamp when profile exits the journey (NULL while in journey) +- `intime_goal`: Timestamp when profile reaches the journey goal + +#### Dynamic Stage Columns +For each stage in the journey, the following columns are created: + +- `intime_stage_{order_index}`: Entry time into stage N +- `outtime_stage_{order_index}`: Exit time from stage N +- `intime_stage_{order_index}_milestone`: Milestone achievement time + +#### Exit Criteria Columns +For each exit criteria defined in a stage: + +- `intime_stage_{order_index}_exit_{exit_index}`: Time when exit criteria was met + +#### Step Columns +For each step within stages: + +- `intime_stage_{order_index}_{step_uuid}`: Entry time into specific step +- `outtime_stage_{order_index}_{step_uuid}`: Exit time from specific step + +#### Decision Point Columns +For decision point steps: + +- `intime_stage_{order_index}_{step_uuid}_{segment_id}`: Entry time into specific branch +- `outtime_stage_{order_index}_{step_uuid}_{segment_id}`: Exit time from specific branch + +#### A/B Test Columns +For A/B test steps: + +- `intime_stage_{order_index}_{step_uuid}_variant_{variant_id}`: Entry time into specific variant +- `outtime_stage_{order_index}_{step_uuid}_variant_{variant_id}`: Exit time from specific variant + +## Auxiliary Journey Tables + +### 1. Standby Table: `journey_{journeyid}_standby` + +Manages profiles waiting to enter other journeys via jump actions. + +#### Columns: +- `session_unixtime`: Processing session timestamp +- `cdp_customer_id`: Customer identifier +- `source_journey_id`: ID of the journey the profile is jumping from +- `target_journey_id`: ID of the destination journey +- `target_journey_stage_id`: Specific stage in target journey +- `reason`: Reason for jump ('goal', 'exit', 'jump_step') + +#### Usage: +```sql +-- Check profiles ready to jump to other journeys +SELECT + cdp_customer_id, + source_journey_id, + target_journey_id, + reason +FROM journey_{journey_id}_standby +WHERE target_journey_id = '{target_journey_id}' +``` + +### 2. Jump History Table: `journey_{journeyid}_jump_history` + +Archives the historical state of profiles when they jump out of the journey. + +#### Columns: +Contains all columns from the main journey table, preserving the state at jump time. + +#### Usage: +```sql +-- View historical journey state for jumped profiles +SELECT + cdp_customer_id, + intime_journey, + intime_stage_0, + intime_stage_1 +FROM journey_{journey_id}_jump_history +WHERE cdp_customer_id = '{customer_id}' +``` + +### 3. Reentry History Table: `journey_{journeyid}_reentry_history` + +Tracks profiles that have re-entered the journey. + +#### Stage-Specific Reentry Tables: `journey_{journeyid}_reentry_stage_{stage_order_index}` + +Manages reentry at specific stages based on journey reentry mode settings. + +#### Usage: +```sql +-- Check reentry history for a profile +SELECT + cdp_customer_id, + intime_journey, + outtime_journey +FROM journey_{journey_id}_reentry_history +WHERE cdp_customer_id = '{customer_id}' +ORDER BY intime_journey DESC +``` + +### 4. Last Import Table: `journey_{journeyid}_last_import` + +Tracks the last successful data import for workflow synchronization. + +#### Columns: +- `time`: Import timestamp +- `last_commit_id`: Last processed commit ID + +#### Usage: +```sql +-- Get latest import status +SELECT + MAX_BY(last_commit_id, time) AS last_commit_id +FROM journey_{journey_id}_last_import +``` + +## Column Naming Conventions + +Understanding the column naming pattern is crucial for querying journey data: + +### Pattern Structure: +- **Journey Level**: `intime_journey`, `outtime_journey`, `intime_goal` +- **Stage Level**: `intime_stage_{N}`, `outtime_stage_{N}`, `intime_stage_{N}_milestone` +- **Exit Level**: `intime_stage_{N}_exit_{M}` +- **Step Level**: `intime_stage_{N}_{step_uuid}`, `outtime_stage_{N}_{step_uuid}` +- **Decision Point**: `intime_stage_{N}_{step_uuid}_{segment_id}` +- **A/B Test**: `intime_stage_{N}_{step_uuid}_variant_{variant_id}` + +### Time Values: +- **Non-NULL**: Profile has reached this state +- **NULL**: Profile has not reached this state +- **Unix Timestamp**: Actual time when state was reached + +## Tracing Profile Movement + +### Profile States + +A profile can be in one of these states: +- **Not in Journey**: `intime_journey IS NULL` +- **Active in Journey**: `intime_journey IS NOT NULL AND outtime_journey IS NULL` +- **Completed Journey**: `intime_journey IS NOT NULL AND intime_goal IS NOT NULL` +- **Exited Journey**: `intime_journey IS NOT NULL AND outtime_journey IS NOT NULL` + +### Stage Progression + +Profiles move through stages sequentially. Current stage can be determined by: +1. Latest non-NULL `intime_stage_N` where `outtime_stage_N IS NULL` +2. Check outside journey conditions (goal/exit criteria met) + +## SQL Query Examples + +### 1. Find Current Journey Status for a Profile + +```sql +-- Get comprehensive journey status for a specific customer +SELECT + cdp_customer_id, + CASE + WHEN intime_journey IS NULL THEN 'Not in Journey' + WHEN outtime_journey IS NOT NULL THEN 'Exited Journey' + WHEN intime_goal IS NOT NULL THEN 'Reached Goal' + ELSE 'Active in Journey' + END AS journey_status, + intime_journey, + outtime_journey, + intime_goal +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' +``` + +### 2. Determine Current Stage for Active Profiles + +```sql +-- Find current stage for all active profiles +SELECT + cdp_customer_id, + CASE + -- Check each stage in reverse order (latest first) + WHEN intime_stage_2 IS NOT NULL AND outtime_stage_2 IS NULL THEN 'Stage 2' + WHEN intime_stage_1 IS NOT NULL AND outtime_stage_1 IS NULL THEN 'Stage 1' + WHEN intime_stage_0 IS NOT NULL AND outtime_stage_0 IS NULL THEN 'Stage 0' + ELSE 'Unknown' + END AS current_stage, + intime_journey +FROM journey_{journey_id} +WHERE intime_journey IS NOT NULL + AND outtime_journey IS NULL + AND intime_goal IS NULL +``` + +### 3. Profile Journey Timeline + +```sql +-- Create timeline of profile movement through journey +SELECT + cdp_customer_id, + 'Journey Entry' AS event_type, + intime_journey AS event_time +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' AND intime_journey IS NOT NULL + +UNION ALL + +SELECT + cdp_customer_id, + 'Stage 0 Entry' AS event_type, + intime_stage_0 AS event_time +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' AND intime_stage_0 IS NOT NULL + +UNION ALL + +SELECT + cdp_customer_id, + 'Stage 0 Milestone' AS event_type, + intime_stage_0_milestone AS event_time +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' AND intime_stage_0_milestone IS NOT NULL + +UNION ALL + +SELECT + cdp_customer_id, + 'Stage 1 Entry' AS event_type, + intime_stage_1 AS event_time +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' AND intime_stage_1 IS NOT NULL + +-- Continue for all stages... + +UNION ALL + +SELECT + cdp_customer_id, + 'Goal Reached' AS event_type, + intime_goal AS event_time +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' AND intime_goal IS NOT NULL + +ORDER BY event_time ASC +``` + +### 4. Stage Conversion Rates + +```sql +-- Calculate conversion rates between stages +WITH stage_counts AS ( + SELECT + COUNT(CASE WHEN intime_stage_0 IS NOT NULL THEN 1 END) AS stage_0_entries, + COUNT(CASE WHEN intime_stage_1 IS NOT NULL THEN 1 END) AS stage_1_entries, + COUNT(CASE WHEN intime_stage_2 IS NOT NULL THEN 1 END) AS stage_2_entries, + COUNT(CASE WHEN intime_goal IS NOT NULL THEN 1 END) AS goal_completions + FROM journey_{journey_id} + WHERE intime_journey IS NOT NULL +) +SELECT + stage_0_entries, + stage_1_entries, + stage_2_entries, + goal_completions, + ROUND(100.0 * stage_1_entries / NULLIF(stage_0_entries, 0), 2) AS stage_0_to_1_conversion, + ROUND(100.0 * stage_2_entries / NULLIF(stage_1_entries, 0), 2) AS stage_1_to_2_conversion, + ROUND(100.0 * goal_completions / NULLIF(stage_0_entries, 0), 2) AS overall_conversion +FROM stage_counts +``` + +### 5. Exit Analysis + +```sql +-- Analyze how profiles exit the journey +SELECT + cdp_customer_id, + CASE + WHEN intime_goal IS NOT NULL THEN 'Completed Goal' + WHEN intime_stage_0_exit_0 IS NOT NULL THEN 'Stage 0 Exit Criteria' + WHEN intime_stage_1_exit_0 IS NOT NULL THEN 'Stage 1 Exit Criteria' + WHEN outtime_journey IS NOT NULL THEN 'Other Exit' + ELSE 'Still Active' + END AS exit_reason, + COALESCE( + intime_goal, + intime_stage_0_exit_0, + intime_stage_1_exit_0, + outtime_journey + ) AS exit_time +FROM journey_{journey_id} +WHERE intime_journey IS NOT NULL +``` + +### 6. Time in Stage Analysis + +```sql +-- Calculate time spent in each stage +SELECT + cdp_customer_id, + -- Time in Stage 0 + CASE + WHEN intime_stage_0 IS NOT NULL AND outtime_stage_0 IS NOT NULL + THEN outtime_stage_0 - intime_stage_0 + WHEN intime_stage_0 IS NOT NULL AND outtime_stage_0 IS NULL + AND (intime_goal IS NOT NULL OR outtime_journey IS NOT NULL) + THEN COALESCE(intime_goal, outtime_journey) - intime_stage_0 + END AS stage_0_duration_seconds, + + -- Time in Stage 1 + CASE + WHEN intime_stage_1 IS NOT NULL AND outtime_stage_1 IS NOT NULL + THEN outtime_stage_1 - intime_stage_1 + WHEN intime_stage_1 IS NOT NULL AND outtime_stage_1 IS NULL + AND (intime_goal IS NOT NULL OR outtime_journey IS NOT NULL) + THEN COALESCE(intime_goal, outtime_journey) - intime_stage_1 + END AS stage_1_duration_seconds + +FROM journey_{journey_id} +WHERE intime_journey IS NOT NULL + AND cdp_customer_id = '{customer_id}' +``` + +### 7. Step-Level Tracking + +```sql +-- Track profile movement through specific steps in a stage +SELECT + cdp_customer_id, + intime_stage_0_{step_uuid_1} AS step_1_entry, + outtime_stage_0_{step_uuid_1} AS step_1_exit, + intime_stage_0_{step_uuid_2} AS step_2_entry, + outtime_stage_0_{step_uuid_2} AS step_2_exit, + CASE + WHEN outtime_stage_0_{step_uuid_1} IS NOT NULL AND intime_stage_0_{step_uuid_2} IS NOT NULL + THEN intime_stage_0_{step_uuid_2} - outtime_stage_0_{step_uuid_1} + END AS step_transition_time_seconds +FROM journey_{journey_id} +WHERE cdp_customer_id = '{customer_id}' + AND intime_stage_0 IS NOT NULL +``` + +### 8. Jump and Reentry Tracking + +```sql +-- Find profiles that have jumped or re-entered +SELECT + j.cdp_customer_id, + 'Jump' AS movement_type, + jh.intime_journey AS original_entry, + j.intime_journey AS new_entry, + s.target_journey_id, + s.reason +FROM journey_{journey_id} j +LEFT JOIN journey_{journey_id}_jump_history jh + ON j.cdp_customer_id = jh.cdp_customer_id +LEFT JOIN journey_{journey_id}_standby s + ON j.cdp_customer_id = s.cdp_customer_id +WHERE jh.cdp_customer_id IS NOT NULL OR s.cdp_customer_id IS NOT NULL + +UNION ALL + +SELECT + r.cdp_customer_id, + 'Reentry' AS movement_type, + r.intime_journey AS original_entry, + j.intime_journey AS new_entry, + NULL AS target_journey_id, + 'Reentry' AS reason +FROM journey_{journey_id}_reentry_history r +JOIN journey_{journey_id} j + ON r.cdp_customer_id = j.cdp_customer_id +WHERE r.intime_journey < j.intime_journey +``` + +## Common Use Cases + +### 1. Journey Performance Analysis +- Track conversion rates at each stage +- Identify bottlenecks and drop-off points +- Measure time to completion +- Compare performance across different journey versions + +### 2. Customer Behavior Analysis +- Understand profile progression patterns +- Identify common exit points +- Analyze reentry behavior +- Track engagement over time + +### 3. A/B Testing Analysis +- Compare variant performance in A/B test steps +- Measure impact of different journey paths +- Track decision point branch selection + +### 4. Operational Monitoring +- Monitor active profile counts +- Track system performance and data flow +- Identify processing issues +- Manage jump and reentry scenarios + +### 5. Personalization +- Use journey state for real-time personalization +- Trigger actions based on stage progression +- Customize experiences based on journey history + +## Best Practices + +1. **Column Existence**: Always check if columns exist before querying, as journey structure can vary +2. **NULL Handling**: Use proper NULL checks when determining profile states +3. **Time Calculations**: Remember timestamps are in Unix format (seconds since epoch) +4. **Performance**: Use appropriate indexes on `cdp_customer_id` and time columns +5. **Version Awareness**: Consider journey versioning when analyzing historical data +6. **Reentry Logic**: Account for reentry modes when analyzing profile behavior + +## Performance Considerations + +- **Indexing**: Ensure proper indexes on frequently queried columns +- **Query Optimization**: Use specific column selection rather than SELECT * +- **Time Ranges**: Add time range filters to improve query performance +- **Join Strategies**: Be mindful of join performance with large customer tables +- **Caching**: Consider caching frequently accessed journey metadata + +--- + +This documentation provides the foundation for effectively querying and analyzing journey data within the TD-CDP-API system. For specific implementation details or advanced use cases, refer to the source code in `app/models/journey/` and related journey modules. \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/requirements.txt b/tool-box/cjo-profile-viewer/requirements.txt new file mode 100644 index 00000000..f3c47ebb --- /dev/null +++ b/tool-box/cjo-profile-viewer/requirements.txt @@ -0,0 +1,5 @@ +streamlit==1.28.1 +pandas==2.1.1 +numpy==1.24.3 +requests==2.31.0 +pytd==2.2.0 \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/__init__.py b/tool-box/cjo-profile-viewer/src/__init__.py new file mode 100644 index 00000000..bf1750aa --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/__init__.py @@ -0,0 +1,10 @@ +""" +CJO Profile Viewer - Source Modules + +This package contains the core modules for the CJO Profile Viewer application: +- column_mapper: CJO column name mapping functionality +- flowchart_generator: Journey flowchart generation +- merge_display_formatter: Display formatting utilities +""" + +__version__ = "1.0.0" \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/column_mapper.py b/tool-box/cjo-profile-viewer/src/column_mapper.py new file mode 100644 index 00000000..c26d1aac --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/column_mapper.py @@ -0,0 +1,242 @@ +""" +Column Mapping Module for CJO Profile Viewer + +This module implements the column mapping logic from guides/journey_column_mapping.md +to convert technical column names from journey tables to human-readable display names. +""" + +import re +from typing import Dict, List, Optional, Tuple + + +class CJOColumnMapper: + """Maps CJO table column names to human-readable display names using API response data.""" + + def __init__(self, api_response: dict): + """ + Initialize the mapper with journey API response. + + Args: + api_response: Journey API response containing stage and step definitions + """ + self.api_response = api_response + self.journey_data = api_response.get('data', {}) + self.attributes = self.journey_data.get('attributes', {}) + self.stages = self.attributes.get('journeyStages', []) + + # Build lookup maps for efficient mapping + self._build_lookup_maps() + + def _build_lookup_maps(self): + """Build lookup maps for steps, variants, and branches.""" + self.step_map = {} + self.variant_map = {} + self.branch_map = {} + + for stage_idx, stage in enumerate(self.stages): + steps = stage.get('steps', {}) + + for step_uuid, step_data in steps.items(): + # Convert UUID format (API uses hyphens, columns use underscores) + converted_uuid = step_uuid.replace('-', '_') + self.step_map[converted_uuid] = { + 'stage_index': stage_idx, + 'uuid': step_uuid, + 'data': step_data + } + + # Map AB test variants + if step_data.get('type') == 'ABTest': + variants = step_data.get('variants', []) + for variant in variants: + variant_uuid = variant['id'].replace('-', '_') + self.variant_map[variant_uuid] = { + 'stage_index': stage_idx, + 'step_uuid': converted_uuid, + 'data': variant + } + + # Map decision point branches + if step_data.get('type') == 'DecisionPoint': + branches = step_data.get('branches', []) + for branch in branches: + segment_id = str(branch.get('segmentId', '')) + self.branch_map[segment_id] = { + 'stage_index': stage_idx, + 'step_uuid': converted_uuid, + 'data': branch + } + + def map_column_to_display_name(self, column_name: str) -> str: + """ + Map a technical column name to a human-readable display name. + + Args: + column_name: Technical column name from journey table + + Returns: + Human-readable display name following the guide's formatting rules + """ + # Core journey columns + if column_name == 'cdp_customer_id': + return 'Customer ID' + if column_name == 'intime_journey': + return 'Journey (Entry)' + if column_name == 'outtime_journey': + return 'Journey (Exit)' + if column_name == 'intime_goal': + return 'Goal Achievement (Entry)' + if column_name == 'time': + return 'Timestamp' + + # Stage columns + stage_match = re.match(r'^(intime|outtime)_stage_(\d+)$', column_name) + if stage_match: + time_type, stage_index = stage_match.groups() + time_label = 'Entry' if time_type == 'intime' else 'Exit' + return f'Stage {stage_index} ({time_label})' + + # Milestone columns + milestone_match = re.match(r'^intime_stage_(\d+)_milestone$', column_name) + if milestone_match: + stage_index = int(milestone_match.group(1)) + milestone = self._get_milestone_name(stage_index) + if milestone: + return f'Stage {stage_index} Milestone: {milestone} (Entry)' + return f'Stage {stage_index} Milestone (Entry)' + + # Step columns - extract components + step_match = re.match(r'^(intime|outtime)_stage_(\d+)_(.+)$', column_name) + if step_match: + time_type, stage_index, step_part = step_match.groups() + time_label = 'Entry' if time_type == 'intime' else 'Exit' + + # Handle AB test variants + variant_match = re.match(r'^(.+)_variant_(.+)$', step_part) + if variant_match: + step_uuid, variant_uuid = variant_match.groups() + variant_info = self.variant_map.get(variant_uuid) + if variant_info: + variant_name = variant_info['data'].get('name', f'Variant {variant_uuid}') + return f'ABTest: {variant_name} ({time_label})' + return f'ABTest: Unknown Variant ({time_label})' + + # Handle decision point branches (with segment ID) + if re.match(r'^[a-f0-9_]+_\d+$', step_part): + segment_id = step_part.split('_')[-1] + branch_info = self.branch_map.get(segment_id) + if branch_info: + branch_data = branch_info['data'] + if branch_data.get('excludedPath'): + branch_name = 'Excluded Path' + else: + branch_name = branch_data.get('name', f'Branch {segment_id}') + return f'Decision Branch: {branch_name} ({time_label})' + return f'Decision Branch: Branch {segment_id} ({time_label})' + + # Handle regular steps + step_info = self.step_map.get(step_part) + if step_info: + step_data = step_info['data'] + step_type = step_data.get('type', 'Unknown') + + if step_type == 'Activation': + step_name = step_data.get('name', 'Activation') + return f'Activation: {step_name} ({time_label})' + elif step_type == 'WaitStep': + wait_step = step_data.get('waitStep', 1) + wait_unit = step_data.get('waitStepUnit', 'day') + return f'Wait {wait_step} {wait_unit} ({time_label})' + elif step_type == 'Jump': + step_name = step_data.get('name', 'Jump') + return f'Jump: {step_name} ({time_label})' + elif step_type == 'End': + return f'End Step ({time_label})' + elif step_type == 'DecisionPoint': + return f'Decision Point ({time_label})' + elif step_type == 'ABTest': + step_name = step_data.get('name', 'AB Test') + return f'ABTest: {step_name} ({time_label})' + else: + step_name = step_data.get('name', step_type) + return f'{step_name} ({time_label})' + + return 'Unknown' + + def _get_milestone_name(self, stage_index: int) -> Optional[str]: + """Get milestone name for a stage.""" + if stage_index < len(self.stages): + milestone = self.stages[stage_index].get('milestone') + if milestone: + return milestone.get('name') + return None + + def get_step_info(self, column_name: str) -> Optional[Dict]: + """ + Get detailed step information for a column. + + Args: + column_name: Technical column name + + Returns: + Dictionary with step information or None if not a step column + """ + step_match = re.match(r'^(intime|outtime)_stage_(\d+)_(.+)$', column_name) + if not step_match: + return None + + time_type, stage_index, step_part = step_match.groups() + + # Handle AB test variants + variant_match = re.match(r'^(.+)_variant_(.+)$', step_part) + if variant_match: + step_uuid, variant_uuid = variant_match.groups() + variant_info = self.variant_map.get(variant_uuid) + if variant_info: + return { + 'type': 'ABTest_Variant', + 'stage_index': int(stage_index), + 'step_uuid': step_uuid, + 'variant_uuid': variant_uuid, + 'variant_data': variant_info['data'], + 'time_type': time_type + } + + # Handle decision point branches + if re.match(r'^[a-f0-9_]+_\d+$', step_part): + segment_id = step_part.split('_')[-1] + branch_info = self.branch_map.get(segment_id) + if branch_info: + return { + 'type': 'DecisionPoint_Branch', + 'stage_index': int(stage_index), + 'step_uuid': branch_info['step_uuid'], + 'segment_id': segment_id, + 'branch_data': branch_info['data'], + 'time_type': time_type + } + + # Handle regular steps + step_info = self.step_map.get(step_part) + if step_info: + return { + 'type': step_info['data'].get('type', 'Unknown'), + 'stage_index': int(stage_index), + 'step_uuid': step_part, + 'step_data': step_info['data'], + 'time_type': time_type + } + + return None + + def get_all_column_mappings(self, columns: List[str]) -> Dict[str, str]: + """ + Get mappings for all columns in a list. + + Args: + columns: List of technical column names + + Returns: + Dictionary mapping technical names to display names + """ + return {col: self.map_column_to_display_name(col) for col in columns} \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/components/__init__.py b/tool-box/cjo-profile-viewer/src/components/__init__.py new file mode 100644 index 00000000..78bb2a80 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/components/__init__.py @@ -0,0 +1,5 @@ +""" +UI Components for CJO Profile Viewer + +This module contains reusable UI components for the Streamlit application. +""" \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/components/flowchart_renderer.py b/tool-box/cjo-profile-viewer/src/components/flowchart_renderer.py new file mode 100644 index 00000000..04430e54 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/components/flowchart_renderer.py @@ -0,0 +1,368 @@ +""" +Flowchart Renderer Component + +This module handles the generation of interactive HTML flowchart visualizations +for CJO journey data. +""" + +import json +from typing import Dict, List +from ..flowchart_generator import CJOFlowchartGenerator +from ..styles import load_flowchart_styles +from ..utils.step_display import get_step_display_name +from ..utils.profile_filtering import get_step_profiles, get_filtered_profile_data +from ..hierarchical_step_formatter import format_hierarchical_steps + + +def _get_step_profiles_from_dict(generator: CJOFlowchartGenerator, step) -> List[str]: + """Get profiles for a specific step (wrapper for shared utility).""" + step_id = step.get('step_id', step.get('id', '')) + stage_idx = step.get('stage_index', step.get('stage_idx', 0)) + + if not step_id or step.get('is_empty_line', False): + return [] + + try: + return get_step_profiles(generator.profile_data, step_id, stage_idx) + except Exception: + return [] + + +def _get_step_profile_data(generator: CJOFlowchartGenerator, step) -> List[Dict]: + """Get profile data with additional attributes for a specific step.""" + import streamlit as st + + step_profiles = _get_step_profiles_from_dict(generator, step) + + if not step_profiles or generator.profile_data.empty or step.get('is_empty_line', False): + return [] + + # Get selected attributes from session state + selected_attributes = st.session_state.get("selected_attributes", []) + + # Filter profile data for customers in this step + profile_data_subset = generator.profile_data[ + generator.profile_data['cdp_customer_id'].isin(step_profiles) + ] + + # Select columns to include + columns_to_show = ['cdp_customer_id'] + selected_attributes + available_columns = [col for col in columns_to_show if col in profile_data_subset.columns] + + if available_columns: + # Convert to list of dictionaries for JavaScript + profile_records = profile_data_subset[available_columns].to_dict('records') + return profile_records + + return [] + + +def create_flowchart_html(generator: CJOFlowchartGenerator) -> str: + """ + Create an HTML/CSS flowchart visualization with horizontal stage layout. + + Args: + generator: CJOFlowchartGenerator instance + + Returns: + Complete HTML string with embedded CSS and JavaScript + """ + # Get styles + css = load_flowchart_styles() + + # Get journey summary + summary = generator.get_journey_summary() + + # Define specific colors for different step types + step_type_colors = { + 'DecisionPoint': '#f8eac5', # Decision Point + 'DecisionPoint_Branch': '#f8eac5', # Decision Point Branch - yellow/beige + 'ABTest': '#f8eac5', # AB Test + 'ABTest_Variant': '#f8eac5', # AB Test Variant - yellow/beige + 'WaitStep': '#f8dcda', # Wait Step - light pink/red + 'WaitCondition_Path': '#f8dcda', # Wait Condition Path - light pink/red + 'Activation': '#d8f3ed', # Activation - light green + 'Jump': '#e8eaff', # Jump - light blue/purple + 'End': '#e8eaff', # End Step - light blue/purple + 'Merge': '#f8eac5', # Merge Step - yellow/beige (same as Decision/AB Test) + 'Unknown': '#f8eac5' # Unknown - default to yellow/beige + } + + # Build HTML content with horizontal layout (always) + html = f''' + {css} + +
+
+ Journey: {summary.get('journey_name', 'N/A')} (ID: {summary.get('journey_id', 'N/A')}) +
+ +
+ ''' + + # Collect step data for JavaScript + step_data = {} + + # Get hierarchical steps for canvas (without profile counts and UUIDs in names) + hierarchical_steps = format_hierarchical_steps(generator, include_profile_counts=False, include_uuid=False) + + # Group hierarchical steps by stage + stages_steps = {} + for step_display, step_info in hierarchical_steps: + stage_idx = step_info.get('stage_index', 0) + if stage_idx not in stages_steps: + stages_steps[stage_idx] = [] + stages_steps[stage_idx].append((step_display, step_info)) + + # Process each stage using hierarchical steps + for stage_idx, stage in enumerate(generator.stages): + stage_name = stage.name + stage_steps = stages_steps.get(stage_idx, []) + + html += f''' +
+
{stage_name}
+ ''' + + html += '
' + + # Process hierarchical steps for this stage + for i, (step_display, step_info) in enumerate(stage_steps): + # Skip empty lines in visual rendering + if step_info.get('is_empty_line', False): + continue + + step_id = step_info.get('step_id', '') + step_name = step_info.get('name', '') + step_type = step_info.get('step_type', 'Unknown') + profile_count = step_info.get('profile_count', 0) + + # Determine if this is a branch header or indented step + is_branch_header = step_info.get('is_branch_header', False) + is_indented = step_info.get('is_indented', False) + + # Get profile data for modal + step_profile_data = _get_step_profile_data(generator, step_info) + step_profiles = _get_step_profiles_from_dict(generator, step_info) + + # Store step data for JavaScript + step_data_key = f"step_{stage_idx}_{i}_{step_id}" + step_data[step_data_key] = { + 'name': step_name, + 'type': step_type, + 'profiles': step_profiles, + 'profile_data': step_profile_data + } + + # Get color for step type + color = step_type_colors.get(step_type, step_type_colors['Unknown']) + + # Create tooltip content - show only shortened UUID + def get_short_uuid(uuid_string: str) -> str: + """Extract the first part of a UUID (before first hyphen).""" + return uuid_string.split('-')[0] if uuid_string else uuid_string + + short_uuid = get_short_uuid(step_id) if step_id else "" + tooltip_content = f"Step UUID: {short_uuid}" if short_uuid else "No UUID" + + # Apply CSS classes based on hierarchy + css_classes = "step-box" + if is_indented: + css_classes += " indented-step" + if is_branch_header: + css_classes += " branch-header" + + # Create the step box with appropriate styling + if is_branch_header: + # Branch header - no profile count display + html += f''' +
+
{step_display}
+
{tooltip_content}
+
+ ''' + else: + # Regular step - show profile count + html += f''' +
+
{step_display.replace('--- ', '')}
+
{profile_count} profiles
+
{tooltip_content}
+
+ ''' + + html += '
' # Close paths-container div + html += '
' # Close stage-container div + + # Close stages-wrapper div + html += '
' # Close stages-wrapper div + + # Convert step data to JSON + step_data_json = json.dumps(step_data) + + # Add JavaScript for interactivity + html += f''' + + + + + +
+ ''' + + return html \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/flowchart_generator.py b/tool-box/cjo-profile-viewer/src/flowchart_generator.py new file mode 100644 index 00000000..f3201ca3 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/flowchart_generator.py @@ -0,0 +1,815 @@ +""" +Flowchart Generator Module for CJO Profile Viewer + +This module implements flowchart generation logic from guides/cjo_flowchart_generation_guide.md +to create visual representations of customer journeys. +""" + +from typing import Dict, List, Optional, Tuple +import pandas as pd +from src.utils.step_display import get_step_display_name +from src.utils.profile_filtering import get_step_profile_count + + +class FlowchartStep: + """Represents a single step in the journey flowchart.""" + + def __init__(self, step_id: str, step_type: str, name: str, stage_index: int, profile_count: int = 0): + self.step_id = step_id + self.step_type = step_type + self.name = name + self.stage_index = stage_index + self.profile_count = profile_count + self.next_steps = [] + # New attributes for merge step hierarchy + self.is_merge_endpoint = False # True when this merge step is at the end of a branch + self.is_merge_header = False # True when this merge step is a grouping header + + def add_next_step(self, step: 'FlowchartStep'): + """Add a next step in the flow.""" + self.next_steps.append(step) + + +class JourneyStage: + """Represents a journey stage with its steps.""" + + def __init__(self, stage_id: str, name: str, index: int, entry_criteria: str = None, milestone: str = None): + self.stage_id = stage_id + self.name = name + self.index = index + self.entry_criteria = entry_criteria + self.milestone = milestone + self.root_step = None + self.paths = [] + + +class CJOFlowchartGenerator: + """Generates flowchart representations of CJO journeys.""" + + def __init__(self, api_response: dict, profile_data: pd.DataFrame): + """ + Initialize the flowchart generator. + + Args: + api_response: Journey API response + profile_data: DataFrame with profile journey data + """ + self.api_response = api_response + self.profile_data = profile_data + self.journey_data = api_response.get('data', {}) + self.attributes = self.journey_data.get('attributes', {}) + self.stages_data = self.attributes.get('journeyStages', []) + + # Parse journey structure + self.journey_id = self.journey_data.get('id', '') + self.journey_name = self.attributes.get('name', '') + self.audience_id = self.attributes.get('audienceId', '') + + # Build stages + self.stages = self._build_stages() + + def _build_stages(self) -> List[JourneyStage]: + """Build journey stages from API response.""" + stages = [] + + for stage_idx, stage_data in enumerate(self.stages_data): + stage_id = stage_data.get('id', '') + stage_name = stage_data.get('name', f'Stage {stage_idx}') + + entry_criteria = stage_data.get('entryCriteria', {}) + entry_criteria_name = entry_criteria.get('name') if entry_criteria else None + + milestone = stage_data.get('milestone', {}) + milestone_name = milestone.get('name') if milestone else None + + stage = JourneyStage( + stage_id=stage_id, + name=stage_name, + index=stage_idx, + entry_criteria=entry_criteria_name, + milestone=milestone_name + ) + + # Build paths for this stage + stage.paths = self._build_stage_paths(stage_data, stage_idx) + stages.append(stage) + + return stages + + def _build_stage_paths(self, stage_data: dict, stage_idx: int) -> List[List[FlowchartStep]]: + """Build all possible paths through a stage.""" + steps = stage_data.get('steps', {}) + root_step_id = stage_data.get('rootStep') + + if not root_step_id or root_step_id not in steps: + return [] + + root_step_data = steps[root_step_id] + paths = [] + + # Track merge points to avoid duplicating steps after merge + merge_points = self._find_merge_points(steps) + + # If this stage has merge points, we need to handle path convergence + if merge_points: + return self._build_paths_with_merges(steps, root_step_id, stage_idx, merge_points) + + # Universal path building that handles hierarchical steps anywhere in the journey + return self._build_all_paths_from_step(steps, root_step_id, [], stage_idx, merge_points) + + def _deprecated_build_stage_paths_old(self, steps, root_step_id, root_step_data, stage_idx, merge_points): + """Deprecated - old logic that only handled hierarchical steps at root.""" + paths = [] + + if root_step_data.get('type') == 'DecisionPoint': + # Create separate path for each branch + branches = root_step_data.get('branches', []) + for branch in branches: + # Check if this branch points to a wait condition step + next_step_id = branch.get('next') + if next_step_id and next_step_id in steps: + next_step_data = steps[next_step_id] + if next_step_data.get('type') == 'WaitStep' and next_step_data.get('waitStepType') == 'Condition': + # This branch points to a wait condition - create separate paths for each condition + conditions = next_step_data.get('conditions', []) + for condition in conditions: + path = [] + # Add decision point step + decision_step = self._create_step_from_branch(root_step_id, root_step_data, branch, stage_idx) + path.append(decision_step) + + # Add wait condition step + condition_step = self._create_step_from_condition(next_step_id, next_step_data, condition, stage_idx) + path.append(condition_step) + + # Follow the path from this condition + if condition.get('next'): + self._follow_path(steps, condition['next'], path, stage_idx, merge_points) + + paths.append(path) + continue # Skip the normal branch processing + + # Normal branch processing (no wait condition) + path = [] + # Add decision point step + decision_step = self._create_step_from_branch(root_step_id, root_step_data, branch, stage_idx) + path.append(decision_step) + + # Follow the path from this branch + if branch.get('next'): + self._follow_path(steps, branch['next'], path, stage_idx, merge_points) + + paths.append(path) + + elif root_step_data.get('type') == 'ABTest': + # Create separate path for each variant + variants = root_step_data.get('variants', []) + for variant in variants: + path = [] + # Add AB test variant step + variant_step = self._create_step_from_variant(root_step_id, root_step_data, variant, stage_idx) + path.append(variant_step) + + # Follow the path from this variant + if variant.get('next'): + self._follow_path(steps, variant['next'], path, stage_idx, merge_points) + + paths.append(path) + + elif root_step_data.get('type') == 'WaitStep' and root_step_data.get('waitStepType') == 'Condition': + # Create separate path for each condition + conditions = root_step_data.get('conditions', []) + for condition in conditions: + path = [] + # Add wait condition step + condition_step = self._create_step_from_condition(root_step_id, root_step_data, condition, stage_idx) + path.append(condition_step) + + # Follow the path from this condition + if condition.get('next'): + self._follow_path(steps, condition['next'], path, stage_idx, merge_points) + + paths.append(path) + + elif root_step_data.get('type') == 'Merge': + # Merge step - create a single path that consolidates multiple incoming paths + path = [] + # Add merge step + merge_step = self._create_step_from_data(root_step_id, root_step_data, stage_idx) + path.append(merge_step) + + # Follow the path from this merge step + if root_step_data.get('next'): + self._follow_path(steps, root_step_data['next'], path, stage_idx, merge_points) + + paths.append(path) + + else: + # Linear path starting from root + path = [] + self._follow_path(steps, root_step_id, path, stage_idx, merge_points) + paths.append(path) + + return paths + + def _build_all_paths_from_step(self, steps: dict, step_id: str, current_path: List[FlowchartStep], + stage_idx: int, merge_points: set = None, visited: set = None) -> List[List[FlowchartStep]]: + """ + Build all possible paths from a given step, handling hierarchical steps anywhere in the journey. + + This method properly expands DecisionPoints, ABTests, and WaitConditions wherever they appear, + not just at the root of a stage. + """ + if merge_points is None: + merge_points = set() + if visited is None: + visited = set() + + # Prevent infinite loops and handle missing steps + if step_id in visited or step_id not in steps: + return [current_path] if current_path else [] + + visited = visited.copy() + visited.add(step_id) + + step_data = steps[step_id] + step_type = step_data.get('type', '') + + # Handle merge points + if step_id in merge_points: + step = self._create_step_from_data(step_id, step_data, stage_idx) + step.is_merge_endpoint = True + return [current_path + [step]] + + # Handle hierarchical step types - these create multiple paths + if step_type == 'DecisionPoint': + branches = step_data.get('branches', []) + all_paths = [] + + for branch in branches: + # Create branch step + branch_step = self._create_step_from_branch(step_id, step_data, branch, stage_idx) + branch_path = current_path + [branch_step] + + # Continue from this branch's next step + next_step = branch.get('next') + if next_step: + branch_paths = self._build_all_paths_from_step( + steps, next_step, branch_path, stage_idx, merge_points, visited + ) + all_paths.extend(branch_paths) + else: + # End of path + all_paths.append(branch_path) + + return all_paths + + elif step_type == 'ABTest': + variants = step_data.get('variants', []) + all_paths = [] + + for variant in variants: + # Create variant step + variant_step = self._create_step_from_variant(step_id, step_data, variant, stage_idx) + variant_path = current_path + [variant_step] + + # Continue from this variant's next step + next_step = variant.get('next') + if next_step: + variant_paths = self._build_all_paths_from_step( + steps, next_step, variant_path, stage_idx, merge_points, visited + ) + all_paths.extend(variant_paths) + else: + # End of path + all_paths.append(variant_path) + + return all_paths + + elif step_type == 'WaitStep' and step_data.get('waitStepType') == 'Condition': + conditions = step_data.get('conditions', []) + all_paths = [] + + for condition in conditions: + # Create condition step + condition_step = self._create_step_from_condition(step_id, step_data, condition, stage_idx) + condition_path = current_path + [condition_step] + + # Continue from this condition's next step + next_step = condition.get('next') + if next_step: + condition_paths = self._build_all_paths_from_step( + steps, next_step, condition_path, stage_idx, merge_points, visited + ) + all_paths.extend(condition_paths) + else: + # End of path + all_paths.append(condition_path) + + return all_paths + + else: + # Regular step - create single step and continue + step = self._create_step_from_data(step_id, step_data, stage_idx) + new_path = current_path + [step] + + # Continue to next step + next_step = step_data.get('next') + if next_step: + return self._build_all_paths_from_step( + steps, next_step, new_path, stage_idx, merge_points, visited + ) + else: + # End of path + return [new_path] + + def _find_merge_points(self, steps: dict) -> set: + """Find all merge step IDs in the stage.""" + merge_points = set() + for step_id, step_data in steps.items(): + if step_data.get('type') == 'Merge': + merge_points.add(step_id) + return merge_points + + def _build_paths_with_merges(self, steps: dict, root_step_id: str, stage_idx: int, merge_points: set) -> List[List[FlowchartStep]]: + """Build paths for stages that contain merge steps with proper hierarchy.""" + paths = [] + + # First, build all branch paths that lead to merge points + branch_paths = self._build_branch_paths_to_merge(steps, root_step_id, stage_idx, merge_points) + paths.extend(branch_paths) + + # Then, create separate merge grouping paths with post-merge steps + for merge_step_id in merge_points: + merge_step_data = steps[merge_step_id] + merge_header = self._create_step_from_data(merge_step_id, merge_step_data, stage_idx) + merge_header.is_merge_header = True # Mark as grouping header + + # Create post-merge path starting with the header + merge_path = [merge_header] + + # Add post-merge steps + next_step_id = merge_step_data.get('next') + if next_step_id: + self._follow_path(steps, next_step_id, merge_path, stage_idx, merge_points) + + paths.append(merge_path) + + return paths + + def _build_branch_paths_to_merge(self, steps: dict, root_step_id: str, stage_idx: int, merge_points: set) -> List[List[FlowchartStep]]: + """Build all branch paths that lead to merge points, including the merge endpoint.""" + paths = [] + + # Start from root and trace all possible paths + self._trace_paths_to_merge(steps, root_step_id, [], paths, stage_idx, merge_points, set()) + + return paths + + def _trace_paths_to_merge(self, steps: dict, step_id: str, current_path: List, all_paths: List, stage_idx: int, merge_points: set, visited: set): + """Recursively trace paths until we reach a merge point.""" + if step_id in visited or step_id not in steps: + return + + visited = visited.copy() + visited.add(step_id) + + step_data = steps[step_id] + step = self._create_step_from_data(step_id, step_data, stage_idx) + new_path = current_path + [step] + + # If this is a merge point, add the merge endpoint and finish this path + if step_id in merge_points: + step.is_merge_endpoint = True + all_paths.append(new_path) + return + + step_type = step_data.get('type', '') + + if step_type == 'DecisionPoint': + # Create a path for each branch + branches = step_data.get('branches', []) + for branch in branches: + # Create branch step + branch_step = self._create_step_from_branch(step_id, step_data, branch, stage_idx) + branch_path = new_path + [branch_step] + + # Continue from this branch + next_step = branch.get('next') + if next_step: + self._trace_paths_to_merge(steps, next_step, branch_path, all_paths, stage_idx, merge_points, visited) + else: + # End of branch path - add this complete path + all_paths.append(branch_path) + + elif step_type == 'ABTest': + # Create a path for each variant + variants = step_data.get('variants', []) + for variant in variants: + variant_step = self._create_step_from_variant(step_id, step_data, variant, stage_idx) + variant_path = new_path + [variant_step] + + next_step = variant.get('next') + if next_step: + self._trace_paths_to_merge(steps, next_step, variant_path, all_paths, stage_idx, merge_points, visited) + else: + # End of variant path - add this complete path + all_paths.append(variant_path) + + elif step_type == 'WaitStep' and step_data.get('waitStepType') == 'Condition': + # Create a path for each condition + conditions = step_data.get('conditions', []) + for condition in conditions: + condition_step = self._create_step_from_condition(step_id, step_data, condition, stage_idx) + condition_path = new_path + [condition_step] + + next_step = condition.get('next') + if next_step: + self._trace_paths_to_merge(steps, next_step, condition_path, all_paths, stage_idx, merge_points, visited) + else: + # End of condition path - add this complete path + all_paths.append(condition_path) + + else: + # Regular step - continue to next + next_step = step_data.get('next') + if next_step: + self._trace_paths_to_merge(steps, next_step, new_path, all_paths, stage_idx, merge_points, visited) + else: + # End of path (no next step) - add this complete path + all_paths.append(new_path) + + def _path_leads_to_merge(self, steps: dict, path: List, merge_step_id: str) -> bool: + """Check if a path leads to the specified merge step.""" + if not path: + return False + + # Check if any step in this path eventually leads to the merge step + for step in path: + if self._step_eventually_leads_to_merge(steps, step.step_id, merge_step_id, set()): + return True + + return False + + def _step_eventually_leads_to_merge(self, steps: dict, step_id: str, merge_step_id: str, visited: set) -> bool: + """Check if a step eventually leads to a merge step (with cycle detection).""" + if step_id in visited or step_id not in steps: + return False + + visited.add(step_id) + step_data = steps[step_id] + + # Check direct next step + next_step = step_data.get('next') + if next_step == merge_step_id: + return True + + # Check branches for decision points + if step_data.get('type') == 'DecisionPoint': + branches = step_data.get('branches', []) + for branch in branches: + branch_next = branch.get('next') + if branch_next == merge_step_id: + return True + if branch_next and self._step_eventually_leads_to_merge(steps, branch_next, merge_step_id, visited.copy()): + return True + + # Check variants for AB tests + if step_data.get('type') == 'ABTest': + variants = step_data.get('variants', []) + for variant in variants: + variant_next = variant.get('next') + if variant_next == merge_step_id: + return True + if variant_next and self._step_eventually_leads_to_merge(steps, variant_next, merge_step_id, visited.copy()): + return True + + # Check conditions for wait steps + if step_data.get('type') == 'WaitStep' and step_data.get('waitStepType') == 'Condition': + conditions = step_data.get('conditions', []) + for condition in conditions: + condition_next = condition.get('next') + if condition_next == merge_step_id: + return True + if condition_next and self._step_eventually_leads_to_merge(steps, condition_next, merge_step_id, visited.copy()): + return True + + # Check next step recursively + if next_step and self._step_eventually_leads_to_merge(steps, next_step, merge_step_id, visited.copy()): + return True + + return False + + def _build_pre_merge_paths(self, steps: dict, root_step_id: str, stage_idx: int, merge_points: set) -> List[List[FlowchartStep]]: + """Build all paths from root until the first merge point.""" + paths = [] + root_step_data = steps[root_step_id] + + if root_step_data.get('type') == 'DecisionPoint': + branches = root_step_data.get('branches', []) + for branch in branches: + path = [] + decision_step = self._create_step_from_branch(root_step_id, root_step_data, branch, stage_idx) + path.append(decision_step) + + # Follow path until we hit a merge point + if branch.get('next'): + self._follow_path_until_merge(steps, branch['next'], path, stage_idx, merge_points) + + paths.append(path) + + elif root_step_data.get('type') == 'ABTest': + variants = root_step_data.get('variants', []) + for variant in variants: + path = [] + variant_step = self._create_step_from_variant(root_step_id, root_step_data, variant, stage_idx) + path.append(variant_step) + + if variant.get('next'): + self._follow_path_until_merge(steps, variant['next'], path, stage_idx, merge_points) + + paths.append(path) + + elif root_step_data.get('type') == 'WaitStep' and root_step_data.get('waitStepType') == 'Condition': + conditions = root_step_data.get('conditions', []) + for condition in conditions: + path = [] + condition_step = self._create_step_from_condition(root_step_id, root_step_data, condition, stage_idx) + path.append(condition_step) + + if condition.get('next'): + self._follow_path_until_merge(steps, condition['next'], path, stage_idx, merge_points) + + paths.append(path) + else: + # Linear path + path = [] + self._follow_path_until_merge(steps, root_step_id, path, stage_idx, merge_points) + paths.append(path) + + return paths + + def _follow_path_until_merge(self, steps: dict, step_id: str, path: List[FlowchartStep], stage_idx: int, merge_points: set): + """Follow a path until we reach a merge point.""" + if step_id not in steps or step_id in merge_points: + return + + step_data = steps[step_id] + + # Skip wait condition steps - they should have been handled at the path generation level + if step_data.get('type') == 'WaitStep' and step_data.get('waitStepType') == 'Condition': + conditions = step_data.get('conditions', []) + if conditions and conditions[0].get('next'): + self._follow_path_until_merge(steps, conditions[0]['next'], path, stage_idx, merge_points) + return + + step = self._create_step_from_data(step_id, step_data, stage_idx) + path.append(step) + + # Continue to next step if it exists and is not a merge point + next_step = step_data.get('next') + if next_step and next_step not in merge_points: + self._follow_path_until_merge(steps, next_step, path, stage_idx, merge_points) + + def _follow_path(self, steps: dict, step_id: str, path: List[FlowchartStep], stage_idx: int, merge_points: set = None): + """Follow a path through the steps.""" + if merge_points is None: + merge_points = set() + + if step_id not in steps: + return + + step_data = steps[step_id] + + # Skip merge points - they are handled separately as grouping headers + # This prevents duplicate merge steps from overriding the header status + if step_id in merge_points: + return + + # Skip wait condition steps - they should have been handled at the path generation level + if step_data.get('type') == 'WaitStep' and step_data.get('waitStepType') == 'Condition': + # This should not happen if path generation is working correctly + # But if it does, skip this step and continue with the first condition's next step + conditions = step_data.get('conditions', []) + if conditions and conditions[0].get('next'): + self._follow_path(steps, conditions[0]['next'], path, stage_idx, merge_points) + return + + step = self._create_step_from_data(step_id, step_data, stage_idx) + path.append(step) + + # Continue to next step if it exists + next_step = step_data.get('next') + if next_step: + self._follow_path(steps, next_step, path, stage_idx, merge_points) + + def _create_step_from_data(self, step_id: str, step_data: dict, stage_idx: int) -> FlowchartStep: + """Create a FlowchartStep from step data.""" + step_type = step_data.get('type', 'Unknown') + name = get_step_display_name(step_data) + profile_count = get_step_profile_count(self.profile_data, step_id, stage_idx) + + return FlowchartStep( + step_id=step_id, + step_type=step_type, + name=name, + stage_index=stage_idx, + profile_count=profile_count + ) + + def _create_step_from_branch(self, step_id: str, step_data: dict, branch: dict, stage_idx: int) -> FlowchartStep: + """Create a FlowchartStep from a decision point branch.""" + if branch.get('excludedPath'): + name = 'Excluded Profiles' + else: + name = branch.get('name', f"Branch {branch.get('segmentId', '')}") + + # Get profile count for this branch + profile_count = self._get_branch_profile_count(step_id, branch.get('segmentId'), stage_idx) + + return FlowchartStep( + step_id=f"{step_id}_branch_{branch.get('segmentId', '')}", + step_type='DecisionPoint_Branch', + name=name, + stage_index=stage_idx, + profile_count=profile_count + ) + + def _create_step_from_variant(self, step_id: str, step_data: dict, variant: dict, stage_idx: int) -> FlowchartStep: + """Create a FlowchartStep from an AB test variant.""" + name = variant.get('name', 'Unknown Variant') + percentage = variant.get('percentage', 0) + display_name = f"{name} ({percentage}%)" + + # Get profile count for this variant + profile_count = self._get_variant_profile_count(step_id, variant.get('id'), stage_idx) + + return FlowchartStep( + step_id=f"{step_id}_variant_{variant.get('id', '')}", + step_type='ABTest_Variant', + name=display_name, + stage_index=stage_idx, + profile_count=profile_count + ) + + def _create_step_from_condition(self, step_id: str, step_data: dict, condition: dict, stage_idx: int) -> FlowchartStep: + """Create a FlowchartStep from a wait condition.""" + wait_name = step_data.get('name', 'Unknown Wait') + path_name = condition.get('name', 'Unknown Condition') + + # Format: "Wait Condition : " + name = f"Wait Condition {wait_name}: {path_name}" + + # Get profile count for this condition + profile_count = self._get_condition_profile_count(step_id, condition.get('id'), stage_idx) + + return FlowchartStep( + step_id=f"{step_id}_condition_{condition.get('id', '')}", + step_type='WaitCondition_Path', + name=name, + stage_index=stage_idx, + profile_count=profile_count + ) + + + + def _get_branch_profile_count(self, step_id: str, segment_id: str, stage_idx: int) -> int: + """Get the number of profiles currently in a decision point branch.""" + if not segment_id: + return 0 + + # Convert step UUID format for column matching + step_uuid = step_id.replace('-', '_') + + # Look for branch entry column + branch_column = f'intime_stage_{stage_idx}_{step_uuid}_{segment_id}' + + if branch_column in self.profile_data.columns: + # Get the corresponding outtime column + outtime_column = branch_column.replace('intime_', 'outtime_') + + # Count profiles that have entered but not exited + condition = self.profile_data[branch_column].notna() + + if outtime_column in self.profile_data.columns: + # Exclude profiles that have exited (outtime is not null) + condition = condition & self.profile_data[outtime_column].isna() + + return condition.sum() + + return 0 + + def _get_variant_profile_count(self, step_id: str, variant_id: str, stage_idx: int) -> int: + """Get the number of profiles currently in an AB test variant.""" + if not variant_id: + return 0 + + # Convert UUIDs format for column matching + step_uuid = step_id.replace('-', '_') + variant_uuid = variant_id.replace('-', '_') + + # Look for variant entry column + variant_column = f'intime_stage_{stage_idx}_{step_uuid}_variant_{variant_uuid}' + + if variant_column in self.profile_data.columns: + # Get the corresponding outtime column + outtime_column = variant_column.replace('intime_', 'outtime_') + + # Count profiles that have entered but not exited + condition = self.profile_data[variant_column].notna() + + if outtime_column in self.profile_data.columns: + # Exclude profiles that have exited (outtime is not null) + condition = condition & self.profile_data[outtime_column].isna() + + return condition.sum() + + return 0 + + def _get_condition_profile_count(self, step_id: str, condition_id: str, stage_idx: int) -> int: + """Get the number of profiles currently in a wait condition path.""" + if not condition_id: + return 0 + + # Convert step UUID format for column matching + step_uuid = step_id.replace('-', '_') + condition_uuid = condition_id.replace('-', '_') + + # Look for condition entry column + condition_column = f'intime_stage_{stage_idx}_{step_uuid}_condition_{condition_uuid}' + + if condition_column in self.profile_data.columns: + # Get the corresponding outtime column + outtime_column = condition_column.replace('intime_', 'outtime_') + + # Count profiles that have entered but not exited + condition = self.profile_data[condition_column].notna() + + if outtime_column in self.profile_data.columns: + # Exclude profiles that have exited (outtime is not null) + condition = condition & self.profile_data[outtime_column].isna() + + return condition.sum() + + return 0 + + def get_stage_profile_counts(self) -> Dict[int, int]: + """Get profile counts for each stage (profiles currently in the stage).""" + stage_counts = {} + + for stage_idx in range(len(self.stages)): + entry_column = f'intime_stage_{stage_idx}' + if entry_column in self.profile_data.columns: + # Get the corresponding outtime column + outtime_column = f'outtime_stage_{stage_idx}' + + # Count profiles that have entered but not exited the stage + condition = self.profile_data[entry_column].notna() + + if outtime_column in self.profile_data.columns: + # Exclude profiles that have exited the stage (outtime is not null) + condition = condition & self.profile_data[outtime_column].isna() + + stage_counts[stage_idx] = condition.sum() + else: + stage_counts[stage_idx] = 0 + + return stage_counts + + def get_journey_summary(self) -> Dict: + """Get summary information about the journey.""" + total_profiles = len(self.profile_data) if not self.profile_data.empty else 0 + + # Count profiles that entered the journey + journey_entry_count = 0 + if 'intime_journey' in self.profile_data.columns: + journey_entry_count = self.profile_data['intime_journey'].notna().sum() + + return { + 'journey_id': self.journey_id, + 'journey_name': self.journey_name, + 'audience_id': self.audience_id, + 'total_profiles': total_profiles, + 'journey_entry_count': journey_entry_count, + 'stage_count': len(self.stages), + 'stage_counts': self.get_stage_profile_counts() + } + + def get_profiles_in_step(self, step_column: str) -> List[str]: + """Get list of customer IDs for profiles currently in a specific step.""" + if step_column not in self.profile_data.columns: + return [] + + # Get the corresponding outtime column + outtime_column = step_column.replace('intime_', 'outtime_') + + # Filter profiles that have entered (intime not null) but not exited (outtime is null) + condition = self.profile_data[step_column].notna() + + if outtime_column in self.profile_data.columns: + # Exclude profiles that have exited (outtime is not null) + condition = condition & self.profile_data[outtime_column].isna() + + profiles_in_step = self.profile_data[condition]['cdp_customer_id'].tolist() + + return profiles_in_step \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/hierarchical_step_formatter.py b/tool-box/cjo-profile-viewer/src/hierarchical_step_formatter.py new file mode 100644 index 00000000..dca9b8a5 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/hierarchical_step_formatter.py @@ -0,0 +1,432 @@ +#!/usr/bin/env python3 +""" +Hierarchical step formatter for all branching step types. + +Handles indented display formatting for: +- Decision Points and their branches +- AB Tests and their variants +- Wait Conditions and their paths +- Merge Points and post-merge steps +""" + +from typing import List, Tuple, Dict, Any + +def clean_step_name_for_display(step_name: str, step_type: str) -> str: + """Clean up step names for display, removing redundant prefixes.""" + if step_type == 'WaitCondition_Path': + # Remove "Wait Condition" prefix if it exists in the step name + if step_name.startswith('Wait Condition '): + return step_name.replace('Wait Condition ', '', 1) + elif 'Wait Condition' in step_name: + # Handle other cases where "Wait Condition" might appear + return step_name.replace('Wait Condition ', '') + return step_name + +def format_step_name_with_uuid(step_name: str, step_type: str, short_uuid: str, include_uuid: bool) -> str: + """Format step name with optional UUID.""" + clean_name = clean_step_name_for_display(step_name, step_type) + if include_uuid: + return f"{clean_name} ({short_uuid})" + else: + return clean_name + +def format_hierarchical_steps(generator, include_profile_counts: bool = True, include_uuid: bool = True) -> List[Tuple[str, Dict[str, Any]]]: + """ + Format steps with hierarchical indentation for all branching step types. + + Examples of formatted output: + + Decision: country is japan + --- Wait 3 days + --- Merge (merge uuid) + + AB Test: email variants + --- Variant A (5%): 2 profiles + --- Variant B (5%): 3 profiles + --- Control (90%): 40 profiles + + Wait Condition: pageview event + --- Path: event occurred (12 profiles) + --- Path: timeout (3 profiles) + + Merge: (merge uuid) - grouping header + --- Wait 1 day + --- End Step + """ + + def get_short_uuid(uuid_string: str) -> str: + """Extract the first part of a UUID (before first hyphen).""" + return uuid_string.split('-')[0] if uuid_string else uuid_string + + formatted_steps = [] + processed_step_ids = set() # Track processed steps to avoid duplicates + + for stage in generator.stages: + stage_idx = stage.index + + # Check if this stage has merge points + merge_points = set() + for path in stage.paths: + for step in path: + if getattr(step, 'is_merge_header', False) or getattr(step, 'is_merge_endpoint', False): + merge_points.add(step.step_id) + + if not merge_points: + # No merge points - use hierarchical display logic for branching steps + for path_idx, path in enumerate(stage.paths): + # Track when we encounter a hierarchical step in this path + found_hierarchical_step = False + + for step_idx, step in enumerate(path): + # Skip if this step has already been processed + if step.step_id in processed_step_ids: + continue + + # Get shortened UUID for all steps + short_uuid = get_short_uuid(step.step_id) + + # Conditionally add profile count to display names + profile_suffix = f" - {step.profile_count} profiles" if include_profile_counts else "" + + # Apply hierarchical formatting based on step type + if step.step_type in ['DecisionPoint_Branch', 'ABTest_Variant', 'WaitCondition_Path']: + # Hierarchical step - use grouping header format with prefix (no profile count for parent items) + if step.step_type == 'DecisionPoint_Branch': + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"Decision Branch: {formatted_name}" + elif step.step_type == 'ABTest_Variant': + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"AB Test: {formatted_name}" + elif step.step_type == 'WaitCondition_Path': + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"Wait Until: {formatted_name}" + is_grouping_header = True + found_hierarchical_step = True # Mark that we found a hierarchical step + + # Add empty line before grouping headers for visual separation + if formatted_steps: + formatted_steps.append(("", { + 'step_id': '', + 'step_type': 'Empty', + 'stage_index': stage_idx, + 'profile_count': 0, + 'name': '', + 'is_empty_line': True + })) + else: + # Regular step - only indent if it comes AFTER a hierarchical step + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + if found_hierarchical_step: + step_display = f"--- {formatted_name}{profile_suffix}" + is_indented = True + else: + step_display = f"{formatted_name}{profile_suffix}" + is_indented = False + is_grouping_header = False + + # Create step info + step_info = { + 'step_id': step.step_id, + 'step_type': step.step_type, + 'stage_index': step.stage_index, + 'profile_count': step.profile_count, + 'name': step.name, + 'path_index': path_idx, + 'step_index': step_idx, + 'breadcrumbs': [step.name], + 'stage_entry_criteria': stage.entry_criteria + } + + # Add type-specific metadata + if step.step_type in ['DecisionPoint_Branch', 'ABTest_Variant', 'WaitCondition_Path']: + step_info['is_branch_header'] = True + elif found_hierarchical_step and step.step_type not in ['DecisionPoint_Branch', 'ABTest_Variant', 'WaitCondition_Path']: + step_info['is_indented'] = True + + formatted_steps.append((step_display, step_info)) + processed_step_ids.add(step.step_id) # Mark as processed + else: + # Has merge points - use special hierarchy formatting + branch_paths = [] + merge_header_path = None + + # Separate branch paths from merge header path + for path in stage.paths: + has_merge_header = any(getattr(step, 'is_merge_header', False) for step in path) + if has_merge_header: + merge_header_path = path + else: + branch_paths.append(path) + + # Format all paths with unified step processing + for path_idx, path in enumerate(branch_paths): + branch_breadcrumbs = [] + + # Build breadcrumb trail for this entire path + for step in path: + if step.step_type == 'DecisionPoint_Branch': + branch_breadcrumbs.append(f"Decision Branch: {step.name}") + elif step.step_type == 'ABTest_Variant': + branch_breadcrumbs.append(f"AB Test: {step.name}") + elif step.step_type == 'WaitCondition_Path': + branch_breadcrumbs.append(f"Wait Until: {step.name}") + elif not getattr(step, 'is_merge_endpoint', False): + branch_breadcrumbs.append(step.name) + + # Process each step in the path uniformly + path_has_grouping_header = False + + for step_idx, step in enumerate(path): + is_merge_endpoint = getattr(step, 'is_merge_endpoint', False) + + # Skip if this step has already been processed, EXCEPT for merge endpoints + # (merge endpoints should appear under each variant path that leads to them) + if step.step_id in processed_step_ids and not is_merge_endpoint: + continue + + # Handle grouping header steps (DecisionPoint_Branch, ABTest_Variant, WaitCondition_Path) + if step.step_type == 'DecisionPoint_Branch': + # Decision point grouping header (no profile count for parent items) + decision_uuid = step.step_id.split('_branch_')[0] if '_branch_' in step.step_id else step.step_id + short_uuid = get_short_uuid(decision_uuid) + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"Decision Branch: {formatted_name}" + step_breadcrumb_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, True) # Always include UUID in breadcrumbs + step_breadcrumbs = [f"Decision Branch: {step_breadcrumb_name}"] + is_grouping_header = True + path_has_grouping_header = True + + elif step.step_type == 'ABTest_Variant': + # AB test variant grouping header (no profile count for parent items) + ab_test_uuid = step.step_id.split('_variant_')[0] if '_variant_' in step.step_id else step.step_id + short_uuid = get_short_uuid(ab_test_uuid) + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"AB Test: {formatted_name}" + step_breadcrumb_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, True) # Always include UUID in breadcrumbs + step_breadcrumbs = [f"AB Test: {step_breadcrumb_name}"] + is_grouping_header = True + path_has_grouping_header = True + + elif step.step_type == 'WaitCondition_Path': + # Wait condition path grouping header (no profile count for parent items) + wait_uuid = step.step_id.split('_path_')[0] if '_path_' in step.step_id else step.step_id + short_uuid = get_short_uuid(wait_uuid) + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"Wait Until: {formatted_name}" + step_breadcrumb_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, True) # Always include UUID in breadcrumbs + step_breadcrumbs = [f"Wait Until: {step_breadcrumb_name}"] + is_grouping_header = True + path_has_grouping_header = True + + elif is_merge_endpoint: + # Merge endpoint step + short_uuid = get_short_uuid(step.step_id) + formatted_name = format_step_name_with_uuid("Merge", 'Merge', short_uuid, include_uuid) + step_display = f"--- {formatted_name}{profile_suffix}" if path_has_grouping_header else f"{formatted_name}{profile_suffix}" + merge_breadcrumbs = branch_breadcrumbs + [f"Merge ({short_uuid})"] + step_breadcrumbs = merge_breadcrumbs + is_grouping_header = False + + else: + # Regular step (any type: WaitStep, ActivationStep, etc.) + short_uuid = get_short_uuid(step.step_id) + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"--- {formatted_name}{profile_suffix}" if path_has_grouping_header else f"{formatted_name}{profile_suffix}" + + # Build breadcrumb trail up to this step + step_breadcrumbs = [] + for i, path_step in enumerate(path): + if path_step.step_type == 'DecisionPoint_Branch': + step_breadcrumbs.append(f"Decision Branch: {path_step.name}") + elif path_step.step_type == 'ABTest_Variant': + step_breadcrumbs.append(f"AB Test: {path_step.name}") + elif path_step.step_type == 'WaitCondition_Path': + step_breadcrumbs.append(f"Wait Until: {path_step.name}") + elif not getattr(path_step, 'is_merge_endpoint', False): + step_breadcrumbs.append(path_step.name) + if path_step.step_id == step.step_id: + break + is_grouping_header = False + + # Add empty line before grouping headers for visual separation + if step.step_type in ['DecisionPoint_Branch', 'ABTest_Variant', 'WaitCondition_Path'] and formatted_steps: + formatted_steps.append(("", { + 'step_id': '', + 'step_type': 'Empty', + 'stage_index': stage_idx, + 'profile_count': 0, + 'name': '', + 'is_empty_line': True + })) + + # Add the step to formatted output + step_info = { + 'step_id': step.step_id, + 'step_type': step.step_type, + 'stage_index': step.stage_index, + 'profile_count': step.profile_count, + 'name': step.name, + 'path_index': path_idx, + 'step_index': step_idx, + 'breadcrumbs': step_breadcrumbs, + 'stage_entry_criteria': stage.entry_criteria + } + + # Add type-specific metadata + if step.step_type in ['DecisionPoint_Branch', 'ABTest_Variant', 'WaitCondition_Path']: + step_info['is_branch_header'] = True + elif is_merge_endpoint: + step_info['is_merge_endpoint'] = True + elif path_has_grouping_header: + step_info['is_indented'] = True + + formatted_steps.append((step_display, step_info)) + + # Only mark non-merge-endpoint steps as processed to avoid duplicates + # Merge endpoints can appear under multiple variant paths + if not is_merge_endpoint: + processed_step_ids.add(step.step_id) + + # Format merge header and post-merge steps using unified approach + # Also check for any remaining unprocessed steps that should be included + if merge_header_path: + post_merge_breadcrumbs = [] + merge_header_processed = False + + for step_idx, step in enumerate(merge_header_path): + # Skip if this step has already been processed + if step.step_id in processed_step_ids: + continue + + is_merge_header = getattr(step, 'is_merge_header', False) + + if is_merge_header: + # Merge grouping header (no profile count for parent items) + short_uuid = get_short_uuid(step.step_id) + post_merge_breadcrumbs = [f"Merge ({short_uuid})"] + formatted_name = format_step_name_with_uuid("Merge", 'Merge', short_uuid, include_uuid) + step_display = formatted_name + merge_header_processed = True + + # Add empty line before merge grouping header + if formatted_steps: + formatted_steps.append(("", { + 'step_id': '', + 'step_type': 'Empty', + 'stage_index': stage_idx, + 'profile_count': 0, + 'name': '', + 'is_empty_line': True + })) + + step_info = { + 'step_id': step.step_id, + 'step_type': step.step_type, + 'stage_index': step.stage_index, + 'profile_count': step.profile_count, + 'name': f"Merge ({short_uuid})", + 'path_index': len(branch_paths), + 'step_index': step_idx, + 'is_merge_header': True, + 'is_branch_header': True, + 'breadcrumbs': post_merge_breadcrumbs.copy(), + 'stage_entry_criteria': stage.entry_criteria + } + + else: + # Post-merge step (any type: WaitStep, ActivationStep, etc.) + short_uuid = get_short_uuid(step.step_id) + post_merge_breadcrumbs.append(step.name) + # Define profile suffix for this step + step_profile_suffix = f" - {step.profile_count} profiles" if include_profile_counts else "" + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"--- {formatted_name}{step_profile_suffix}" if merge_header_processed else f"{formatted_name}{step_profile_suffix}" + + step_info = { + 'step_id': step.step_id, + 'step_type': step.step_type, + 'stage_index': step.stage_index, + 'profile_count': step.profile_count, + 'name': step.name, + 'path_index': len(branch_paths), + 'step_index': step_idx, + 'breadcrumbs': post_merge_breadcrumbs.copy(), + 'stage_entry_criteria': stage.entry_criteria + } + + # Add indentation flag if there was a merge header + if merge_header_processed: + step_info['is_indented'] = True + step_info['is_post_merge'] = True + + formatted_steps.append((step_display, step_info)) + processed_step_ids.add(step.step_id) # Mark as processed + + # Ensure all steps from all paths are included (fallback for missing merge header paths) + # First, collect all unprocessed steps + unprocessed_steps = [] + for path_idx, path in enumerate(stage.paths): + for step_idx, step in enumerate(path): + if step.step_id not in processed_step_ids: + unprocessed_steps.append((step, path_idx, step_idx)) + + # If we have merge points and unprocessed steps, they are likely post-merge steps + if merge_points and unprocessed_steps: + # Add merge grouping header if we have post-merge steps + first_merge_id = next(iter(merge_points)) # Get first merge ID + short_uuid = get_short_uuid(first_merge_id) + + # Add empty line before merge grouping header + formatted_steps.append(("", { + 'step_id': '', + 'step_type': 'Empty', + 'stage_index': stage_idx, + 'profile_count': 0, + 'name': '', + 'is_empty_line': True + })) + + # Add merge grouping header + merge_display_name = format_step_name_with_uuid("Merge", 'Merge', short_uuid, include_uuid) + merge_name_with_uuid = f"Merge ({short_uuid})" # Always use UUID in name field for identification + formatted_steps.append((merge_display_name, { + 'step_id': first_merge_id + "_header", + 'step_type': 'Merge', + 'stage_index': stage_idx, + 'profile_count': 0, # Grouping headers don't show profile counts + 'name': merge_name_with_uuid, + 'path_index': len(stage.paths), + 'step_index': 0, + 'is_merge_header': True, + 'is_branch_header': True, + 'breadcrumbs': [f"Merge ({short_uuid})"], + 'stage_entry_criteria': stage.entry_criteria, + 'is_fallback_merge_header': True # Mark as fallback + })) + + # Now add all unprocessed steps (indented if post-merge) + for step, path_idx, step_idx in unprocessed_steps: + short_uuid = get_short_uuid(step.step_id) + is_post_merge = bool(merge_points) # Indent if there are merge points + + # Define profile suffix for this step + step_profile_suffix = f" - {step.profile_count} profiles" if include_profile_counts else "" + formatted_name = format_step_name_with_uuid(step.name, step.step_type, short_uuid, include_uuid) + step_display = f"--- {formatted_name}{step_profile_suffix}" if is_post_merge else f"{formatted_name}{step_profile_suffix}" + + formatted_steps.append((step_display, { + 'step_id': step.step_id, + 'step_type': step.step_type, + 'stage_index': step.stage_index, + 'profile_count': step.profile_count, + 'name': step.name, + 'path_index': path_idx, + 'step_index': step_idx, + 'breadcrumbs': [step.name], + 'stage_entry_criteria': stage.entry_criteria, + 'is_indented': is_post_merge, + 'is_fallback_processed': True # Mark as fallback for debugging + })) + processed_step_ids.add(step.step_id) + + return formatted_steps \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/services/__init__.py b/tool-box/cjo-profile-viewer/src/services/__init__.py new file mode 100644 index 00000000..3b288a4f --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/services/__init__.py @@ -0,0 +1,5 @@ +""" +Services for CJO Profile Viewer + +This module contains service classes for external API interactions. +""" \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/services/td_api.py b/tool-box/cjo-profile-viewer/src/services/td_api.py new file mode 100644 index 00000000..8a763439 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/services/td_api.py @@ -0,0 +1,214 @@ +""" +Treasure Data API Service + +This module handles all interactions with the Treasure Data APIs including: +- Journey configuration retrieval +- Profile data querying +- Customer attribute discovery +- API key management +""" + +import streamlit as st +import pandas as pd +import requests +import os +import pytd +from typing import Dict, List, Optional, Tuple + + +class TDAPIService: + """Service class for Treasure Data API interactions.""" + + def __init__(self): + self.api_key = self.get_api_key() + + def get_api_key(self) -> Optional[str]: + """Get TD API key from environment variable or config file.""" + # First try environment variable + api_key = os.getenv('TD_API_KEY') + if api_key: + return api_key + + # Try to read from config file + config_paths = [ + os.path.expanduser('~/.td/config'), + 'td_config.txt', + '.env' + ] + + for config_path in config_paths: + try: + if os.path.exists(config_path): + with open(config_path, 'r') as f: + for line in f: + if line.startswith('TD_API_KEY=') or line.startswith('apikey='): + return line.split('=', 1)[1].strip() + except Exception: + continue + + return None + + def fetch_journey_data(self, journey_id: str) -> Tuple[Optional[dict], Optional[str]]: + """Fetch journey data from TD API.""" + if not journey_id or not self.api_key: + return None, "Journey ID and API key are required" + + url = f"https://api-cdp.treasuredata.com/entities/journeys/{journey_id}" + headers = { + 'Authorization': f'TD1 {self.api_key}', + 'Content-Type': 'application/json' + } + + try: + with st.spinner(f"Fetching journey data for ID: {journey_id}..."): + response = requests.get(url, headers=headers, timeout=30) + + if response.status_code == 200: + return response.json(), None + elif response.status_code == 401: + return None, "Authentication failed. Please check your API key." + elif response.status_code == 404: + return None, f"Journey ID '{journey_id}' not found." + else: + return None, f"API request failed with status {response.status_code}: {response.text}" + + except requests.exceptions.Timeout: + return None, "Request timed out. Please try again." + except requests.exceptions.ConnectionError: + return None, "Unable to connect to TD API. Please check your internet connection." + except Exception as e: + return None, f"Unexpected error: {str(e)}" + + def get_available_attributes(self, audience_id: str) -> List[str]: + """Get list of available customer attributes from the customers table.""" + if not audience_id or not self.api_key: + return [] + + try: + with st.spinner("Loading available customer attributes..."): + client = pytd.Client( + apikey=self.api_key, + endpoint='https://api.treasuredata.com', + engine='presto' + ) + + # Query to describe the customers table + describe_query = f"DESCRIBE cdp_audience_{audience_id}.customers" + result = client.query(describe_query) + + if result and result.get('data'): + # Extract column names, excluding 'time' and 'cdp_customer_id' + columns = [row[0] for row in result['data'] if row[0] not in ['time', 'cdp_customer_id']] + return sorted(columns) + + except Exception as e: + st.toast(f"Could not load customer attributes: {str(e)}", icon="⚠️") + + return [] + + def load_profile_data(self, journey_id: str, audience_id: str, selected_attributes: List[str] = None) -> Optional[pd.DataFrame]: + """Load profile data using pytd from live Treasure Data tables.""" + if not journey_id or not audience_id or not self.api_key: + st.error("Journey ID, Audience ID, and API key are required for live data query") + return None + + if selected_attributes is None: + selected_attributes = [] + + try: + # Initialize pytd client with presto engine and api.treasuredata.com endpoint + with st.spinner(f"Connecting to Treasure Data and querying profile data..."): + client = pytd.Client( + apikey=self.api_key, + endpoint='https://api.treasuredata.com', + engine='presto' + ) + + # Construct the query for live profile data + table_name = f"cdp_audience_{audience_id}.journey_{journey_id}" + + if selected_attributes: + # JOIN query with additional attributes from customers table + attributes_str = ", ".join([f"c.{attr}" for attr in selected_attributes]) + query = f""" + SELECT j.cdp_customer_id, {attributes_str} + FROM {table_name} j + JOIN cdp_audience_{audience_id}.customers c + ON c.cdp_customer_id = j.cdp_customer_id + """ + st.toast(f"Querying journey table with {len(selected_attributes)} additional attributes", icon="🔍") + else: + # Standard query without JOIN + query = f"SELECT * FROM {table_name}" + st.toast(f"Querying table: {table_name}", icon="🔍") + + # Execute the query and return as DataFrame + query_result = client.query(query) + + # Convert the result to a pandas DataFrame + if not query_result.get('data'): + st.toast(f"No data found in table {table_name}", icon="⚠️") + return pd.DataFrame() + + profile_data = pd.DataFrame(query_result['data'], columns=query_result['columns']) + + # If we used JOIN query, we need to merge back with the full journey data + if selected_attributes and not profile_data.empty: + # Get the full journey data for journey step information + full_journey_query = f"SELECT * FROM {table_name}" + full_result = client.query(full_journey_query) + + if full_result and full_result.get('data'): + full_journey_data = pd.DataFrame(full_result['data'], columns=full_result['columns']) + + # Merge the customer attributes with the full journey data + profile_data = full_journey_data.merge( + profile_data, + on='cdp_customer_id', + how='left' + ) + + return profile_data + + except Exception as e: + error_msg = str(e) + st.error(f"Error querying live profile data: {error_msg}") + + # Provide helpful error messages for common issues + if "Table not found" in error_msg or "does not exist" in error_msg: + st.error(f"Table 'cdp_audience_{audience_id}.journey_{journey_id}' does not exist. Please verify the audience ID and journey ID. Note: The journey workflow may not have been run yet and the audience needs to be built first.") + elif "Authentication" in error_msg or "401" in error_msg: + st.error("Authentication failed. Please check your TD API key.") + elif "Permission denied" in error_msg or "403" in error_msg: + st.error("Permission denied. Please ensure your API key has access to the audience data.") + + return None + + +# Convenience functions for backward compatibility +def get_api_key() -> Optional[str]: + """Get TD API key - convenience function.""" + service = TDAPIService() + return service.api_key + + +def fetch_journey_data(journey_id: str, api_key: str) -> Tuple[Optional[dict], Optional[str]]: + """Fetch journey data - convenience function.""" + service = TDAPIService() + service.api_key = api_key + return service.fetch_journey_data(journey_id) + + +def get_available_attributes(audience_id: str, api_key: str) -> List[str]: + """Get available attributes - convenience function.""" + service = TDAPIService() + service.api_key = api_key + return service.get_available_attributes(audience_id) + + +def load_profile_data(journey_id: str, audience_id: str, api_key: str) -> Optional[pd.DataFrame]: + """Load profile data - convenience function.""" + service = TDAPIService() + service.api_key = api_key + selected_attributes = st.session_state.get("selected_attributes", []) + return service.load_profile_data(journey_id, audience_id, selected_attributes) \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/styles/__init__.py b/tool-box/cjo-profile-viewer/src/styles/__init__.py new file mode 100644 index 00000000..5caa3857 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/styles/__init__.py @@ -0,0 +1,91 @@ +""" +Style management for CJO Profile Viewer + +This module provides utilities for loading CSS styles for the Streamlit application. +""" + +import streamlit as st +import os +from pathlib import Path + + +def load_css_file(css_file: str) -> str: + """ + Load CSS content from a file. + + Args: + css_file: Name of the CSS file (without path) + + Returns: + CSS content as string + """ + styles_dir = Path(__file__).parent + css_path = styles_dir / css_file + + try: + with open(css_path, 'r') as f: + return f.read() + except FileNotFoundError: + st.error(f"CSS file not found: {css_file}") + return "" + + +def inject_css(css_content: str) -> None: + """ + Inject CSS into the Streamlit app. + + Args: + css_content: CSS content to inject + """ + if css_content: + st.markdown(f"", unsafe_allow_html=True) + + +def load_all_styles() -> None: + """Load all application styles.""" + # Load layout styles + layout_css = load_css_file("layout.css") + inject_css(layout_css) + + # Load button styles + button_css = load_css_file("buttons.css") + inject_css(button_css) + + +def load_flowchart_styles() -> str: + """ + Load flowchart-specific styles for HTML generation. + + Returns: + Flowchart CSS wrapped in style tags + """ + flowchart_css = load_css_file("flowchart.css") + modal_css = load_css_file("modal.css") + + if flowchart_css or modal_css: + return f"" + return "" + + +# Style categories for selective loading +STYLE_CATEGORIES = { + "layout": "layout.css", + "buttons": "buttons.css", + "flowchart": "flowchart.css", + "modal": "modal.css" +} + + +def load_styles(*categories: str) -> None: + """ + Load specific style categories. + + Args: + *categories: Style categories to load (layout, buttons, flowchart, modal) + """ + for category in categories: + if category in STYLE_CATEGORIES: + css_content = load_css_file(STYLE_CATEGORIES[category]) + inject_css(css_content) + else: + st.warning(f"Unknown style category: {category}") \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/styles/buttons.css b/tool-box/cjo-profile-viewer/src/styles/buttons.css new file mode 100644 index 00000000..e67eb68d --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/styles/buttons.css @@ -0,0 +1,13 @@ +.stButton > button[data-testid="baseButton-primary"], +.stButton > button[kind="primary"] { + background-color: #0066CC !important; + border-color: #0066CC !important; + color: white !important; +} + +.stButton > button[data-testid="baseButton-primary"]:hover, +.stButton > button[kind="primary"]:hover { + background-color: #0052A3 !important; + border-color: #0052A3 !important; + color: white !important; +} \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/styles/flowchart.css b/tool-box/cjo-profile-viewer/src/styles/flowchart.css new file mode 100644 index 00000000..33e783e7 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/styles/flowchart.css @@ -0,0 +1,249 @@ +.flowchart-container { + background-color: #1E1E1E; + padding: 20px; + border-radius: 8px; + margin: 10px 0; + font-family: "Source Sans Pro", sans-serif; + border: 1px solid #333333; +} + +/* Always horizontal layout - responsive to screen width */ + +.journey-header { + background-color: #2D2D2D; + color: #FFFFFF; + padding: 15px; + border-radius: 8px; + margin-bottom: 20px; + border: 1px solid #444444; + font-size: 14px; +} + +/* Container for all stages - always horizontal with responsive behavior */ +.stages-wrapper { + display: flex; + flex-direction: row; + gap: 20px; + overflow-x: auto; + padding: 10px 0; +} + +.stage-container { + margin: 0; + padding: 20px; + border: 1px solid #444444; + border-radius: 8px; + background-color: #2D2D2D; + flex-shrink: 0; + min-width: 300px; + max-width: 400px; +} + +/* Responsive design for different screen sizes */ +@media (max-width: 1200px) { + .stage-container { + min-width: 280px; + max-width: 350px; + } +} + +@media (max-width: 900px) { + .stage-container { + min-width: 250px; + max-width: 300px; + } +} + +@media (max-width: 768px) { + .stages-wrapper { + flex-direction: column; + gap: 20px; + overflow-x: visible; + } + + .stage-container { + min-width: 100%; + max-width: 100%; + width: 100%; + } +} + +@media (max-width: 480px) { + .stage-container { + padding: 15px; + } +} + +.stage-header { + color: #FFFFFF; + font-size: 18px; + font-weight: 600; + margin-bottom: 15px; + text-align: center; +} + +.stage-info { + background-color: #d4ebf7; + color: #000000; + padding: 15px 20px; + border-radius: 5px; + margin-bottom: 20px; + font-size: 13px; + border: 1px solid rgba(0,0,0,0.1); + line-height: 1.6; +} + +.stage-info-section { + display: inline-block; + margin-right: 30px; + font-weight: normal; +} + +.stage-info-header { + font-weight: bold; + color: #000000; +} + +.paths-container { + position: relative; +} + +.path { + display: flex; + flex-direction: column; + align-items: center; + margin: 20px 0; + gap: 8px; +} + +.step-box { + /* Background color is set dynamically via inline styles in JavaScript */ + color: #000000; + padding: 15px 20px; + margin: 5px 0; + border-radius: 8px; + border: 1px solid rgba(0,0,0,0.1); + min-width: 180px; + max-width: 220px; + text-align: center; + cursor: pointer; + font-weight: 600; + font-size: 13px; + line-height: 1.3; + transition: all 0.3s ease; + position: relative; + font-family: "Source Sans Pro", sans-serif; + flex-shrink: 0; + z-index: 1; +} + +.step-box:hover { + transform: scale(1.03); + box-shadow: 0 2px 8px rgba(0,0,0,0.1); + border-color: #85C1E9; + z-index: 1000000; +} + +.step-name { + font-size: 12px; + margin-bottom: 5px; + word-wrap: break-word; + font-weight: 600; + color: #000000; +} + +.step-count { + font-size: 11px; + font-weight: 400; + color: #000000; +} + +.arrow { + color: #FFFFFF; + font-size: 20px; + font-weight: bold; + margin: 0 5px; + opacity: 0.8; + flex-shrink: 0; + align-self: center; +} + +.step-tooltip { + position: absolute; + top: -65px; + left: 50%; + transform: translateX(-50%); + background-color: rgba(0,0,0,0.9); + color: white; + padding: 8px 12px; + border-radius: 4px; + font-size: 14px; + white-space: pre-line; + opacity: 0; + pointer-events: none; + transition: opacity 0.3s; + z-index: 999999; + max-width: 400px; + text-align: center; + word-wrap: break-word; + min-width: 200px; +} + +/* Adjust tooltip position for elements near left edge */ +.path .step-box:first-child .step-tooltip { + left: 0; + transform: translateX(0); +} + +/* Adjust tooltip position for elements near right edge */ +.path .step-box:last-child .step-tooltip { + left: auto; + right: 0; + transform: translateX(0); +} + +.step-box:hover .step-tooltip { + opacity: 1; +} + +/* Hierarchical step styling */ +.branch-header { + font-weight: bold; + /* Background color is set dynamically via inline styles - removed !important override */ + border: 2px solid #85C1E9 !important; + font-size: 14px; + min-width: 250px; + max-width: 400px; +} + +.indented-step { + margin-left: 30px; + font-size: 12px; + min-width: 200px; + max-width: 350px; + position: relative; +} + +/* Indentation line for visual hierarchy */ +.indented-step::before { + content: ""; + position: absolute; + left: -20px; + top: 50%; + width: 15px; + height: 2px; + background-color: #85C1E9; + opacity: 0.6; +} + +/* Vertical line to connect indented steps to parent */ +.indented-step::after { + content: ""; + position: absolute; + left: -30px; + top: -8px; + width: 2px; + height: calc(100% + 16px); + background-color: #85C1E9; + opacity: 0.4; +} \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/styles/layout.css b/tool-box/cjo-profile-viewer/src/styles/layout.css new file mode 100644 index 00000000..ceda2b23 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/styles/layout.css @@ -0,0 +1,23 @@ +.main { + background-color: #2C3E50; +} + +.stTitle { + color: white; +} + +.stMarkdown { + color: white; +} + +.stSelectbox label { + color: white; +} + +.stTextInput label { + color: white; +} + +.stDataFrame { + background-color: white; +} \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/styles/modal.css b/tool-box/cjo-profile-viewer/src/styles/modal.css new file mode 100644 index 00000000..37e3cd91 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/styles/modal.css @@ -0,0 +1,153 @@ +/* Modal styles */ +.modal { + display: none; + position: fixed; + z-index: 2000; + left: 0; + top: 0; + width: 100%; + height: 100%; + background-color: rgba(0,0,0,0.8); + font-family: Arial, sans-serif; +} + +.modal-content { + background-color: #2D2D2D; + margin: 5% auto; + padding: 20px; + border: 1px solid #444444; + border-radius: 8px; + width: 90%; + max-width: 1200px; + min-width: 600px; + max-height: 80%; + overflow-y: auto; + color: #FFFFFF; + font-family: "Source Sans Pro", sans-serif; +} + +.modal-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 20px; + border-bottom: 1px solid #444444; + padding-bottom: 10px; +} + +.modal-title { + font-size: 18px; + font-weight: 600; + color: #FFFFFF; +} + +.close-button { + color: #CCCCCC; + font-size: 28px; + font-weight: bold; + cursor: pointer; + background: none; + border: none; +} + +.close-button:hover { + color: #FF6B6B; +} + +.search-box { + width: 100%; + padding: 10px; + margin-bottom: 15px; + border: 1px solid #444444; + border-radius: 5px; + background-color: #3A3A3A; + color: #FFFFFF; + font-size: 14px; + font-family: "Source Sans Pro", sans-serif; +} + +.search-box::placeholder { + color: #AAAAAA; +} + +.search-box:focus { + outline: none; + border-color: #666666; + background-color: #404040; +} + +.profiles-list { + max-height: 400px; + overflow-y: auto; + border: 1px solid #444444; + border-radius: 5px; + background-color: #3A3A3A; +} + +.profile-item { + padding: 8px 12px; + border-bottom: 1px solid #444444; + font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; + font-size: 12px; + color: #E0E0E0; +} + +.profile-item:hover { + background-color: #404040; +} + +.profile-item:last-child { + border-bottom: none; +} + +.no-profiles { + text-align: center; + padding: 20px; + color: #AAAAAA; + font-style: italic; +} + +.profiles-table { + width: 100%; + border-collapse: collapse; + font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; + font-size: 12px; + color: #E0E0E0; + background-color: #3A3A3A; +} + +.profiles-table th { + background-color: #2D2D2D; + color: #FFFFFF; + padding: 10px 12px; + text-align: left; + border-bottom: 2px solid #444444; + font-weight: 600; + position: sticky; + top: 0; + z-index: 10; +} + +.profiles-table td { + padding: 8px 12px; + border-bottom: 1px solid #444444; + vertical-align: top; +} + +.profiles-table tr:hover { + background-color: #404040; +} + +.profiles-table tr:last-child td { + border-bottom: none; +} + +.profile-count-info { + margin-bottom: 15px; + padding: 10px; + background-color: #3A3A3A; + border-radius: 5px; + font-size: 14px; + color: #E0E0E0; + border: 1px solid #555555; +} \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/utils/__init__.py b/tool-box/cjo-profile-viewer/src/utils/__init__.py new file mode 100644 index 00000000..04744201 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/utils/__init__.py @@ -0,0 +1,5 @@ +""" +Utilities for CJO Profile Viewer + +This module contains utility functions and classes for the application. +""" \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/utils/profile_filtering.py b/tool-box/cjo-profile-viewer/src/utils/profile_filtering.py new file mode 100644 index 00000000..fb2254ce --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/utils/profile_filtering.py @@ -0,0 +1,136 @@ +""" +Profile Filtering Utilities + +Shared utilities for filtering step profiles consistently across all components. +This eliminates duplicate filtering logic between step selection, canvas, and flowchart generator. +""" + +from typing import List +import pandas as pd + + +def get_step_column_name(step_id: str, stage_idx: int) -> str: + """ + Generate step column name based on step ID and stage index. + + Args: + step_id: The step UUID (may contain hyphens) + stage_idx: The stage index number + + Returns: + Column name in format: intime_stage_{stage_idx}_{step_uuid} + """ + step_uuid = step_id.replace('-', '_') + return f"intime_stage_{stage_idx}_{step_uuid}" + + +def create_step_profile_condition(profile_data: pd.DataFrame, step_column: str) -> pd.Series: + """ + Create pandas condition for filtering profiles that are currently in a specific step. + + This applies the standard filtering logic: + 1. Profile has entered the step (intime_stage_N_stepuuid IS NOT NULL) + 2. Profile has not exited the step (outtime_stage_N_stepuuid IS NULL) + 3. Profile has not left the journey (outtime_journey IS NULL) + + Args: + profile_data: DataFrame containing profile data + step_column: The intime column name for the step + + Returns: + Boolean Series for filtering profiles + """ + # Base condition: profile has entered the step + condition = profile_data[step_column].notna() + + # Exclude profiles that have exited this specific step + step_outtime_column = step_column.replace('intime_', 'outtime_') + if step_outtime_column in profile_data.columns: + condition = condition & profile_data[step_outtime_column].isna() + + # Exclude profiles that have left the journey + if 'outtime_journey' in profile_data.columns: + condition = condition & profile_data['outtime_journey'].isna() + + return condition + + +def get_step_profiles(profile_data: pd.DataFrame, step_id: str, stage_idx: int) -> List[str]: + """ + Get list of customer IDs for profiles currently in a specific step. + + Args: + profile_data: DataFrame containing profile data + step_id: The step UUID + stage_idx: The stage index number + + Returns: + List of customer IDs (cdp_customer_id values) + """ + if profile_data.empty: + return [] + + step_column = get_step_column_name(step_id, stage_idx) + if step_column not in profile_data.columns: + return [] + + condition = create_step_profile_condition(profile_data, step_column) + return profile_data[condition]['cdp_customer_id'].tolist() + + +def get_step_profile_count(profile_data: pd.DataFrame, step_id: str, stage_idx: int) -> int: + """ + Get count of profiles currently in a specific step. + + Args: + profile_data: DataFrame containing profile data + step_id: The step UUID + stage_idx: The stage index number + + Returns: + Number of profiles currently in the step + """ + if profile_data.empty: + return 0 + + step_column = get_step_column_name(step_id, stage_idx) + if step_column not in profile_data.columns: + return 0 + + condition = create_step_profile_condition(profile_data, step_column) + return condition.sum() + + +def get_filtered_profile_data(profile_data: pd.DataFrame, step_id: str, stage_idx: int, + selected_attributes: List[str] = None) -> pd.DataFrame: + """ + Get filtered profile data for profiles currently in a specific step. + + Args: + profile_data: DataFrame containing profile data + step_id: The step UUID + stage_idx: The stage index number + selected_attributes: List of additional attributes to include + + Returns: + Filtered DataFrame with profiles currently in the step + """ + if profile_data.empty: + return pd.DataFrame() + + step_column = get_step_column_name(step_id, stage_idx) + if step_column not in profile_data.columns: + return pd.DataFrame() + + condition = create_step_profile_condition(profile_data, step_column) + filtered_data = profile_data[condition] + + if selected_attributes: + # Include cdp_customer_id and selected attributes + columns_to_show = ['cdp_customer_id'] + [attr for attr in selected_attributes + if attr in filtered_data.columns] + if len(columns_to_show) > 1: + return filtered_data[columns_to_show].copy() + + # Default: just return customer IDs + return filtered_data[['cdp_customer_id']].copy() \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/utils/session_state.py b/tool-box/cjo-profile-viewer/src/utils/session_state.py new file mode 100644 index 00000000..3a8a7b40 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/utils/session_state.py @@ -0,0 +1,153 @@ +""" +Session State Management + +This module provides utilities for managing Streamlit session state. +""" + +import streamlit as st +from typing import Any, Dict, Optional + + +class SessionStateManager: + """Manages Streamlit session state with default values and validation.""" + + # Default session state values + DEFAULTS = { + 'api_response': None, + 'profile_data': None, + 'journey_loaded': False, + 'config_loaded': False, + 'available_attributes': {}, + 'selected_attributes': [], + 'auto_load_attempted': False, + } + + @classmethod + def initialize(cls) -> None: + """Initialize all session state variables with default values.""" + for key, default_value in cls.DEFAULTS.items(): + if key not in st.session_state: + st.session_state[key] = default_value + + @classmethod + def get(cls, key: str, default: Any = None) -> Any: + """ + Get a value from session state with optional default. + + Args: + key: Session state key + default: Default value if key doesn't exist + + Returns: + Value from session state or default + """ + if default is None: + default = cls.DEFAULTS.get(key) + return st.session_state.get(key, default) + + @classmethod + def set(cls, key: str, value: Any) -> None: + """ + Set a value in session state. + + Args: + key: Session state key + value: Value to set + """ + st.session_state[key] = value + + @classmethod + def reset_journey_data(cls) -> None: + """Reset journey-related session state.""" + cls.set('api_response', None) + cls.set('profile_data', None) + cls.set('journey_loaded', False) + cls.set('config_loaded', False) + cls.set('available_attributes', {}) + cls.set('selected_attributes', []) + + @classmethod + def is_config_loaded(cls) -> bool: + """Check if journey configuration is loaded.""" + return cls.get('config_loaded', False) and cls.get('api_response') is not None + + @classmethod + def is_journey_loaded(cls) -> bool: + """Check if complete journey data is loaded.""" + return (cls.get('journey_loaded', False) and + cls.get('api_response') is not None and + cls.get('profile_data') is not None) + + @classmethod + def get_journey_id(cls) -> Optional[str]: + """Get journey ID from loaded configuration.""" + api_response = cls.get('api_response') + if api_response: + return api_response.get('data', {}).get('id') + return None + + @classmethod + def get_audience_id(cls) -> Optional[str]: + """Get audience ID from loaded configuration.""" + api_response = cls.get('api_response') + if api_response: + return api_response.get('data', {}).get('attributes', {}).get('audienceId') + return None + + @classmethod + def set_config_loaded(cls, api_response: Dict, audience_id: str, available_attributes: list) -> None: + """ + Set configuration as loaded with all required data. + + Args: + api_response: Journey API response + audience_id: Audience ID + available_attributes: List of available customer attributes + """ + cls.set('api_response', api_response) + cls.set('config_loaded', True) + + # Store available attributes + if 'available_attributes' not in st.session_state: + st.session_state['available_attributes'] = {} + st.session_state['available_attributes'][audience_id] = available_attributes + + # Reset profile-related state + cls.set('profile_data', None) + cls.set('journey_loaded', False) + + @classmethod + def set_profile_loaded(cls, profile_data: Any) -> None: + """ + Set profile data as loaded. + + Args: + profile_data: Profile DataFrame + """ + cls.set('profile_data', profile_data) + cls.set('journey_loaded', True) + + @classmethod + def get_available_attributes(cls, audience_id: str) -> list: + """ + Get available attributes for a specific audience. + + Args: + audience_id: Audience ID + + Returns: + List of available attributes + """ + available_attrs = cls.get('available_attributes', {}) + return available_attrs.get(audience_id, []) + + +# Convenience functions for backward compatibility +def initialize_session_state(): + """Initialize session state - convenience function.""" + SessionStateManager.initialize() + + +def reset_journey_data(): + """Reset journey data - convenience function.""" + SessionStateManager.reset_journey_data() \ No newline at end of file diff --git a/tool-box/cjo-profile-viewer/src/utils/step_display.py b/tool-box/cjo-profile-viewer/src/utils/step_display.py new file mode 100644 index 00000000..f9631452 --- /dev/null +++ b/tool-box/cjo-profile-viewer/src/utils/step_display.py @@ -0,0 +1,81 @@ +""" +Step Display Utilities + +Shared utilities for calculating step display names consistently across components. +""" + +from typing import Dict + + +def get_step_display_name(step_data: Dict) -> str: + """ + Get display name for a step based on its type. + + This function provides consistent step naming logic used by both + the flowchart generator and the step selection dropdown. + + Args: + step_data: Dictionary containing step configuration data + + Returns: + Human-readable display name for the step + """ + step_type = step_data.get('type', 'Unknown') + + if step_type == 'WaitStep': + return _get_wait_step_display_name(step_data) + elif step_type == 'Activation': + return step_data.get('name', 'Activation') + elif step_type == 'Jump': + return step_data.get('name', 'Jump') + elif step_type == 'End': + return 'End Step' + elif step_type == 'DecisionPoint': + return 'Decision Point' + elif step_type == 'ABTest': + return step_data.get('name', 'AB Test') + elif step_type == 'Merge': + return step_data.get('name', 'Merge Step') + else: + return step_data.get('name', step_type) + + +def _get_wait_step_display_name(step_data: Dict) -> str: + """ + Get display name for WaitStep type with specific wait logic. + + Args: + step_data: Dictionary containing wait step configuration + + Returns: + Formatted wait step display name + """ + wait_step_type = step_data.get('waitStepType', 'Duration') + + if wait_step_type == 'Condition': + step_name = step_data.get('name', 'Unknown Condition') + return f'Wait: {step_name}' + + elif wait_step_type == 'Date': + wait_until_date = step_data.get('waitUntilDate', 'Unknown Date') + return f'Wait Until {wait_until_date}' + + elif wait_step_type == 'DaysOfTheWeek': + days_of_week = step_data.get('waitUntilDaysOfTheWeek', []) + if days_of_week: + # Map day numbers to day names (1=Monday, 2=Tuesday, etc.) + day_names = { + 1: 'Monday', 2: 'Tuesday', 3: 'Wednesday', 4: 'Thursday', + 5: 'Friday', 6: 'Saturday', 7: 'Sunday' + } + day_list = [day_names.get(day, f'Day{day}') for day in days_of_week] + days_str = ', '.join(day_list) + return f'Wait Until {days_str}' + else: + return 'Wait Until (No Days Specified)' + + else: + # Duration-based wait step (default/legacy) + wait_step = step_data.get('waitStep', 1) + wait_unit = step_data.get('waitStepUnit', 'day') + return f'Wait {wait_step} {wait_unit}' \ No newline at end of file