diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..79bd9d0
--- /dev/null
+++ b/.env.example
@@ -0,0 +1 @@
+GEMINI_API_KEY="your_api_key_here"
\ No newline at end of file
diff --git a/README.md b/README.md
index 0232fde..6b213be 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ But **real work happens on mobile devices** in places where laptops don't fit:
 Watch Android Use automate an entire logistics workflow:
 
 ### Before (Manual - 10+ minutes)
+
 ```
 1. Driver takes photo of Bill of Lading
 2. Opens WhatsApp, sends to back office
@@ -62,6 +63,7 @@ Watch Android Use automate an entire logistics workflow:
 ```
 
 ### After (Automated - 30 seconds)
+
 ```python
 # Driver just texts the photo. Agent does the rest.
 run_agent("""
@@ -84,6 +86,7 @@ run_agent("""
 <td width="50%">
 
 ### 🚫 Computer Use (Anthropic)
+
 - Requires desktop/laptop
 - Takes screenshots → OCR
 - Sends images to vision model
@@ -95,6 +98,7 @@ run_agent("""
 <td width="50%">
 
 ### ✅ Android Use (This Library)
+
 - Works on handheld devices
 - Reads accessibility tree (XML)
 - Structured data → LLM
@@ -128,13 +132,13 @@ Launched **24 hours ago** with the logistics demo:
 
 ## 📊 The Market: Mobile-First Industries
 
-| Industry | Why They Need This | Market Size | Current State |
-|----------|-------------------|-------------|---------------|
-| **🚛 Logistics** | Drivers use factoring apps (RTS Pro, OTR Capital) in truck cabs | **$10.5T** | Manual, no laptop access |
-| **🚗 Gig Economy** | Uber/Lyft/DoorDash drivers optimize between apps on phones | **$455B** | Tap manually, lose 20% earnings |
-| **📦 Last-Mile** | Amazon Flex, UPS drivers scan packages on handhelds | **$500B+** | Proprietary apps, no APIs |
-| **🏗️ Field Services** | Techs log work orders on tablets on-site | **$200B+** | Mobile-only workflows |
-| **🏦 Mobile Banking** | Treasury ops, reconciliation on native banking apps | **$28T** | 2FA + biometric locks |
+| Industry              | Why They Need This                                              | Market Size | Current State                   |
+| --------------------- | --------------------------------------------------------------- | ----------- | ------------------------------- |
+| **🚛 Logistics**      | Drivers use factoring apps (RTS Pro, OTR Capital) in truck cabs | **$10.5T**  | Manual, no laptop access        |
+| **🚗 Gig Economy**    | Uber/Lyft/DoorDash drivers optimize between apps on phones      | **$455B**   | Tap manually, lose 20% earnings |
+| **📦 Last-Mile**      | Amazon Flex, UPS drivers scan packages on handhelds             | **$500B+**  | Proprietary apps, no APIs       |
+| **🏗️ Field Services** | Techs log work orders on tablets on-site                        | **$200B+**  | Mobile-only workflows           |
+| **🏦 Mobile Banking** | Treasury ops, reconciliation on native banking apps             | **$28T**    | 2FA + biometric locks           |
 
 **Total: $40+ trillion in GDP from mobile-first workflows**
 
@@ -145,6 +149,7 @@ Browser agents can't reach these. Desktop agents don't fit. **Android Use is the
 ## 🚀 Quick Start (60 Seconds)
 
 ### Prerequisites
+
 - Python 3.10+
 - Android device or emulator (USB debugging enabled)
 - ADB (Android Debug Bridge)
@@ -172,6 +177,7 @@ export OPENAI_API_KEY="sk-..."
 
 # 6. Run your first agent
 python kernel.py
+# python kernel-genai.py # For GenAI users
 ```
 
 ### Try It: Logistics Example
@@ -181,12 +187,13 @@ from kernel import run_agent
 
 # Automate the workflow from the viral demo
 run_agent("""
-Open WhatsApp, get the latest image, 
+Open WhatsApp, get the latest image,
 then open the invoice app and fill out the form
 """)
 ```
 
 **Other examples:**
+
 - `"Accept the next DoorDash delivery and navigate to restaurant"`
 - `"Scan all packages and mark them delivered in the driver app"`
 - `"Check Chase mobile for today's transactions"`
@@ -196,51 +203,61 @@ then open the invoice app and fill out the form
 ## 💼 Use Cases Beyond Logistics
 
 ### 🚗 Gig Economy Multi-Apping
+
 **Problem:** Drivers lose 20%+ earnings manually switching between DoorDash, Uber Eats, Instacart.
 
 ```python
 run_agent("Monitor all delivery apps, accept the highest paying order")
 ```
+
 **Impact:** Instant acceptance, maximize earnings, reduce downtime.
 
 ---
 
 ### 📦 Package Scanning Automation
+
 **Problem:** Drivers manually scan 200+ packages/day in proprietary apps.
 
 ```python
 run_agent("Scan all packages in photo and mark as loaded in Amazon Flex")
 ```
+
 **Impact:** Bulk scanning, eliminate manual entry, speed up loading.
 
 ---
 
 ### 🏦 Mobile Banking Operations
+
 **Problem:** Treasury teams reconcile transactions across multiple mobile banking apps.
 
 ```python
 run_agent("Log into Chase mobile and export today's wire transfers")
 ```
+
 **Impact:** Automate reconciliation, fraud detection, compliance.
 
 ---
 
 ### 🏥 Healthcare Mobile Workflows
+
 **Problem:** Staff extract patient data from HIPAA-locked mobile portals.
 
 ```python
 run_agent("Open Epic MyChart and download lab results for patient 12345")
 ```
+
 **Impact:** Data extraction, appointment booking, records management.
 
 ---
 
 ### 🧪 Mobile App QA Testing
+
 **Problem:** Manual testing of Android apps is slow and expensive.
 
 ```python
 run_agent("Create account, complete onboarding, make test purchase")
 ```
+
 **Impact:** Automated E2E testing, regression tests, CI/CD integration.
 
 ---
@@ -297,10 +314,10 @@ run_agent("Create account, complete onboarding, make test purchase")
 
 ### Why Accessibility Tree > Screenshots
 
-| Approach | Cost | Speed | Accuracy | Works on Device |
-|----------|------|-------|----------|----------------|
-| **Screenshots (Computer Use)** | $0.15/action | 3-5s | 70-80% | ❌ Desktop only |
-| **Accessibility Tree (Android Use)** | $0.01/action | <1s | 99%+ | ✅ Handheld devices |
+| Approach                             | Cost         | Speed | Accuracy | Works on Device     |
+| ------------------------------------ | ------------ | ----- | -------- | ------------------- |
+| **Screenshots (Computer Use)**       | $0.15/action | 3-5s  | 70-80%   | ❌ Desktop only     |
+| **Accessibility Tree (Android Use)** | $0.01/action | <1s   | 99%+     | ✅ Handheld devices |
 
 **Technical advantage:** Accessibility tree provides structured data (text, coordinates, hierarchy) without image encoding/OCR.
 
@@ -370,24 +387,28 @@ screen_json = get_screen_state()
 ## 🗺️ Roadmap
 
 ### ✅ Now (MVP - 48 hours)
+
 - [x] Core agent loop (perception → reasoning → action)
 - [x] Accessibility tree parsing
 - [x] GPT-4 integration
 - [x] Basic actions (tap, type, navigate)
 
 ### 🚧 Next 2 Weeks
+
 - [ ] **PyPI package:** `pip install android-use`
 - [ ] **Multi-LLM support:** Claude, Gemini, Llama
 - [ ] **WhatsApp integration:** Pre-built actions for messaging
 - [ ] **Error recovery:** Retry logic, fallback strategies
 
 ### 🔮 Next 3 Months
+
 - [ ] **App-specific agents:** Pre-trained for RTS Pro, OTR Capital, factoring apps
 - [ ] **Cloud device farms:** Run at scale on AWS Device Farm, BrowserStack
 - [ ] **Vision augmentation:** Screenshot fallback when accessibility insufficient
 - [ ] **Multi-step memory:** Remember context across sessions
 
 ### 🚀 Long-term Vision
+
 - [ ] **Hosted Cloud API:** No-code agent execution (waitlist below)
 - [ ] **Agent marketplace:** Buy/sell vertical-specific automations
 - [ ] **Enterprise platform:** SOC2, audit logs, PII redaction, fleet management
@@ -400,6 +421,7 @@ screen_json = get_screen_state()
 **Don't want to host it yourself?** Join the waitlist for our managed Cloud API.
 
 **What you get:**
+
 - ✅ No device setup required
 - ✅ Scale to 1000s of simultaneous agents
 - ✅ Pre-built integrations (WhatsApp, factoring apps, etc.)
@@ -414,6 +436,7 @@ screen_json = get_screen_state()
 **Want to help build the future of mobile AI agents?**
 
 ### 🔥 High Priority
+
 - **Logistics app templates:** RTS Pro, OTR Capital, Axle, TriumPay integrations
 - **WhatsApp automation:** Message parsing, image extraction
 - **Error handling:** Robustness for unreliable connections (truck cabs!)
@@ -421,6 +444,7 @@ screen_json = get_screen_state()
 - **Testing:** E2E tests for common workflows
 
 ### How to Contribute
+
 1. ⭐ **Star this repo** (most important!)
 2. 🍴 Fork it
 3. 🌿 Create branch: `git checkout -b feature/factoring-app-support`
@@ -461,7 +485,7 @@ Support the project
 
 Help logistics companies find this
 
-[Tweet →](https://twitter.com/intent/tweet?text=🚛%20Game%20changer%20for%20logistics!%20Android%20Use%20lets%20AI%20agents%20control%20native%20Android%20apps.%0A%0A✅%20Works%20in%20truck%20cabs%20(no%20laptop%20needed)%0A✅%2095%25%20cheaper%20than%20Computer%20Use%0A✅%20Automates%20factoring%20apps,%20WhatsApp,%20more%0A%0A4M%20views!%0A%0A&url=https://github.com/actionstatelabs/android-action-kernel&via=ethanjlim)
+[Tweet →](<https://twitter.com/intent/tweet?text=🚛%20Game%20changer%20for%20logistics!%20Android%20Use%20lets%20AI%20agents%20control%20native%20Android%20apps.%0A%0A✅%20Works%20in%20truck%20cabs%20(no%20laptop%20needed)%0A✅%2095%25%20cheaper%20than%20Computer%20Use%0A✅%20Automates%20factoring%20apps,%20WhatsApp,%20more%0A%0A4M%20views!%0A%0A&url=https://github.com/actionstatelabs/android-action-kernel&via=ethanjlim>)
 
 </td>
 <td align="center" width="33%">
@@ -496,17 +520,19 @@ Progress: ████░░░░░░░░░░░░░░░░░░░
 
 I was interviewing truck drivers for a logistics automation project. One driver showed me his phone and said:
 
-> *"I have to manually type invoice data from this Bill of Lading photo into the RTS Pro app. Takes 10 minutes every delivery. I can't use a laptop because it doesn't fit in the cab."*
+> _"I have to manually type invoice data from this Bill of Lading photo into the RTS Pro app. Takes 10 minutes every delivery. I can't use a laptop because it doesn't fit in the cab."_
 
 That's when it clicked: **AI agents exist for web and desktop, but the real economy runs on handheld devices.**
 
 I looked at existing solutions:
+
 - **Browser Use:** Only works on websites ❌
 - **Computer Use:** Requires a laptop ($0.15/action, vision model) ❌
 
 Neither solved the truck cab problem. So I built Android Use in 48 hours using Android's accessibility API.
 
 **The result:**
+
 - 95% cheaper (accessibility tree vs vision)
 - 5x faster (<1s latency)
 - Works on handheld devices ✅
@@ -541,6 +567,7 @@ This started as a library for developers. But based on demand, we're building:
 ## 📊 By the Numbers
 
 **Since launch (24 hours ago):**
+
 - 👀 **4,000,000+** views on X
 - ⭐ **12** GitHub stars (help us get to 1,000!)
 - 💬 **150+** DMs from companies
@@ -548,6 +575,7 @@ This started as a library for developers. But based on demand, we're building:
 - 🏦 **3** factoring company partnership discussions
 
 **Market data:**
+
 - 🚛 **3.5M** truck drivers in US alone
 - 📦 **60M** gig economy workers globally
 - 💰 **$40T+** in mobile-first GDP
@@ -567,12 +595,14 @@ MIT License - see [LICENSE](LICENSE)
 ## 🙏 Acknowledgments
 
 Built on:
+
 - [Browser Use](https://github.com/browser-use/browser-use) - Web agent inspiration
 - [Anthropic Computer Use](https://www.anthropic.com/news/computer-use) - Proved UI control works
 - Android Accessibility API - The enabling technology
 - **The 4 million people who watched and validated this need**
 
 Special thanks to:
+
 - Truck drivers who showed me the real problem
 - Early beta testers in logistics
 - Everyone sharing and supporting this project
diff --git a/kernel-genai.py b/kernel-genai.py
new file mode 100644
index 0000000..834b744
--- /dev/null
+++ b/kernel-genai.py
@@ -0,0 +1,177 @@
+import os
+import time
+import subprocess
+import json
+from typing import Dict, Any, List
+from google import genai
+import sanitizer
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# --- CONFIGURATION ---
+ADB_PATH = "adb"  # Ensure adb is in your PATH
+MODEL = "gemini-2.5-flash"  # Or another Gemini model as needed
+SCREEN_DUMP_PATH = "/sdcard/window_dump.xml"
+LOCAL_DUMP_PATH = "window_dump.xml"
+
+
+try:
+    client = genai.Client()
+except Exception as e:
+    # Handle case where API key is not set
+    print("Error: Failed to initialize Google Gen AI Client.")
+    print("Please ensure the GEMINI_API_KEY environment variable is set.")
+    exit(1)
+# ------------------------------------------------------
+
+
+def run_adb_command(command: List[str]):
+    """Executes a shell command via ADB."""
+    str_command = [str(c) for c in command]
+    result = subprocess.run([ADB_PATH] + str_command,
+                            capture_output=True, text=True)
+    if result.stderr and "error" in result.stderr.lower():
+        print(f"❌ ADB Error: {result.stderr.strip()}")
+    return result.stdout.strip()
+
+
+def get_screen_state() -> str:
+    """Dumps the current UI XML and returns the sanitized JSON string."""
+    # 1. Capture XML
+    run_adb_command(["shell", "uiautomator", "dump", SCREEN_DUMP_PATH])
+
+    # 2. Pull to local
+    pull_result = subprocess.run(
+        [ADB_PATH, "pull", SCREEN_DUMP_PATH, LOCAL_DUMP_PATH], capture_output=True, text=True)
+    if pull_result.returncode != 0:
+        print(f"❌ ADB Pull Error: {pull_result.stderr.strip()}")
+        return "Error: Could not pull screen dump."
+
+    # 3. Read & Sanitize
+    if not os.path.exists(LOCAL_DUMP_PATH):
+        return "Error: Could not capture screen."
+
+    with open(LOCAL_DUMP_PATH, "r", encoding="utf-8") as f:
+        xml_content = f.read()
+
+    elements = sanitizer.get_interactive_elements(xml_content)
+    return json.dumps(elements, indent=2)
+
+
+def execute_action(action: Dict[str, Any]):
+    """Executes the action decided by the LLM."""
+    act_type = action.get("action")
+
+    if act_type == "tap":
+        coordinates = action.get("coordinates", [0, 0])
+        x, y = coordinates[0], coordinates[1]
+        print(f"👉 Tapping: ({x}, {y})")
+        run_adb_command(["shell", "input", "tap", str(x), str(y)])
+
+    elif act_type == "type":
+        text_to_type = action.get("text")
+        adb_text = text_to_type.replace(
+            " ", "%s")  # ADB requires %s for spaces
+        print(f"⌨️ Typing: {text_to_type}")
+        run_adb_command(["shell", "input", "text", adb_text])
+
+    elif act_type == "home":
+        print("🏠 Going Home")
+        # Corrected KEYWORDS_HOME to KEYCODE_HOME
+        run_adb_command(["shell", "input", "keyevent", "KEYWORDS_HOME"])
+
+    elif act_type == "back":
+        print("🔙 Going Back")
+        # Corrected KEYWORDS_BACK to KEYCODE_BACK
+        run_adb_command(["shell", "input", "keyevent", "KEYWORDS_BACK"])
+
+    elif act_type == "wait":
+        print("⏳ Waiting...")
+        time.sleep(2)
+
+    elif act_type == "done":
+        print("✅ Goal Achieved.")
+        exit(0)
+    else:
+        print(f"⚠️ Unknown action type: {act_type}")
+
+
+def get_llm_decision(goal: str, screen_context: str) -> Dict[str, Any]:
+    """Sends screen context to LLM and asks for the next move using Gemini API."""
+    system_prompt = """
+    You are an Android Driver Agent. Your job is to achieve the user's goal by navigating the UI.
+    
+    You will receive:
+    1. The User's Goal.
+    2. A list of interactive UI elements (JSON) with their (x,y) center coordinates.
+    
+    You must output ONLY a valid JSON object with your next action.
+    
+    Available Actions:
+    - {"action": "tap", "coordinates": [x, y], "reason": "Why you are tapping"}
+    - {"action": "type", "text": "Hello World", "reason": "Why you are typing"}
+    - {"action": "home", "reason": "Go to home screen"}
+    - {"action": "back", "reason": "Go back"}
+    - {"action": "wait", "reason": "Wait for loading"}
+    - {"action": "done", "reason": "Task complete"}
+    
+    Example Output:
+    {"action": "tap", "coordinates": [540, 1200], "reason": "Clicking the 'Connect' button"}
+    """
+
+    full_prompt = (
+        f"{system_prompt}\n\n"
+        f"GOAL: {goal}\n\n"
+        f"SCREEN_CONTEXT:\n{screen_context}"
+    )
+
+    response = client.models.generate_content(
+        model=MODEL,
+        contents=[{"role": "user", "parts": [{"text": full_prompt}]}],
+        config={
+            "response_mime_type": "application/json",
+        }
+    )
+    return json.loads(response.text)
+
+
+def run_agent(goal: str, max_steps=10):
+    print(f"🚀 Android Use Agent Started. Goal: {goal}")
+
+    for step in range(max_steps):
+        print(f"\n--- Step {step + 1} ---")
+
+        # 1. Perception
+        print("👀 Scanning Screen...")
+        screen_context = get_screen_state()
+
+        if screen_context.startswith("Error"):
+            print(f"❌ Aborting: {screen_context}")
+            break
+
+        # 2. Reasoning
+        print("🧠 Thinking...")
+        try:
+            decision = get_llm_decision(goal, screen_context)
+        except Exception as e:
+            print(f"❌ LLM Decision Error: {e}")
+            time.sleep(2)
+            continue
+
+        print(f"💡 Decision: {decision.get('reason')}")
+
+        # 3. Action
+        execute_action(decision)
+
+        # Wait for UI to update
+        time.sleep(2)
+
+
+if __name__ == "__main__":
+    # Example Goal: "Open settings and turn on Wi-Fi"
+    GOAL = input("Enter your goal: ")
+    if not GOAL:
+        print("No goal entered. Exiting.")
+    else:
+        run_agent(GOAL)
diff --git a/kernel.py b/kernel.py
index f827897..2f9dd0f 100644
--- a/kernel.py
+++ b/kernel.py
@@ -2,7 +2,7 @@
 import time
 import subprocess
 import json
-from typing import Dict, Any
+from typing import Dict, Any, List
 from openai import OpenAI
 import sanitizer
 
@@ -14,61 +14,67 @@
 
 client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 
+
 def run_adb_command(command: List[str]):
     """Executes a shell command via ADB."""
-    result = subprocess.run([ADB_PATH] + command, capture_output=True, text=True)
+    result = subprocess.run([ADB_PATH] + command,
+                            capture_output=True, text=True)
     if result.stderr and "error" in result.stderr.lower():
         print(f"❌ ADB Error: {result.stderr.strip()}")
     return result.stdout.strip()
 
+
 def get_screen_state() -> str:
     """Dumps the current UI XML and returns the sanitized JSON string."""
     # 1. Capture XML
     run_adb_command(["shell", "uiautomator", "dump", SCREEN_DUMP_PATH])
-    
+
     # 2. Pull to local
     run_adb_command(["pull", SCREEN_DUMP_PATH, LOCAL_DUMP_PATH])
-    
+
     # 3. Read & Sanitize
     if not os.path.exists(LOCAL_DUMP_PATH):
         return "Error: Could not capture screen."
-        
+
     with open(LOCAL_DUMP_PATH, "r", encoding="utf-8") as f:
         xml_content = f.read()
-        
+
     elements = sanitizer.get_interactive_elements(xml_content)
     return json.dumps(elements, indent=2)
 
+
 def execute_action(action: Dict[str, Any]):
     """Executes the action decided by the LLM."""
     act_type = action.get("action")
-    
+
     if act_type == "tap":
         x, y = action.get("coordinates")
         print(f"👉 Tapping: ({x}, {y})")
         run_adb_command(["shell", "input", "tap", str(x), str(y)])
-        
+
     elif act_type == "type":
-        text = action.get("text").replace(" ", "%s") # ADB requires %s for spaces
+        text = action.get("text").replace(
+            " ", "%s")  # ADB requires %s for spaces
         print(f"⌨️ Typing: {action.get('text')}")
         run_adb_command(["shell", "input", "text", text])
-        
+
     elif act_type == "home":
         print("🏠 Going Home")
         run_adb_command(["shell", "input", "keyevent", "KEYWORDS_HOME"])
-        
+
     elif act_type == "back":
         print("🔙 Going Back")
         run_adb_command(["shell", "input", "keyevent", "KEYWORDS_BACK"])
-        
+
     elif act_type == "wait":
         print("⏳ Waiting...")
         time.sleep(2)
-        
+
     elif act_type == "done":
         print("✅ Goal Achieved.")
         exit(0)
 
+
 def get_llm_decision(goal: str, screen_context: str) -> Dict[str, Any]:
     """Sends screen context to LLM and asks for the next move."""
     system_prompt = """
@@ -91,7 +97,7 @@ def get_llm_decision(goal: str, screen_context: str) -> Dict[str, Any]:
     Example Output:
     {"action": "tap", "coordinates": [540, 1200], "reason": "Clicking the 'Connect' button"}
     """
-    
+
     response = client.chat.completions.create(
         model=MODEL,
         response_format={"type": "json_object"},
@@ -100,32 +106,34 @@ def get_llm_decision(goal: str, screen_context: str) -> Dict[str, Any]:
             {"role": "user", "content": f"GOAL: {goal}\n\nSCREEN_CONTEXT:\n{screen_context}"}
         ]
     )
-    
+
     return json.loads(response.choices[0].message.content)
 
+
 def run_agent(goal: str, max_steps=10):
     print(f"🚀 Android Use Agent Started. Goal: {goal}")
-    
+
     for step in range(max_steps):
         print(f"\n--- Step {step + 1} ---")
-        
+
         # 1. Perception
         print("👀 Scanning Screen...")
         screen_context = get_screen_state()
-        
+
         # 2. Reasoning
         print("🧠 Thinking...")
         decision = get_llm_decision(goal, screen_context)
         print(f"💡 Decision: {decision.get('reason')}")
-        
+
         # 3. Action
         execute_action(decision)
-        
+
         # Wait for UI to update
         time.sleep(2)
 
+
 if __name__ == "__main__":
     # Example Goal: "Open settings and turn on Wi-Fi"
     # Or your demo goal: "Find the 'Connect' button and tap it"
     GOAL = input("Enter your goal: ")
-    run_agent(GOAL)
\ No newline at end of file
+    run_agent(GOAL)
diff --git a/requirements.txt b/requirements.txt
index 06018fe..4201827 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
-openai>=1.12.0
\ No newline at end of file
+openai>=1.12.0 
+google-genai >=1.55.0
\ No newline at end of file