From 986e188529c2e4418002326ed490e49ac5cd5fb2 Mon Sep 17 00:00:00 2001 From: Syed Arsalan Shah Date: Fri, 12 Dec 2025 15:33:12 +0500 Subject: [PATCH 1/2] revise README for clarity and consistency in examples --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index b10e274..aa14c52 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ **[► See it automate a logistics workflow in 60 seconds →](https://x.com/ethanjlim/status/1999152070428148108?s=20)** -Driver texts a photo → Agent handles WhatsApp → Scanner app → Banking app → Invoice submitted +Driver texts a photo → Agent opens WhatsApp → uses scanner app → opens banking app → submits invoice
@@ -43,7 +43,7 @@ But **real work happens on mobile devices** in places where laptops don't fit: - 🏗️ **Field technicians** log work orders on tablets - 🏦 **Mobile banking** happens on phones, not web browsers -**3 billion Android devices. Zero AI agent access. Until now.** +**There are 3 billion Android devices and zero AI agent access — until now.** --- @@ -51,7 +51,7 @@ But **real work happens on mobile devices** in places where laptops don't fit: Watch Android Use automate an entire logistics workflow: -### Before (Manual - 10+ minutes) +### Before — Manual (10+ minutes) ``` 1. Driver takes photo of Bill of Lading 2. Opens WhatsApp, sends to back office @@ -61,7 +61,7 @@ Watch Android Use automate an entire logistics workflow: 6. Submits for payment ``` -### After (Automated - 30 seconds) +### After — Automated (30 seconds) ```python # Driver just texts the photo. Agent does the rest. run_agent(""" @@ -73,7 +73,7 @@ run_agent(""" """) ``` -**✅ Result:** Driver gets paid faster. No back-office work. No laptop needed. +**✅ Result:** Driver gets paid faster — no back-office work and no laptop needed. --- @@ -83,9 +83,9 @@ run_agent(""" -### 🚫 Computer Use (Anthropic) -- Requires desktop/laptop -- Takes screenshots → OCR +-### 🚫 Computer Use (Anthropic) +- Requires a desktop or laptop +- Takes screenshots → uses OCR - Sends images to vision model - **$0.15 per action** - 3-5 second latency @@ -167,7 +167,7 @@ brew install android-platform-tools # macOS # 4. Connect device & verify adb devices -# 5. Set API key +# 5. Set your OpenAI API key: export OPENAI_API_KEY="sk-..." # 6. Run your first agent From dc418255b0d21cfa1eecc5d06082d48f0d6e1af5 Mon Sep 17 00:00:00 2001 From: Subhadip Jana Date: Fri, 12 Dec 2025 19:43:01 +0530 Subject: [PATCH 2/2] refactor: add sample.env file and fix small bug --- kernel.py | 9 ++++++--- sample.env | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 sample.env diff --git a/kernel.py b/kernel.py index f827897..22cd06f 100644 --- a/kernel.py +++ b/kernel.py @@ -2,17 +2,20 @@ import time import subprocess import json -from typing import Dict, Any +from typing import Dict, Any, List from openai import OpenAI import sanitizer +from dotenv import load_dotenv + +load_dotenv() # --- CONFIGURATION --- ADB_PATH = "adb" # Ensure adb is in your PATH -MODEL = "gpt-4o" # Or "gpt-4-turbo" for faster/cheaper execution +MODEL = os.environ.get("MODEL") # Or "gpt-4-turbo" for faster/cheaper execution SCREEN_DUMP_PATH = "/sdcard/window_dump.xml" LOCAL_DUMP_PATH = "window_dump.xml" -client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) +client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url=os.environ.get("BASE_URL")) def run_adb_command(command: List[str]): """Executes a shell command via ADB.""" diff --git a/sample.env b/sample.env new file mode 100644 index 0000000..137a39d --- /dev/null +++ b/sample.env @@ -0,0 +1,3 @@ +OPENAI_API_KEY="" +BASE_URL="https://api.openai.com/v1" +MODEL="gpt-4o" \ No newline at end of file