diff --git a/api-server/jobs.db b/api-server/jobs.db index 516300bb..32295018 100644 Binary files a/api-server/jobs.db and b/api-server/jobs.db differ diff --git a/api-server/jobs.go b/api-server/jobs.go index b2ccc3b4..8db61395 100644 --- a/api-server/jobs.go +++ b/api-server/jobs.go @@ -17,7 +17,11 @@ import ( // and ensures a jobs table exists. func (srv *ILabServer) initDB() { var err error - srv.db, err = sql.Open("sqlite3", "jobs.db") + dbName := "jobs.db" + if srv.mockServer { + dbName = "mock-jobs.db" + } + srv.db, err = sql.Open("sqlite3", dbName) if err != nil { srv.log.Fatalf("Failed to open SQLite database: %v", err) } diff --git a/api-server/main.go b/api-server/main.go index 394d5644..a0c42c3e 100644 --- a/api-server/main.go +++ b/api-server/main.go @@ -109,15 +109,15 @@ type ILabServer struct { modelProcessBase *exec.Cmd modelProcessLatest *exec.Cmd - // Base model reference baseModel string // Map of "pre-train"/"post-train" => jobID for VLLM serving servedModelJobIDs map[string]string jobIDsMutex sync.RWMutex - // Cache variables modelCache ModelCache + + mockServer bool } func main() { @@ -143,6 +143,7 @@ func main() { rootCmd.Flags().BoolVar(&srv.isCuda, "cuda", false, "Enable Cuda (default: false)") rootCmd.Flags().BoolVar(&srv.useVllm, "vllm", false, "Enable VLLM model serving using podman containers") rootCmd.Flags().StringVar(&srv.pipelineType, "pipeline", "", "Pipeline type (simple, accelerated, full)") + rootCmd.Flags().BoolVar(&srv.mockServer, "mock-server", false, "Enable mock mode: simulate backend jobs for development (jobs run for 30s)") rootCmd.Flags().BoolVar(&srv.debugEnabled, "debug", false, "Enable debug logging") // PreRun to validate flags @@ -214,9 +215,15 @@ func (srv *ILabServer) runServer(cmd *cobra.Command, args []string) { srv.ilabCmd = ilabPath } else { // Use ilab from virtual environment + // First attempt: baseDir/bin/ilab srv.ilabCmd = filepath.Join(srv.baseDir, "bin", "ilab") if _, err := os.Stat(srv.ilabCmd); os.IsNotExist(err) { - srv.log.Fatalf("ilab binary not found at %s. Please ensure the virtual environment is set up correctly.", srv.ilabCmd) + // Second attempt: baseDir/venv/bin/ilab + altCmd := filepath.Join(srv.baseDir, "venv", "bin", "ilab") + if _, err := os.Stat(altCmd); os.IsNotExist(err) { + srv.log.Fatalf("ilab binary not found at %s or %s. Please ensure the virtual environment is set up correctly.", srv.ilabCmd, altCmd) + } + srv.ilabCmd = altCmd } } @@ -394,6 +401,15 @@ func (srv *ILabServer) reconstructServedModelJobIDs() { // startGenerateJob launches a job to run "ilab data generate" and tracks it. func (srv *ILabServer) startGenerateJob() (string, error) { + if srv.mockServer { + jobID, err := srv.simulateJob("generate") + if err != nil { + return "", err + } + srv.log.Infof("Started mock generate job: %s", jobID) + return jobID, nil + } + ilabPath := srv.getIlabCommand() // Hard-coded pipeline choice for data generate, or we could use srv.pipelineType @@ -470,6 +486,15 @@ func (srv *ILabServer) startGenerateJob() (string, error) { // startTrainJob starts a training job with the given parameters. func (srv *ILabServer) startTrainJob(modelName, branchName string, epochs *int) (string, error) { + if srv.mockServer { + jobID, err := srv.simulateJob("train") + if err != nil { + return "", err + } + srv.log.Infof("Started mock train job: %s", jobID) + return jobID, nil + } + srv.log.Infof("Starting training job for model: '%s', branch: '%s'", modelName, branchName) jobID := fmt.Sprintf("t-%d", time.Now().UnixNano()) @@ -681,7 +706,15 @@ func (srv *ILabServer) generateTrainPipelineHandler(w http.ResponseWriter, r *ht // runPipelineJob orchestrates data generate + model train steps in sequence. func (srv *ILabServer) runPipelineJob(job *Job, modelName, branchName string, epochs *int) { - // Open the pipeline job log + if srv.mockServer { + jobID, err := srv.simulateJob("generate") + if err != nil { + return + } + srv.log.Infof("Started mock generate job: %s", jobID) + return + } + logFile, err := os.Create(job.LogFile) if err != nil { srv.log.Errorf("Error creating pipeline log file for job %s: %v", job.JobID, err) @@ -829,6 +862,13 @@ func (srv *ILabServer) getFullModelPath(modelName string) (string, error) { // runIlabCommand executes the ilab command with the provided arguments and returns combined output. func (srv *ILabServer) runIlabCommand(args ...string) (string, error) { + if srv.mockServer { + if len(args) >= 2 && args[0] == "model" && args[1] == "list" { + return "Mock Model A\nMock Model B\n", nil + } + return "mock output", nil + } + cmdPath := srv.getIlabCommand() cmd := exec.Command(cmdPath, args...) if !srv.rhelai { diff --git a/api-server/mock.go b/api-server/mock.go new file mode 100644 index 00000000..c29af82c --- /dev/null +++ b/api-server/mock.go @@ -0,0 +1,58 @@ +// mock_mode.go +package main + +import ( + "fmt" + "os" + "time" +) + +// simulateJob simulates a job of the given type (e.g., "generate" or "train"). +// It creates a job record with a unique job ID, writes a log file, and schedules +// a goroutine that waits 30s before marking the job as finished. +func (srv *ILabServer) simulateJob(jobType string) (string, error) { + // Generate a unique job ID + jobID := fmt.Sprintf("mock-%s-%d", jobType, time.Now().UnixNano()) + logFilePath := fmt.Sprintf("logs/%s.log", jobID) + + // Create and write an initial log file + f, err := os.Create(logFilePath) + if err != nil { + srv.log.Errorf("Mock: failed to create log file: %v", err) + return "", err + } + _, _ = f.WriteString(fmt.Sprintf("Mock %s job started...\n", jobType)) + f.Close() + + // Create a new job record + newJob := &Job{ + JobID: jobID, + Cmd: fmt.Sprintf("mock-%s", jobType), + Args: []string{}, + Status: "running", + PID: 0, + LogFile: logFilePath, + StartTime: time.Now(), + } + if err := srv.createJob(newJob); err != nil { + srv.log.Errorf("Mock: failed to create job record: %v", err) + return "", err + } + + // Simulate the job: after (n) seconds, mark it as finished. + go func(j *Job) { + srv.log.Infof("Mock job %s running (simulated 30s delay)...", j.JobID) + time.Sleep(30 * time.Second) + j.Lock.Lock() + defer j.Lock.Unlock() + j.Status = "finished" + now := time.Now() + j.EndTime = &now + if err := srv.updateJob(j); err != nil { + srv.log.Errorf("Mock: failed to update job %s: %v", j.JobID, err) + } + srv.log.Infof("Mock job %s finished successfully", j.JobID) + }(newJob) + + return jobID, nil +}