Add Llama service and update pom

AutonomicPerfectionist · AutonomicPerfectionist · commit 19e76c052787 · 2023-10-22T16:54:57.000-05:00
diff --git a/pom.xml b/pom.xml
@@ -80,7 +80,7 @@
       <!-- force overriding property at command line, use ${maven.build.timestamp}-->
       <timestamp>${maven.build.timestamp}</timestamp>
       <maven.build.timestamp.format>yyyyMMddHHmm</maven.build.timestamp.format>      
-      <version>${version}</version>   
+      <version>${version}</version>
       <GitBranch>${git.branch}</GitBranch>
       <username>${NODE_NAME}</username>      
       <platform>${NODE_LABELS}</platform>      
@@ -614,6 +614,15 @@
   </dependency>
 <!-- LeapMotion end -->
 
+<!-- Llama begin -->
+  <dependency>
+    <groupId>de.kherud</groupId>
+    <artifactId>llama</artifactId>
+    <version>1.1.0</version>
+    <scope>provided</scope>
+  </dependency>
+<!-- Llama end -->
+
 <!-- LocalSpeech begin -->
   <dependency>
     <groupId>org.myrobotlab.audio</groupId>
diff --git a/src/main/java/org/myrobotlab/service/Llama.java b/src/main/java/org/myrobotlab/service/Llama.java
@@ -0,0 +1,154 @@
+package org.myrobotlab.service;
+
+import de.kherud.llama.LlamaModel;
+import de.kherud.llama.Parameters;
+import org.myrobotlab.framework.Service;
+import org.myrobotlab.logging.Level;
+import org.myrobotlab.logging.LoggingFactory;
+import org.myrobotlab.programab.Response;
+import org.myrobotlab.service.config.LlamaConfig;
+import org.myrobotlab.service.data.Utterance;
+import org.myrobotlab.service.interfaces.ResponsePublisher;
+import org.myrobotlab.service.interfaces.UtterancePublisher;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.URL;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.nio.channels.ReadableByteChannel;
+import java.util.stream.StreamSupport;
+
+public class Llama extends Service<LlamaConfig> implements UtterancePublisher, ResponsePublisher {
+    private transient LlamaModel model;
+
+    /**
+     * Constructor of service, reservedkey typically is a services name and inId
+     * will be its process id
+     *
+     * @param reservedKey the service name
+     * @param inId        process id
+     */
+    public Llama(String reservedKey, String inId) {
+        super(reservedKey, inId);
+    }
+
+    public void loadModel(String modelPath) {
+        Parameters params = new Parameters.Builder()
+                .setNGpuLayers(0)
+                .setTemperature(0.7f)
+                .setPenalizeNl(true)
+                .setMirostat(Parameters.MiroStat.V2)
+                .setAntiPrompt(new String[]{config.userPrompt})
+                .build();
+        model = new LlamaModel(modelPath, params);
+    }
+
+    public Response getResponse(String text) {
+        if (model == null) {
+            error("Model is not loaded.");
+            return null;
+        }
+
+        String prompt = config.systemPrompt + config.systemMessage + "\n" + text + "\n";
+        String response = StreamSupport.stream(model.generate(prompt).spliterator(), false)
+                .map(LlamaModel.Output::toString)
+                .reduce("", (a, b) -> a + b);
+
+        Utterance utterance = new Utterance();
+        utterance.username = getName();
+        utterance.text = response;
+        utterance.isBot = true;
+        utterance.channel = "";
+        utterance.channelType = "";
+        utterance.channelBotName = getName();
+        utterance.channelName = "";
+        invoke("publishUtterance", utterance);
+        Response res = new Response("friend", getName(), response, null);
+        invoke("publishResponse", res);
+        return res;
+    }
+
+    public String findModelPath(String model) {
+        // First, we loop over all user-defined
+        // model directories
+        for (String dir : config.modelPaths) {
+            File path = new File(dir + fs + model);
+            if (path.exists()) {
+                return path.getAbsolutePath();
+            }
+        }
+
+        // Now, we check our data directory for any downloaded models
+        File path = new File(getDataDir() + fs + model);
+        if (path.exists()) {
+            return path.getAbsolutePath();
+        } else if (config.modelUrls.containsKey(model)){
+            // Model was not in data but we do have a URL for it
+            try (FileOutputStream fileOutputStream = new FileOutputStream(path)){
+                ReadableByteChannel readableByteChannel = Channels.newChannel(new URL(config.modelUrls.get(model)).openStream());
+                FileChannel fileChannel = fileOutputStream.getChannel();
+                info("Downloading model %s to path %s from URL %s", model, path, config.modelUrls.get(model));
+                fileChannel.transferFrom(readableByteChannel, 0, Long.MAX_VALUE);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            return path.getAbsolutePath();
+
+        }
+
+        // Cannot find the model anywhere
+        error("Could not locate model {}, add its URL to download it or add a directory where it is located", model);
+        return null;
+    }
+
+    @Override
+    public LlamaConfig apply(LlamaConfig c) {
+        super.apply(c);
+
+        if (config.selectedModel != null && !config.selectedModel.isEmpty()) {
+            String modelPath = findModelPath(config.selectedModel);
+            if (modelPath != null) {
+                loadModel(modelPath);
+            } else {
+                error("Could not find selected model {}", config.selectedModel);
+            }
+        }
+
+        return config;
+    }
+
+    @Override
+    public Utterance publishUtterance(Utterance utterance) {
+        return utterance;
+    }
+
+    @Override
+    public Response publishResponse(Response response) {
+        return response;
+    }
+
+    public static void main(String[] args) {
+        try {
+
+            LoggingFactory.init(Level.INFO);
+
+            // Runtime runtime = Runtime.getInstance();
+            // Runtime.startConfig("gpt3-01");
+
+            WebGui webgui = (WebGui) Runtime.create("webgui", "WebGui");
+            webgui.autoStartBrowser(false);
+            webgui.startService();
+
+
+            Llama llama = (Llama) Runtime.start("llama", "Llama");
+
+            System.out.println(llama.getResponse("Hello!").msg);
+
+
+        } catch (Exception e) {
+            log.error("main threw", e);
+        }
+    }
+}
diff --git a/src/main/java/org/myrobotlab/service/config/LlamaConfig.java b/src/main/java/org/myrobotlab/service/config/LlamaConfig.java
@@ -0,0 +1,41 @@
+package org.myrobotlab.service.config;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class LlamaConfig extends ServiceConfig {
+
+    public String systemPrompt = "";
+
+    public String systemMessage = "";
+
+    /**
+     * The prompt that is prefixed to every user request.
+     * No whitespace is stripped, so ensure that
+     * the prompt is formatted so that a whitespace-stripped
+     * user request does not cause tokenizer errors.
+     */
+    public String userPrompt = "### User:\n";
+
+    /**
+     * The prompt that the AI should use, should not
+     * have a trailing space. Any trailing space
+     * (but not newlines) are stripped to prevent
+     * tokenizer errors.
+     */
+    public String assistantPrompt = "### Assistant:\n";
+
+    public String selectedModel = "llama-2-7b-guanaco-qlora.Q4_K_M.gguf";
+
+    public List<String> modelPaths = new ArrayList<>(List.of(
+
+    ));
+
+    public Map<String, String> modelUrls = new HashMap<>(Map.of(
+            "stablebeluga-7b.Q4_K_M.gguf", "https://huggingface.co/TheBloke/StableBeluga-7B-GGUF/resolve/main/stablebeluga-7b.Q4_K_M.gguf",
+            "llama-2-7b-guanaco-qlora.Q4_K_M.gguf", "https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF/resolve/main/llama-2-7b-guanaco-qlora.Q4_K_M.gguf"
+    ));
+
+}
diff --git a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java
@@ -0,0 +1,15 @@
+package org.myrobotlab.service.meta;
+
+import org.myrobotlab.service.meta.abstracts.MetaData;
+
+public class LlamaMeta extends MetaData {
+
+    public LlamaMeta() {
+        addDescription(
+                "A large language model inference engine based on the widely used " +
+                        "llama.cpp project. Can run most GGUF models."
+        );
+
+        addDependency("de.kherud", "llama", "1.1.0");
+    }
+}
diff --git a/src/main/resources/resource/WebGui/app/service/js/LlamaGui.js b/src/main/resources/resource/WebGui/app/service/js/LlamaGui.js
@@ -0,0 +1,57 @@
+angular.module('mrlapp.service.LlamaGui', []).controller('LlamaGuiCtrl', ['$scope', 'mrl', function($scope, mrl) {
+    console.info('LlamaGuiCtrl')
+    var _self = this
+    var msg = this.msg
+    $scope.utterances = []
+    $scope.maxRecords = 500
+    $scope.text = null
+
+    // GOOD TEMPLATE TO FOLLOW
+    this.updateState = function(service) {
+        $scope.service = service
+    }
+
+
+    // init scope variables
+    $scope.onTime = null
+    $scope.onEpoch = null
+
+    this.onMsg = function(inMsg) {
+        let data = inMsg.data[0]
+        switch (inMsg.method) {
+        case 'onState':
+            _self.updateState(data)
+            $scope.$apply()
+            break
+        case 'onUtterance':
+            $scope.utterances.push(data)
+            // remove the beginning if we are at maxRecords
+            if ($scope.utterances.length > $scope.maxRecords) {
+                $scope.utterances.shift()
+            }
+            $scope.$apply()
+            break
+        case 'onRequest':
+            request = {"username":"friend", "text":data}
+            $scope.utterances.push(request)
+            // remove the beginning if we are at maxRecords
+            if ($scope.utterances.length > $scope.maxRecords) {
+                $scope.utterances.shift()
+            }
+            $scope.$apply()
+            break
+        case 'onEpoch':
+            $scope.onEpoch = data
+            $scope.$apply()
+            break
+        default:
+            console.error("ERROR - unhandled method " + $scope.name + " " + inMsg.method)
+            break
+        }
+    }
+
+    msg.subscribe('publishRequest')
+    msg.subscribe('publishUtterance')
+    msg.subscribe(this)
+}
+])
diff --git a/src/main/resources/resource/WebGui/app/service/views/LlamaGui.html b/src/main/resources/resource/WebGui/app/service/views/LlamaGui.html
@@ -0,0 +1,25 @@
+<div class="row">
+    <form class="form-inline">
+        text<br/>
+        <input class="form-control" type="text" ng-model="text" placeholder="send text" title="send text">
+        <button class="btn btn-default" ng-click="msg.getResponse(text)">send text</button><br/>
+    </form>
+</div>
+
+<div class="row">
+    <table class="table table-hover">
+        <tbody>
+            <tr ng-repeat="e in utterances" >
+                <td>
+                    <small>{{e.username}}</small>
+                </td>
+                <td>
+                    <small>{{e.channel}}</small>
+                </td>
+                <td>
+                    <small style="white-space: pre-wrap">{{e.text}}</small>
+                </td>
+            </tr>
+        </tbody>
+    </table>
+</div>