microsoft
diff --git a/‎solvers/README.md‎
Lines changed: 7 additions & 0 deletions b/‎solvers/README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎solvers/gpt3/README.md‎
Lines changed: 29 additions & 0 deletions b/‎solvers/gpt3/README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎solvers/gpt3/ezlog.py‎
Lines changed: 98 additions & 0 deletions b/‎solvers/gpt3/ezlog.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎solvers/gpt3/lm_solve/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎solvers/gpt3/lm_solve/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎solvers/gpt3/lm_solve/gpt3_lib.py‎
Lines changed: 154 additions & 0 deletions b/‎solvers/gpt3/lm_solve/gpt3_lib.py‎
Lines changed: 154 additions & 0 deletions
@@ -0,0 +1,7 @@
+# Solvers 
+
+This folder contains two subfolders for recreating the benchmarks in the [paper](https://arxiv.org/abs/2106.05784).
+* [gpt3](/benchmarks/gpt3) The GPT-3 experiments.
+* [enumerative](/benchmarks/enumerative) The enumerative top-down search solvers.
+
+Each folder has a separate README explaining how to run the experiments.
@@ -0,0 +1,29 @@
+# Running GPT-3 experiments
+
+These are instructions for re-running the GPT-3 experiments. The results will be slightly different than those in 
+the paper because the API is non-deterministic.
+
+The requirements can be installed with `pip3 install -r requirements.txt`.
+
+This script runs the GPT-3 experiments and prints the results to stdout.
+
+## Installation and execution.
+You will need an open-ai GPT-3 access key which can be signed up for [here](https://openai.com/join/). 
+You will then need to set it as the `OPENAI_API_KEY` environmnet variable.
+
+The requirements can be installed with `pip3 install -r requirements.txt`.
+
+It was run with Python 3.6.9, sys.version = '3.6.9 (default, Jan 26 2021, 15:33:00) \n[GCC 8.4.0]', but should
+be compatible with later versions as well. 
+
+Then you simply run
+`python run_gpt3_experiments.py` and the results are written to stdout. It uses cacheing mechanisms with the first run 
+being quite slow and verbose, querying the API. However you can subsequently run it again and it will be
+much faster and just output the results. The cacheing makes it deterministic so it should give the same
+exact results when re-run. 
+ 
+
+## Contact
+
+If you are interested in reproducing the exact results of the paper, please contact the authors to ensure the exact
+same query results.
@@ -0,0 +1,98 @@
+import os
+import logging
+import inspect
+import io
+
+my_path = os.path.dirname(__file__)
+
+
+def color_str(obj, code="\033[0;36m"):
+    return code + str(obj) + '\033[0m'
+
+
+_configured = False
+
+
+def configure_logging(stdio_level=logging.INFO,
+                      file_level=logging.DEBUG,
+                      filename=".easy.log",
+                      filepath=os.path.join(my_path, "logs")):
+    os.makedirs(filepath, exist_ok=True)
+    filename = os.path.join(filepath, filename)
+    global _configured
+    if _configured:
+        warning("Re-configuring logging")
+    stdio_handler = logging.StreamHandler()
+    stdio_handler.setLevel(stdio_level)
+    file_hanlder = logging.FileHandler(filename)
+    file_hanlder.setLevel(file_level)
+
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message).200s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        level=min(stdio_level, file_level),
+        handlers=[stdio_handler, file_hanlder]
+    )
+    #
+    # fh = logging.FileHandler('spam.log')
+    # fh.setLevel(logging.DEBUG)
+    # # create console handler with a higher log level
+    # ch = logging.StreamHandler()
+    # ch.setLevel(logging.ERROR)
+    # # create formatter and add it to the handlers
+    # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    # fh.setFormatter(formatter)
+    # ch.setFormatter(formatter)
+    # # add the handlers to the logger
+    # logger.addHandler(fh)
+    # logger.addHandler(ch)
+
+    _configured = True
+    _get_or_create_logger().debug("Configured logging")
+
+
+_loggers = {}
+
+
+def _get_or_create_logger():
+    global _configured, _loggers
+    if not _configured:
+        configure_logging()
+    try:
+        for frame in inspect.stack():
+            name = inspect.getmodule(frame[0]).__name__
+            if name != __name__:
+                break
+    except:
+        name = "_"
+    if name not in _loggers:
+        _loggers[name] = logging.getLogger(name)
+    return _loggers[name]
+
+
+def print_to_string(*args, end="", **kwargs):
+    with io.StringIO() as buf:
+        print(*args, file=buf, end=end, **kwargs)
+        return buf.getvalue()
+
+
+def debug(*args, **kwargs):
+    _get_or_create_logger().debug(print_to_string(*args, **kwargs))
+
+
+def info(*args, **kwargs):
+    _get_or_create_logger().info(print_to_string(*args, **kwargs))
+
+
+log = info
+
+
+def warning(*args, **kwargs):
+    _get_or_create_logger().warning(print_to_string(*args, **kwargs))
+
+
+warn = warning
+
+
+def error(*args, **kwargs):
+    _get_or_create_logger().error(print_to_string(*args, **kwargs))
@@ -0,0 +1 @@
+from lm_solve.run import *
@@ -0,0 +1,154 @@
+import os
+import json
+import openai
+import ezlog
+import time
+import datetime
+
+assert 'OPENAI_API_KEY' in os.environ, "Need to set environment variable `OPENAI_API_KEY`"
+openai.api_key = os.environ['OPENAI_API_KEY']
+
+
+_CACHE_PATH = os.path.join(os.path.dirname(__file__), "../.cache")
+_CACHE_FILENAME = os.path.join(_CACHE_PATH, "gpt3.cache")
+_ENCODING = "utf-8"
+
+_cache = None
+
+
+# the cache file is just a list of (query params dictionary encoded as a string but without n, result list)
+# multiple queries with the same params (except for n) are merged into a single big list
+def _save_line(item, comment=None):
+    global _cache
+    assert _cache is not None
+    with open(_CACHE_FILENAME, "a", encoding=_ENCODING) as f:
+        f.write(str(item)+ ((" # " + comment + "\n") if comment else "\n"))
+
+def _load_cache():
+    global _cache
+
+    assert _cache is None, "gpt3 cache already loaded"
+
+    if not os.path.exists(_CACHE_PATH):
+        ezlog.warn("Creating cache path")
+        os.makedirs(_CACHE_PATH)
+
+    _cache = {}
+
+    if os.path.exists(_CACHE_FILENAME):
+        time0 = time.perf_counter()
+        with open(_CACHE_FILENAME, "r", encoding=_ENCODING) as f:
+            for k, v in [eval(line) for line in f.readlines()]:
+                if k not in _cache:
+                    _cache[k] = v
+                else:
+                    _cache[k].extend(v)
+        ezlog.info(f"Loaded gpt3 cache in {time.perf_counter()-time0:.1f}s")
+    else:
+        ezlog.warn("No gpt3 cache yet")
+
+
+
+def query(prompt, n=10, max_tokens=150, temp=1.0, max_batch=32, stop=None, notes=None, cache_only=False, verbose=True):
+    """Query gpt3
+
+    :param prompt: Up to 2048 tokens (about 3-4k chars)
+    :param n: number of answers, None returns all cached answers
+    :param max_tokens:
+    :param temp: 0.9 seems to work well
+    :param max_batch: max to query at once
+    :param stop: string to stop at or '' if not to stop
+    :param notes: notes you want to save or change in case you want to run the same query more than once!
+    :return: list of answers and then the response items
+    """
+    global _cache
+    if _cache is None:
+        _load_cache()
+
+    if temp == 0 and n > 1:
+        ezlog.debug("Temp 0: no point in running more than one query")
+        n = 1
+
+    key = str(dict(prompt=prompt, max_tokens=max_tokens, temp=temp, max_batch=max_batch, stop=stop, rep=notes))
+    cached = _cache.get(key, [])
+    if n is None:
+        return cached[:]
+
+    if len(cached) >= n:
+        return cached[:n]
+
+    if cache_only:
+        pass
+        1/0
+    assert not cache_only, "Entry not found in cache"
+    if verbose:
+        print("/"*100)
+        print("Querying GPT3 with prompt:")
+        print(prompt)
+        s = stop and stop.replace('\n', '\\n')
+        print(f"/// n={n} ({n-len(cached)} new) max_tokens={max_tokens} temp={temp} max_batch={max_batch} stop={s}")
+        print("/"*100)
+
+    time0 = time.perf_counter()
+
+    new = []
+    n -= len(cached)
+
+    while n > 0:
+        m = min(n, max_batch)
+
+        res = openai.Completion.create(
+            engine="davinci-msft",
+            prompt=prompt,
+            max_tokens=max_tokens,
+            temperature=temp,
+            n=m,
+            stop=stop or None
+        )
+
+        new += [c["text"] for c in res["choices"]]
+        n -= m
+
+    _save_line((key, new), f"{time.perf_counter() - time0:.1f}s {datetime.datetime.now()}")
+    ans = _cache[key] = cached + new
+    return ans[:]
+
+# old code
+# # to persist calls to the API...
+# _disk_cache = joblib.Memory(os.path.join(os.path.dirname(__file__), ".cache"), verbose=1).cache
+#
+#
+# @_disk_cache
+# def query(prompt, n=10, max_tokens=150, temperature=1.0, max_batch=32):
+#     """Query gpt3
+#
+#     :param prompt: Up to 2048 tokens (about 3-4k chars)
+#     :param n: number of answers
+#     :param max_tokens:
+#     :param temperature:
+#     :param max_batch: max to query at once
+#     :return: list of answers and then the response items
+#     """
+#     if temperature == 0 and n > 1:
+#         ezlog.debug("Temp 0: no point in running more than one query")
+#         n = 1
+#
+#     responses = []
+#     while n > 0:
+#         m = min(n, max_batch)
+#         prompt_summary = prompt if len(prompt) < 80 else f"{prompt[:40]}...{prompt[-40:]}"
+#         ezlog.warn(f"**** Running GPT3 query: temp {temperature}, n={m}, prompt={prompt_summary}")
+#         time0 = time.perf_counter()
+#         responses.append(openai.Completion.create(
+#             engine="davinci-msft",
+#             prompt=prompt,
+#             max_tokens=max_tokens,
+#             temperature=temperature,
+#             n=m
+#         ))
+#         ezlog.info(f"**** Got response in {time.perf_counter()-time0}s...")
+#         n -= m
+#
+#     return [c["text"] for r in responses for c in r["choices"]], responses
+
+