microsoft
diff --git a/‎solvers/enumerative/README.md‎
Lines changed: 34 additions & 0 deletions b/‎solvers/enumerative/README.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎solvers/enumerative/challenges/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎solvers/enumerative/challenges/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎solvers/enumerative/challenges/challenge.py‎
Lines changed: 133 additions & 0 deletions b/‎solvers/enumerative/challenges/challenge.py‎
Lines changed: 133 additions & 0 deletions
diff --git a/‎solvers/enumerative/challenges/solutions.py‎
Lines changed: 82 additions & 0 deletions b/‎solvers/enumerative/challenges/solutions.py‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎solvers/enumerative/download_pretrained_roberta.sh‎
Lines changed: 21 additions & 0 deletions b/‎solvers/enumerative/download_pretrained_roberta.sh‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎solvers/enumerative/filter_outputs.py‎
Lines changed: 34 additions & 0 deletions b/‎solvers/enumerative/filter_outputs.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎solvers/enumerative/models/__init__.py‎
Lines changed: 12 additions & 0 deletions b/‎solvers/enumerative/models/__init__.py‎
Lines changed: 12 additions & 0 deletions
@@ -0,0 +1,34 @@
+# Enumerative puzzle solvers
+
+This folder contains the code for the enumerative models used in our Programming Puzzles paper.
+We used python 3.8.0 and the libraries in the `requirements.txt` file.
+
+In a linux machine with python3.8.0 installed, the following commands will set up the environment:
+```
+virtualenv -p /usr/bin/python3.8 env_solvers
+source env_solvers/bin/activate
+pip install -r requirements.txt
+```
+
+## Uniform solver
+```
+bash run_uniform.sh
+```
+This will run the uniform solver for a maximum of 10k trials per puzzle. This is required before training the other parameterized solvers.
+
+To run the uniform with 1M trials per puzzle, simply change the `max_n_progs` argument in the bash script.
+
+## Bigram random forest solver
+```
+bash run_bigram.sh
+```
+This will first train a parameterized model with self-bootsrapping (first iteration is based on the unifrom solutions). The last command will train a model without self-bootsrapping.
+
+## Transformers solver
+```
+bash download_pretrained_roberta.sh
+bash run_transformer.sh
+```
+The first script will download the RoBERTa-Base model that we trained on Python code.
+
+The second script will first train a parameterized model with self-bootsrapping (first iteration is based on the unifrom solutions). The last command will train a model without self-bootsrapping.
@@ -0,0 +1,5 @@
+from challenges.challenge import *
+from challenges.solutions import *
+
+def contains_node(root, x_node):
+    return root is x_node or (hasattr(root, "children") and any(contains_node(k, x_node) for k in root.children))
@@ -0,0 +1,133 @@
+import utils
+import tython
+import logging
+from typing import List, Dict, Callable, Tuple, Generator, Set, Sequence
+from tython import Program, nt
+
+logger = logging.getLogger(__name__)
+
+
+def extract_constants(prog) -> Dict:
+    '''
+    Extract all constants from program. Does not (yet) allow copying of comprehensions, e.g., '[i*i for i in range(10)]'
+    '''
+
+    from collections import defaultdict
+    consts = defaultdict(list)
+
+    def handle_args(args_node):
+
+        if args_node.rule.name == 'cast:ARGS':
+            handle_args(args_node.children[0])
+        else:
+            if len(args_node.children) >= 3 and args_node.children[1].nt == nt.TYPE:
+                annotation_node = args_node.children[1]
+                t = nt.type2nt(eval(annotation_node.src()))
+                consts[t].append(args_node.children[0])
+            if args_node.children and args_node.children[-1].nt in {nt.ARGS, nt.DEFAULT_ARGS}:
+                handle_args(args_node.children[-1])
+
+    def helper(node):
+        if node.rule.name == 'def':  # it's a function
+            name_node, args_node, body_node = node.children
+            if name_node.src() == 'sat':
+                handle_args(args_node.children[-1])  # skip first arg for `def sat`
+            else:
+                handle_args(args_node)
+            helper(body_node)
+            return False
+        elif node.nt in {nt.NAME}:
+            return False
+        elif node.nt in {nt.STMT}:
+            for c in node.children:
+                helper(c)
+            return False
+        if node.rule.name not in {"int-const", "str-const"} and not all([helper(c) for c in node.children]):
+            return False
+        if node.nt.isa(nt.LIST, nt.SET, nt.DICT, nt.TUPLE, nt.RANGE,
+                       nt.INT, nt.FLOAT, nt.BOOL, nt.STR):
+            consts[node.nt].append(node)
+        return True
+
+    if prog is not None:
+        helper(prog.tree)
+
+    return dict(consts)
+
+#
+# q = Program("""
+# def sat(i: List[str], a=5):
+#     return i==['5']
+# """)
+#
+# extract_constants(q)
+#
+#
+# %%
+class Solution():
+    def __init__(self, string=None, prog=None, likelihood=None, time=None, count=None):
+        self.string = string
+        self.prog = prog
+        self.likelihood = likelihood
+        self.time = time
+        self.count = count
+
+
+class SolverSolution(Solution):
+    def __init__(self, string=None, prog=None, likelihood=None, time=None, count=None):
+        super().__init__(string=string, prog=prog, likelihood=likelihood)
+        self.time = time
+        self.count = count
+
+
+def get_arg_type_str(sat_str):
+    assert sat_str.startswith("def sat(") and ":" in sat_str
+    depth = 0
+    for i, c in enumerate(sat_str):
+        if c == '[':
+            depth += 1
+        elif c == ']':
+            depth -= 1
+        elif c in ")," and depth == 0:
+            return sat_str[sat_str.index(":") + 1:i].lstrip()
+    assert False
+
+
+class Challenge():
+    def __init__(self, challenge_config, max_ticks=100000000):
+        self.name = challenge_config["name"]
+        self.f_str = challenge_config["sat"]
+        self.type_str = get_arg_type_str(challenge_config["sat"])
+        self.type = eval(self.type_str)
+        self.gold_solutions = []
+        self.solver_solutions = []
+        for sol in challenge_config["sols"]:
+            self.gold_solutions.append(Solution(string=sol))
+        if "sol_tries" in challenge_config:
+            for i, x in enumerate(challenge_config["sol_tries"]):
+                self.gold_solutions[i].count = x
+
+        if "sol_time" in challenge_config:
+            for i, x in enumerate(challenge_config["sol_time"]):
+                self.gold_solutions[i].time = x
+
+        self.solution_strs = challenge_config["sols"]
+        self.max_ticks = max_ticks
+
+        self._parse_challenge()
+
+    def _parse_challenge(self):
+        '''
+        Converts the challenge string to a tython program.
+        '''
+        self.sol_kind = tython.nt.type2nt(self.type)
+        self.prog = None
+        self.f = None
+        try:
+            self.prog = tython.Program(
+                self.f_str)
+            self.f = self.prog.run(max_ticks=self.max_ticks)
+        except Program.EvalException as e:
+            logger.warning(f"Exception evaluating {self.name} '{self.f_str}': {e}")
+        except Exception as e:
+            logger.warning(f"Exception parsing {self.name} '{self.f_str}': {e}")
@@ -0,0 +1,82 @@
+from typing import List, Set, Dict, Callable, Tuple
+import logging
+from challenges import extract_constants
+
+from tython import Program, TastNode, _RULES_BY_KIND, RULES, Rule, str2name
+from tython.rules import DEF_RULE
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def generatable_answer(q: Program, a: Program):
+    def get_src(node):
+        return Program(node).src(safe=False, simplify=False)
+
+    consts = extract_constants(q)
+    const_srcs = {k: [get_src(c) for c in consts[k]] for k in consts}
+
+    def helper(anode):
+        if anode.rule.name == "COPY":
+            return get_src(anode.children[0]) in const_srcs[anode.rule.nt]
+        return anode.rule.name != "literal" and all(helper(n) for n in anode.children)
+
+    return helper(a.tree)
+
+
+def verify_solutions(challenges):
+    '''
+    Verify all provided solutions to the given challenges and store the parsed solution
+    program in the challenge object.
+    '''
+    successes = 0
+    all_correct = True
+    for ch in challenges:
+        verified_sols = []
+        f = ch.prog.run(max_ticks=ch.max_ticks)['sat']
+        args_node = ch.prog.tree.children[0].children[1].children[-1]
+        for sol_p in ch.gold_solutions:
+            s = sol_p.string
+            if s == '':
+                continue
+            # Verify solution is correct.
+            if True:
+                assert s.startswith('def ')
+                try:
+                    sol_prog = Program(s)
+                except Exception as e:
+                    logger.error(f"Exception parsing solution for {ch.name} '{s}': {e}")
+                    continue
+
+                # Inject the value assignments for the variables to the call of the sol func.
+                p_body = sol_prog.tree.children[0].children[2]
+                sol_prog.tree.children[0].children[1].children = args_node.children
+                sol_prog = Program(TastNode(DEF_RULE, [str2name("sol"), args_node, p_body]))
+
+                a_safe = sol_prog.src(simplify=False)
+                x = sol_prog.run(max_ticks=ch.max_ticks)["sol"]()
+                ch.prog.reset_clock()
+
+                v = f(x)
+                assert isinstance(v, bool)
+
+                if not generatable_answer(ch.prog, sol_prog):
+                    logger.error(f'Challenge "{ch.name}" cannot be used to automatically generate solution "{s}"')
+
+                # TODO
+                # if type(y) != ch.type:
+                #    print(f'Challenge "{ch.name}" has wrong solution type: "{type(y)}"')
+                #    all_correct = False
+                if v is not True:  # checks both False and None
+                    logger.error(f'Challenge "{ch.name}" not satisfied by solution "{s}"')
+                else:
+                    sol_p.prog = sol_prog
+                    verified_sols.append(sol_p)
+                    successes += 1
+
+        ch.gold_solutions = verified_sols
+
+    logger.info(
+        f"Tython confirmed {successes:,} solutions to {len(challenges)} challenges."
+    )
+    return
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# Linux commands to download our Roberta model pretrained on Python code.
+# Newer vesrions of huggingface transformers don't require this but we need to adjust the rest of the code for them.
+
+set -ex
+
+mkdir tals
+mkdir tals/roberta_python
+
+cd tals/roberta_python
+
+wget https://huggingface.co/tals/roberta_python/resolve/main/config.json
+wget https://huggingface.co/tals/roberta_python/resolve/main/merges.txt
+wget https://huggingface.co/tals/roberta_python/resolve/main/pytorch_model.bin
+wget https://huggingface.co/tals/roberta_python/resolve/main/special_tokens_map.json
+wget https://huggingface.co/tals/roberta_python/resolve/main/tokenizer_config.json
+wget https://huggingface.co/tals/roberta_python/resolve/main/training_args.bin
+wget https://huggingface.co/tals/roberta_python/resolve/main/vocab.json
+
+cd ../..
@@ -0,0 +1,34 @@
+import json
+import sys
+import os
+
+
+inp_file = sys.argv[1]
+if len(sys.argv) > 2:
+    shift = int(sys.argv[2])
+else:
+    shift = 0
+out_file = os.path.splitext(inp_file)[0]
+
+
+with open(inp_file, 'r') as f:
+    data = json.load(f)
+
+thresholds = [100, 1000, 10000, 100000, 1000000]
+for t in thresholds:
+    t = t
+    out = []
+    suc = 0
+    for p in data:
+        #if not p["name"].startswith("Study"):
+        #    continue
+        if p["sols"][-1] != "" and p["sol_tries"][-1] + shift <= t:
+            out.append(p)
+            suc += 1
+        else:
+            out.append(dict(name=p["name"], sat=p["sat"], sols=[]))
+
+    print(f"t={t}: solutions: {suc}/ {len(out)}")
+
+    with open(out_file + f"_{t}.json", "w") as fw:
+        json.dump(out, fw, indent=4)
@@ -0,0 +1,12 @@
+MODEL_REGISTRY = {}
+def RegisterModel(model_name):
+    def decorator(m):
+        MODEL_REGISTRY[model_name] = m
+        return m
+
+    return decorator
+
+from models.uniform import *
+#from models.bigram import *
+#from models.ml_bow_unigram import *
+from models.ml_bow_bigram import *