Format python code with black and add contribution instructions to re…

…adme
lean-dojo · Sep 1, 2024 · 30a70c0 · 30a70c0
1 parent 157e158
commit 30a70c0
Show file tree

Hide file tree

Showing 9 changed files with 163 additions and 128 deletions.
diff --git a/python/README.md b/python/README.md
@@ -21,3 +21,9 @@ uvicorn server:app --port 23337
 ```
 
 After the server is up running, you can go to `LeanCopilotTests/ModelAPIs.lean` to try your external models out!
+
+## Contributions
+
+We welcome contributions. If you think it would beneficial to add some other external models, or if you would like to make other contributions regarding the external model support in Lean Copilot, please feel free to open a PR. The main entry point is this `python` folder as well as the `ModelAPIs.lean` file under `LeanCopilotTests`.
+
+We use [`black`](https://pypi.org/project/black/) to format code in this folder.
diff --git a/python/external_models/__init__.py b/python/external_models/__init__.py
@@ -2,4 +2,4 @@
 from .hf_runner import HFTacticGenerator
 from .vllm_runner import VLLMTacticGenerator
 from .claude_runner import ClaudeRunner
-from .gemini_runner import GeminiRunner
+from .gemini_runner import GeminiRunner
diff --git a/python/external_models/claude_runner.py b/python/external_models/claude_runner.py
@@ -1,5 +1,6 @@
 from typing import List, Tuple
 import os
+
 try:
     from anthropic import Anthropic
 except ImportError as e:
@@ -12,32 +13,35 @@ class ClaudeRunner(Generator, Transformer):
 
     def __init__(self, **args):
         self.client_kwargs: dict[str | str] = {
-            "model": args['model'],
-            "temperature": args['temperature'],
-            "max_tokens": args['max_tokens'],
-            "top_p": args['top_p'],
-            }
+            "model": args["model"],
+            "temperature": args["temperature"],
+            "max_tokens": args["max_tokens"],
+            "top_p": args["top_p"],
+        }
         self.name = self.client_kwargs["model"]
 
     def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
         prompt = pre_process_input(self.name, input + target_prefix)
-        
+
         response = self.client.completions.create(
-                prompt=prompt,
-                **self.client_kwargs,
-            )                                
+            prompt=prompt,
+            **self.client_kwargs,
+        )
         content = response.completion
 
-        results = [(post_process_output(self.name, content),1.0)] # Currently Claude only supports one output.
+        results = [
+            (post_process_output(self.name, content), 1.0)
+        ]  # Currently Claude only supports one output.
         return choices_dedup(results)
 
 
 if __name__ == "__main__":
-    generation_kwargs = {"model": "claude-3-opus",
-                         "temperature": 0.9,
-                         "max_tokens": 1024,
-                         "top_p": 0.9,
-                         }
+    generation_kwargs = {
+        "model": "claude-3-opus",
+        "temperature": 0.9,
+        "max_tokens": 1024,
+        "top_p": 0.9,
+    }
 
     model = ClaudeRunner(**generation_kwargs)
     print(model.generate("n : ℕ\n⊢ gcd n n = n"))
diff --git a/python/external_models/external_parser.py b/python/external_models/external_parser.py
@@ -10,28 +10,41 @@ def get_cuda_if_available():
 
 def pre_process_input(model_name, input):
     if model_name == "internlm/internlm2-math-plus-1_8b":
-        prompt="My LEAN 4 state is:\n```lean\n" + input + \
-        "```\nPlease predict a possible tactic to help me prove the theorem."
+        prompt = (
+            "My LEAN 4 state is:\n```lean\n"
+            + input
+            + "```\nPlease predict a possible tactic to help me prove the theorem."
+        )
         prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""
     elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview":
-        prompt = 'Here is a theorom you need to prove in Lean:\n' + \
-            input+'\nNow you should suggest one line tactic in lean code:'
-    elif 'gemini' in model_name  or "claude" in model_name:
-        prompt = 'Here is a theorom you need to prove in Lean:\n' + \
-            input+'\nNow you should suggest one line tactic in lean code:'
+        prompt = (
+            "Here is a theorom you need to prove in Lean:\n"
+            + input
+            + "\nNow you should suggest one line tactic in lean code:"
+        )
+    elif "gemini" in model_name or "claude" in model_name:
+        prompt = (
+            "Here is a theorom you need to prove in Lean:\n"
+            + input
+            + "\nNow you should suggest one line tactic in lean code:"
+        )
     else:
         raise NotImplementedError(f"External model '{model_name}' not supported")
     return prompt
 
 
 def post_process_output(model_name, output):
     if model_name == "internlm/internlm2-math-plus-1_8b":
-        result = output.split(
-            'assistant')[-1].split('lean')[-1].split('```')[0].split('\n')[1]
+        result = (
+            output.split("assistant")[-1]
+            .split("lean")[-1]
+            .split("```")[0]
+            .split("\n")[1]
+        )
     elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview":
-        result = output.split('lean')[-1].split('```')[0].split('\n')[1]
-    elif 'gemini' in model_name  or "claude" in model_name: 
-        result = output.split('lean')[-1].split('```')[0].split('\n')[1]
+        result = output.split("lean")[-1].split("```")[0].split("\n")[1]
+    elif "gemini" in model_name or "claude" in model_name:
+        result = output.split("lean")[-1].split("```")[0].split("\n")[1]
     else:
         raise NotImplementedError(f"External model '{model_name}' not supported")
     return result

diff --git a/python/external_models/gemini_runner.py b/python/external_models/gemini_runner.py
@@ -1,5 +1,6 @@
 from typing import List, Tuple
 import os
+
 try:
     import google.generativeai as genai
     from google.generativeai import GenerationConfig
@@ -26,44 +27,47 @@ class GeminiRunner(Generator, Transformer):
         {
             "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
             "threshold": "BLOCK_NONE",
-        },]
-
-    def __init__(self, **args):   
+        },
+    ]
+
+    def __init__(self, **args):
         self.client_kwargs: dict[str | str] = {
-            "model": args['model'],
-            "temperature": args['temperature'],
-            "max_tokens": args['max_tokens'],
-            "top_p": args['top_p'],
-
+            "model": args["model"],
+            "temperature": args["temperature"],
+            "max_tokens": args["max_tokens"],
+            "top_p": args["top_p"],
         }
         self.name = self.client_kwargs["model"]
-        self.client = genai.GenerativeModel(args['model'])
+        self.client = genai.GenerativeModel(args["model"])
         self.generation_config = GenerationConfig(
             candidate_count=1,
-            max_output_tokens=args['max_tokens'],
-            temperature=args['temperature'],
-            top_p=args['top_p'],
+            max_output_tokens=args["max_tokens"],
+            temperature=args["temperature"],
+            top_p=args["top_p"],
         )
-        
+
     def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
         prompt = pre_process_input(self.name, input + target_prefix)
-        
+
         response = self.client.generate_content(
-                    prompt,
-                    generation_config=self.generation_config,
-                    safety_settings=GeminiRunner.safety_settings,
-                )
+            prompt,
+            generation_config=self.generation_config,
+            safety_settings=GeminiRunner.safety_settings,
+        )
 
-        results = [(post_process_output(self.name, response.text),1.0)] # Currently Gemini only supports one output.
+        results = [
+            (post_process_output(self.name, response.text), 1.0)
+        ]  # Currently Gemini only supports one output.
         return choices_dedup(results)
 
 
 if __name__ == "__main__":
-    generation_kwargs = {"model": 'gemini-1.0-pro',
-                         "temperature": 0.9,
-                         "max_tokens": 1024,
-                         "top_p": 0.9,
-                         }
+    generation_kwargs = {
+        "model": "gemini-1.0-pro",
+        "temperature": 0.9,
+        "max_tokens": 1024,
+        "top_p": 0.9,
+    }
 
     model = GeminiRunner(**generation_kwargs)
     print(model.generate("n : ℕ\n⊢ gcd n n = n"))
diff --git a/python/external_models/hf_runner.py b/python/external_models/hf_runner.py
@@ -9,28 +9,27 @@
 
 
 class HFTacticGenerator(Generator, Transformer):
-    def __init__(
-        self,
-        **args
-    ) -> None:
-        self.name = args['model']
+    def __init__(self, **args) -> None:
+        self.name = args["model"]
         self.tokenizer = AutoTokenizer.from_pretrained(
-            self.name, trust_remote_code=True)
-        device = args['device']
+            self.name, trust_remote_code=True
+        )
+        device = args["device"]
         if device == "auto":
             device = get_cuda_if_available()
         else:
             device = torch.device(device)
         logger.info(f"Loading {self.name} on {device}")
         self.model = AutoModelForCausalLM.from_pretrained(
-            self.name, trust_remote_code=True).to(device)
+            self.name, trust_remote_code=True
+        ).to(device)
 
         self.generation_args: dict[str | str] = {
             "do_sample": args["do_sample"],
-            "temperature": args['temperature'],  # chat default is 0.8.
-            "max_new_tokens": args['max_new_tokens'],
-            "top_p": args['top_p'],  # chat default is 0.8.
-            "num_return_sequences": args['num_return_sequences'],
+            "temperature": args["temperature"],  # chat default is 0.8.
+            "max_new_tokens": args["max_new_tokens"],
+            "top_p": args["top_p"],  # chat default is 0.8.
+            "num_return_sequences": args["num_return_sequences"],
             # "num_beams": self.num_return_sequences,  # Here if we use beam search for llms the output are not diverse (few tactics are provided).
             "output_scores": args["output_scores"],
             "output_logits": args["output_logits"],
@@ -47,16 +46,19 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
         self.model = self.model.eval()
 
         tokenized_input = self.tokenizer(prompt, return_tensors="pt")
-        eos_token_id = [self.tokenizer.eos_token_id,
-                        self.tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0]]
+        eos_token_id = [
+            self.tokenizer.eos_token_id,
+            self.tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0],
+        ]
         outputs = self.model.generate(
             tokenized_input.input_ids.to(self.device),
             eos_token_id=eos_token_id,
-            **self.generation_args
+            **self.generation_args,
         )
         response = self.tokenizer.batch_decode(
-            outputs['sequences'], skip_special_tokens=True)
-
+            outputs["sequences"], skip_special_tokens=True
+        )
+
         result = []
         index = 0
         for out, score in zip(response, outputs.scores):
@@ -68,18 +70,19 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
 
 
 if __name__ == "__main__":
-    generation_kwargs = {"model": "internlm/internlm2-math-plus-1_8b",
-                         "temperature": 0.6,
-                         "max_new_tokens": 1024,
-                         "top_p": 0.9,
-                         "length_penalty": 0,
-                         "num_return_sequences": 64,
-                         "do_sample": True,
-                         "output_scores": True,
-                         "output_logits": False,
-                         "return_dict_in_generate": True,
-                         "device": "auto",
-                         }
+    generation_kwargs = {
+        "model": "internlm/internlm2-math-plus-1_8b",
+        "temperature": 0.6,
+        "max_new_tokens": 1024,
+        "top_p": 0.9,
+        "length_penalty": 0,
+        "num_return_sequences": 64,
+        "do_sample": True,
+        "output_scores": True,
+        "output_logits": False,
+        "return_dict_in_generate": True,
+        "device": "auto",
+    }
     model = HFTacticGenerator(**generation_kwargs)
     model.cuda()
     print(model.generate("n : ℕ\n⊢ gcd n n = n"))
diff --git a/python/external_models/oai_runner.py b/python/external_models/oai_runner.py
@@ -14,14 +14,14 @@ class OpenAIRunner(Generator, Transformer):
 
     def __init__(self, **args):
         self.client_kwargs: dict[str | str] = {
-            "model": args['model'],
-            "temperature": args['temperature'],
-            "max_tokens": args['max_tokens'],
-            "top_p": args['top_p'],
+            "model": args["model"],
+            "temperature": args["temperature"],
+            "max_tokens": args["max_tokens"],
+            "top_p": args["top_p"],
             "frequency_penalty": 0,
             "presence_penalty": 0,
-            "n": args['num_return_sequences'],
-            "timeout": args['openai_timeout'],
+            "n": args["num_return_sequences"],
+            "timeout": args["openai_timeout"],
             # "stop": args.stop,  # stop is only used for base models currently
         }
         self.name = self.client_kwargs["model"]
@@ -30,7 +30,6 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
         prompt = pre_process_input(self.name, input + target_prefix)
         prompt = [
             {"role": "user", "content": f"{prompt}"},
-
         ]
         try:
             response = OpenAIRunner.client.chat.completions.create(
@@ -56,22 +55,28 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
             print("Exception: ", repr(e))
             raise e
 
-        results = [(post_process_output(self.name, c.message.content), np.exp(-np.mean([
-            token.logprob for token in c.logprobs.content])))for c in response.choices]
+        results = [
+            (
+                post_process_output(self.name, c.message.content),
+                np.exp(-np.mean([token.logprob for token in c.logprobs.content])),
+            )
+            for c in response.choices
+        ]
         return choices_dedup(results)
 
 
 if __name__ == "__main__":
-    generation_kwargs = {"model": "gpt-4-turbo-preview",
-                         "temperature": 0.9,
-                         "max_tokens": 1024,
-                         "top_p": 0.9,
-                         "frequency_penalty": 0,
-                         "presence_penalty": 0,
-                         "num_return_sequences": 16,
-                         "openai_timeout": 45,
-                         # "stop": args.stop,  # stop is only used for base models currently
-                         }
+    generation_kwargs = {
+        "model": "gpt-4-turbo-preview",
+        "temperature": 0.9,
+        "max_tokens": 1024,
+        "top_p": 0.9,
+        "frequency_penalty": 0,
+        "presence_penalty": 0,
+        "num_return_sequences": 16,
+        "openai_timeout": 45,
+        # "stop": args.stop,  # stop is only used for base models currently
+    }
 
     model = OpenAIRunner(**generation_kwargs)
     print(model.generate("n : ℕ\n⊢ gcd n n = n"))