Skip to content

Commit

Permalink
Format python code with black and add contribution instructions to re…
Browse files Browse the repository at this point in the history
…adme
  • Loading branch information
Peiyang-Song committed Sep 1, 2024
1 parent 157e158 commit 30a70c0
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 128 deletions.
6 changes: 6 additions & 0 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@ uvicorn server:app --port 23337
```

After the server is up running, you can go to `LeanCopilotTests/ModelAPIs.lean` to try your external models out!

## Contributions

We welcome contributions. If you think it would beneficial to add some other external models, or if you would like to make other contributions regarding the external model support in Lean Copilot, please feel free to open a PR. The main entry point is this `python` folder as well as the `ModelAPIs.lean` file under `LeanCopilotTests`.

We use [`black`](https://pypi.org/project/black/) to format code in this folder.
2 changes: 1 addition & 1 deletion python/external_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .hf_runner import HFTacticGenerator
from .vllm_runner import VLLMTacticGenerator
from .claude_runner import ClaudeRunner
from .gemini_runner import GeminiRunner
from .gemini_runner import GeminiRunner
34 changes: 19 additions & 15 deletions python/external_models/claude_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Tuple
import os

try:
from anthropic import Anthropic
except ImportError as e:
Expand All @@ -12,32 +13,35 @@ class ClaudeRunner(Generator, Transformer):

def __init__(self, **args):
self.client_kwargs: dict[str | str] = {
"model": args['model'],
"temperature": args['temperature'],
"max_tokens": args['max_tokens'],
"top_p": args['top_p'],
}
"model": args["model"],
"temperature": args["temperature"],
"max_tokens": args["max_tokens"],
"top_p": args["top_p"],
}
self.name = self.client_kwargs["model"]

def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
prompt = pre_process_input(self.name, input + target_prefix)

response = self.client.completions.create(
prompt=prompt,
**self.client_kwargs,
)
prompt=prompt,
**self.client_kwargs,
)
content = response.completion

results = [(post_process_output(self.name, content),1.0)] # Currently Claude only supports one output.
results = [
(post_process_output(self.name, content), 1.0)
] # Currently Claude only supports one output.
return choices_dedup(results)


if __name__ == "__main__":
generation_kwargs = {"model": "claude-3-opus",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
}
generation_kwargs = {
"model": "claude-3-opus",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
}

model = ClaudeRunner(**generation_kwargs)
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
37 changes: 25 additions & 12 deletions python/external_models/external_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,41 @@ def get_cuda_if_available():

def pre_process_input(model_name, input):
if model_name == "internlm/internlm2-math-plus-1_8b":
prompt="My LEAN 4 state is:\n```lean\n" + input + \
"```\nPlease predict a possible tactic to help me prove the theorem."
prompt = (
"My LEAN 4 state is:\n```lean\n"
+ input
+ "```\nPlease predict a possible tactic to help me prove the theorem."
)
prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""
elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview":
prompt = 'Here is a theorom you need to prove in Lean:\n' + \
input+'\nNow you should suggest one line tactic in lean code:'
elif 'gemini' in model_name or "claude" in model_name:
prompt = 'Here is a theorom you need to prove in Lean:\n' + \
input+'\nNow you should suggest one line tactic in lean code:'
prompt = (
"Here is a theorom you need to prove in Lean:\n"
+ input
+ "\nNow you should suggest one line tactic in lean code:"
)
elif "gemini" in model_name or "claude" in model_name:
prompt = (
"Here is a theorom you need to prove in Lean:\n"
+ input
+ "\nNow you should suggest one line tactic in lean code:"
)
else:
raise NotImplementedError(f"External model '{model_name}' not supported")
return prompt


def post_process_output(model_name, output):
if model_name == "internlm/internlm2-math-plus-1_8b":
result = output.split(
'assistant')[-1].split('lean')[-1].split('```')[0].split('\n')[1]
result = (
output.split("assistant")[-1]
.split("lean")[-1]
.split("```")[0]
.split("\n")[1]
)
elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview":
result = output.split('lean')[-1].split('```')[0].split('\n')[1]
elif 'gemini' in model_name or "claude" in model_name:
result = output.split('lean')[-1].split('```')[0].split('\n')[1]
result = output.split("lean")[-1].split("```")[0].split("\n")[1]
elif "gemini" in model_name or "claude" in model_name:
result = output.split("lean")[-1].split("```")[0].split("\n")[1]
else:
raise NotImplementedError(f"External model '{model_name}' not supported")
return result
Expand Down
52 changes: 28 additions & 24 deletions python/external_models/gemini_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Tuple
import os

try:
import google.generativeai as genai
from google.generativeai import GenerationConfig
Expand All @@ -26,44 +27,47 @@ class GeminiRunner(Generator, Transformer):
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},]

def __init__(self, **args):
},
]

def __init__(self, **args):
self.client_kwargs: dict[str | str] = {
"model": args['model'],
"temperature": args['temperature'],
"max_tokens": args['max_tokens'],
"top_p": args['top_p'],

"model": args["model"],
"temperature": args["temperature"],
"max_tokens": args["max_tokens"],
"top_p": args["top_p"],
}
self.name = self.client_kwargs["model"]
self.client = genai.GenerativeModel(args['model'])
self.client = genai.GenerativeModel(args["model"])
self.generation_config = GenerationConfig(
candidate_count=1,
max_output_tokens=args['max_tokens'],
temperature=args['temperature'],
top_p=args['top_p'],
max_output_tokens=args["max_tokens"],
temperature=args["temperature"],
top_p=args["top_p"],
)

def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
prompt = pre_process_input(self.name, input + target_prefix)

response = self.client.generate_content(
prompt,
generation_config=self.generation_config,
safety_settings=GeminiRunner.safety_settings,
)
prompt,
generation_config=self.generation_config,
safety_settings=GeminiRunner.safety_settings,
)

results = [(post_process_output(self.name, response.text),1.0)] # Currently Gemini only supports one output.
results = [
(post_process_output(self.name, response.text), 1.0)
] # Currently Gemini only supports one output.
return choices_dedup(results)


if __name__ == "__main__":
generation_kwargs = {"model": 'gemini-1.0-pro',
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
}
generation_kwargs = {
"model": "gemini-1.0-pro",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
}

model = GeminiRunner(**generation_kwargs)
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
61 changes: 32 additions & 29 deletions python/external_models/hf_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,27 @@


class HFTacticGenerator(Generator, Transformer):
def __init__(
self,
**args
) -> None:
self.name = args['model']
def __init__(self, **args) -> None:
self.name = args["model"]
self.tokenizer = AutoTokenizer.from_pretrained(
self.name, trust_remote_code=True)
device = args['device']
self.name, trust_remote_code=True
)
device = args["device"]
if device == "auto":
device = get_cuda_if_available()
else:
device = torch.device(device)
logger.info(f"Loading {self.name} on {device}")
self.model = AutoModelForCausalLM.from_pretrained(
self.name, trust_remote_code=True).to(device)
self.name, trust_remote_code=True
).to(device)

self.generation_args: dict[str | str] = {
"do_sample": args["do_sample"],
"temperature": args['temperature'], # chat default is 0.8.
"max_new_tokens": args['max_new_tokens'],
"top_p": args['top_p'], # chat default is 0.8.
"num_return_sequences": args['num_return_sequences'],
"temperature": args["temperature"], # chat default is 0.8.
"max_new_tokens": args["max_new_tokens"],
"top_p": args["top_p"], # chat default is 0.8.
"num_return_sequences": args["num_return_sequences"],
# "num_beams": self.num_return_sequences, # Here if we use beam search for llms the output are not diverse (few tactics are provided).
"output_scores": args["output_scores"],
"output_logits": args["output_logits"],
Expand All @@ -47,16 +46,19 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
self.model = self.model.eval()

tokenized_input = self.tokenizer(prompt, return_tensors="pt")
eos_token_id = [self.tokenizer.eos_token_id,
self.tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0]]
eos_token_id = [
self.tokenizer.eos_token_id,
self.tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0],
]
outputs = self.model.generate(
tokenized_input.input_ids.to(self.device),
eos_token_id=eos_token_id,
**self.generation_args
**self.generation_args,
)
response = self.tokenizer.batch_decode(
outputs['sequences'], skip_special_tokens=True)

outputs["sequences"], skip_special_tokens=True
)

result = []
index = 0
for out, score in zip(response, outputs.scores):
Expand All @@ -68,18 +70,19 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float


if __name__ == "__main__":
generation_kwargs = {"model": "internlm/internlm2-math-plus-1_8b",
"temperature": 0.6,
"max_new_tokens": 1024,
"top_p": 0.9,
"length_penalty": 0,
"num_return_sequences": 64,
"do_sample": True,
"output_scores": True,
"output_logits": False,
"return_dict_in_generate": True,
"device": "auto",
}
generation_kwargs = {
"model": "internlm/internlm2-math-plus-1_8b",
"temperature": 0.6,
"max_new_tokens": 1024,
"top_p": 0.9,
"length_penalty": 0,
"num_return_sequences": 64,
"do_sample": True,
"output_scores": True,
"output_logits": False,
"return_dict_in_generate": True,
"device": "auto",
}
model = HFTacticGenerator(**generation_kwargs)
model.cuda()
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
43 changes: 24 additions & 19 deletions python/external_models/oai_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ class OpenAIRunner(Generator, Transformer):

def __init__(self, **args):
self.client_kwargs: dict[str | str] = {
"model": args['model'],
"temperature": args['temperature'],
"max_tokens": args['max_tokens'],
"top_p": args['top_p'],
"model": args["model"],
"temperature": args["temperature"],
"max_tokens": args["max_tokens"],
"top_p": args["top_p"],
"frequency_penalty": 0,
"presence_penalty": 0,
"n": args['num_return_sequences'],
"timeout": args['openai_timeout'],
"n": args["num_return_sequences"],
"timeout": args["openai_timeout"],
# "stop": args.stop, # stop is only used for base models currently
}
self.name = self.client_kwargs["model"]
Expand All @@ -30,7 +30,6 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
prompt = pre_process_input(self.name, input + target_prefix)
prompt = [
{"role": "user", "content": f"{prompt}"},

]
try:
response = OpenAIRunner.client.chat.completions.create(
Expand All @@ -56,22 +55,28 @@ def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float
print("Exception: ", repr(e))
raise e

results = [(post_process_output(self.name, c.message.content), np.exp(-np.mean([
token.logprob for token in c.logprobs.content])))for c in response.choices]
results = [
(
post_process_output(self.name, c.message.content),
np.exp(-np.mean([token.logprob for token in c.logprobs.content])),
)
for c in response.choices
]
return choices_dedup(results)


if __name__ == "__main__":
generation_kwargs = {"model": "gpt-4-turbo-preview",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
"frequency_penalty": 0,
"presence_penalty": 0,
"num_return_sequences": 16,
"openai_timeout": 45,
# "stop": args.stop, # stop is only used for base models currently
}
generation_kwargs = {
"model": "gpt-4-turbo-preview",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
"frequency_penalty": 0,
"presence_penalty": 0,
"num_return_sequences": 16,
"openai_timeout": 45,
# "stop": args.stop, # stop is only used for base models currently
}

model = OpenAIRunner(**generation_kwargs)
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
Loading

0 comments on commit 30a70c0

Please sign in to comment.