Skip to content

Commit

Permalink
Merge pull request #257 from tumido/run-make
Browse files Browse the repository at this point in the history
fix: sync generated files
  • Loading branch information
HumairAK authored Jan 23, 2025
2 parents 423f94b + d9678da commit c0bc6ed
Showing 1 changed file with 39 additions and 57 deletions.
96 changes: 39 additions & 57 deletions standalone/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,20 +1132,27 @@ def sdg_op(
sdg_path: str = "/data/sdg",
sdg_sampling_size: float = 1.0,
):
from os import getenv, path
import os
import shutil
import tempfile
import instructlab.sdg
import openai
import xdg_base_dirs
import yaml
api_key = getenv("api_key")
model = getenv("model")
endpoint = getenv("endpoint")
api_key = os.getenv("api_key")
model = os.getenv("model")
endpoint = os.getenv("endpoint")
if sdg_ca_cert := getenv("SDG_CA_CERT_PATH"):
sdg_ca_cert_path = os.getenv("SDG_CA_CERT_PATH")
use_tls = os.path.exists(sdg_ca_cert_path) and (
os.path.getsize(sdg_ca_cert_path) > 0
)
if use_tls:
import httpx
custom_http_client = httpx.Client(verify=sdg_ca_cert)
custom_http_client = httpx.Client(verify=sdg_ca_cert_path)
client = openai.OpenAI(
base_url=endpoint, api_key=api_key, http_client=custom_http_client
)
Expand Down Expand Up @@ -1184,7 +1191,7 @@ def sdg_op(
skills_recipe = "/usr/share/instructlab/sdg/default_data_recipes/skills.yaml"
def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
if path.exists(skills_recipe):
if os.path.exists(skills_recipe):
with open(skills_recipe, "r", encoding="utf-8") as file:
skills_yaml = yaml.load(file, Loader=yaml.Loader)
Expand All @@ -1200,16 +1207,11 @@ def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
except PermissionError:
print("Failed to set precomputed skills data ratio: Permission denied")
print("Attempting to move default data recipes to temporary directory")
import os
import shutil
import tempfile
import xdg_base_dirs
# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
# Create a default_data_recipes directory
temp_dir = path.join(temp_dir, "default_data_recipes")
temp_dir = os.path.join(temp_dir, "default_data_recipes")
os.mkdir(temp_dir)
# Copy default_data_recipes/skills.yaml to the temporary directory
Expand All @@ -1222,7 +1224,7 @@ def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
os.path.join(str(dir), "instructlab", "sdg")
for dir in xdg_base_dirs.xdg_data_dirs()
]
temp_pipeline_dir = path.join(temp_dir, "pipeline")
temp_pipeline_dir = os.path.join(temp_dir, "pipeline")
os.mkdir(temp_pipeline_dir)
for d in data_dirs:
pipeline_path = os.path.join(d, "pipelines", pipeline)
Expand All @@ -1235,7 +1237,7 @@ def set_precomputed_skills_data_ratio(sampling_size: float, skills_recipe: str):
break
# Build new skills.yaml path
new_skills_recipe = path.join(temp_dir, "skills.yaml")
new_skills_recipe = os.path.join(temp_dir, "skills.yaml")
print(f"New skills recipe path: {new_skills_recipe}")
# Override XDG_DATA_DIRS with the temporary directory
Expand Down Expand Up @@ -1661,28 +1663,18 @@ def run_mt_bench_op(
import os
import subprocess
import httpx
import torch
from instructlab.eval.mt_bench import MTBenchEvaluator
if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai
# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)
# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)
# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
use_tls = os.path.exists(judge_ca_cert_path) and (
os.path.getsize(judge_ca_cert_path) > 0
)
judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None
def launch_vllm(
model_path: str, gpu_count: int, retries: int = 120, delay: int = 10
Expand Down Expand Up @@ -1775,10 +1767,6 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
models_list = os.listdir(models_folder)
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
scores = {}
all_mt_bench_data = []
Expand Down Expand Up @@ -1814,6 +1802,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
server_url=vllm_server,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
shutdown_vllm(vllm_process)
Expand All @@ -1823,6 +1812,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
api_key=judge_api_key,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
mt_bench_data = {
Expand Down Expand Up @@ -1884,30 +1874,20 @@ def run_final_eval_op(
import os
import subprocess
import httpx
import torch
from instructlab.eval.mmlu import MMLUBranchEvaluator
from instructlab.eval.mt_bench import MTBenchBranchEvaluator
from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score
if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai
# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)
# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)
# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
use_tls = os.path.exists(judge_ca_cert_path) and (
os.path.getsize(judge_ca_cert_path) > 0
)
judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None
print("Starting Final Eval...")
Expand Down Expand Up @@ -2267,6 +2247,7 @@ def find_node_dataset_directories(base_dir: str):
server_url=vllm_server,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
shutdown_vllm(vllm_process)
Expand All @@ -2277,6 +2258,7 @@ def find_node_dataset_directories(base_dir: str):
api_key=judge_api_key,
serving_gpus=gpu_count,
max_workers=max_workers,
http_client=judge_http_client,
)
qa_pairs_and_errors.append((overall_score, qa_pairs, error_rate))
Expand Down

0 comments on commit c0bc6ed

Please sign in to comment.