Skip to content

Commit

Permalink
update graphs and tables
Browse files Browse the repository at this point in the history
  • Loading branch information
ConorOBrien-Foxx committed Jun 24, 2024
1 parent b1ab231 commit 510db76
Show file tree
Hide file tree
Showing 22 changed files with 346 additions and 327 deletions.
154 changes: 72 additions & 82 deletions bugs2fix-checklist.ipynb

Large diffs are not rendered by default.

161 changes: 55 additions & 106 deletions bugs2fix.ipynb

Large diffs are not rendered by default.

175 changes: 85 additions & 90 deletions code2code-trans.ipynb

Large diffs are not rendered by default.

69 changes: 53 additions & 16 deletions commit-message.ipynb

Large diffs are not rendered by default.

Binary file added figs/b2f-all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/b2f-cl-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/b2f-cl-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/b2f-cl-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/b2f-cl-all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/c2c-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/c2c-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/c2c-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/c2c-all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/cmg-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figs/cmg-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/cmg-all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"case_count": 100, "results": {"em": {"prompt0": [0.0, 0.0, 0.0, 0.0], "prompt1": [0.0, 0.0, 0.0, 0.0], "prompt2": [0.0, 0.0, 0.0, 0.0], "prompt3": [0.0, 0.0, 0.0, 0.0]}, "bleu": {"prompt0": [0.14744789748224357, 0.12727270151090614, 0.1393528404051602, 0.14905405884515543], "prompt1": [0.12117556664357497, 0.0718618269678404, 0.06927247251551676, 0.1428896349232126], "prompt2": [0.08860709274190968, 0.03637304434009434, 0.008308461926085538, 0.02466904576809074], "prompt3": [0.015706555118775244, 0.020436617309708652, 0.020881900494266446, 0.02689716575735942]}, "codebleu-cs": {"prompt0": [0.22905727956582494, 0.23617902012442155, 0.2613782829466277, 0.26818107073706954], "prompt1": [0.19402742458113337, 0.15936101249631518, 0.18051152500315965, 0.26547690149663683], "prompt2": [0.18166417631938353, 0.09415470281717575, 0.03251950621663942, 0.06503135542869536], "prompt3": [0.07928639158475127, 0.08881636926079384, 0.08986404204424117, 0.10575441586271458]}}}
1 change: 1 addition & 0 deletions output/commit/metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"case_count": 100, "results": {"codebleu-bmoses": {"prompt0": [0.0, 0.0, 0.0, 0.0]}, "codebleu-bnorm": {"prompt0": [0.005091944041387383, 0.0014133822836740583, 0.00625229882485127, 0.005881310913068362]}}}
34 changes: 26 additions & 8 deletions render_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ def index_axis(axes, idx):


class OutputRenderer:
def __init__(self, baseline=0.0, metric="(Unspecified metric)"):
def __init__(self, baseline=0.0, metric="(Unspecified metric)", linemarker="o"):
self.x_values = [0.35, 2.70, 6.10, 16.10]
#self.box_color = "Pink"
self.baseline = baseline
self.metric = metric
self.linemarker = linemarker


def set_lim(self, ax=None, y_max=None):
Expand Down Expand Up @@ -90,15 +91,24 @@ def draw_line(self, ax, ys, label=None, color="b"):
else vals
for vals in ys
]
PROP_REF = {
".": dict(

),
"o": dict(
markerfacecolor="none",
markeredgewidth=2,
markersize=8,
)
}
props = PROP_REF.get(self.linemarker, PROP_REF["o"])
line = ax.plot(
self.x_values,
medians,
marker="o",
marker=self.linemarker,
color=color,
linestyle="-",
markerfacecolor="none",
markeredgewidth=2,
markersize=8,
**props,
# alpha=0.5,
zorder=8,
label=label,
Expand Down Expand Up @@ -169,6 +179,7 @@ def render_lines(self, ax, y_lines):
box_color += (0.3, )
self.draw_box(ax, ys, box_color)
return lines


def render(self, ys, y_max=None, save=None, title=None):
y_lines = ys
Expand Down Expand Up @@ -209,6 +220,7 @@ def render_multi(
dims,
title,
figsize=(8, 4),
save=None,
):
fig, axes = plt.subplots(*dims, figsize=figsize)
for idx, (ys, metric, subtitle) in enumerate(zip(yss, metrics, subtitles)):
Expand All @@ -225,10 +237,16 @@ def render_multi(
lines,
legend_keys,
loc="upper center",
bbox_to_anchor=(0.5,0.93),
bbox_to_anchor=(0.5,0.91),
ncol=len(legend_keys),
)
plt.suptitle(title)
plt.suptitle(title, fontsize=20, fontweight="bold")
plt.tight_layout()
fig.subplots_adjust(top=0.78)
fig.subplots_adjust(top=0.76)

if save is not None:
# save must come before show
plt.savefig(save, bbox_inches="tight")
print("Saved figure to", save)

plt.show()
59 changes: 38 additions & 21 deletions run_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,13 @@ class BatteryConfigs:
"// code.java\n{prompt}\n// code.cs\n",
"// This code is written in Java. Reproduce the same exact code in C#.\n{prompt}\n",
"// original code.java\n{prompt}\n\n// code.cs version of code.java\n",
"// This code is written in Java. Reproduce the same exact code in C#.\n{prompt}\n// This code is written in C#.\n",
],
battery_path="./data/CodeXGLUE/Code-Code/code-to-code-trans/data",
questions_file="test.java-cs.txt.java",
truth_file="test.java-cs.txt.cs",
)

"""
Code2CodeChecklist = dict(
case_count=100,
meta_count=None,
task="code2code-trans_checklist",
display_name="CodeTrans (Checklist)",
prompts=[
"// original code.java\n{prompt}\n// code.cs version of code.java\n",
"// code.java\n{prompt}\n// code.cs\n",
"// This code is written in Java. Reproduce the same exact code in C#.\n{prompt}\n",
"// original code.java\n{prompt}\n\n// code.cs version of code.java\n",
],
battery_path="./data/checklist/Code2Code",
questions_file="test.java-cs.txt.java",
truth_file="test.java-cs.txt.cs",
)
"""

Bugs2Fix = dict(
case_count=100,
meta_count=None,
Expand Down Expand Up @@ -86,6 +69,7 @@ class BatteryConfigs:
case_count=100,
meta_count=None,
task="commit",
display_name="Commit Message Generation",
prompts=[
"/* diff of changes\n{prompt}\n*/\n// a summary of the above diff is:\n// -"
],
Expand Down Expand Up @@ -117,7 +101,7 @@ def clean_model_output(line):


class BatteryRunner:
def __init__(self, case_count, task, prompts, battery_path, questions_file=None, truth_file=None, *, meta_count=None, json_battery=False, base=None, **kwargs):
def __init__(self, case_count, task, prompts, battery_path, questions_file=None, truth_file=None, *, meta_count=None, json_battery=False, base=None, display_name=None, **kwargs):
self.task = task
self.output_dir_base = f"./output/{task}"
self.prompts = prompts
Expand All @@ -136,6 +120,7 @@ def __init__(self, case_count, task, prompts, battery_path, questions_file=None,
self.truth_path = os.path.join(self.battery_path, truth_file)
self.battery = []
self.meta_count = meta_count
self.display_name = display_name
if base is None:
self.base = None
else:
Expand Down Expand Up @@ -398,7 +383,6 @@ def render_metric(
self,
metric,
by_prompt=None,
render_to=None,
*args,
**kwargs
):
Expand All @@ -410,9 +394,42 @@ def render_metric(
metric=metric.name,
)

self.renderer.render(ys=by_prompt, render_to=render_to, *args, **kwargs)
self.renderer.render(ys=by_prompt, *args, **kwargs)
return by_prompt


def render_metric_multi(
self,
metrics,
dims=None,
save=None,
*args,
**kwargs,
):
yss = [
self.calculate_metrics(metric)
for metric in metrics
]
metric_names = [ metric.name for metric in metrics ]
subtitles = [ f"{metric.simplename} vs scale" for metric in metrics ]
title = f"{self.display_name} Performance"
dims = dims or (1, len(metrics))

self.renderer = OutputRenderer(baseline=None, linemarker=".")

self.renderer.render_multi(
yss=yss,
metrics=metric_names,
subtitles=subtitles,
dims=dims,
title=title,
save=save,
*args,
**kwargs,
)

return yss


def calculate_iterative_metric(self, metric, limit=None, quiet=False):
if limit is None:
Expand Down
14 changes: 12 additions & 2 deletions tabulate-results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "3208f992-2269-4aa9-8b68-5f2b182a9af6",
"metadata": {},
"outputs": [],
Expand All @@ -61,19 +61,21 @@
" BatteryConfigs.Bugs2Fix,\n",
" BatteryConfigs.Bugs2FixChecklist,\n",
" BatteryConfigs.Code2Code,\n",
" BatteryConfigs.CommitMessageGeneration,\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 8,
"id": "62848b4b-5708-4163-871f-6eaefd044737",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"## Bugs2fix\n",
"\\hline Metric & Prompt & 350M & 2.7B & 6.1B & 16.1B & RMSD & MAE \\\\ \\hline \\hline\n",
"EM & prompt0 & 0 & 0 & 0 & 0 & 0 & 0 \\\\\n",
"EM & prompt1 & 0 & 0 & 0 & 0 & 0 & 0 \\\\\n",
Expand All @@ -85,6 +87,7 @@
"CodeBLEU (Java) & prompt1 & 0.1862 & 0.2634 & 0.3111 & 0.2642 & 0.0406 & 0.0371 \\\\\n",
"CodeBLEU (Java) & prompt2 & 0.5918 & \\textbf{0.6909} & 0.5499 & 0.6595 & 0.0538 & 0.0455 \\\\ \\hline\n",
"------------------------------\n",
"## Bugs2fix (Checklist)\n",
"\\hline Metric & Prompt & 350M & 2.7B & 6.1B & 16.1B & RMSD & MAE \\\\ \\hline \\hline\n",
"EM & prompt0 & 0.0100 & 0 & 0.0100 & 0.0100 & 0.0041 & 0.0033 \\\\\n",
"EM & prompt1 & 0 & 0 & 0 & 0 & 0 & 0 \\\\\n",
Expand All @@ -96,6 +99,7 @@
"CodeBLEU (Java) & prompt1 & 0.1853 & 0.2495 & 0.4194 & 0.0521 & \\textbf{0.1145} & 0.0952 \\\\\n",
"CodeBLEU (Java) & prompt2 & 0.6154 & \\textbf{0.6989} & 0.6321 & 0.6983 & 0.0315 & 0.0266 \\\\ \\hline\n",
"------------------------------\n",
"## CodeTrans\n",
"\\hline Metric & Prompt & 350M & 2.7B & 6.1B & 16.1B & RMSD & MAE \\\\ \\hline \\hline\n",
"EM & prompt0 & 0 & 0 & 0 & 0 & 0 & 0 \\\\\n",
"EM & prompt1 & 0 & 0 & 0 & 0 & 0 & 0 \\\\\n",
Expand All @@ -109,6 +113,11 @@
"CodeBLEU (C$^\\sharp$) & prompt1 & 0.1940 & 0.1594 & 0.1805 & 0.2655 & 0.0201 & 0.0190 \\\\\n",
"CodeBLEU (C$^\\sharp$) & prompt2 & 0.1817 & 0.0942 & 0.0325 & 0.0650 & 0.0446 & 0.0405 \\\\\n",
"CodeBLEU (C$^\\sharp$) & prompt3 & 0.0793 & 0.0888 & 0.0899 & 0.1058 & 0.0022 & 0.0017 \\\\ \\hline\n",
"------------------------------\n",
"## Commit Message Generation\n",
"\\hline Metric & Prompt & 350M & 2.7B & 6.1B & 16.1B & RMSD & MAE \\\\ \\hline \\hline\n",
"B-Moses & prompt0 & 0 & 0 & 0 & 0 & 0 & 0 \\\\ \\hline\n",
"B-Norm & prompt0 & 0.0051 & 0.0014 & \\textbf{0.0063} & 0.0059 & 0.0017 & 0.0015 \\\\ \\hline\n",
"------------------------------\n"
]
}
Expand Down Expand Up @@ -171,6 +180,7 @@
" *emergence_evaluations,\n",
" ])\n",
"\n",
" print(\"##\", config[\"display_name\"])\n",
" print(\"\\\\hline \", end=\"\")\n",
" for idx, row in enumerate(table):\n",
" row_display = \" & \".join(row) + \" \\\\\\\\\"\n",
Expand Down
5 changes: 3 additions & 2 deletions testing.ipynb

Large diffs are not rendered by default.

0 comments on commit 510db76

Please sign in to comment.