Skip to content

Commit

Permalink
add bootstrapping figs and code
Browse files Browse the repository at this point in the history
  • Loading branch information
ConorOBrien-Foxx committed Jun 5, 2024
1 parent 3853d8f commit 4b5e4c7
Show file tree
Hide file tree
Showing 17 changed files with 984 additions and 44 deletions.
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,28 @@
# emergent-capabilities
# emergent-capabilities

## Primary Files
- `bugs2fix.ipynb` generates the graphs for the Bugs2Fix code repair task.
- `code2code-trans.ipynb` generates the graphs for the Code2Code code translation tastk.
- `commit-message.ipynb` generates the graphs for the commit message generation task.

- `pull-tests.ipynb` installs the datasets from BIG and other various places. (I'm pretty sure CodeXGLUE was not installed this way - the repository was simply cloned to `data/CodeXGLUE`.)
- `trim-tokens.ipynb` (***TODO***) is to uniformly trim output lines to ensure all lines are at most 500 tokens long (useful because various configurations were used during the testing process).

- `bleu.py` is code adapted from CodeXGLUE which calculates the BLEU metric.
- `metric.py` is a wrapper around the various metrics we used in this project.
- `model_wrapper.py` is a wrapper around (specifically) the CodeGen extended family of models.
- `render_output.py` is a wrapper around matplotlib suited for our usecase.
- `run_battery.py` is helper code which streamlines the testcase running process.
- `timehelp.py` is helper code which is responsible for timing operations and formatting them.

## Scaffolding Files

- `accelerate-test.ipynb` is scaffolding code which became the basis for `model_wrapper.py`, testing GPU loading & caching of the codegen models.
- `codexglue-test.ipynb` is a scratchpad for initial testing of various prompts.
- `consecutive.ipynb` is proof of concept code loading the models in sequence without leaking GPU memory.
- `metric-progress.ipynb` investigates the change in metric according to number of test cases processed.
- `softmax.ipynb` is verifying the correspondence between softmax and logits.
- `testing.ipynb` is used for miscellaneous testing, but primarily the parsing of multiple choice questions.
- `verify-result.ipynb` is debugging code used to examine questionable input/output pairs and assess what caused them (in this case, a bug/false assumption in the generation code).
- `wrapper-test.ipynb` is a simple testing file for making sure the model wrapper works correctly.
- `test.py` is an old testing file.
102 changes: 102 additions & 0 deletions bars3d.ipynb

Large diffs are not rendered by default.

16 changes: 2 additions & 14 deletions bugs2fix.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,9 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from run_battery import BatteryRunner\n",
"from run_battery import BatteryRunner, BatteryConfigs\n",
"\n",
"runner = BatteryRunner(\n",
" case_count=100,\n",
" meta_count=None,\n",
" task=\"bugs2fix\",\n",
" prompts=[\n",
" \"// the buggy version of the code\\n{prompt}\\n// the fixed version of the code\\n\",\n",
" \"// You are given a piece of buggy code. Your task is to fix the error, and generate the corrected code. Fix the following code:\\n{prompt}\\n\",\n",
" ],\n",
" battery_path=\"./data/CodeXGLUE/Code-Code/code-refinement/data/small\",\n",
" questions_file=\"test.buggy-fixed.buggy\",\n",
" truth_file=\"test.buggy-fixed.fixed\",\n",
")"
"runner = BatteryRunner.of(BatteryConfigs.Bugs2Fix)"
]
},
{
Expand Down
Binary file added figs/b2f-bootstrap-bleu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/b2f-bootstrap-codebleu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/c2c-bootstrap-bleu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/c2c-bootstrap-codebleu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
571 changes: 571 additions & 0 deletions metric-progress.ipynb

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ def b_norm(ref, trans):
return _bleu(ref, trans, smooth=True, lower=True)

class Metric:
def __init__(self, name, *, grade_single=None, grade_multi=None, baseline=0.0):
def __init__(self, name, shortname, *, grade_single=None, grade_multi=None, baseline=0.0):
self.name = name
self.shortname = shortname
self.grade_single = grade_single
self.grade_multi = grade_multi
self.baseline = 0.0
Expand All @@ -35,25 +36,31 @@ def grade(self, answer_key, answers):

ExactMatch = Metric(
name="Accuracy% (Exact Match)",
shortname="em",
grade_single = lambda truth, answer: truth == answer,
)
BLEU = Metric(
name="BLEU",
shortname="bleu",
grade_multi = _bleu,
)
CodeBLEUJava = Metric(
name="CodeBLEU (Java)",
shortname="codebleu-java",
grade_multi = partial(_codebleu, lang="java"),
)
CodeBLEUCSharp = Metric(
name="CodeBLEU (C#)",
shortname="codebleu-cs",
grade_multi = partial(_codebleu, lang="c_sharp"),
)
BMoses = Metric(
name="B-Moses",
shortname="codebleu-bmoses",
grade_multi = b_moses,
)
BNorm = Metric(
name="B-Norm",
shortname="codebleu-bnorm",
grade_multi = b_norm,
)

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions output/bugs2fix/bootstrap-bleu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions output/bugs2fix/bootstrap-codebleu-java.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions output/code2code-trans/bootstrap-bleu.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions output/code2code-trans/bootstrap-codebleu-java.json

Large diffs are not rendered by default.

57 changes: 48 additions & 9 deletions render_output.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from statistics import median
import numpy as np

class OutputRenderer:
def __init__(self, baseline=0.0, metric="(Unspecified metric)"):
self.x_values = [0.35, 2.70, 6.10, 16.10]
self.box_color = "Pink"
#self.box_color = "Pink"
self.baseline = baseline
self.metric = metric

Expand All @@ -18,19 +19,52 @@ def set_lim(self, y_max=None):
plt.ylim(0, y_max)


def draw_box(self, ax, ys):
def draw_box(self, ax, ys, box_color):
solid_color = mcolors.to_rgb(box_color)
# a black version of the given color
black_ratio = 0.2
black = (0.0, 0.0, 0.0)
edge_color = tuple(
(1 - black_ratio) * np.array(solid_color)
+ black_ratio * np.array(black)
)
bplot = ax.boxplot(
ys,
positions=self.x_values,
widths=1,
manage_ticks=False,
patch_artist=True,
zorder=5,
medianprops=dict(color="black"),
medianprops=dict(
color=edge_color,
linewidth=2
),
whiskerprops=dict(
color=edge_color,
linewidth=2
),
capprops=dict(
color=edge_color,
linewidth=2
),
flierprops=dict(
markersize=5,
markeredgecolor=solid_color,
markerfacecolor=solid_color,
marker=".",
# the "x" marker is cursed, idky
# markeredgecolor=box_color,
# marker="x",
# linewidth=15,
),
boxprops=dict(
color=edge_color,
linewidth=2
),
)

for patch in bplot["boxes"]:
patch.set_facecolor(self.box_color)
patch.set_facecolor(box_color)

def draw_line(self, ax, ys, label=None, color="b"):
medians = [median(vals) for vals in ys]
Expand Down Expand Up @@ -65,11 +99,13 @@ def draw_random_annotation(self, y_max=None):
color="orange",
horizontalalignment="right",
)


def meta_info(self):
def meta_info(self, title=None):
plt.xlabel("Parameters (in billions)")
plt.ylabel(self.metric)
plt.title("Model Performance")
plt.title(title or "Model Performance")


def draw_bands(self, ax, ys, color="b"):
q1 = [np.percentile(val, 25) for val in ys]
Expand All @@ -92,8 +128,9 @@ def draw_bands(self, ax, ys, color="b"):
zorder=5,
color=color,
)


def render(self, ys, y_max=None, save=None):
def render(self, ys, y_max=None, save=None, title=None):
y_lines = ys
if not isinstance(y_lines, dict):
y_lines = { "unnamed": y_lines }
Expand All @@ -111,7 +148,7 @@ def render(self, ys, y_max=None, save=None):
ax1 = fig.add_subplot(111)
plt.grid(True)
self.set_lim(y_max=y_max)
self.meta_info()
self.meta_info(title=title)

self.draw_random_annotation(y_max=y_max)

Expand All @@ -120,7 +157,9 @@ def render(self, ys, y_max=None, save=None):
color = colors[idx % len(colors)]
self.draw_bands(ax1, ys, color=color)
self.draw_line(ax1, ys, label=key, color=color)
self.draw_box(ax1, ys)
box_color = mcolors.to_rgb(color)
box_color += (0.3, )
self.draw_box(ax1, ys, box_color)

plt.legend()

Expand Down
Loading

0 comments on commit 4b5e4c7

Please sign in to comment.