finalize method of multiple choice assessment

WM-SEMERU · May 6, 2024 · 5fc3bce · 5fc3bce
1 parent d3c29c8
commit 5fc3bce
Show file tree

Hide file tree

Showing 5 changed files with 556 additions and 88 deletions.
diff --git a/consecutive.ipynb b/consecutive.ipynb
@@ -0,0 +1,226 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4b252f6f-f290-4e96-a09c-71345e8c7bad",
+   "metadata": {},
+   "source": [
+    "# README\n",
+    "This tests that no GPU memory is \"leaked\" after running successive trials of each model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6225d88d-c5fc-4810-a971-a72cf4673b38",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Importing torch...\n",
+      "Importing HF...\n",
+      "Importing python modules...\n",
+      "Done!\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Importing torch...\")\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "print(\"Importing HF...\")\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
+    "print(\"Importing python modules...\")\n",
+    "from timehelp import time_start, time_end\n",
+    "from model_wrapper import Model, ModelFamily, MultipleChoiceStrategy\n",
+    "import re\n",
+    "print(\"Done!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3cc0a219-aafc-4460-a646-f87a1ee9d35e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Loading 350M (Salesforce/codegen-350M-mono) ===\n",
+      "[2024-05-03@18:51:57|model.device] Starting timer.\n",
+      "Configuring torch device...\n",
+      "Using device: cuda:0 aka cuda:0\n",
+      "[2024-05-03@18:51:58|model.device] Time elapsed: 69ms\n",
+      "[2024-05-03@18:51:58|model.tokenizer] Starting timer.\n",
+      "[2024-05-03@18:51:58|model.tokenizer] Time elapsed: 419ms\n",
+      "[2024-05-03@18:51:58|model.model] Starting timer.\n",
+      "Obtaining model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-05-03@18:52:09|model.model] Time elapsed: 11s 17ms\n",
+      "Token count in input: 1\n",
+      "Generating...\n",
+      "[2024-05-03@18:52:09|model.generate] Starting timer.\n",
+      "[2024-05-03@18:52:13|model.generate] Time elapsed: 4s 10ms\n",
+      " -*- coding: utf-\n",
+      "\n",
+      "Freeing model...\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "=== Loading 2B (Salesforce/codegen-2B-mono) ===\n",
+      "[2024-05-03@18:52:13|model.device] Starting timer.\n",
+      "Configuring torch device...\n",
+      "Using device: cuda:0 aka cuda:0\n",
+      "[2024-05-03@18:52:13|model.device] Time elapsed: ~0s\n",
+      "[2024-05-03@18:52:13|model.tokenizer] Starting timer.\n",
+      "[2024-05-03@18:52:13|model.tokenizer] Time elapsed: 162ms\n",
+      "[2024-05-03@18:52:13|model.model] Starting timer.\n",
+      "Obtaining model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-05-03@18:52:45|model.model] Time elapsed: 31s 604ms\n",
+      "Token count in input: 1\n",
+      "Generating...\n",
+      "[2024-05-03@18:52:45|model.generate] Starting timer.\n",
+      "[2024-05-03@18:52:45|model.generate] Time elapsed: 569ms\n",
+      " -*- coding: utf-\n",
+      "\n",
+      "Freeing model...\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "=== Loading 6B (Salesforce/codegen-6B-mono) ===\n",
+      "[2024-05-03@18:52:46|model.device] Starting timer.\n",
+      "Configuring torch device...\n",
+      "Using device: cuda:0 aka cuda:0\n",
+      "[2024-05-03@18:52:46|model.device] Time elapsed: ~0s\n",
+      "[2024-05-03@18:52:46|model.tokenizer] Starting timer.\n",
+      "[2024-05-03@18:52:46|model.tokenizer] Time elapsed: 167ms\n",
+      "[2024-05-03@18:52:46|model.model] Starting timer.\n",
+      "Obtaining model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.\n",
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-05-03@18:54:01|model.model] Time elapsed: 1min 14s\n",
+      "Token count in input: 1\n",
+      "Generating...\n",
+      "[2024-05-03@18:54:01|model.generate] Starting timer.\n",
+      "[2024-05-03@18:54:22|model.generate] Time elapsed: 20s 932ms\n",
+      "!/usr/bin/env python\n",
+      "\n",
+      "\n",
+      "Freeing model...\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "=== Loading 16B (Salesforce/codegen-16B-mono) ===\n",
+      "[2024-05-03@18:54:22|model.device] Starting timer.\n",
+      "Configuring torch device...\n",
+      "Using device: cuda:0 aka cuda:0\n",
+      "[2024-05-03@18:54:22|model.device] Time elapsed: ~0s\n",
+      "[2024-05-03@18:54:22|model.tokenizer] Starting timer.\n",
+      "[2024-05-03@18:54:22|model.tokenizer] Time elapsed: 183ms\n",
+      "[2024-05-03@18:54:22|model.model] Starting timer.\n",
+      "Obtaining model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.\n",
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-05-03@18:57:09|model.model] Time elapsed: 2min 46s\n",
+      "Token count in input: 1\n",
+      "Generating...\n",
+      "[2024-05-03@18:57:09|model.generate] Starting timer.\n",
+      "[2024-05-03@18:57:50|model.generate] Time elapsed: 41s 52ms\n",
+      " -*- coding: utf-\n",
+      "\n",
+      "Freeing model...\n",
+      "--------------------------------------------------\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# attempt to load all models in succession\n",
+    "for key, model_name in ModelFamily.CodeGen1.mono.items():\n",
+    "    print(\"=== Loading\", key, f\"({model_name}) ===\")\n",
+    "    torch.cuda.empty_cache()\n",
+    "    model = Model(model_name)\n",
+    "    model.configure(time=True)\n",
+    "    inputs = model.tokenize(\"#\")\n",
+    "    sample = model.generate(inputs, time=True, max_new_tokens=8)\n",
+    "    print(model.decode(sample, inputs))\n",
+    "    print()\n",
+    "    print(\"Freeing model...\")\n",
+    "    del inputs, sample\n",
+    "    model.free()\n",
+    "    print(\"-\" * 50)\n",
+    "    print()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/model_wrapper.py b/model_wrapper.py
@@ -37,6 +37,7 @@ class CodeGen2p5:
 
 class Model:
     CACHE_DIR = "/workspaces/emergent-capabilities/datax"
+    DEFAULT_SOFTMAX = torch.nn.Softmax(dim=-1)
 
     @staticmethod
     def clean_cache_dir(confirm=False):
@@ -60,14 +61,15 @@ def prob_from_logit(logit):
         return prob
 
 
-    def __init__(self, name, cache_dir=None, device_name=None, verbose=True):
+    def __init__(self, name, cache_dir=None, device_name=None, verbose=True, softmax=None):
         self.verbose = verbose
         self.name = name
         self.tokenizer = None
         self.model = None
         self.cache_dir = cache_dir or Model.CACHE_DIR
         self.device_name = device_name
         self.device = None
+        self.softmax = softmax or Model.DEFAULT_SOFTMAX
 
 
     def yap(self, *args, **kwargs):
@@ -321,7 +323,7 @@ def _multiple_choice_prompts_logit_average(self, input_tokens, target_tokens, ti
                 best_option_idx = idx
 
         return best_option_idx
-
+        
     def _multiple_choice_prompts_multiply(self, input_tokens, target_tokens, time=False):
         """
         Private helper function.
@@ -357,24 +359,23 @@ def _multiple_choice_prompts_multiply(self, input_tokens, target_tokens, time=Fa
 
                 output = self.model_no_grad(input_ids=running_inputs)
                 next_logits = output.logits[:, -1, :]
+                # self.yap("NEXT LOGITS:", next_logits)
+
+                distribution = self.softmax(next_logits)
+                # self.yap("SOFTMAX:", distribution)
 
-                # NOTE: The probabilities do not sum to 1 (in fact, much larger); should we rescale?
-                # how can we rescale?
-                self.yap("P Sum:", sum(Model.prob_from_logit(logit) for logit in list(next_logits[0])))
-
                 logit_score = next_logits[:, token].item()
-                prob = Model.prob_from_logit(logit_score)
+                prob = distribution[:, token].item()
                 self.yap(f"Token {j}: P={prob}, logit={logit_score}")
                 total_prob *= prob
-                self.yap("Running P:", total_prob)
-
-            # todo: normalize, somehow?
+                # self.yap("Running P:", total_prob)
+
             score = total_prob
 
             if best_option_idx is None or score > best_score:
                 best_score = score
                 best_option_idx = idx
-            self.yap()
+            # self.yap()
 
         return idx
 
@@ -420,6 +421,9 @@ def multiple_choice_prompts(self, inputs, targets, time=False, strategy=Multiple
         elif strategy == MultipleChoiceStrategy.FIRST_BRANCH:
             idx = self._multiple_choice_prompts_first_branch(inputs, target_tokens, time=time)
 
+        else:
+            assert False, f"Unknown/unhandled multiple choice strategy {strategy}"
+
         return idx
 
 

diff --git a/pull-tests.ipynb b/pull-tests.ipynb
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d989539b-9522-47c0-aca7-14115980c78e",
+   "metadata": {},
+   "source": [
+    "# INSTALLING THE TESTS"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,