-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kaiyu Yang
committed
May 28, 2024
0 parents
commit f44c384
Showing
1,068 changed files
with
74,386 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
on: | ||
push: | ||
branches: | ||
- master | ||
|
||
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages | ||
permissions: | ||
contents: read | ||
pages: write | ||
id-token: write | ||
|
||
jobs: | ||
build_project: | ||
runs-on: ubuntu-latest | ||
name: Build project | ||
steps: | ||
- name: Checkout project | ||
uses: actions/checkout@v2 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Install elan | ||
run: curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y --default-toolchain leanprover/lean4:4.5.0 | ||
|
||
- name: Build and Install CVC5 | ||
run: | | ||
git clone https://github.com/cvc5/cvc5 | ||
cd cvc5 | ||
./configure.sh --auto-download | ||
cd build | ||
make -j$(nproc) | ||
sudo make install | ||
- name: Build and Install Z3 | ||
run: | | ||
git clone https://github.com/Z3Prover/z3 | ||
cd z3 | ||
python3 scripts/mk_make.py | ||
cd build | ||
make -j$(nproc) | ||
sudo make install | ||
- name: Install Python and smt-portfolio | ||
run: | | ||
sudo apt-get update | ||
sudo apt-get install python3 python3-pip | ||
pip3 install smt-portfolio | ||
- name: Get cache | ||
run: ~/.elan/bin/lake exe cache get || true | ||
|
||
- name: Build project | ||
run: ~/.elan/bin/lake build Book | ||
|
||
- name: Cache mathlib docs | ||
uses: actions/cache@v3 | ||
with: | ||
path: | | ||
.lake/build/doc/Init | ||
.lake/build/doc/Lake | ||
.lake/build/doc/Lean | ||
.lake/build/doc/Std | ||
.lake/build/doc/Mathlib | ||
.lake/build/doc/declarations | ||
!.lake/build/doc/declarations/declaration-data-Book* | ||
key: MathlibDoc-${{ hashFiles('lake-manifest.json') }} | ||
restore-keys: | | ||
MathlibDoc- | ||
- name: Build documentation | ||
run: ~/.elan/bin/lake -R -Kenv=dev build Book:docs | ||
|
||
#- name: Build blueprint and copy to `docs/blueprint` | ||
# uses: xu-cheng/texlive-action@v2 | ||
# with: | ||
# docker_image: ghcr.io/xu-cheng/texlive-full:20231201 | ||
# run: | | ||
# apk update | ||
# apk add --update make py3-pip git pkgconfig graphviz graphviz-dev gcc musl-dev | ||
# git config --global --add safe.directory $GITHUB_WORKSPACE | ||
# git config --global --add safe.directory `pwd` | ||
# python3 -m venv env | ||
# source env/bin/activate | ||
# pip install --upgrade pip requests wheel | ||
# pip install pygraphviz --global-option=build_ext --global-option="-L/usr/lib/graphviz/" --global-option="-R/usr/lib/graphviz/" | ||
# pip install leanblueprint | ||
# leanblueprint pdf | ||
# mkdir docs | ||
# cp blueprint/print/print.pdf docs/blueprint.pdf | ||
# leanblueprint web | ||
# cp -r blueprint/web docs/blueprint | ||
|
||
# - name: Check declarations | ||
# run: | | ||
# ~/.elan/bin/lake exe checkdecls blueprint/lean_decls | ||
|
||
- name: Move documentation to `docs/docs` | ||
run: | | ||
sudo chown -R runner docs | ||
cp -r .lake/build/doc docs/docs | ||
- name: Upload docs & blueprint artifact | ||
uses: actions/upload-pages-artifact@v1 | ||
with: | ||
path: docs/ | ||
|
||
- name: Deploy to GitHub Pages | ||
id: deployment | ||
uses: actions/deploy-pages@v1 | ||
|
||
- name: Make sure the cache works | ||
run: | | ||
mv docs/docs .lake/build/doc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
/build | ||
/lake-packages/* | ||
/.lake/* | ||
/.vscode/* | ||
/result | ||
/tmp | ||
/E3/_tmp/ | ||
/E3/_out/ | ||
*.olean | ||
.DS_Store | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
## AutoFormalization with LeanEuclid | ||
|
||
This folder contains a Python wrapper to autoformalize theorem statements and proofs from LeanEuclid, and the automatically evaluate the results. For details on how the equivalence checker works and how to interpret its results, please see [here](https://github.com/loganrjmurphy/LeanEuclid/tree/master/E3). | ||
|
||
We currently use the OpenAI API for autoformalization, but it should be straightforward to plug any LLM into the wrapper. | ||
|
||
### Setup | ||
Make sure that your OpenAI API key is registered as an environment variable, or you can supply it explicitly in `AutoFormalization/utils.py`. Also make sure that LeanEuclid is in your Python path. | ||
|
||
### Choosing a Dataset | ||
We will provide some usage examples targeting the *Elements* dataset. To target UniGeo instead, replace any instances of the arguments `--dataset Book --category ""` with `--dataset UniGeo --category <cat>`, where `<cat>` is one of `{"Parallel","Triangle","Quadrilateral","Congruent","Similarity"}`. | ||
|
||
### Autoformalizing Theorem Statements | ||
|
||
|
||
To autoformalize theorem statements from *Elements*, go to the root directory of LeanEuclid and run | ||
|
||
``` | ||
python3 AutoFormalization/statement/autoformalize.py --dataset Book --category "" --reasoning text-only --num_query 1 --num_examples 3 | ||
``` | ||
|
||
The arguments `--dataset Book` and `--category ""` indicate that we are formalizing all 43 propositions in the test set of *Elements*. The argument `--num_query` indicates the number of chances the model is given to produce a well-formed Lean expression (of course, specifying `num_query=1` means that some propositions may not be formalized this round). The argument `num-examples` indicates the number of "shots" to be included in the prompt. | ||
|
||
Autoformalizing the 43 propositions in the testing set of *Elements* as above should take roughly 6 minutes and ~100K tokens. | ||
|
||
The results can be found from the root directory under `results`, under the subdirectories associated with the round's configuration options (in this case, `result/statement/Book/text-only/3shot`). The result for Proposition 1 may look something like: | ||
|
||
``` | ||
{"prediction": "∀ (a b : Point) (AB : Line), distinctPointsOnLine a b AB → ∃ (c : Point) (AC BC : Line), distinctPointsOnLine a c AC ∧ distinctPointsOnLine b c BC ∧ |(a─b)| = |(a─c)| ∧ |(a─c)| = |(b─c)|", | ||
"groud_truth": "∀ (a b : Point) (AB : Line), distinctPointsOnLine a b AB → ∃ c : Point, |(c─a)| = |(a─b)| ∧ |(c─b)| = |(a─b)|"} | ||
``` | ||
|
||
### Evaluating Autoformalized Theorem Statements | ||
|
||
Before you run any evaluation scripts for theorem statements, make sure you run `lake build E3` to build the equivalence checker. | ||
|
||
#### Standard Equivalence Checking | ||
Once you've formalized a set of theoerem statements, you can use our equivalence checker `E3` to logically evaluate them in batch against the ground truth theorems in LeanEuclid. | ||
|
||
For instance, to evaluate the theorems formalized from the previous example, go to the root directory of LeanEuclid and run: | ||
|
||
``` | ||
python3 AutoFormalization/statement/evaluate.py --dataset Book --category "" --reasoning text-only --num_examples 3 | ||
``` | ||
|
||
The default configuration of the script uses the standard equivalence checking procedure (skipping the more expensive approximate equivalence checking) and allocates 15 seconds to prove each direction of the equivalence. In the experiments reported in the paper, we used 60 second timeouts. For the 43 theorems in *Elements*, using 15-second timeouts takes about 15 minutes on a standard laptop. | ||
|
||
The results will, in this case, reside in `result/equivalence/Book/text-only/3shot`. | ||
|
||
#### Approximate Equivalence Checking | ||
|
||
The approximate equivalence checker tries to quantify how "close" an autoformalized theorem statement is to some ground truth formalization. It is slower than the ordinary equivalence checker, so it is not enabled by default. | ||
|
||
To enable the approximate equivalence checker, you can provide an additional argument `mode` to the checker. Providing the argument "onlyApprox" will skip the standard equivalence checking and only execute the approximate analysis. Providing the argument "full" will execute both analyses. E.g., | ||
|
||
``` | ||
python3 AutoFormalization/statement/evaluate.py --dataset Book --category "" --reasoning text-only --num_examples 3 --mode "skipApprox" | ||
``` | ||
|
||
Note that the approximate equivalence checker will only run on target propositions whose number and type of bound variables matches those of the ground truth formalizations. This is usually only a minority (e.g., 33%) of the autoformalized theorem statements in a given batch. The time this will take depends on the quality of the predictions and configuration options, in our experience a typical batch from *Elements* will run for about 20-25 minutes on a standard laptop. | ||
|
||
### Autoformalizing Proofs | ||
|
||
To autoformalize proofs in Elements, go to the root directory of LeanEuclid and run | ||
|
||
``` | ||
python3 AutoFormalization/proof/autoformalize.py --dataset Book --category "" --reasoning text-only --num_query 1 --num_examples 3 | ||
``` | ||
|
||
Autoformalizing the 43 propositions in the testing set of *Elements* as above should take roughly 6 minutes and ~250K tokens. The results, in this example, can be found in the subdirectory `result/proof/Book/text-only/3shot/`. | ||
|
||
Unlike theorem statements, Lean does not need any assistance in evaluating proofs (aside, of course, from LeanEuclid's proof automation). You can get Lean to check all of the proofs produced in the previous steps by running | ||
|
||
``` | ||
python3 AutoFormalization/proof/evaluate.py --dataset Book --category "" --reasoning text-only --num_examples 3 | ||
``` | ||
|
||
After the proofs are checked, the aggregate results will be displayed akin to the following: | ||
|
||
``` | ||
cnt: 1, tot: 43, acc: 2.33% | ||
``` | ||
|
||
Which indicates that one proof was formalized correctly. In practice, it is rare for a proof in *Elements* to be correct out-of-the-box, as most require a small degree of manual repair (for now). | ||
|
||
## Cleanup | ||
|
||
As a side-effect of the autoformalization-evaluation pipeline, autoformalization and evaluation instances are stored in the `tmp` subdirectory of the LeanEuclid repository. When you are **done** with these files, you can empty this folder by running `lake script run cleanup`. |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import SystemE | ||
import Book.Prop01 | ||
|
||
namespace Elements.Book1 | ||
|
||
theorem proposition_2 : ∀ (a b c : Point) (BC : Line), | ||
distinctPointsOnLine b c BC ∧ a ≠ b → | ||
∃ l : Point, |(a─l)| = |(b─c)| := | ||
by | ||
euclid_intros | ||
euclid_apply (line_from_points a b) as AB | ||
euclid_apply (proposition_1 a b AB) as d | ||
euclid_apply (line_from_points d a ) as AE | ||
euclid_apply (line_from_points d b ) as BF | ||
euclid_apply (circle_from_points b c) as CGH | ||
euclid_apply (intersection_circle_line_extending_points CGH BF b d) as g | ||
euclid_apply (circle_from_points d g) as GKL | ||
euclid_apply (intersection_circle_line_extending_points GKL AE a d) as l | ||
euclid_apply (point_on_circle_onlyif b c g CGH) | ||
euclid_apply (point_on_circle_onlyif d l g GKL) | ||
euclid_apply (between_if l a d ) | ||
euclid_apply (between_if g b d ) | ||
use l | ||
euclid_finish | ||
|
||
/- | ||
An extension of proposition_2 to the case where a and b may be the same point. | ||
-/ | ||
theorem proposition_2' : | ||
∀ (a b c : Point) (BC : Line), | ||
distinctPointsOnLine b c BC → | ||
∃ l : Point, |(a─l)| = |(b─c)| := | ||
by | ||
euclid_intros | ||
by_cases (a = b) | ||
. use c | ||
euclid_finish | ||
. euclid_apply proposition_2 a b c BC as l | ||
use l | ||
|
||
end Elements.Book1 |
Oops, something went wrong.