Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Simple and Advanced Geometry Dataset Generators #38

Merged
merged 5 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions reasoning_gym/geometry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .simple_geometry import SimpleGeometryConfig, SimpleGeometryDataset
from .advanced_geometry import AdvancedGeometryConfig, AdvancedGeometryDataset

__all__ = [
"SimpleGeometryConfig",
"SimpleGeometryDataset",
"AdvancedGeometryConfig",
"AdvancedGeometryDataset",
]
229 changes: 229 additions & 0 deletions reasoning_gym/geometry/advanced_geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
import random
from dataclasses import dataclass
from typing import Optional, List

import sympy
from sympy.geometry import Point, Triangle, Segment

from ..factory import ProceduralDataset, register_dataset


@dataclass
class AdvancedGeometryConfig:
"""
Configuration for generating advanced geometry tasks.
"""
min_coord: int = -10 # Minimum x/y coordinate
max_coord: int = 10 # Maximum x/y coordinate
size: int = 50 # Number of problems to generate
seed: Optional[int] = None

# Probability or list of tasks we want to generate
# For demonstration, we have three categories:
task_types: List[str] = None

def __post_init__(self):
if self.task_types is None:
# Default set of advanced tasks
self.task_types = [
"orthocenter",
"incircle_radius",
"angle_measure",
]

def validate(self):
assert self.min_coord < self.max_coord, "min_coord must be < max_coord."
assert self.size > 0, "Size of dataset must be positive."
assert len(self.task_types) > 0, "Must specify at least one task type."


class AdvancedGeometryDataset(ProceduralDataset):
"""
A dataset for advanced geometry tasks using coordinate geometry.
"""

def __init__(self, config: AdvancedGeometryConfig):
self._prompt_templates = {
"orthocenter": [
"Given triangle ABC with coordinates A={A}, B={B}, and C={C}, find the coordinates of its orthocenter.",
"For triangle with vertices A={A}, B={B}, and C={C}, determine the orthocenter (intersection of altitudes).",
],
"incircle_radius": [
"Consider triangle ABC with coordinates A={A}, B={B}, and C={C}. Compute the radius of its incircle.",
"Find the incircle radius of triangle ABC whose vertices are A={A}, B={B}, and C={C}.",
],
"angle_measure": [
"In triangle ABC with coordinates A={A}, B={B}, and C={C}, find the measure (in degrees) of angle ABC.",
"Given a triangle with vertices A={A}, B={B}, C={C}, determine the angle at B in degrees.",
],
}
super().__init__(config=config, seed=config.seed, size=config.size)

def __getitem__(self, idx: int) -> dict:
"""
Generate a single advanced geometry item based on the config's task types.
"""
rng = random.Random(self.seed + idx)
task_type = rng.choice(self.config.task_types)

# Randomly generate coordinates for a triangle
A, B, C = self._generate_non_degenerate_triangle(rng)

# Build a question and compute the solution
if task_type == "orthocenter":
question, answer, metadata = self._build_orthocenter_task(rng, A, B, C)
elif task_type == "incircle_radius":
question, answer, metadata = self._build_incircle_radius_task(rng, A, B, C)
elif task_type == "angle_measure":
question, answer, metadata = self._build_angle_measure_task(rng, A, B, C)
else:
raise ValueError(f"Unknown task_type: {task_type}")

return {
"question": question,
"answer": answer,
"metadata": metadata,
}

def _generate_non_degenerate_triangle(self, rng: random.Random):
"""
Generate a random non-degenerate triangle with integer coordinates
in [min_coord, max_coord] x [min_coord, max_coord].
"""
max_attempts = 100
for _ in range(max_attempts):
# Generate points with integer coordinates
points = []
for _ in range(3):
x = rng.randint(self.config.min_coord, self.config.max_coord)
y = rng.randint(self.config.min_coord, self.config.max_coord)
points.append(Point(x, y))

A, B, C = points

# Calculate signed area to check for non-degeneracy
# Using the formula: 1/2 * |x1(y2 - y3) + x2(y3 - y1) + x3(y1 - y2)|
area = abs(
A.x * (B.y - C.y) +
B.x * (C.y - A.y) +
C.x * (A.y - B.y)
) / 2

if area > 0:
return A, B, C

raise ValueError(f"Failed to generate a non-degenerate triangle after {max_attempts} attempts.")

def _build_orthocenter_task(self, rng: random.Random, A: Point, B: Point, C: Point):
"""
Build a question about finding the orthocenter of triangle ABC.
"""
# Convert segments to lines
BC_line = sympy.Line(B, C)
CA_line = sympy.Line(C, A)

# Calculate altitudes by creating lines perpendicular from each vertex
alt_A = BC_line.perpendicular_line(A)
alt_B = CA_line.perpendicular_line(B)

# Find orthocenter (intersection of any two altitudes, e.g. alt_A and alt_B)
ortho = alt_A.intersection(alt_B)[0]

x_ortho_approx = float(ortho.x.evalf())
y_ortho_approx = float(ortho.y.evalf())

question_template = rng.choice(self._prompt_templates["orthocenter"])
question = question_template.format(
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y)
)
answer_str = f"({x_ortho_approx:.3f}, {y_ortho_approx:.3f})"

metadata = {
"A": (A.x, A.y),
"B": (B.x, B.y),
"C": (C.x, C.y),
"orthocenter_exact": (str(ortho.x), str(ortho.y)),
"orthocenter_approx": (x_ortho_approx, y_ortho_approx),
}
return question, answer_str, metadata


def _build_incircle_radius_task(self, rng: random.Random, A: Point, B: Point, C: Point):
"""
Build a question about finding the incircle radius of triangle ABC.
"""
# Calculate side lengths
a = B.distance(C)
b = C.distance(A)
c = A.distance(B)

# Semi-perimeter
s = (a + b + c) / 2

# Area using Heron's formula
area = sympy.sqrt(s * (s - a) * (s - b) * (s - c))

# Radius of incircle = Area / Semi-perimeter
radius = area / s

# Convert to float for final answer
radius_approx = float(radius.evalf())

question_template = rng.choice(self._prompt_templates["incircle_radius"])
question = question_template.format(
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y)
)
answer_str = f"{radius_approx:.3f}"

metadata = {
"A": (A.x, A.y),
"B": (B.x, B.y),
"C": (C.x, C.y),
"incircle_radius_exact": str(radius),
"incircle_radius_approx": radius_approx,
}
return question, answer_str, metadata

def _build_angle_measure_task(self, rng: random.Random, A: Point, B: Point, C: Point):
"""
Build a question about finding the measure of angle ABC in degrees.
"""
# Angle at B means the angle ∠ABC
# Vector BA = A - B, BC = C - B
BA = A - B
BC = C - B

# Use vector dot product to find angle between BA and BC
# angle = arccos((BA · BC) / (|BA| * |BC|))
dot_val = BA.dot(BC)
mag_ba = BA.distance(Point(0, 0))
mag_bc = BC.distance(Point(0, 0))

# numerical check
if mag_ba == 0 or mag_bc == 0:
# degenerate, but theoretically we forced a non-degenerate triangle
angle_deg = 0
else:
cos_theta = dot_val / (mag_ba * mag_bc)
# clamp cos_theta to [-1, 1] to avoid floating rounding errors
cos_theta = max(-1, min(1, cos_theta))
angle_rad = sympy.acos(cos_theta)
angle_deg = float(angle_rad.evalf() * 180 / sympy.pi)

question_template = rng.choice(self._prompt_templates["angle_measure"])
question = question_template.format(
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y)
)

answer_str = f"{angle_deg:.2f}°"
metadata = {
"A": (A.x, A.y),
"B": (B.x, B.y),
"C": (C.x, C.y),
"angle_ABC_degrees": angle_deg,
}
return question, answer_str, metadata


# Register the dataset
register_dataset("advanced_geometry", AdvancedGeometryDataset, AdvancedGeometryConfig)
140 changes: 140 additions & 0 deletions reasoning_gym/geometry/simple_geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import random
from dataclasses import dataclass
from typing import Optional

from ..factory import ProceduralDataset, register_dataset

@dataclass
class SimpleGeometryConfig:
"""
Configuration for generating basic geometry (angle-finding) tasks.
Produces a random convex polygon with N sides, random angles
for the first (N-1) sides, and asks the solver to find the last angle.
"""

min_sides: int = 3 # Minimum number of sides (e.g. triangle)
max_sides: int = 6 # Maximum number of sides (e.g. hexagon)
min_angle: int = 10 # Minimum angle (in degrees) for each of the first (N-1) angles
max_angle: int = 170 # Maximum angle (in degrees) for each of the first (N-1) angles
seed: Optional[int] = None # Random seed
size: int = 100 # Number of geometry tasks to generate

def validate(self) -> None:
"""
Validate configuration parameters.
"""
assert self.min_sides >= 3, "min_sides must be at least 3 (triangle)."
assert self.max_sides >= self.min_sides, "max_sides must be >= min_sides."
assert 0 < self.min_angle < 180, "min_angle must be in (0, 180)."
assert self.max_angle <= 179, "max_angle should be less than 180."
assert self.max_angle >= self.min_angle, "max_angle must be >= min_angle."


class SimpleGeometryDataset(ProceduralDataset):
"""
A dataset for simple polygon angle-finding tasks.
We randomly choose the number of sides N within [min_sides, max_sides].
We then generate (N-1) random angles (in degrees), ensuring their sum is
strictly less than the total sum for an (N)-sided convex polygon (which is 180*(N-2)).
The question asks for the missing angle; the answer is computed by subtracting the
sum of known angles from 180*(N-2).
"""

def __init__(self, config: SimpleGeometryConfig):
self._prompt_templates = [
(
"Given a convex polygon with {n_sides} sides, its first {n_minus_1} interior angles "
"are: {angle_list}. What is the measure of the remaining interior angle (in degrees)?"
),
(
"A convex polygon has {n_sides} sides. The measures of "
"the first {n_minus_1} interior angles are: {angle_list}. "
"Find the measure of the last interior angle."
),
(
"Consider a convex {n_sides}-gon whose first {n_minus_1} interior angles "
"are: {angle_list}. Determine the measure of the remaining angle."
),
]
super().__init__(config=config, seed=config.seed, size=config.size)

def __getitem__(self, idx: int) -> dict:
"""
Generate a single geometry angle-finding item.

Returns:
A dict with:
- question: str
- answer: str (the missing angle, as an integer or float in degrees)
- metadata: dict (n_sides, angles, sum_of_known, missing_angle, etc.)
"""
rng = random.Random(self.seed + idx)

# Randomly pick the number of sides
n_sides = rng.randint(self.config.min_sides, self.config.max_sides)

# Total interior angle sum for a convex n_sides-gon
total_sum = 180 * (n_sides - 2)

# Generate (n_sides - 1) random angles, ensuring their sum < total_sum
known_angles = self._generate_valid_angles(rng, n_sides, total_sum)

# Missing angle
missing_angle = total_sum - sum(known_angles)

# Build the question string
angle_list_str = ", ".join(f"{a:.1f}°" for a in known_angles)
prompt = rng.choice(self._prompt_templates).format(
n_sides=n_sides,
n_minus_1=n_sides - 1,
angle_list=angle_list_str
)

# Round the missing angle to one decimal place or integer if it is very close to an integer
# so that the answer remains consistent and clean
missing_angle_rounded = round(missing_angle, 1)
if abs(missing_angle_rounded - round(missing_angle_rounded)) < 1e-6:
# If it is effectively an integer, keep it as int
missing_angle_rounded = int(missing_angle_rounded)

answer_str = str(missing_angle_rounded)

return {
"question": prompt,
"answer": answer_str,
"metadata": {
"n_sides": n_sides,
"known_angles": known_angles,
"sum_of_known_angles": sum(known_angles),
"missing_angle_raw": missing_angle,
"missing_angle_rounded": missing_angle_rounded,
"total_interior_sum": total_sum,
},
}

def _generate_valid_angles(self, rng: random.Random, n_sides: int, total_sum: int):
"""
Generate (n_sides - 1) random angles in [min_angle, max_angle],
ensuring the sum is strictly less than total_sum to keep a valid missing angle.
We keep retrying until we find a valid set or reach a max attempt limit.
"""
max_attempts = 100
for _ in range(max_attempts):
angles = []
# We choose angles one by one
for _ in range(n_sides - 1):
angle = rng.randint(self.config.min_angle, self.config.max_angle)
angles.append(float(angle))

# Check if the sum is strictly less than total_sum
if sum(angles) < total_sum:
return angles

# If we fail after max_attempts, raise an error
raise ValueError(
f"Could not generate valid angles for an {n_sides}-gon "
f"with total sum {total_sum} within {max_attempts} attempts."
)

# Register the dataset so it can be accessed similarly to the others
register_dataset("simple_geometry", SimpleGeometryDataset, SimpleGeometryConfig)
Loading
Loading