diff --git a/ax/benchmark/problems/data.py b/ax/benchmark/problems/data.py
new file mode 100644
index 00000000000..cf48bf124cc
--- /dev/null
+++ b/ax/benchmark/problems/data.py
@@ -0,0 +1,143 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+import pandas as pd
+
+
+class AbstractParquetDataLoader(ABC):
+    def __init__(
+        self,
+        benchmark_name: str,
+        dataset_name: str,
+        stem: str,
+        cache_dir: Path | None = None,
+    ) -> None:
+        """
+        Initialize the ParquetDataLoader.
+
+        This class provides a way to load Parquet data from an external URL,
+        caching it locally to avoid repeated downloads.
+        It downloads the file from the external URL and saves it to the cache
+        if it's not already cached, and reads from the cache otherwise.
+
+        Args:
+            dataset_name (str): The name of the dataset to load.
+            stem (str): The stem of the parquet file.
+            cache_dir (Path): The directory where cached data will be stored.
+                Defaults to '~/.cache/ax_benchmark_data'.
+        """
+        self.cache_dir: Path = (
+            cache_dir
+            if cache_dir is not None
+            else Path("~/.cache").expanduser().joinpath("ax_benchmark_data")
+        )
+        self.benchmark_name = benchmark_name
+        self.dataset_name = dataset_name
+        self.stem = stem
+
+    @property
+    def filename(self) -> str:
+        """
+        Get the filename of the cached file.
+
+        This method returns the filename of the cached file, which is the stem
+        followed by the extension '.parquet.gzip'.
+
+        Returns:
+            str: The filename of the cached file.
+        """
+        return f"{self.stem}.parquet.gzip"
+
+    @property
+    def cache_path(self) -> Path:
+        """
+        Get the path to the cached file.
+
+        This method returns the path where the cached file should be stored.
+
+        Returns:
+            Path: The path to the cached file.
+        """
+        return self.cache_dir.joinpath(
+            self.benchmark_name,
+            self.dataset_name,
+            self.filename,
+        )
+
+    def is_cached(self) -> bool:
+        """
+        Check if the data is already cached (whether the file simply exists).
+
+        Returns:
+            bool: True if the data is cached, False otherwise.
+        """
+        return self.cache_path.exists()
+
+    def load(self, download: bool = True) -> pd.DataFrame:
+        """
+        Read the parquet data from the cache or download it from the URL.
+
+        If the data is cached, this method reads the data from the cache.
+        If the data is not cached and download is True, this method downloads
+        the data from the URL, caches it, and then returns the data.
+        If the data is not cached and download is False, this method raises an OSError.
+
+        Args:
+            download (bool): Whether to download the data if it's not available
+                locally. If False, this method raises an OSError. Defaults to True.
+
+        Returns:
+            pd.DataFrame: The loaded parquet data.
+        """
+        if self.is_cached():
+            with self.cache_path.open("rb") as infile:
+                return pd.read_parquet(infile, engine="pyarrow")
+        if download:
+            if self.url is None:
+                raise ValueError(
+                    f"File {self.cache_path} does not exist, "
+                    "`download` is True, but URL is not specified."
+                )
+            return self._fetch_and_cache()
+        raise ValueError(
+            f"File {self.cache_path} does not exist and `download` is False"
+        )
+
+    def _fetch_and_cache(self) -> pd.DataFrame:
+        """
+        Download the data from the URL and cache it.
+
+        This method downloads the data from the URL, creates the cache directory
+        if needed, and saves the data to the cache.
+
+        Returns:
+            pd.DataFrame: The downloaded parquet data.
+        """
+        # Download the data from the URL
+        data = pd.read_parquet(self.url, engine="pyarrow")
+        # Create the cache directory if needed
+        self.cache_path.parent.mkdir(parents=True, exist_ok=True)
+        with self.cache_path.open("wb") as outfile:
+            data.to_parquet(outfile, engine="pyarrow", compression="gzip")
+        return data
+
+    @property
+    @abstractmethod
+    def url(self) -> str | None:
+        """
+        Get the URL of the parquet file.
+
+        This method should return the URL of the parquet file to download.
+        None is allowed to support cases where the user manually populates the
+        download cache beforehand.
+
+        Returns:
+            str | None: The URL of the parquet file or None.
+        """
+        pass
diff --git a/ax/benchmark/problems/surrogate/__init__.py b/ax/benchmark/problems/surrogate/__init__.py
new file mode 100644
index 00000000000..ffb2930b3a8
--- /dev/null
+++ b/ax/benchmark/problems/surrogate/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
diff --git a/ax/benchmark/problems/surrogate/lcbench/__init__.py b/ax/benchmark/problems/surrogate/lcbench/__init__.py
new file mode 100644
index 00000000000..ffb2930b3a8
--- /dev/null
+++ b/ax/benchmark/problems/surrogate/lcbench/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
diff --git a/ax/benchmark/problems/surrogate/lcbench/data.py b/ax/benchmark/problems/surrogate/lcbench/data.py
new file mode 100644
index 00000000000..43e250471db
--- /dev/null
+++ b/ax/benchmark/problems/surrogate/lcbench/data.py
@@ -0,0 +1,206 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from collections.abc import Collection
+from dataclasses import dataclass, field, InitVar
+from pathlib import Path
+
+import pandas as pd
+
+import torch
+from ax.benchmark.problems.data import AbstractParquetDataLoader
+from ax.benchmark.problems.surrogate.lcbench.utils import (
+    DEFAULT_METRIC_NAME,
+    get_lcbench_log_scale_parameter_names,
+    get_lcbench_parameter_names,
+)
+
+DATASET_NAMES = [
+    "APSFailure",
+    "Amazon_employee_access",
+    "Australian",
+    "Fashion-MNIST",
+    "KDDCup09_appetency",
+    "MiniBooNE",
+    "adult",
+    "airlines",
+    "albert",
+    "bank-marketing",
+    "blood-transfusion-service-center",
+    "car",
+    "christine",
+    "cnae-9",
+    "connect-4",
+    "covertype",
+    "credit-g",
+    "dionis",
+    "fabert",
+    "helena",
+    "higgs",
+    "jannis",
+    "jasmine",
+    "jungle_chess_2pcs_raw_endgame_complete",
+    "kc1",
+    "kr-vs-kp",
+    "mfeat-factors",
+    "nomao",
+    "numerai28.6",
+    "phoneme",
+    "segment",
+    "shuttle",
+    "sylvine",
+    "vehicle",
+    "volkert",
+]
+
+
+class LCBenchDataLoader(AbstractParquetDataLoader):
+    def __init__(
+        self,
+        dataset_name: str,
+        stem: str,
+        cache_dir: Path | None = None,
+    ) -> None:
+        super().__init__(
+            benchmark_name="LCBenchLite",
+            dataset_name=dataset_name,
+            stem=stem,
+            cache_dir=cache_dir,
+        )
+
+    @property
+    def url(self) -> str:
+        """
+        URL to the GZIP compressed parquet files for the 35 datasets from LCBench.
+        These files were created by splitting the massive JSON dump of LCBench into
+        datasets, then further into config info, learning curve metrics, and final
+        results, and subsequently saving them to an efficient Parquet format,
+        compressed with GZIP, and finally uploading them to address.
+        """
+
+        return (
+            "https://raw.githubusercontent.com/ltiao/"
+            f"{self.benchmark_name}/main/{self.dataset_name}/{self.filename}"
+        )
+
+
+@dataclass(kw_only=True)
+class LCBenchData:
+    parameter_df: pd.DataFrame
+    metric_series: pd.Series
+    timestamp_series: pd.Series
+
+    runtime_series: pd.Series = field(init=False)
+    # pyre-ignore [16]: Pyre doesn't understand InitVars.
+    runtime_fillna: InitVar[bool] = False
+    # pyre-ignore [16]: Pyre doesn't understand InitVars.
+    log_scale_parameter_names: InitVar[Collection[str] | None] = None
+    dtype: torch.dtype = torch.double
+    device: torch.device | None = None
+
+    def __post_init__(
+        self,
+        runtime_fillna: bool,
+        log_scale_parameter_names: Collection[str] | None,
+    ) -> None:
+        self.timestamp_series.name = "timestamp"
+
+        self.runtime_series = self._get_runtime_series(fillna=runtime_fillna)
+        self.runtime_series.name = "runtimes"
+
+        parameter_names = get_lcbench_parameter_names()
+        if log_scale_parameter_names is None:
+            log_scale_parameter_names = get_lcbench_log_scale_parameter_names()
+
+        if len(log_scale_parameter_names) > 0:
+            if unrecognized_param_set := (
+                set(log_scale_parameter_names) - set(parameter_names)
+            ):
+                raise ValueError(f"Unrecognized columns: {unrecognized_param_set}")
+            self.parameter_df[log_scale_parameter_names] = self.parameter_df[
+                log_scale_parameter_names
+            ].transform("log")
+
+        self.parameter_df = self.parameter_df[parameter_names]
+
+    @staticmethod
+    def _unstack_by_epoch(series: pd.Series) -> pd.DataFrame:
+        # unstack by epoch and truncate 52 epochs [0, ..., 51]
+        # to 50 epochs [1, ..., 50]
+        return series.unstack(level="epoch").iloc[:, 1:-1]
+
+    def _get_runtime_series(self, fillna: bool) -> pd.Series:
+        # timestamp (in secs) at every epoch, grouped by trial
+        timestamps_grouped = self.timestamp_series.groupby(level="trial")
+
+        # runtime (in secs) of training each incremental epoch
+        runtime_series = timestamps_grouped.diff(periods=1)  # first element is NaN
+        if fillna:
+            runtime_series.fillna(timestamps_grouped.head(n=1), inplace=True)
+
+        return runtime_series
+
+    def _to_tensor(
+        self,
+        x: pd.DataFrame | pd.Series,
+    ) -> torch.Tensor:
+        return torch.from_numpy(x.values).to(dtype=self.dtype, device=self.device)
+
+    @property
+    def metric_df(self) -> pd.DataFrame:
+        return self._unstack_by_epoch(self.metric_series)
+
+    @property
+    def runtime_df(self) -> pd.DataFrame:
+        return self._unstack_by_epoch(self.runtime_series)
+
+    @property
+    def average_runtime_series(self) -> pd.Series:
+        # take average runtime over epochs (N6231489 shows runtime is
+        # mostly constant across epochs, as one'd expect)
+        return self.runtime_series.groupby(level="trial").mean()
+
+    @property
+    def parameters(self) -> torch.Tensor:
+        return self._to_tensor(self.parameter_df)
+
+    @property
+    def metrics(self) -> torch.Tensor:
+        return self._to_tensor(self.metric_df)
+
+    @property
+    def runtimes(self) -> torch.Tensor:
+        return self._to_tensor(self.runtime_df)
+
+    @property
+    def average_runtimes(self) -> torch.Tensor:
+        return self._to_tensor(self.average_runtime_series)
+
+
+def load_lcbench_data(
+    dataset_name: str,
+    metric_name: str = DEFAULT_METRIC_NAME,
+    log_scale_parameter_names: Collection[str] | None = None,
+    dtype: torch.dtype = torch.double,
+    device: torch.device | None = None,
+) -> LCBenchData:
+    if dataset_name not in DATASET_NAMES:
+        raise ValueError(
+            f"Invalid dataset {dataset_name}. Valid datasets: {DATASET_NAMES}"
+        )
+
+    parameter_df = LCBenchDataLoader(dataset_name, stem="config").load()
+    metrics_df = LCBenchDataLoader(dataset_name, stem="metrics").load()
+
+    return LCBenchData(
+        parameter_df=parameter_df,
+        metric_series=metrics_df[metric_name],
+        timestamp_series=metrics_df["time"],
+        log_scale_parameter_names=log_scale_parameter_names,
+        dtype=dtype,
+        device=device,
+    )
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_APSFailure.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_APSFailure.pt
new file mode 100644
index 00000000000..e3566d117e2
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_APSFailure.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Amazon_employee_access.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Amazon_employee_access.pt
new file mode 100644
index 00000000000..fb67d484fd6
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Amazon_employee_access.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Australian.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Australian.pt
new file mode 100644
index 00000000000..eab40908da3
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Australian.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Fashion-MNIST.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Fashion-MNIST.pt
new file mode 100644
index 00000000000..14b02cc6212
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_Fashion-MNIST.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_KDDCup09_appetency.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_KDDCup09_appetency.pt
new file mode 100644
index 00000000000..38b1675668f
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_KDDCup09_appetency.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_albert.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_albert.pt
new file mode 100644
index 00000000000..f3c9595f785
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_albert.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_car.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_car.pt
new file mode 100644
index 00000000000..f1af8b33074
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_car.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_christine.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_christine.pt
new file mode 100644
index 00000000000..286b209b372
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_christine.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_cnae-9.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_cnae-9.pt
new file mode 100644
index 00000000000..671e77c404d
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_cnae-9.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_covertype.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_covertype.pt
new file mode 100644
index 00000000000..4b37a1860c9
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_covertype.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_dionis.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_dionis.pt
new file mode 100644
index 00000000000..0bebffc78ab
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_dionis.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_fabert.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_fabert.pt
new file mode 100644
index 00000000000..96e59d27daf
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_fabert.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_helena.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_helena.pt
new file mode 100644
index 00000000000..9d2dea47c3d
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_helena.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_higgs.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_higgs.pt
new file mode 100644
index 00000000000..c5f2dafdcf7
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_higgs.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_jannis.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_jannis.pt
new file mode 100644
index 00000000000..6b58f0368ce
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_jannis.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_jasmine.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_jasmine.pt
new file mode 100644
index 00000000000..91e0d865ea1
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_jasmine.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_kr-vs-kp.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_kr-vs-kp.pt
new file mode 100644
index 00000000000..ad67b7c41fa
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_kr-vs-kp.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_mfeat-factors.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_mfeat-factors.pt
new file mode 100644
index 00000000000..c2bb1b4c33a
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_mfeat-factors.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_nomao.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_nomao.pt
new file mode 100644
index 00000000000..40bea7cd895
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_nomao.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_shuttle.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_shuttle.pt
new file mode 100644
index 00000000000..b275d0aa5a2
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_shuttle.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_sylvine.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_sylvine.pt
new file mode 100644
index 00000000000..dbf61d2bcd8
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_sylvine.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_volkert.pt b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_volkert.pt
new file mode 100644
index 00000000000..0daa4a24b80
Binary files /dev/null and b/ax/benchmark/problems/surrogate/lcbench/data/transfer_learning/lcbench_volkert.pt differ
diff --git a/ax/benchmark/problems/surrogate/lcbench/early_stopping.py b/ax/benchmark/problems/surrogate/lcbench/early_stopping.py
new file mode 100644
index 00000000000..b33aa7106eb
--- /dev/null
+++ b/ax/benchmark/problems/surrogate/lcbench/early_stopping.py
@@ -0,0 +1,257 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from collections.abc import Iterable, Mapping, Sequence
+
+from dataclasses import dataclass, field, InitVar
+from logging import Logger
+from typing import Any, Protocol, TypeVar
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import torch
+from ax.benchmark.benchmark_problem import BenchmarkProblem
+from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
+from ax.benchmark.problems.surrogate.lcbench.data import load_lcbench_data
+from ax.benchmark.problems.surrogate.lcbench.transfer_learning import (
+    BASELINE_VALUES,
+    DEFAULT_AND_OPTIMAL_VALUES,
+    DEFAULT_NUM_TRIALS,
+)
+from ax.benchmark.problems.surrogate.lcbench.utils import (
+    DEFAULT_METRIC_NAME,
+    get_lcbench_log_scale_parameter_names,
+    get_lcbench_optimization_config,
+    get_lcbench_parameter_names,
+    get_lcbench_search_space,
+)
+from ax.core.optimization_config import OptimizationConfig
+from ax.core.search_space import SearchSpace
+from ax.core.types import TParamValue
+from ax.exceptions.core import UserInputError
+from ax.utils.common.logger import get_logger
+
+from sklearn.compose import make_column_transformer, TransformedTargetRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer, MinMaxScaler
+
+
+logger: Logger = get_logger(__name__)
+
+TRegressorProtocol = TypeVar("TRegressorProtocol", bound="RegressorProtocol")
+
+
+class RegressorProtocol(Protocol):
+    """
+    A regressor that can fit and predict, such as `RandomForestRegressor`.
+    """
+
+    def fit(
+        self: TRegressorProtocol, X: pd.DataFrame, y: pd.DataFrame | pd.Series
+    ) -> TRegressorProtocol: ...
+    def predict(self: TRegressorProtocol, X: pd.DataFrame) -> npt.NDArray: ...
+    def set_params(self: TRegressorProtocol, **kwargs: Any) -> TRegressorProtocol: ...
+
+
+def get_default_base_regressor() -> RegressorProtocol:
+    return RandomForestRegressor(max_depth=30)
+
+
+def _create_surrogate_regressor(
+    base_regressor: RegressorProtocol,
+    log_numeric_columns: Iterable,
+    numeric_columns: Iterable,
+    seed: int,
+) -> RegressorProtocol:
+    unit_scaler = MinMaxScaler()
+    log_transformer = FunctionTransformer(
+        func=np.log, inverse_func=np.exp, validate=True
+    )
+
+    log_numeric_transformer = make_pipeline(log_transformer, unit_scaler)
+
+    preprocessor = make_column_transformer(
+        (log_numeric_transformer, list(log_numeric_columns)),
+        (unit_scaler, list(numeric_columns)),
+        remainder="drop",
+    )
+
+    try:
+        regressor = base_regressor.set_params(random_state=seed)
+    except ValueError:
+        # some models (e.g. K nearest neighbors) are deterministic by nature and do not
+        # allow you to set a random seed
+        logger.warning(
+            f"Surrogate model `{base_regressor}` does not support specification of "
+            "random seed, which *may* indicate that the model is already "
+            "deterministic by nature. However, if you're unsure, this could lead to "
+            "non-deterministic behavior in your experiments."
+        )
+        regressor = base_regressor
+
+    return make_pipeline(
+        preprocessor,
+        TransformedTargetRegressor(regressor=regressor, transformer=log_transformer),
+    )
+
+
+@dataclass(kw_only=True)
+class LearningCurveBenchmarkTestFunction(BenchmarkTestFunction):
+    """A benchmark test function for LCBench early-stopping problems.
+
+    This class represents a learning curve benchmark test function, which leverages a
+    surrogate model trained to predict the performance of deep learning models at
+    different stages of training. The test function takes in a set of hyperparameters
+    and returns a tensor representing the predicted performance of the model at each
+    stage (epoch) of training.
+
+    To use this class, you would typically create an instance of it and pass it to a
+    `BenchmarkProblem` along with a search space, optimization config, and other
+    relevant parameters.
+
+    Example:
+        test_function = LearningCurveBenchmarkTestFunction(
+            dataset_name="vehicle", seed=42
+        )
+        search_space = get_lcbench_search_space()
+        optimization_config = get_lcbench_optimization_config(
+            metric_name="Train/val_accuracy", observe_noise_sd=True, use_map_metric=True
+        )
+        problem = BenchmarkProblem(
+            name="vehicle_Train/val_accuracy",
+            search_space=search_space,
+            optimization_config=optimization_config,
+            test_function=test_function,
+            step_runtime_function=None,
+            ...
+        )
+    """
+
+    n_steps: int = field(init=False)
+    outcome_names: Sequence[str] = field(default_factory=lambda: [DEFAULT_METRIC_NAME])
+    dataset_name: str
+    metric_surrogate: RegressorProtocol = field(init=False)
+    runtime_surrogate: RegressorProtocol = field(init=False)
+
+    # pyre-ignore [16]: Pyre doesn't understand InitVars.
+    metric_base_surrogate: InitVar[RegressorProtocol] = get_default_base_regressor()
+    # pyre-ignore [16]: Pyre doesn't understand InitVars.
+    runtime_base_surrogate: InitVar[RegressorProtocol] = get_default_base_regressor()
+    # pyre-ignore [16]: Pyre doesn't understand InitVars.
+    seed: InitVar[int]
+
+    def __post_init__(
+        self,
+        metric_base_surrogate: RegressorProtocol,
+        runtime_base_surrogate: RegressorProtocol,
+        seed: int,
+    ) -> None:
+        if len(self.outcome_names) != 1:
+            raise ValueError("Exactly one outcome is supported currently")
+
+        metric_name = self.outcome_names[0]
+        lcbench_data = load_lcbench_data(
+            dataset_name=self.dataset_name,
+            metric_name=metric_name,
+            log_scale_parameter_names=[],
+        )
+        self.n_steps = lcbench_data.metric_df.shape[-1]
+
+        parameter_names = get_lcbench_parameter_names()
+        log_scale_parameter_names = get_lcbench_log_scale_parameter_names()
+        numeric_columns = set(parameter_names) - set(log_scale_parameter_names)
+
+        self.metric_surrogate = _create_surrogate_regressor(
+            base_regressor=metric_base_surrogate,
+            log_numeric_columns=log_scale_parameter_names,
+            numeric_columns=numeric_columns,
+            seed=seed,
+        ).fit(X=lcbench_data.parameter_df, y=lcbench_data.metric_df)
+        self.runtime_surrogate = _create_surrogate_regressor(
+            base_regressor=runtime_base_surrogate,
+            log_numeric_columns=log_scale_parameter_names,
+            numeric_columns=numeric_columns,
+            seed=seed,
+        ).fit(X=lcbench_data.parameter_df, y=lcbench_data.average_runtime_series)
+
+    def evaluate_true(self, params: Mapping[str, TParamValue]) -> torch.Tensor:
+        X = pd.DataFrame.from_records(data=[params])
+        Y = self.metric_surrogate.predict(X)  # shape: (1, 50)
+        return torch.from_numpy(Y)
+
+    def step_runtime(self, params: Mapping[str, TParamValue]) -> float:
+        X = pd.DataFrame.from_records(data=[params])
+        Y = self.runtime_surrogate.predict(X)  # shape: (1,)
+        return Y.item()
+
+
+def get_lcbench_early_stopping_benchmark_problem(
+    dataset_name: str,
+    metric_name: str = DEFAULT_METRIC_NAME,
+    num_trials: int = DEFAULT_NUM_TRIALS,
+    constant_step_runtime: bool = False,
+    noise_std: Mapping[str, float] | float = 0.0,
+    observe_noise_sd: bool = False,
+    seed: int = 0,
+) -> BenchmarkProblem:
+    """Construct an LCBench early-stopping benchmark problem.
+
+    Args:
+        dataset_name: Must be one of the keys of `DEFAULT_AND_OPTIMAL_VALUES`, which
+            correspond to the names of the datasets available in LCBench.
+        metric_name: The name of the metric to use for the objective.
+        num_trials: The number of optimization trials to run.
+        constant_step_runtime: Determines if the step runtime is fixed or varies
+            based on the hyperparameters.
+        noise_std: The standard deviation of the observation noise.
+        observe_noise_sd: Whether to report the standard deviation of the
+            obervation noise.
+        seed: The random seed used in training the surrogate model to ensure
+            reproducibility and consistency of results.
+
+    Returns:
+        An LCBench surrogate benchmark problem.
+    """
+
+    if dataset_name not in DEFAULT_AND_OPTIMAL_VALUES:
+        raise UserInputError(
+            f"`dataset_name` must be one of {sorted(DEFAULT_AND_OPTIMAL_VALUES)}"
+        )
+
+    name = f"LCBench_Surrogate_{dataset_name}_{metric_name}:v1"
+
+    _, optimal_value = DEFAULT_AND_OPTIMAL_VALUES[dataset_name]
+    baseline_value = BASELINE_VALUES[dataset_name]
+
+    search_space: SearchSpace = get_lcbench_search_space()
+    optimization_config: OptimizationConfig = get_lcbench_optimization_config(
+        metric_name=metric_name,
+        observe_noise_sd=observe_noise_sd,
+        use_map_metric=True,
+    )
+
+    test_function = LearningCurveBenchmarkTestFunction(
+        dataset_name=dataset_name, seed=seed
+    )
+
+    step_runtime_function = (
+        None if constant_step_runtime else test_function.step_runtime
+    )
+
+    return BenchmarkProblem(
+        name=name,
+        search_space=search_space,
+        optimization_config=optimization_config,
+        num_trials=num_trials,
+        optimal_value=optimal_value,
+        baseline_value=baseline_value,
+        test_function=test_function,
+        step_runtime_function=step_runtime_function,
+        noise_std=noise_std,
+    )
diff --git a/ax/benchmark/problems/surrogate/lcbench/transfer_learning.py b/ax/benchmark/problems/surrogate/lcbench/transfer_learning.py
new file mode 100644
index 00000000000..5337be11eb8
--- /dev/null
+++ b/ax/benchmark/problems/surrogate/lcbench/transfer_learning.py
@@ -0,0 +1,209 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+import os
+from collections.abc import Mapping
+
+from typing import Any
+
+import torch
+from ax.benchmark.benchmark_problem import BenchmarkProblem
+from ax.benchmark.benchmark_test_functions.surrogate import SurrogateTestFunction
+from ax.benchmark.problems.surrogate.lcbench.utils import (
+    DEFAULT_METRIC_NAME,
+    get_lcbench_optimization_config,
+    get_lcbench_search_space,
+)
+from ax.core.experiment import Experiment
+from ax.core.optimization_config import OptimizationConfig
+from ax.core.search_space import SearchSpace
+from ax.exceptions.core import UserInputError
+from ax.modelbridge.registry import Cont_X_trans, Models, Y_trans
+from ax.modelbridge.torch import TorchModelBridge
+from ax.models.torch.botorch_modular.kernels import ScaleMaternKernel
+from ax.models.torch.botorch_modular.surrogate import Surrogate
+from ax.utils.testing.mock import skip_fit_gpytorch_mll_context_manager
+from botorch.models import SingleTaskGP
+from gpytorch.priors import LogNormalPrior
+
+from pyre_extensions import assert_is_instance
+
+
+DEFAULT_NUM_TRIALS: int = 30
+
+BASELINE_VALUES: dict[str, float] = {
+    "KDDCup09_appetency": 94.84762378096477,
+    "APSFailure": 97.75754021610224,
+    "albert": 63.893807756587876,
+    "Amazon_employee_access": 93.92434556024065,
+    "Australian": 89.35657945184583,
+    "Fashion-MNIST": 84.94202558279305,
+    "car": 80.47958436427733,
+    "christine": 72.27323565977512,
+    "cnae-9": 94.15832149950144,
+    "covertype": 61.552294168420595,
+    "dionis": 54.99212355534204,
+    "fabert": 64.88207128531921,
+    "helena": 19.156010689783603,
+    "higgs": 64.84690723875762,
+    "jannis": 57.58628096200955,
+    "jasmine": 80.6321652907534,
+    "kr-vs-kp": 94.53560263952683,
+    "mfeat-factors": 95.58423367904923,
+    "nomao": 93.51402242799601,
+    "shuttle": 96.43481523407816,
+    "sylvine": 91.91719206036713,
+    "volkert": 49.50686237250762,
+}
+DEFAULT_AND_OPTIMAL_VALUES: dict[str, tuple[float, float]] = {
+    "KDDCup09_appetency": (87.14437173839048, 100.41903197808242),
+    "APSFailure": (97.3412499690734, 98.38099041845653),
+    "albert": (64.42693765555859, 67.1082934765708),
+    "Amazon_employee_access": (80.69975381128579, 98.85943103737361),
+    "Australian": (78.15200826093329, 93.0325039665508),
+    "Fashion-MNIST": (83.10219231927393, 89.07884250211491),
+    "car": (64.26087451215653, 88.77391803474296),
+    "christine": (70.9732126619125, 73.29816335805616),
+    "cnae-9": (25.89740105397502, 119.55228152861949),
+    "covertype": (62.13132918760403, 67.15439170116016),
+    "dionis": (11.977294194995338, 101.64303302727558),
+    "fabert": (37.72627877151164, 73.30035354875776),
+    "helena": (7.455048985077637, 29.78291566900156),
+    "higgs": (64.80984463924982, 71.89770865111743),
+    "jannis": (58.17868556972097, 62.4080058894946),
+    "jasmine": (76.76806487249725, 83.56868288456046),
+    "kr-vs-kp": (79.76060013094786, 104.6216855876375),
+    "mfeat-factors": (69.85128706899793, 111.67026074027292),
+    "nomao": (92.85065022473196, 95.83471144381221),
+    "shuttle": (98.86272845879327, 100.37428346365724),
+    "sylvine": (83.1596613771663, 98.85179841137813),
+    "volkert": (45.361097364985376, 58.133196667029864),
+}
+
+
+def get_lcbench_experiment(
+    metric_name: str = DEFAULT_METRIC_NAME,
+    observe_noise_stds: bool = False,
+) -> Experiment:
+    """Construct an experiment with the LCBench search space and optimization config.
+    Used in N5808878 to fit the initial surrogate, and may be useful for the setup
+    of transfer learning experiments.
+
+    Args:
+        observe_noise_stds: Whether or not the magnitude of the observation noise
+            is known.
+        metric_name: The name of the metric to use for the objective.
+
+    Returns:
+        An experiment with the LCBench search space and optimization config.
+    """
+
+    search_space: SearchSpace = get_lcbench_search_space()
+    optimization_config: OptimizationConfig = get_lcbench_optimization_config(
+        metric_name=metric_name,
+        observe_noise_sd=observe_noise_stds,
+        use_map_metric=False,
+    )
+
+    experiment = Experiment(
+        search_space=search_space, optimization_config=optimization_config
+    )
+    return experiment
+
+
+def get_lcbench_surrogate() -> Surrogate:
+    """Construct a surrogate used to fit the LCBench data.
+
+    Returns:
+        A Surrogate with the specification used to fit the LCBench data.
+    """
+    return Surrogate(
+        botorch_model_class=SingleTaskGP,
+        covar_module_class=ScaleMaternKernel,
+        covar_module_options={
+            "nu": 1.5,
+            "ard_num_dims": 7,
+            "outputscale_prior": LogNormalPrior(-3, 0.0025),
+        },
+        input_transform_classes=None,
+    )
+
+
+def get_lcbench_benchmark_problem(
+    dataset_name: str,
+    metric_name: str = DEFAULT_METRIC_NAME,
+    num_trials: int = DEFAULT_NUM_TRIALS,
+    noise_stds: Mapping[str, float] | float = 0.0,
+    observe_noise_stds: bool = False,
+) -> BenchmarkProblem:
+    """Construct a LCBench benchmark problem.
+
+    Args:
+        dataset_name: Must be one of the keys of `DEFAULT_AND_OPTIMAL_VALUES`, which
+            correspond to the names of the dataset available in LCBench.
+        metric_name: The name of the metric to use for the objective.
+        num_trials: The number of optimization trials to run.
+        noise_stds: The standard deviation of the observation noise.
+        observe_noise_stds: Whether to report the standard deviation of the
+            obervation noise.
+
+    Returns:
+        An LCBench surrogate benchmark problem.
+    """
+
+    if dataset_name not in DEFAULT_AND_OPTIMAL_VALUES:
+        raise UserInputError(
+            f"`dataset_name` must be one of {sorted(DEFAULT_AND_OPTIMAL_VALUES)}"
+        )
+    _, optimal_value = DEFAULT_AND_OPTIMAL_VALUES[dataset_name]
+    base_path = os.path.dirname(os.path.realpath(__file__))
+    obj: dict[str, Any] = torch.load(
+        f=os.path.join(
+            base_path, "data", "transfer_learning", f"lcbench_{dataset_name}.pt"
+        ),
+        weights_only=False,
+    )
+    optimization_config: OptimizationConfig = get_lcbench_optimization_config(
+        metric_name=metric_name,
+        observe_noise_sd=observe_noise_stds,
+        use_map_metric=False,
+    )
+
+    def get_surrogate() -> TorchModelBridge:
+        """Construct a modelbridge with the LCBench surrogate and datasets.
+
+        Returns:
+            A fitted modelbridge with the LCBench Surrogate and data.
+        """
+        # We load the model hyperparameters from the saved state dict.
+        with skip_fit_gpytorch_mll_context_manager():
+            mb = Models.BOTORCH_MODULAR(
+                surrogate=get_lcbench_surrogate(),
+                experiment=obj["experiment"],
+                search_space=obj["experiment"].search_space,
+                data=obj["data"],
+                transforms=Cont_X_trans + Y_trans,
+            )
+        mb.model.surrogate.model.load_state_dict(obj["state_dict"])
+        return assert_is_instance(mb, TorchModelBridge)
+
+    name = f"LCBench_Surrogate_{dataset_name}:v1"
+
+    test_function = SurrogateTestFunction(
+        name=name, outcome_names=[metric_name], get_surrogate=get_surrogate
+    )
+
+    return BenchmarkProblem(
+        name=name,
+        search_space=obj["experiment"].search_space,
+        optimization_config=optimization_config,
+        num_trials=num_trials,
+        optimal_value=optimal_value,
+        baseline_value=BASELINE_VALUES[dataset_name],
+        test_function=test_function,
+        noise_std=0.0 if noise_stds is None else noise_stds,
+    )
diff --git a/ax/benchmark/problems/surrogate/lcbench/utils.py b/ax/benchmark/problems/surrogate/lcbench/utils.py
new file mode 100644
index 00000000000..1f05f61fc3f
--- /dev/null
+++ b/ax/benchmark/problems/surrogate/lcbench/utils.py
@@ -0,0 +1,103 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from ax.benchmark.benchmark_problem import get_soo_opt_config
+from ax.core.optimization_config import OptimizationConfig
+from ax.core.parameter import ParameterType, RangeParameter
+from ax.core.search_space import SearchSpace
+
+
+DEFAULT_METRIC_NAME: str = "Train/val_accuracy"
+
+
+def get_lcbench_search_space() -> SearchSpace:
+    """Construct the LCBench search space."""
+    search_space: SearchSpace = SearchSpace(
+        parameters=[
+            RangeParameter(
+                name="batch_size",
+                parameter_type=ParameterType.INT,
+                lower=16,
+                upper=512,
+                log_scale=True,
+            ),
+            RangeParameter(
+                name="max_dropout",
+                parameter_type=ParameterType.FLOAT,
+                lower=0.0,
+                upper=1.0,  # Yes, really. Could make smaller if
+                # we want to have it be more realistic.
+                log_scale=False,
+            ),
+            RangeParameter(
+                name="max_units",
+                parameter_type=ParameterType.INT,
+                lower=64,
+                upper=1024,
+                log_scale=True,
+            ),
+            RangeParameter(
+                name="num_layers",
+                parameter_type=ParameterType.INT,
+                lower=1,
+                upper=4,  # not a bug, even though it says 1-5 in the LCBench repo.
+                # See https://github.com/automl/LCBench/issues/4
+                log_scale=False,
+            ),
+            RangeParameter(
+                name="learning_rate",
+                parameter_type=ParameterType.FLOAT,
+                lower=1e-4,
+                upper=1e-1,
+                log_scale=True,
+            ),
+            RangeParameter(
+                name="momentum",
+                parameter_type=ParameterType.FLOAT,
+                lower=0.1,
+                upper=0.99,
+                log_scale=True,
+            ),
+            RangeParameter(
+                name="weight_decay",
+                parameter_type=ParameterType.FLOAT,
+                lower=1e-5,
+                upper=1e-1,
+                log_scale=False,  # not a bug, see the LCBench repo.
+            ),
+        ]
+    )
+    return search_space
+
+
+def get_lcbench_optimization_config(
+    metric_name: str = DEFAULT_METRIC_NAME,
+    observe_noise_sd: bool = False,
+    use_map_metric: bool = False,
+) -> OptimizationConfig:
+    return get_soo_opt_config(
+        outcome_names=[metric_name],
+        lower_is_better=False,
+        observe_noise_sd=observe_noise_sd,
+        use_map_metric=use_map_metric,
+    )
+
+
+def get_lcbench_parameters() -> dict[str, RangeParameter]:
+    return get_lcbench_search_space().parameters  # pyre-ignore [7]
+
+
+def get_lcbench_parameter_names() -> list[str]:
+    return list(get_lcbench_parameters().keys())
+
+
+def get_lcbench_log_scale_parameter_names() -> list[str]:
+    return [
+        name
+        for name, parameter in get_lcbench_parameters().items()
+        if parameter.log_scale
+    ]
diff --git a/ax/benchmark/tests/problems/test_data.py b/ax/benchmark/tests/problems/test_data.py
new file mode 100644
index 00000000000..9e29b60acb4
--- /dev/null
+++ b/ax/benchmark/tests/problems/test_data.py
@@ -0,0 +1,81 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from io import BufferedReader
+from pathlib import Path
+from unittest.mock import patch
+
+import pandas as pd
+
+from ax.benchmark.problems.data import AbstractParquetDataLoader
+
+from ax.utils.common.testutils import TestCase
+
+
+class ConcreteParquetDataLoader(AbstractParquetDataLoader):
+    @property
+    def url(self) -> str | None:
+        return (
+            f"https://example.com/{self.benchmark_name}"
+            "/main/{self.dataset_name}/{self.filename}"
+        )
+
+
+class TestParquetDataLoader(TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.test_data = ConcreteParquetDataLoader(
+            benchmark_name="test_benchmark",
+            dataset_name="test_dataset",
+            stem="test_stem",
+            cache_dir=Path("/tmp/test_cache"),
+        )
+
+    def tearDown(self) -> None:
+        # Delete the cached file if it exists
+        self.test_data.cache_path.unlink(missing_ok=True)
+
+    def test_read_cached(self) -> None:
+        # Create a mock cached file
+        self.test_data.cache_path.parent.mkdir(parents=True, exist_ok=True)
+        self.test_data.cache_path.touch()
+
+        with patch(
+            "pandas.read_parquet",
+            return_value=pd.DataFrame(),
+        ) as mock_read_parquet:
+            result = self.test_data.load()
+
+        # Assert that the cached file was read
+        mock_read_parquet.assert_called_once()
+        self.assertIsInstance(mock_read_parquet.call_args.args[0], BufferedReader)
+        self.assertEqual(
+            mock_read_parquet.call_args.args[0].name, str(self.test_data.cache_path)
+        )
+        self.assertIsInstance(result, pd.DataFrame)
+
+    def test_read_not_cached_download_true(self) -> None:
+        with patch(
+            "pandas.read_parquet",
+            return_value=pd.DataFrame(),
+        ) as mock_read_parquet:
+            # Call the load method with download=True
+            result = self.test_data.load(download=True)
+
+        # Assert that the data was downloaded and cached
+        mock_read_parquet.assert_called_once_with(self.test_data.url, engine="pyarrow")
+
+        # Assert that the cached file now exists
+        self.assertTrue(self.test_data.is_cached())
+        self.assertIsInstance(result, pd.DataFrame)
+
+    def test_read_not_cached_download_false(self) -> None:
+        # Call the load method with download=False
+        with self.assertRaisesRegex(
+            ValueError, "File .* does not exist and `download` is False"
+        ):
+            self.test_data.load(download=False)
diff --git a/ax/benchmark/tests/problems/test_lcbench_benchmark.py b/ax/benchmark/tests/problems/test_lcbench_benchmark.py
new file mode 100644
index 00000000000..f2e94011bfa
--- /dev/null
+++ b/ax/benchmark/tests/problems/test_lcbench_benchmark.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from ax.benchmark.benchmark_metric import BenchmarkMetric
+from ax.benchmark.benchmark_test_functions.surrogate import SurrogateTestFunction
+from ax.benchmark.problems.surrogate.lcbench.transfer_learning import (
+    DEFAULT_AND_OPTIMAL_VALUES,
+    get_lcbench_benchmark_problem,
+)
+from ax.utils.common.testutils import TestCase
+from pyre_extensions import assert_is_instance
+
+
+class TestLCBenchBenchmark(TestCase):
+    def test_lcbench_predictions(self) -> None:
+        self.assertEqual(len(DEFAULT_AND_OPTIMAL_VALUES), 22)
+        # NOTE: lots of tasks, so testing only one here o/w this is very slow
+        dataset_name = "car"
+        problem = get_lcbench_benchmark_problem(
+            dataset_name=dataset_name,
+            num_trials=32,
+        )
+        test_function = assert_is_instance(problem.test_function, SurrogateTestFunction)
+        metric = assert_is_instance(
+            problem.optimization_config.objective.metric, BenchmarkMetric
+        )
+        self.assertFalse(metric.observe_noise_sd)
+        self.assertEqual(problem.num_trials, 32)
+        default_val, opt_val = DEFAULT_AND_OPTIMAL_VALUES[dataset_name]
+        self.assertAlmostEqual(
+            float(problem.optimal_value),
+            opt_val,
+            places=4,
+        )
+        surrogate = test_function.surrogate
+
+        # Predict for arm 0_0 and make sure it matches the expected value
+        obs_0_0 = [
+            obs for obs in surrogate.get_training_data() if obs.arm_name == "0_0"
+        ]
+        self.assertEqual(len(obs_0_0), 1)
+        pred, _ = surrogate.predict(observation_features=[obs_0_0[0].features])
+        self.assertAlmostEqual(
+            pred["Train/val_accuracy"][0],
+            default_val,
+            places=3,
+        )
diff --git a/setup.py b/setup.py
index b9ec5263799..7aafe508e86 100644
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,9 @@
     "tensorboard",  # For tensorboard unit tests.
     "torchvision",  # For torchvision unit tests.
     "torchx",  # For torchx unit tests.
+    # Required for building RayTune tutorial notebook and
+    # deserializing data for benchmark suites.
+    "pyarrow",
 ]
 
 UNITTEST_REQUIRES = (
@@ -71,7 +74,6 @@
 TUTORIAL_REQUIRES = UNITTEST_REQUIRES + [
     "ray",  # Required for building RayTune tutorial notebook.
     "tabulate",  # Required for building RayTune tutorial notebook.
-    "pyarrow",  # Required for building RayTune tutorial notebook.
     "tensorboardX",  # Required for building RayTune tutorial notebook.
     "matplotlib",  # Required for building Multi-objective tutorial notebook.
     "pyro-ppl",  # Required for to call run_inference.
diff --git a/sphinx/source/benchmark.rst b/sphinx/source/benchmark.rst
index f8f707f79af..31c9bc9e0d7 100644
--- a/sphinx/source/benchmark.rst
+++ b/sphinx/source/benchmark.rst
@@ -106,6 +106,14 @@ Benchmark Problems Registry
     :undoc-members:
     :show-inheritance:
 
+Benchmark Problems: Data Loading Utilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: ax.benchmark.problems.data
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 Benchmark Problems: Bandit
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -154,6 +162,38 @@ Benchmark Problems PyTorchCNN TorchVision
     :undoc-members:
     :show-inheritance:
 
+Benchmark Problems: LCBench - Benchmark Data Loading Utilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: ax.benchmark.problems.surrogate.lcbench.data
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Benchmark Problems: LCBench - Transfer Learning Surrogates
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: ax.benchmark.problems.surrogate.lcbench.transfer_learning
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Benchmark Problems: LCBench - Early-Stopping and Full Learning Curve Surrogates
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: ax.benchmark.problems.surrogate.lcbench.early_stopping
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Benchmark Problems: LCBench - Problem Definition and Helper Functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: ax.benchmark.problems.surrogate.lcbench.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
 
 Benchmark Problems Runtime Functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~