From 563c57d1b9a32c9aca9553c2d60315f9cc07550f Mon Sep 17 00:00:00 2001 From: Sheng-Wei Chen Date: Sun, 8 Dec 2024 19:11:01 +0800 Subject: [PATCH 01/12] Fix two issues in linear tree-based model: 1. We should not filter any data instance in the root. (In the original code, we do not include the instances with zero label in the training.) 2. The outputs of a tree-based model should be in the range [0, 1]^{# of label}, which is corresponding to the probability estimates. Moreover, if we want to use sparse matrix to store the prediction values of a tree model, the value ``-inf'' will be a trouble issue. --- libmultilabel/linear/tree.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py index c33cfe33..a8f27b57 100644 --- a/libmultilabel/linear/tree.py +++ b/libmultilabel/linear/tree.py @@ -27,6 +27,7 @@ def __init__( """ self.label_map = label_map self.children = children + self.is_root = False def isLeaf(self) -> bool: return len(self.children) == 0 @@ -101,7 +102,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra next_level = [] num_labels = len(self.root.label_map) - scores = np.full(num_labels, -np.inf) + scores = np.full(num_labels, 0) for node, score in cur_level: slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]] pred = instance_preds[slice] @@ -134,6 +135,7 @@ def train_tree( label_representation = (y.T * x).tocsr() label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1) root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax) + root.is_root = True num_nodes = 0 # Both type(x) and type(y) are sparse.csr_matrix @@ -161,7 +163,10 @@ def count(node): pbar = tqdm(total=num_nodes, disable=not verbose) def visit(node): - relevant_instances = y[:, node.label_map].getnnz(axis=1) > 0 + if node.is_root: + relevant_instances = y[:, node.label_map].getnnz(axis=1) >= 0 + else: + relevant_instances = y[:, node.label_map].getnnz(axis=1) > 0 _train_node(y[relevant_instances], x[relevant_instances], options, node) pbar.update() From a758e789a882b7e5056ae7229bebf0663ea1de82 Mon Sep 17 00:00:00 2001 From: Sheng-Wei Chen Date: Mon, 9 Dec 2024 18:32:20 +0800 Subject: [PATCH 02/12] The outputs of a tree-based model should be in the range [0, 1]^{# of label}, which is corresponding to the probability estimates. Moreover, if we want to use sparse matrix to store the prediction values of a tree model, the value ``-inf'' will be a trouble issue. --- libmultilabel/linear/tree.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py index c33cfe33..bed74383 100644 --- a/libmultilabel/linear/tree.py +++ b/libmultilabel/linear/tree.py @@ -58,7 +58,7 @@ def predict_values( x: sparse.csr_matrix, beam_width: int = 10, ) -> np.ndarray: - """Calculates the decision values associated with x. + """Calculates the probability estimates associated with x. Args: x (sparse.csr_matrix): A matrix with dimension number of instances * number of features. @@ -72,10 +72,10 @@ def predict_values( return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])]) def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray: - """Predict with beam search using cached decision values for a single instance. + """Predict with beam search using cached probability estimates for a single instance. Args: - instance_preds (np.ndarray): A vector of cached decision values of each node, has dimension number of labels + total number of metalabels. + instance_preds (np.ndarray): A vector of cached probability estimates of each node, has dimension number of labels + total number of metalabels. beam_width (int): Number of candidates considered. Returns: @@ -101,7 +101,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra next_level = [] num_labels = len(self.root.label_map) - scores = np.full(num_labels, -np.inf) + scores = np.full(num_labels, 0) for node, score in cur_level: slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]] pred = instance_preds[slice] From 3aeff7b4f5c999aa42a3816fd537c8cd44277b28 Mon Sep 17 00:00:00 2001 From: Sheng-Wei Chen Date: Mon, 9 Dec 2024 18:44:12 +0800 Subject: [PATCH 03/12] change code form --- libmultilabel/linear/tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py index a8f27b57..bc5e7c20 100644 --- a/libmultilabel/linear/tree.py +++ b/libmultilabel/linear/tree.py @@ -164,10 +164,10 @@ def count(node): def visit(node): if node.is_root: - relevant_instances = y[:, node.label_map].getnnz(axis=1) >= 0 + _train_node(y, x, options, node) else: relevant_instances = y[:, node.label_map].getnnz(axis=1) > 0 - _train_node(y[relevant_instances], x[relevant_instances], options, node) + _train_node(y[relevant_instances], x[relevant_instances], options, node) pbar.update() root.dfs(visit) From 220d8cede0270fa6766e4f777ce06cd735141abc Mon Sep 17 00:00:00 2001 From: ChengYehLi Date: Tue, 10 Dec 2024 19:51:05 +0800 Subject: [PATCH 04/12] Update links --- docs/cli/flags.rst | 2 +- docs/cli/ov_data_format.rst | 4 ++-- docs/tutorials/Parameter_Selection_for_Neural_Networks.rst | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/cli/flags.rst b/docs/cli/flags.rst index 11b3b501..f876bf8a 100644 --- a/docs/cli/flags.rst +++ b/docs/cli/flags.rst @@ -6,7 +6,7 @@ or directly passed as flags. If an option exists in both the config file and flags, flags take precedent and override the config file. The config file is a yaml file, examples may be found in -`example_config `_. +`example_config `_. In the config file, each key-value pair ``key: value`` corresponds to passing the flag ``--key value``. The following example sets the training data path in the config file diff --git a/docs/cli/ov_data_format.rst b/docs/cli/ov_data_format.rst index f417ef39..2e7ae85c 100644 --- a/docs/cli/ov_data_format.rst +++ b/docs/cli/ov_data_format.rst @@ -40,11 +40,11 @@ and then create a virtual enviroment as follows. conda create -n LibMultiLabel python=3.8 conda activate LibMultiLabel -* Clone `LibMultiLabel `_. +* Clone `LibMultiLabel `_. .. code-block:: bash - git clone https://github.com/ASUS-AICS/LibMultiLabel.git + git clone https://github.com/ntumlgroup/LibMultiLabel.git cd LibMultiLabel * Install the default dependencies with: diff --git a/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst b/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst index e76351e9..f761bfa5 100644 --- a/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst +++ b/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst @@ -10,7 +10,7 @@ Direct Trying Some Parameters ----------------------------- First, train a BiGRU model with the -`default configuration file `_ +`default configuration file `_ with a little modification on the learning rate. Some important parameters are listed as follows. @@ -92,7 +92,7 @@ To save time, LibMultiLabel has incorporated some early stopping techniques impl Here we demonstrate an example of applying an `ASHA (Asynchronous Successive Halving Algorithm) Scheduler `_. First, uncomment the following lines in the -`configuration file `_: +`configuration file `_: .. code-block:: bash From a179f5ec5c72cffbf561fad8d3785a427775e152 Mon Sep 17 00:00:00 2001 From: maclin726 <46101124+maclin726@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:45:10 +0400 Subject: [PATCH 05/12] Update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index fa35bd49..43336195 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,4 +2,4 @@ # the repo. # Reviewers list below will be requested for # review when someone opens a pull request. -* @cjlin1 @sian-chen @Eleven1Liu @henryyang42 @JamesLYC88 @Gordon119 +* @cjlin1 @libmultilabel_reviewers From 1900f5f2914be9d0d88d5e87291ae18290590794 Mon Sep 17 00:00:00 2001 From: maclin726 <46101124+maclin726@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:59:38 +0400 Subject: [PATCH 06/12] Update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 43336195..bfbe1855 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,4 +2,4 @@ # the repo. # Reviewers list below will be requested for # review when someone opens a pull request. -* @cjlin1 @libmultilabel_reviewers +* @cjlin1 @ntumlgroup/libmultilabel_reviewers From e3629a2d2ccb14ff4b2b9b92aef9efa9ac55f85c Mon Sep 17 00:00:00 2001 From: ChengYehLi Date: Wed, 11 Dec 2024 13:57:54 +0800 Subject: [PATCH 07/12] Remove cited paper section on the left panel --- docs/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 4d8b612a..9212c919 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,7 +19,6 @@ For practical use, please see the `Tutorials `_. For Implementati library_index tutorial Implementation Document - papers .. From ecd57b3cb731779e7fc8375a58ecb20e50d634c9 Mon Sep 17 00:00:00 2001 From: Sheng-Wei Chen Date: Thu, 26 Dec 2024 13:32:31 +0800 Subject: [PATCH 08/12] 1. np.full(..., 0) return an np.array with int64, but we need float64. 2. np.matrix ** 2 will be represented as the square of a ``square matrix'' in the latest NumPy version. For the element-wise square of an np.matrix, we should use np.square( np.matrix ). --- libmultilabel/linear/tree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py index fd93b5f4..0988dc7d 100644 --- a/libmultilabel/linear/tree.py +++ b/libmultilabel/linear/tree.py @@ -95,18 +95,18 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra continue slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]] pred = instance_preds[slice] - children_score = score - np.maximum(0, 1 - pred) ** 2 + children_score = score - np.square( np.maximum(0, 1 - pred) ) next_level.extend(zip(node.children, children_score.tolist())) cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width] next_level = [] num_labels = len(self.root.label_map) - scores = np.full(num_labels, 0) + scores = np.full(num_labels, 0.0) for node, score in cur_level: slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]] pred = instance_preds[slice] - scores[node.label_map] = np.exp(score - np.maximum(0, 1 - pred) ** 2) + scores[node.label_map] = np.exp( score - np.square( np.maximum(0, 1 - pred) ) ) return scores From a51891f4622e221859a7a63794f40c8273dcd1f1 Mon Sep 17 00:00:00 2001 From: Sheng-Wei Chen Date: Thu, 26 Dec 2024 14:20:20 +0800 Subject: [PATCH 09/12] reformat by black formatter --- libmultilabel/linear/tree.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py index 0988dc7d..64070b2a 100644 --- a/libmultilabel/linear/tree.py +++ b/libmultilabel/linear/tree.py @@ -95,7 +95,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra continue slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]] pred = instance_preds[slice] - children_score = score - np.square( np.maximum(0, 1 - pred) ) + children_score = score - np.square(np.maximum(0, 1 - pred)) next_level.extend(zip(node.children, children_score.tolist())) cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width] @@ -106,7 +106,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra for node, score in cur_level: slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]] pred = instance_preds[slice] - scores[node.label_map] = np.exp( score - np.square( np.maximum(0, 1 - pred) ) ) + scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred))) return scores @@ -151,14 +151,14 @@ def count(node): root.dfs(count) model_size = get_estimated_model_size(root) - print(f'The estimated tree model size is: {model_size / (1024**3):.3f} GB') + print(f"The estimated tree model size is: {model_size / (1024**3):.3f} GB") # Calculate the total memory (excluding swap) on the local machine - total_memory = psutil.virtual_memory().total - print(f'Your system memory is: {total_memory / (1024**3):.3f} GB') + total_memory = psutil.virtual_memory().total + print(f"Your system memory is: {total_memory / (1024**3):.3f} GB") - if (total_memory <= model_size): - raise MemoryError(f'Not enough memory to train the model.') + if total_memory <= model_size: + raise MemoryError(f"Not enough memory to train the model.") pbar = tqdm(total=num_nodes, disable=not verbose) @@ -221,7 +221,7 @@ def get_estimated_model_size(root): def collect_stat(node: Node): nonlocal total_num_weights - + if node.isLeaf(): total_num_weights += len(node.label_map) * node.num_features_used else: @@ -231,7 +231,7 @@ def collect_stat(node: Node): # 16 is because when storing sparse matrices, indices (int64) require 8 bytes and floats require 8 bytes # Our study showed that among the used features of every binary classification problem, on average no more than 2/3 of weights obtained by the dual coordinate descent method are non-zeros. - return total_num_weights * 16 * 2/3 + return total_num_weights * 16 * 2 / 3 def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node: Node): From d20d2552a495e23636ebb9b576c8e6a3f7746f79 Mon Sep 17 00:00:00 2001 From: Eleven Liu Date: Thu, 26 Dec 2024 16:28:20 +0800 Subject: [PATCH 10/12] Fix scipy version to 1.13.1. --- requirements.txt | 2 +- setup.cfg | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index c1285e72..b1a3a030 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ numba pandas>1.3.0 PyYAML scikit-learn -scipy +scipy==1.13.1 tqdm psutil diff --git a/setup.cfg b/setup.cfg index 58c079a3..b0441ba9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = libmultilabel -version = 0.7.1 +version = 0.7.2 author = LibMultiLabel Team license = MIT License license_file = LICENSE @@ -30,7 +30,7 @@ install_requires = pandas>1.3.0 PyYAML scikit-learn - scipy + scipy==1.13.1 tqdm python_requires = >=3.8 From ad747450c4744f9bdcb83de6bcfb20aa163fdff9 Mon Sep 17 00:00:00 2001 From: Eleven Liu Date: Thu, 26 Dec 2024 17:02:56 +0800 Subject: [PATCH 11/12] Do not fix scipy version for python 3.8. --- requirements.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b1a3a030..dc31526e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ numba pandas>1.3.0 PyYAML scikit-learn -scipy==1.13.1 +scipy<=1.13.1 tqdm psutil diff --git a/setup.cfg b/setup.cfg index b0441ba9..19aabee0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ install_requires = pandas>1.3.0 PyYAML scikit-learn - scipy==1.13.1 + scipy<=1.13.1 tqdm python_requires = >=3.8 From 5c2b717d5cf9b6284f5588772139b6a3103dbdfa Mon Sep 17 00:00:00 2001 From: Eleven Liu Date: Thu, 26 Dec 2024 17:31:13 +0800 Subject: [PATCH 12/12] Set scipy to <1.14 (enable patch versions of 1.13*) --- requirements.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index dc31526e..46f549d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ numba pandas>1.3.0 PyYAML scikit-learn -scipy<=1.13.1 +scipy<1.14.0 tqdm psutil diff --git a/setup.cfg b/setup.cfg index 19aabee0..11124fb1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ install_requires = pandas>1.3.0 PyYAML scikit-learn - scipy<=1.13.1 + scipy<1.14.0 tqdm python_requires = >=3.8