From 563c57d1b9a32c9aca9553c2d60315f9cc07550f Mon Sep 17 00:00:00 2001
From: Sheng-Wei Chen <will945945945@gmail.com>
Date: Sun, 8 Dec 2024 19:11:01 +0800
Subject: [PATCH 01/12] Fix two issues in linear tree-based model:

1. We should not filter any data instance in the root.
  (In the original code, we do not include the instances with zero label in the training.)

2. The outputs of a tree-based model should be in the range [0, 1]^{# of label}, which is corresponding to the probability estimates.
  Moreover, if we want to use sparse matrix to store the prediction values of a tree model, the value ``-inf'' will be a trouble issue.
---
 libmultilabel/linear/tree.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index c33cfe33..a8f27b57 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -27,6 +27,7 @@ def __init__(
         """
         self.label_map = label_map
         self.children = children
+        self.is_root = False
 
     def isLeaf(self) -> bool:
         return len(self.children) == 0
@@ -101,7 +102,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
             next_level = []
 
         num_labels = len(self.root.label_map)
-        scores = np.full(num_labels, -np.inf)
+        scores = np.full(num_labels, 0)
         for node, score in cur_level:
             slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
             pred = instance_preds[slice]
@@ -134,6 +135,7 @@ def train_tree(
     label_representation = (y.T * x).tocsr()
     label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
     root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
+    root.is_root = True
 
     num_nodes = 0
     # Both type(x) and type(y) are sparse.csr_matrix
@@ -161,7 +163,10 @@ def count(node):
     pbar = tqdm(total=num_nodes, disable=not verbose)
 
     def visit(node):
-        relevant_instances = y[:, node.label_map].getnnz(axis=1) > 0
+        if node.is_root:
+            relevant_instances = y[:, node.label_map].getnnz(axis=1) >= 0
+        else:
+            relevant_instances = y[:, node.label_map].getnnz(axis=1) > 0
         _train_node(y[relevant_instances], x[relevant_instances], options, node)
         pbar.update()
 

From a758e789a882b7e5056ae7229bebf0663ea1de82 Mon Sep 17 00:00:00 2001
From: Sheng-Wei Chen <will945945945@gmail.com>
Date: Mon, 9 Dec 2024 18:32:20 +0800
Subject: [PATCH 02/12] The outputs of a tree-based model should be in the
 range [0, 1]^{# of label}, which is corresponding to the probability
 estimates. Moreover, if we want to use sparse matrix to store the prediction
 values of a tree model, the value ``-inf'' will be a trouble issue.

---
 libmultilabel/linear/tree.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index c33cfe33..bed74383 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -58,7 +58,7 @@ def predict_values(
         x: sparse.csr_matrix,
         beam_width: int = 10,
     ) -> np.ndarray:
-        """Calculates the decision values associated with x.
+        """Calculates the probability estimates associated with x.
 
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
@@ -72,10 +72,10 @@ def predict_values(
         return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
 
     def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray:
-        """Predict with beam search using cached decision values for a single instance.
+        """Predict with beam search using cached probability estimates for a single instance.
 
         Args:
-            instance_preds (np.ndarray): A vector of cached decision values of each node, has dimension number of labels + total number of metalabels.
+            instance_preds (np.ndarray): A vector of cached probability estimates of each node, has dimension number of labels + total number of metalabels.
             beam_width (int): Number of candidates considered.
 
         Returns:
@@ -101,7 +101,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
             next_level = []
 
         num_labels = len(self.root.label_map)
-        scores = np.full(num_labels, -np.inf)
+        scores = np.full(num_labels, 0)
         for node, score in cur_level:
             slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
             pred = instance_preds[slice]

From 3aeff7b4f5c999aa42a3816fd537c8cd44277b28 Mon Sep 17 00:00:00 2001
From: Sheng-Wei Chen <will945945945@gmail.com>
Date: Mon, 9 Dec 2024 18:44:12 +0800
Subject: [PATCH 03/12] change code form

---
 libmultilabel/linear/tree.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index a8f27b57..bc5e7c20 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -164,10 +164,10 @@ def count(node):
 
     def visit(node):
         if node.is_root:
-            relevant_instances = y[:, node.label_map].getnnz(axis=1) >= 0
+            _train_node(y, x, options, node)
         else:
             relevant_instances = y[:, node.label_map].getnnz(axis=1) > 0
-        _train_node(y[relevant_instances], x[relevant_instances], options, node)
+            _train_node(y[relevant_instances], x[relevant_instances], options, node)
         pbar.update()
 
     root.dfs(visit)

From 220d8cede0270fa6766e4f777ce06cd735141abc Mon Sep 17 00:00:00 2001
From: ChengYehLi <b09201034@ntu.edu.tw>
Date: Tue, 10 Dec 2024 19:51:05 +0800
Subject: [PATCH 04/12] Update links

---
 docs/cli/flags.rst                                         | 2 +-
 docs/cli/ov_data_format.rst                                | 4 ++--
 docs/tutorials/Parameter_Selection_for_Neural_Networks.rst | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/cli/flags.rst b/docs/cli/flags.rst
index 11b3b501..f876bf8a 100644
--- a/docs/cli/flags.rst
+++ b/docs/cli/flags.rst
@@ -6,7 +6,7 @@ or directly passed as flags. If an option exists in both the config
 file and flags, flags take precedent and override the config file.
 
 The config file is a yaml file, examples may be found in
-`example_config <https://github.com/ASUS-AICS/LibMultiLabel/tree/master/example_config>`_.
+`example_config <https://github.com/ntumlgroup/LibMultiLabel/tree/master/example_config>`_.
 In the config file, each key-value pair ``key: value`` corresponds to
 passing the flag ``--key value``. The following example sets the training data path
 in the config file
diff --git a/docs/cli/ov_data_format.rst b/docs/cli/ov_data_format.rst
index f417ef39..2e7ae85c 100644
--- a/docs/cli/ov_data_format.rst
+++ b/docs/cli/ov_data_format.rst
@@ -40,11 +40,11 @@ and then create a virtual enviroment as follows.
     conda create -n LibMultiLabel python=3.8
     conda activate LibMultiLabel
 
-* Clone `LibMultiLabel <https://github.com/ASUS-AICS/LibMultiLabel>`_.
+* Clone `LibMultiLabel <https://github.com/ntumlgroup/LibMultiLabel>`_.
 
 .. code-block:: bash
 
-    git clone https://github.com/ASUS-AICS/LibMultiLabel.git
+    git clone https://github.com/ntumlgroup/LibMultiLabel.git
     cd LibMultiLabel
 
 * Install the default dependencies with:
diff --git a/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst b/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst
index e76351e9..f761bfa5 100644
--- a/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst
+++ b/docs/tutorials/Parameter_Selection_for_Neural_Networks.rst
@@ -10,7 +10,7 @@ Direct Trying Some Parameters
 -----------------------------
 
 First, train a BiGRU model with the
-`default configuration file <https://github.com/ASUS-AICS/LibMultiLabel/blob/master/example_config/EUR-Lex/bigru_lwan.yml>`_
+`default configuration file <https://github.com/ntumlgroup/LibMultiLabel/blob/master/example_config/EUR-Lex/bigru_lwan.yml>`_
 with a little modification on the learning rate.
 Some important parameters are listed as follows.
 
@@ -92,7 +92,7 @@ To save time, LibMultiLabel has incorporated some early stopping techniques impl
 Here we demonstrate an example of applying an `ASHA (Asynchronous Successive Halving Algorithm) Scheduler <https://arxiv.org/abs/1810.05934>`_.
 
 First, uncomment the following lines in the
-`configuration file <https://github.com/ASUS-AICS/LibMultiLabel/blob/master/example_config/EUR-Lex/bigru_lwan_tune.yml>`_:
+`configuration file <https://github.com/ntumlgroup/LibMultiLabel/blob/master/example_config/EUR-Lex/bigru_lwan_tune.yml>`_:
 
 .. code-block:: bash
 

From a179f5ec5c72cffbf561fad8d3785a427775e152 Mon Sep 17 00:00:00 2001
From: maclin726 <46101124+maclin726@users.noreply.github.com>
Date: Tue, 10 Dec 2024 17:45:10 +0400
Subject: [PATCH 05/12] Update CODEOWNERS

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index fa35bd49..43336195 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 # the repo.
 # Reviewers list below will be requested for
 # review when someone opens a pull request.
-*       @cjlin1 @sian-chen @Eleven1Liu @henryyang42 @JamesLYC88 @Gordon119
+*       @cjlin1 @libmultilabel_reviewers

From 1900f5f2914be9d0d88d5e87291ae18290590794 Mon Sep 17 00:00:00 2001
From: maclin726 <46101124+maclin726@users.noreply.github.com>
Date: Tue, 10 Dec 2024 17:59:38 +0400
Subject: [PATCH 06/12] Update CODEOWNERS

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 43336195..bfbe1855 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 # the repo.
 # Reviewers list below will be requested for
 # review when someone opens a pull request.
-*       @cjlin1 @libmultilabel_reviewers
+*       @cjlin1 @ntumlgroup/libmultilabel_reviewers

From e3629a2d2ccb14ff4b2b9b92aef9efa9ac55f85c Mon Sep 17 00:00:00 2001
From: ChengYehLi <b09201034@ntu.edu.tw>
Date: Wed, 11 Dec 2024 13:57:54 +0800
Subject: [PATCH 07/12] Remove cited paper section on the left panel

---
 docs/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/index.rst b/docs/index.rst
index 4d8b612a..9212c919 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -19,7 +19,6 @@ For practical use, please see the `Tutorials <tutorial.html>`_. For Implementati
    library_index
    tutorial
    Implementation Document <https://www.csie.ntu.edu.tw/~cjlin/papers/libmultilabel/libmultilabel_implementation.pdf>
-   papers
    
 
 ..

From ecd57b3cb731779e7fc8375a58ecb20e50d634c9 Mon Sep 17 00:00:00 2001
From: Sheng-Wei Chen <will945945945@gmail.com>
Date: Thu, 26 Dec 2024 13:32:31 +0800
Subject: [PATCH 08/12] 1. np.full(..., 0) return an np.array with int64, but
 we need float64. 2. np.matrix ** 2 will be represented as the square of a
 ``square matrix'' in the latest NumPy version.    For the element-wise square
 of an np.matrix, we should use np.square( np.matrix ).

---
 libmultilabel/linear/tree.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index fd93b5f4..0988dc7d 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -95,18 +95,18 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
                     continue
                 slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
                 pred = instance_preds[slice]
-                children_score = score - np.maximum(0, 1 - pred) ** 2
+                children_score = score - np.square( np.maximum(0, 1 - pred) )
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
             next_level = []
 
         num_labels = len(self.root.label_map)
-        scores = np.full(num_labels, 0)
+        scores = np.full(num_labels, 0.0)
         for node, score in cur_level:
             slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
             pred = instance_preds[slice]
-            scores[node.label_map] = np.exp(score - np.maximum(0, 1 - pred) ** 2)
+            scores[node.label_map] = np.exp( score - np.square( np.maximum(0, 1 - pred) ) )
         return scores
 
 

From a51891f4622e221859a7a63794f40c8273dcd1f1 Mon Sep 17 00:00:00 2001
From: Sheng-Wei Chen <will945945945@gmail.com>
Date: Thu, 26 Dec 2024 14:20:20 +0800
Subject: [PATCH 09/12] reformat by black formatter

---
 libmultilabel/linear/tree.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 0988dc7d..64070b2a 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -95,7 +95,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
                     continue
                 slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
                 pred = instance_preds[slice]
-                children_score = score - np.square( np.maximum(0, 1 - pred) )
+                children_score = score - np.square(np.maximum(0, 1 - pred))
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -106,7 +106,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
         for node, score in cur_level:
             slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
             pred = instance_preds[slice]
-            scores[node.label_map] = np.exp( score - np.square( np.maximum(0, 1 - pred) ) )
+            scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
         return scores
 
 
@@ -151,14 +151,14 @@ def count(node):
     root.dfs(count)
 
     model_size = get_estimated_model_size(root)
-    print(f'The estimated tree model size is: {model_size / (1024**3):.3f} GB')
+    print(f"The estimated tree model size is: {model_size / (1024**3):.3f} GB")
 
     # Calculate the total memory (excluding swap) on the local machine
-    total_memory = psutil.virtual_memory().total 
-    print(f'Your system memory is: {total_memory / (1024**3):.3f} GB')
+    total_memory = psutil.virtual_memory().total
+    print(f"Your system memory is: {total_memory / (1024**3):.3f} GB")
 
-    if (total_memory <= model_size):
-        raise MemoryError(f'Not enough memory to train the model.')
+    if total_memory <= model_size:
+        raise MemoryError(f"Not enough memory to train the model.")
 
     pbar = tqdm(total=num_nodes, disable=not verbose)
 
@@ -221,7 +221,7 @@ def get_estimated_model_size(root):
 
     def collect_stat(node: Node):
         nonlocal total_num_weights
-        
+
         if node.isLeaf():
             total_num_weights += len(node.label_map) * node.num_features_used
         else:
@@ -231,7 +231,7 @@ def collect_stat(node: Node):
 
     # 16 is because when storing sparse matrices, indices (int64) require 8 bytes and floats require 8 bytes
     # Our study showed that among the used features of every binary classification problem, on average no more than 2/3 of weights obtained by the dual coordinate descent method are non-zeros.
-    return total_num_weights * 16 * 2/3
+    return total_num_weights * 16 * 2 / 3
 
 
 def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node: Node):

From d20d2552a495e23636ebb9b576c8e6a3f7746f79 Mon Sep 17 00:00:00 2001
From: Eleven Liu <jiejyunliu@gmail.com>
Date: Thu, 26 Dec 2024 16:28:20 +0800
Subject: [PATCH 10/12] Fix scipy version to 1.13.1.

---
 requirements.txt | 2 +-
 setup.cfg        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c1285e72..b1a3a030 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,6 @@ numba
 pandas>1.3.0
 PyYAML
 scikit-learn
-scipy
+scipy==1.13.1
 tqdm
 psutil
diff --git a/setup.cfg b/setup.cfg
index 58c079a3..b0441ba9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = libmultilabel
-version = 0.7.1
+version = 0.7.2
 author = LibMultiLabel Team
 license = MIT License
 license_file = LICENSE
@@ -30,7 +30,7 @@ install_requires =
     pandas>1.3.0
     PyYAML
     scikit-learn
-    scipy
+    scipy==1.13.1
     tqdm
 
 python_requires = >=3.8

From ad747450c4744f9bdcb83de6bcfb20aa163fdff9 Mon Sep 17 00:00:00 2001
From: Eleven Liu <jiejyunliu@gmail.com>
Date: Thu, 26 Dec 2024 17:02:56 +0800
Subject: [PATCH 11/12] Do not fix scipy version for python 3.8.

---
 requirements.txt | 2 +-
 setup.cfg        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b1a3a030..dc31526e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,6 @@ numba
 pandas>1.3.0
 PyYAML
 scikit-learn
-scipy==1.13.1
+scipy<=1.13.1
 tqdm
 psutil
diff --git a/setup.cfg b/setup.cfg
index b0441ba9..19aabee0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ install_requires =
     pandas>1.3.0
     PyYAML
     scikit-learn
-    scipy==1.13.1
+    scipy<=1.13.1
     tqdm
 
 python_requires = >=3.8

From 5c2b717d5cf9b6284f5588772139b6a3103dbdfa Mon Sep 17 00:00:00 2001
From: Eleven Liu <jiejyunliu@gmail.com>
Date: Thu, 26 Dec 2024 17:31:13 +0800
Subject: [PATCH 12/12] Set scipy to <1.14 (enable patch versions of 1.13*)

---
 requirements.txt | 2 +-
 setup.cfg        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index dc31526e..46f549d6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,6 @@ numba
 pandas>1.3.0
 PyYAML
 scikit-learn
-scipy<=1.13.1
+scipy<1.14.0
 tqdm
 psutil
diff --git a/setup.cfg b/setup.cfg
index 19aabee0..11124fb1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ install_requires =
     pandas>1.3.0
     PyYAML
     scikit-learn
-    scipy<=1.13.1
+    scipy<1.14.0
     tqdm
 
 python_requires = >=3.8