From 81afc16efc299f80d279fd4e4b26f56388a51b70 Mon Sep 17 00:00:00 2001
From: bxdd <bxddream@gmail.com>
Date: Tue, 14 Nov 2023 14:56:46 +0800
Subject: [PATCH] [DOC] update docstring for unify them

---
 .../organizer/hetero_map/feature_extractor.py | 40 ++++++---------
 .../organizer/hetero_map/trainer.py           | 50 +++++++++----------
 2 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py b/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py
index 89105c0..10d390a 100644
--- a/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py
+++ b/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py
@@ -10,9 +10,7 @@ from transformers import BertTokenizerFast
 
 
 class WordEmbedding(nn.Module):
-    """
-    Encode tokens drawn from column names
-    """
+    """Encode tokens drawn from column names"""
 
     def __init__(
         self,
@@ -36,9 +34,7 @@ class WordEmbedding(nn.Module):
 
 
 class NumEmbedding(nn.Module):
-    """
-    Encode tokens drawn from column names and the corresponding numerical features.
-    """
+    """Encode tokens drawn from column names and the corresponding numerical features."""
 
     def __init__(self, hidden_dim):
         super().__init__()
@@ -47,9 +43,13 @@ class NumEmbedding(nn.Module):
         nn_init.uniform_(self.num_bias, a=-1 / math.sqrt(hidden_dim), b=1 / math.sqrt(hidden_dim))
 
     def forward(self, col_emb, x_ts) -> Tensor:
-        """args:
-        col_emb: numerical column embedding, (# numerical columns, emb_dim)
-        x_ts: numerical features, (bs, emb_dim)
+        """
+        Parameters
+        ----------
+        col_emb : Any
+            numerical column embedding, (# numerical columns, emb_dim)
+        x_ts : Any
+            numerical features, (bs, emb_dim)
         """
         col_emb = col_emb.unsqueeze(0).expand((x_ts.shape[0], -1, -1))
         feat_emb = col_emb * x_ts.unsqueeze(-1).float() + self.num_bias
@@ -57,10 +57,7 @@ class NumEmbedding(nn.Module):
 
 
 class FeatureTokenizer:
-    """
-    Process input dataframe to input indices towards encoder,
-    usually used to build dataloader for paralleling loading.
-    """
+    """Process input dataframe to input indices towards encoder, usually used to build dataloader for paralleling loading."""
 
     def __init__(
         self,
@@ -68,8 +65,11 @@ class FeatureTokenizer:
         cache_dir=None,
         **kwargs,
     ):
-        """args:
-        disable_tokenizer_parallel: true if use extractor for collator function in torch.DataLoader
+        """
+        Parameters
+        ----------
+        disable_tokenizer_parallel : bool, optional
+            true if use extractor for collator function in torch.DataLoader
         """
         self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased", cache_dir=cache_dir)
         self.tokenizer.__dict__["model_max_length"] = 512
@@ -95,10 +95,7 @@ class FeatureTokenizer:
                 'num_col_input_ids': tensor contains numerical column tokenized ids,
             }
         """
-        encoded_inputs = {
-            "x_num": None,
-            "num_col_input_ids": None
-        }
+        encoded_inputs = {"x_num": None, "num_col_input_ids": None}
         num_cols = x.columns.tolist() if not shuffle else np.random.shuffle(x.columns.tolist())
         x_num = x[num_cols].fillna(0)
 
@@ -194,11 +191,6 @@ class FeatureProcessor(nn.Module):
         num_att_mask=None,
         **kwargs,
     ) -> Tensor:
-        """args:
-        x: pd.DataFrame with column names and features.
-        shuffle: if shuffle column order during the training.
-        num_mask: indicate the NaN place of numerical features, 0: NaN 1: normal.
-        """
         x_num = x_num.to(self.device)
 
         num_col_emb = self.word_embedding(num_col_input_ids.to(self.device))
diff --git a/learnware/market/heterogeneous/organizer/hetero_map/trainer.py b/learnware/market/heterogeneous/organizer/hetero_map/trainer.py
index f192b78..c4a85e6 100644
--- a/learnware/market/heterogeneous/organizer/hetero_map/trainer.py
+++ b/learnware/market/heterogeneous/organizer/hetero_map/trainer.py
@@ -30,11 +30,6 @@ class Trainer:
         eval_batch_size=256,
         **kwargs,
     ):
-        """args:
-        train_set_list: a list of training sets [(x_1,y_1),(x_2,y_2),...]
-        patience: the max number of early stop patience
-        eval_less_is_better: if the set eval_metric is the less the better. For val_loss, it should be set True.
-        """
         self.model = model
         if isinstance(train_set_list, tuple):
             train_set_list = [train_set_list]
@@ -129,9 +124,7 @@ class Trainer:
         return trainloader
 
     def _get_parameter_names(self, model, forbidden_layer_types):
-        """
-        Returns the names of the model parameters that are not inside a forbidden layer.
-        """
+        """Returns the names of the model parameters that are not inside a forbidden layer."""
         result = []
         for name, child in model.named_children():
             result += [
@@ -174,9 +167,7 @@ class TransTabCollatorForCL:
         self.num_partition = num_partition
 
     def __call__(self, data):
-        """
-        Take a list of subsets (views) from the original tests.
-        """
+        """Take a list of subsets (views) from the original tests."""
         # 1. build positive pairs
         # 2. encode each pair using feature extractor
         df_x = pd.concat([row for row in data])
@@ -192,15 +183,19 @@ class TransTabCollatorForCL:
         return res
 
     def _build_positive_pairs(self, x, n):
-        """
-        Builds positive pairs of sub-dataframes from the input dataframe x.
-
-        Args:
-            x (pandas.DataFrame): Input dataframe.
-            n (int): Number of sub-dataframes to split x into.
-
-        Returns:
-            list: List of sub-dataframes, each containing a positive pair of columns from x.
+        """Builds positive pairs of sub-dataframes from the input dataframe x.
+
+        Parameters
+        ----------
+        x : pandas.DataFrame
+            Input dataframe.
+        n : int
+            Number of sub-dataframes to split x into.
+
+        Returns
+        -------
+        List
+            List of sub-dataframes, each containing a positive pair of columns from x.
         """
         x_cols = x.columns.tolist()
         sub_col_list = np.array_split(np.array(x_cols), n)
@@ -217,14 +212,17 @@ class TransTabCollatorForCL:
         return sub_x_list
 
     def _build_positive_pairs_single_view(self, x):
-        """
-        Builds positive pairs for a single view of data by corrupting half of the columns and shuffling the corrupted columns.
+        """Builds positive pairs for a single view of data by corrupting half of the columns and shuffling the corrupted columns.
 
-        Args:
-            x (pandas.DataFrame): The input data.
+        Parameters
+        ----------
+        x : pandas.DataFrame
+            The input data.
 
-        Returns:
-            list: A list of two pandas DataFrames, where each DataFrame contains the original data with half of the columns corrupted and shuffled.
+        Returns
+        -------
+        List
+            A list of two pandas DataFrames, where each DataFrame contains the original data with half of the columns corrupted and shuffled.
         """
         x_cols = x.columns.tolist()
         sub_x_list = [x]