Merge pull request #22 from LuposX/development

Development
LuposX · Sep 26, 2019 · 4815e8e · 4815e8e
2 parents ce26737 + 488dccf
commit 4815e8e
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 24 deletions.
diff --git a/code/linear_regression_libary.py b/code/linear_regression_libary.py
@@ -138,13 +138,16 @@ def predic(self, visualize_process, args_normalization) -> None:
                     break
                 else:
                     rm_input = round(float(rm_input), 20)
-                    self.pred_target = self.w1 * rm_input + self.bias  # predicting
+                    rm_input_norm = (rm_input - df_mean[0]) / df_range[0]  # normalizing input
 
-                    output_pred = (self.pred_target * df_range) + df_mean
+                    self.pred_target = self.w1 * rm_input_norm + self.bias  # predicting
+
+                    # denormalization of output
+                    denorm_pred_target = (self.pred_target * df_range[1]) + df_mean[1]
 
                     print(" ")
                     print("The model predicted that a house with a RM value of: " + str(rm_input) + ".")
-                    print("Is worth about: " + str(round(output_pred, 4)) + " in 100,000$(GER 100.000$).")
+                    print("Is worth about: " + str(round(denorm_pred_target, 6)) + " in 10,000$(GER 10.000$).")
                     print(" ")
             except ValueError:
                 print("Invalid Input!")

diff --git a/code/misc_libary.py b/code/misc_libary.py
@@ -114,42 +114,50 @@ def download_dataset() -> None:
 
 
 def preproc_data(df: object, args) -> list:
-    # money in 100,000
-    df[["MEDV"]] = df[["MEDV"]] / 100000
+    # money in 10,000
+    df[["MEDV"]] = df[["MEDV"]] / 10000
 
     if args.model == "linear_regression":
-        df_new = df[["RM", "MEDV"]]
+        # split data and target
+        df_new = df #df[["RM"]]
 
         # normalization variables for linear regression
-        df_new_range = (df_new.max() - df_new.min())
-        df_new_mean = df_new.mean()
+        df_new_range = df_new.max() - df_new.min()
+        df_new_mean = df_new.std(ddof=1)
 
         df_new = (df_new - df_new_mean) / df_new_range
 
         # shuffling data
         df_new = df_new.sample(frac=1).reset_index(drop=True)
 
+        #df_new["MEDV"] = df["MEDV"]  # we dont want to normalize our target so here we add data and target together in one dataset again.
+
         # split in training and test data
         df_new_train = df_new[:380]
         df_new_test = df_new[381:]
 
         return df_new_train, df_new_test, df_new_range, df_new_mean
 
     elif args.model == "polynomial_regression":
+        # split data and target
+        df_new = df # df[["RM", "LSTAT", "PTRATIO"]]
+
         # normalization variables for polynomial regression
-        df_range = df.max() - df.min()
-        df_mean = df.mean()
+        df_new_range = df_new.max() - df_new.min()
+        df_new_mean = df_new.mean()
 
-        df = (df - df.mean()) / (df.max() - df.min())
+        df_new = (df_new - df_new_mean) / df_new_range
 
         # shuffling data
-        df = df.sample(frac=1).reset_index(drop=True)
+        df_new = df_new.sample(frac=1).reset_index(drop=True)
+
+        # df_new["MEDV"] = df["MEDV"]  # we dont want to normalize our target so here we add data and target together in one dataset again.
 
         # split in training and test data
-        df_train = df[:380]
-        df_test = df[381:]
+        df_new_train = df_new[:380]
+        df_new_test = df_new[381:]
 
-        return df_train, df_test, df_range, df_mean
+        return df_new_train, df_new_test, df_new_range, df_new_mean
 
     else:
         print("something went wrong in data preprocessing.")
@@ -278,9 +286,9 @@ def v_model_poly(x_axis, y_axis, weights_bias, data_train, target_train):
     fig = plt.figure(figsize=(10, 7))
     ax = fig.gca(projection='3d')
 
-    f1 = np.arange(-1, 1, 0.1)
-    f2 = np.arange(-1, 1, 0.1)
-    f3 = np.arange(-1, 1, 0.1)
+    f1 = np.arange(-0.7, 1.2, 0.1)
+    f2 = np.arange(-0.7, 1.2, 0.1)
+    f3 = np.arange(-0.7, 1.2, 0.1)
 
     f1, f2 = np.meshgrid(f1, f2)
     # z corosponds to medv

diff --git a/code/polynomial_regression_libary.py b/code/polynomial_regression_libary.py
@@ -10,17 +10,19 @@ def __init__(self, df, args):
 
         # how man epoch we train
         self.epochs = 30
-        self.alpha = 0.008
+        self.alpha = 0.005
         self.train_loss_history = []
         self.test_loss_history = []
         self.x_train_loose = []
 
         # split in target and data
+        # print(df[0].info())
+        # print("-----------------------")
         self.data_train = df[0].iloc[:,  df[0].columns != "MEDV"].reset_index(drop=True)    # the ":" stands for every element in there
         self.data_test = df[1].iloc[:,  df[1].columns != "MEDV"].reset_index(drop=True)
+        # print(self.data_train.info())
         self.target_train = df[0]["MEDV"].tolist()
         self.target_test = df[1]["MEDV"].tolist()
-
         # misc
         self.evaluation_time = 0
         self.args = args
@@ -32,8 +34,8 @@ def hypothesis(self, weights, f1, f2, f3, bias):
         #print(weights[0])
         # pred = round(weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f2 + weights[3] * f2 ** 2 + \
         #              weights[4] * f3 + weights[5] * f3 ** 2 + weights[6] * bias, 10)
-        pred = round(weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f1 ** 3 + weights[3] * f2 + weights[4] * f2 ** 2 + \
-                          weights[5] * f2 ** 3 + weights[6] * f3 + weights[7] * f3 ** 2 + weights[8] * f3 ** 3 + weights[9] * bias, 10)
+        pred = weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f1 ** 3 + weights[3] * f2 + weights[4] * f2 ** 2 + \
+                          weights[5] * f2 ** 3 + weights[6] * f3 + weights[7] * f3 ** 2 + weights[8] * f3 ** 3 + weights[9] * bias
         return pred
 
     # training our model
@@ -213,15 +215,30 @@ def predic(self, visualize_process, args_normalization) -> None:
                 lstat_input = round(float(lstat_input), 4)
                 ptratio_input = round(float(ptratio_input), 4)
 
-                self.pred_target = self.hypothesis(self.weights, rm_input, lstat_input, ptratio_input, 1)
+                # normalizing input
+                rm_input_norm = (rm_input - df_mean[0]) / df_range[0]
+                lstat_input_norm = (lstat_input - df_mean[1]) / df_range[1]
+                ptratio_input_norm = (ptratio_input - df_mean[2]) / df_range[2]
+
+                self.pred_target = self.hypothesis(self.weights, rm_input_norm, lstat_input_norm, ptratio_input_norm, 1)
+                print(self.pred_target)
+
+                # denormalization of output
+                denorm_pred_target = (self.pred_target * df_range[3]) + df_mean[3]
+
+                # print(self.pred_target)
+                # print("---------------")
+                # print(df_range)
+                # print("---------------")
+                # print(df_mean)
 
                 print(" ")
                 print("The model predicted that a house with the values: ")
                 print("RM :" + str(rm_input))
                 print("LSTAT :" + str(lstat_input))
                 print("PTRATIO :" + str(ptratio_input))
                 print(" ")
-                print("Is worth about: " + str(round(self.pred_target, 4)) + " in 10,000$(GER 10.000$).")
+                print("Is worth about: " + str(round(denorm_pred_target, 6)) + " in 10,000$(GER 10.000$).")
                 print(" ")
             except ValueError:
                 print("Invalid Input!")