Skip to content
This repository has been archived by the owner on May 5, 2024. It is now read-only.

Commit

Permalink
Merge pull request #22 from LuposX/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
LuposX authored Sep 26, 2019
2 parents ce26737 + 488dccf commit 4815e8e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 24 deletions.
9 changes: 6 additions & 3 deletions code/linear_regression_libary.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,16 @@ def predic(self, visualize_process, args_normalization) -> None:
break
else:
rm_input = round(float(rm_input), 20)
self.pred_target = self.w1 * rm_input + self.bias # predicting
rm_input_norm = (rm_input - df_mean[0]) / df_range[0] # normalizing input

output_pred = (self.pred_target * df_range) + df_mean
self.pred_target = self.w1 * rm_input_norm + self.bias # predicting

# denormalization of output
denorm_pred_target = (self.pred_target * df_range[1]) + df_mean[1]

print(" ")
print("The model predicted that a house with a RM value of: " + str(rm_input) + ".")
print("Is worth about: " + str(round(output_pred, 4)) + " in 100,000$(GER 100.000$).")
print("Is worth about: " + str(round(denorm_pred_target, 6)) + " in 10,000$(GER 10.000$).")
print(" ")
except ValueError:
print("Invalid Input!")
Expand Down
38 changes: 23 additions & 15 deletions code/misc_libary.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,42 +114,50 @@ def download_dataset() -> None:


def preproc_data(df: object, args) -> list:
# money in 100,000
df[["MEDV"]] = df[["MEDV"]] / 100000
# money in 10,000
df[["MEDV"]] = df[["MEDV"]] / 10000

if args.model == "linear_regression":
df_new = df[["RM", "MEDV"]]
# split data and target
df_new = df #df[["RM"]]

# normalization variables for linear regression
df_new_range = (df_new.max() - df_new.min())
df_new_mean = df_new.mean()
df_new_range = df_new.max() - df_new.min()
df_new_mean = df_new.std(ddof=1)

df_new = (df_new - df_new_mean) / df_new_range

# shuffling data
df_new = df_new.sample(frac=1).reset_index(drop=True)

#df_new["MEDV"] = df["MEDV"] # we dont want to normalize our target so here we add data and target together in one dataset again.

# split in training and test data
df_new_train = df_new[:380]
df_new_test = df_new[381:]

return df_new_train, df_new_test, df_new_range, df_new_mean

elif args.model == "polynomial_regression":
# split data and target
df_new = df # df[["RM", "LSTAT", "PTRATIO"]]

# normalization variables for polynomial regression
df_range = df.max() - df.min()
df_mean = df.mean()
df_new_range = df_new.max() - df_new.min()
df_new_mean = df_new.mean()

df = (df - df.mean()) / (df.max() - df.min())
df_new = (df_new - df_new_mean) / df_new_range

# shuffling data
df = df.sample(frac=1).reset_index(drop=True)
df_new = df_new.sample(frac=1).reset_index(drop=True)

# df_new["MEDV"] = df["MEDV"] # we dont want to normalize our target so here we add data and target together in one dataset again.

# split in training and test data
df_train = df[:380]
df_test = df[381:]
df_new_train = df_new[:380]
df_new_test = df_new[381:]

return df_train, df_test, df_range, df_mean
return df_new_train, df_new_test, df_new_range, df_new_mean

else:
print("something went wrong in data preprocessing.")
Expand Down Expand Up @@ -278,9 +286,9 @@ def v_model_poly(x_axis, y_axis, weights_bias, data_train, target_train):
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')

f1 = np.arange(-1, 1, 0.1)
f2 = np.arange(-1, 1, 0.1)
f3 = np.arange(-1, 1, 0.1)
f1 = np.arange(-0.7, 1.2, 0.1)
f2 = np.arange(-0.7, 1.2, 0.1)
f3 = np.arange(-0.7, 1.2, 0.1)

f1, f2 = np.meshgrid(f1, f2)
# z corosponds to medv
Expand Down
29 changes: 23 additions & 6 deletions code/polynomial_regression_libary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@ def __init__(self, df, args):

# how man epoch we train
self.epochs = 30
self.alpha = 0.008
self.alpha = 0.005
self.train_loss_history = []
self.test_loss_history = []
self.x_train_loose = []

# split in target and data
# print(df[0].info())
# print("-----------------------")
self.data_train = df[0].iloc[:, df[0].columns != "MEDV"].reset_index(drop=True) # the ":" stands for every element in there
self.data_test = df[1].iloc[:, df[1].columns != "MEDV"].reset_index(drop=True)
# print(self.data_train.info())
self.target_train = df[0]["MEDV"].tolist()
self.target_test = df[1]["MEDV"].tolist()

# misc
self.evaluation_time = 0
self.args = args
Expand All @@ -32,8 +34,8 @@ def hypothesis(self, weights, f1, f2, f3, bias):
#print(weights[0])
# pred = round(weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f2 + weights[3] * f2 ** 2 + \
# weights[4] * f3 + weights[5] * f3 ** 2 + weights[6] * bias, 10)
pred = round(weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f1 ** 3 + weights[3] * f2 + weights[4] * f2 ** 2 + \
weights[5] * f2 ** 3 + weights[6] * f3 + weights[7] * f3 ** 2 + weights[8] * f3 ** 3 + weights[9] * bias, 10)
pred = weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f1 ** 3 + weights[3] * f2 + weights[4] * f2 ** 2 + \
weights[5] * f2 ** 3 + weights[6] * f3 + weights[7] * f3 ** 2 + weights[8] * f3 ** 3 + weights[9] * bias
return pred

# training our model
Expand Down Expand Up @@ -213,15 +215,30 @@ def predic(self, visualize_process, args_normalization) -> None:
lstat_input = round(float(lstat_input), 4)
ptratio_input = round(float(ptratio_input), 4)

self.pred_target = self.hypothesis(self.weights, rm_input, lstat_input, ptratio_input, 1)
# normalizing input
rm_input_norm = (rm_input - df_mean[0]) / df_range[0]
lstat_input_norm = (lstat_input - df_mean[1]) / df_range[1]
ptratio_input_norm = (ptratio_input - df_mean[2]) / df_range[2]

self.pred_target = self.hypothesis(self.weights, rm_input_norm, lstat_input_norm, ptratio_input_norm, 1)
print(self.pred_target)

# denormalization of output
denorm_pred_target = (self.pred_target * df_range[3]) + df_mean[3]

# print(self.pred_target)
# print("---------------")
# print(df_range)
# print("---------------")
# print(df_mean)

print(" ")
print("The model predicted that a house with the values: ")
print("RM :" + str(rm_input))
print("LSTAT :" + str(lstat_input))
print("PTRATIO :" + str(ptratio_input))
print(" ")
print("Is worth about: " + str(round(self.pred_target, 4)) + " in 10,000$(GER 10.000$).")
print("Is worth about: " + str(round(denorm_pred_target, 6)) + " in 10,000$(GER 10.000$).")
print(" ")
except ValueError:
print("Invalid Input!")

0 comments on commit 4815e8e

Please sign in to comment.