diff --git a/README.md b/README.md index 270ff0d..05482a7 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,11 @@ Boston Housing Prediction is a python script that can predict the housing prices ## Installation You need to have `python >= 3.5` installed. -To install the the script do: +To install the the script do([Press to view PYPI page](https://pypi.org/project/boston-housing-prediction/)): ```sh $ pip install boston_housing_prediction ``` - For older versions: Alternatively download the latest release of `boston-housing`. Open the zip and in the `code` or `boston_housing_prediction` folder(folders have different names in different versions) you can see the script and its libaries. @@ -35,6 +34,13 @@ $ python -m boston_housing_prediction -h ## Release History +* 0.2.2 + * fix negative input for training of model #24 + * fix negative output of predictions #24 + * optimized folder structure for PYPI #13 + * optimized some code.(comments, input) + * documentation: changes in PYPI, coments in code + * 0.2.1 * add normalization for data diff --git a/boston_housing_prediction/__main__.py b/boston_housing_prediction/__main__.py index 92d04e9..5dffe30 100644 --- a/boston_housing_prediction/__main__.py +++ b/boston_housing_prediction/__main__.py @@ -3,7 +3,7 @@ Started: 08.09.2019 Lang: Phyton Description: Prediction of boston housing market prices. -version: 0.2.1 +version: 0.2.2 Dataset: Housing Values in Suburbs of Boston @@ -15,12 +15,14 @@ Latest change: - fixed Normalization for data +- organisational stuff and bugfixes: #24 +- negative input/output ''' # My Files that get imported -import boston_housing_prediction.boston_main +import boston_main # TODO: fix train and test loss if __name__ == "__main__": - boston_housing_prediction.boston_main.main() \ No newline at end of file + boston_main.main() diff --git a/boston_housing_prediction/__pycache__/boston_main.cpython-36.pyc b/boston_housing_prediction/__pycache__/boston_main.cpython-36.pyc new file mode 100644 index 0000000..575e177 Binary files /dev/null and b/boston_housing_prediction/__pycache__/boston_main.cpython-36.pyc differ diff --git a/boston_housing_prediction/__pycache__/linear_regression_libary.cpython-36.pyc b/boston_housing_prediction/__pycache__/linear_regression_libary.cpython-36.pyc new file mode 100644 index 0000000..b761cfb Binary files /dev/null and b/boston_housing_prediction/__pycache__/linear_regression_libary.cpython-36.pyc differ diff --git a/boston_housing_prediction/__pycache__/misc_libary.cpython-36.pyc b/boston_housing_prediction/__pycache__/misc_libary.cpython-36.pyc new file mode 100644 index 0000000..c82cf53 Binary files /dev/null and b/boston_housing_prediction/__pycache__/misc_libary.cpython-36.pyc differ diff --git a/boston_housing_prediction/__pycache__/polynomial_regression_libary.cpython-36.pyc b/boston_housing_prediction/__pycache__/polynomial_regression_libary.cpython-36.pyc new file mode 100644 index 0000000..f1d557c Binary files /dev/null and b/boston_housing_prediction/__pycache__/polynomial_regression_libary.cpython-36.pyc differ diff --git a/boston_housing_prediction/boston_housing.csv b/boston_housing_prediction/boston_housing.csv new file mode 100644 index 0000000..9b9116f --- /dev/null +++ b/boston_housing_prediction/boston_housing.csv @@ -0,0 +1,490 @@ +RM,LSTAT,PTRATIO,MEDV +6.575,4.98,15.3,504000.0 +6.421,9.14,17.8,453600.0 +7.185,4.03,17.8,728700.0 +6.998,2.94,18.7,701400.0 +7.147,5.33,18.7,760200.0 +6.43,5.21,18.7,602700.0 +6.012,12.43,15.2,480900.0 +6.172,19.15,15.2,569100.0 +5.631,29.93,15.2,346500.0 +6.004,17.1,15.2,396900.0 +6.377,20.45,15.2,315000.0 +6.009,13.27,15.2,396900.0 +5.889,15.71,15.2,455700.0 +5.949,8.26,21.0,428400.0 +6.096,10.26,21.0,382200.0 +5.834,8.47,21.0,417900.0 +5.935,6.58,21.0,485100.0 +5.99,14.67,21.0,367500.0 +5.456,11.69,21.0,424200.0 +5.727,11.28,21.0,382200.0 +5.57,21.02,21.0,285600.0 +5.965,13.83,21.0,411600.0 +6.142,18.72,21.0,319200.0 +5.813,19.88,21.0,304500.0 +5.924,16.3,21.0,327600.0 +5.599,16.51,21.0,291900.0 +5.813,14.81,21.0,348600.0 +6.047,17.28,21.0,310800.0 +6.495,12.8,21.0,386400.0 +6.674,11.98,21.0,441000.0 +5.713,22.6,21.0,266700.0 +6.072,13.04,21.0,304500.0 +5.95,27.71,21.0,277200.0 +5.701,18.35,21.0,275100.0 +6.096,20.34,21.0,283500.0 +5.933,9.68,19.2,396900.0 +5.841,11.41,19.2,420000.0 +5.85,8.77,19.2,441000.0 +5.966,10.13,19.2,518700.0 +6.595,4.32,18.3,646800.0 +7.024,1.98,18.3,732900.0 +6.77,4.84,17.9,558600.0 +6.169,5.81,17.9,531300.0 +6.211,7.44,17.9,518700.0 +6.069,9.55,17.9,445200.0 +5.682,10.21,17.9,405300.0 +5.786,14.15,17.9,420000.0 +6.03,18.8,17.9,348600.0 +5.399,30.81,17.9,302400.0 +5.602,16.2,17.9,407400.0 +5.963,13.45,16.8,413700.0 +6.115,9.43,16.8,430500.0 +6.511,5.28,16.8,525000.0 +5.998,8.43,16.8,491400.0 +5.888,14.8,21.1,396900.0 +7.249,4.81,17.9,743400.0 +6.383,5.77,17.3,518700.0 +6.816,3.95,15.1,663600.0 +6.145,6.86,19.7,489300.0 +5.927,9.22,19.7,411600.0 +5.741,13.15,19.7,392700.0 +5.966,14.44,19.7,336000.0 +6.456,6.73,19.7,466200.0 +6.762,9.5,19.7,525000.0 +7.104,8.05,18.6,693000.0 +6.29,4.67,16.1,493500.0 +5.787,10.24,16.1,407400.0 +5.878,8.1,18.9,462000.0 +5.594,13.09,18.9,365400.0 +5.885,8.79,18.9,438900.0 +6.417,6.72,19.2,508200.0 +5.961,9.88,19.2,455700.0 +6.065,5.52,19.2,478800.0 +6.245,7.54,19.2,491400.0 +6.273,6.78,18.7,506100.0 +6.286,8.94,18.7,449400.0 +6.279,11.97,18.7,420000.0 +6.14,10.27,18.7,436800.0 +6.232,12.34,18.7,445200.0 +5.874,9.1,18.7,426300.0 +6.727,5.29,19.0,588000.0 +6.619,7.22,19.0,501900.0 +6.302,6.72,19.0,520800.0 +6.167,7.51,19.0,480900.0 +6.389,9.62,18.5,501900.0 +6.63,6.53,18.5,558600.0 +6.015,12.86,18.5,472500.0 +6.121,8.44,18.5,466200.0 +7.007,5.5,17.8,495600.0 +7.079,5.7,17.8,602700.0 +6.417,8.81,17.8,474600.0 +6.405,8.2,17.8,462000.0 +6.442,8.16,18.2,480900.0 +6.211,6.21,18.2,525000.0 +6.249,10.59,18.2,432600.0 +6.625,6.65,18.0,596400.0 +6.163,11.34,18.0,449400.0 +8.069,4.21,18.0,812700.0 +7.82,3.57,18.0,919800.0 +7.416,6.19,18.0,697200.0 +6.727,9.42,20.9,577500.0 +6.781,7.67,20.9,556500.0 +6.405,10.63,20.9,390600.0 +6.137,13.44,20.9,405300.0 +6.167,12.33,20.9,422100.0 +5.851,16.47,20.9,409500.0 +5.836,18.66,20.9,409500.0 +6.127,14.09,20.9,428400.0 +6.474,12.27,20.9,415800.0 +6.229,15.55,20.9,407400.0 +6.195,13.0,20.9,455700.0 +6.715,10.16,17.8,478800.0 +5.913,16.21,17.8,394800.0 +6.092,17.09,17.8,392700.0 +6.254,10.45,17.8,388500.0 +5.928,15.76,17.8,384300.0 +6.176,12.04,17.8,445200.0 +6.021,10.3,17.8,403200.0 +5.872,15.37,17.8,428400.0 +5.731,13.61,17.8,405300.0 +5.87,14.37,19.1,462000.0 +6.004,14.27,19.1,426300.0 +5.961,17.93,19.1,430500.0 +5.856,25.41,19.1,363300.0 +5.879,17.58,19.1,394800.0 +5.986,14.81,19.1,449400.0 +5.613,27.26,19.1,329700.0 +5.693,17.19,21.2,340200.0 +6.431,15.39,21.2,378000.0 +5.637,18.34,21.2,300300.0 +6.458,12.6,21.2,403200.0 +6.326,12.26,21.2,411600.0 +6.372,11.12,21.2,483000.0 +5.822,15.03,21.2,386400.0 +5.757,17.31,21.2,327600.0 +6.335,16.96,21.2,380100.0 +5.942,16.9,21.2,365400.0 +6.454,14.59,21.2,359100.0 +5.857,21.32,21.2,279300.0 +6.151,18.46,21.2,373800.0 +6.174,24.16,21.2,294000.0 +5.019,34.41,21.2,302400.0 +5.403,26.82,14.7,281400.0 +5.468,26.42,14.7,327600.0 +4.903,29.29,14.7,247800.0 +6.13,27.8,14.7,289800.0 +5.628,16.65,14.7,327600.0 +4.926,29.53,14.7,306600.0 +5.186,28.32,14.7,373800.0 +5.597,21.45,14.7,323400.0 +6.122,14.1,14.7,451500.0 +5.404,13.28,14.7,411600.0 +5.012,12.12,14.7,321300.0 +5.709,15.79,14.7,407400.0 +6.129,15.12,14.7,357000.0 +6.152,15.02,14.7,327600.0 +5.272,16.14,14.7,275100.0 +6.943,4.59,14.7,867300.0 +6.066,6.43,14.7,510300.0 +6.51,7.39,14.7,489300.0 +6.25,5.5,14.7,567000.0 +5.854,11.64,14.7,476700.0 +6.101,9.81,14.7,525000.0 +5.877,12.14,14.7,499800.0 +6.319,11.1,14.7,499800.0 +6.402,11.32,14.7,468300.0 +5.875,14.43,14.7,365400.0 +5.88,12.03,14.7,401100.0 +5.572,14.69,16.6,485100.0 +6.416,9.04,16.6,495600.0 +5.859,9.64,16.6,474600.0 +6.546,5.33,16.6,617400.0 +6.02,10.11,16.6,487200.0 +6.315,6.29,16.6,516600.0 +6.86,6.92,16.6,627900.0 +6.98,5.04,17.8,781200.0 +7.765,7.56,17.8,835800.0 +6.144,9.45,17.8,760200.0 +7.155,4.82,17.8,795900.0 +6.563,5.68,17.8,682500.0 +5.604,13.98,17.8,554400.0 +6.153,13.15,17.8,621600.0 +6.782,6.68,15.2,672000.0 +6.556,4.56,15.2,625800.0 +7.185,5.39,15.2,732900.0 +6.951,5.1,15.2,777000.0 +6.739,4.69,15.2,640500.0 +7.178,2.87,15.2,764400.0 +6.8,5.03,15.6,653100.0 +6.604,4.38,15.6,611100.0 +7.287,4.08,12.6,699300.0 +7.107,8.61,12.6,636300.0 +7.274,6.62,12.6,726600.0 +6.975,4.56,17.0,732900.0 +7.135,4.45,17.0,690900.0 +6.162,7.43,14.7,506100.0 +7.61,3.11,14.7,888300.0 +7.853,3.81,14.7,1018500.0 +5.891,10.87,18.6,474600.0 +6.326,10.97,18.6,512400.0 +5.783,18.06,18.6,472500.0 +6.064,14.66,18.6,512400.0 +5.344,23.09,18.6,420000.0 +5.96,17.27,18.6,455700.0 +5.404,23.98,18.6,405300.0 +5.807,16.03,18.6,470400.0 +6.375,9.38,18.6,590100.0 +5.412,29.55,18.6,497700.0 +6.182,9.47,18.6,525000.0 +5.888,13.51,16.4,489300.0 +6.642,9.69,16.4,602700.0 +5.951,17.92,16.4,451500.0 +6.373,10.5,16.4,483000.0 +6.951,9.71,17.4,560700.0 +6.164,21.46,17.4,455700.0 +6.879,9.93,17.4,577500.0 +6.618,7.6,17.4,632100.0 +8.266,4.14,17.4,940800.0 +8.04,3.13,17.4,789600.0 +7.163,6.36,17.4,663600.0 +7.686,3.92,17.4,980700.0 +6.552,3.76,17.4,661500.0 +5.981,11.65,17.4,510300.0 +7.412,5.25,17.4,665700.0 +8.337,2.47,17.4,875700.0 +8.247,3.95,17.4,1014300.0 +6.726,8.05,17.4,609000.0 +6.086,10.88,17.4,504000.0 +6.631,9.54,17.4,527100.0 +7.358,4.73,17.4,661500.0 +6.481,6.36,16.6,497700.0 +6.606,7.37,16.6,489300.0 +6.897,11.38,16.6,462000.0 +6.095,12.4,16.6,422100.0 +6.358,11.22,16.6,466200.0 +6.393,5.19,16.6,497700.0 +5.593,12.5,19.1,369600.0 +5.605,18.46,19.1,388500.0 +6.108,9.16,19.1,510300.0 +6.226,10.15,19.1,430500.0 +6.433,9.52,19.1,514500.0 +6.718,6.56,19.1,550200.0 +6.487,5.9,19.1,512400.0 +6.438,3.59,19.1,520800.0 +6.957,3.53,19.1,621600.0 +8.259,3.54,19.1,898800.0 +6.108,6.57,16.4,459900.0 +5.876,9.25,16.4,438900.0 +7.454,3.11,15.9,924000.0 +7.333,7.79,13.0,756000.0 +6.842,6.9,13.0,632100.0 +7.203,9.59,13.0,709800.0 +7.52,7.26,13.0,905100.0 +8.398,5.91,13.0,1024800.0 +7.327,11.25,13.0,651000.0 +7.206,8.1,13.0,766500.0 +5.56,10.45,13.0,478800.0 +7.014,14.79,13.0,644700.0 +7.47,3.16,13.0,913500.0 +5.92,13.65,18.6,434700.0 +5.856,13.0,18.6,443100.0 +6.24,6.59,18.6,529200.0 +6.538,7.73,18.6,512400.0 +7.691,6.58,18.6,739200.0 +6.758,3.53,17.6,680400.0 +6.854,2.98,17.6,672000.0 +7.267,6.05,17.6,697200.0 +6.826,4.16,17.6,695100.0 +6.482,7.19,17.6,611100.0 +6.812,4.85,14.9,737100.0 +7.82,3.76,14.9,953400.0 +6.968,4.59,14.9,743400.0 +7.645,3.01,14.9,966000.0 +7.088,7.85,15.3,676200.0 +6.453,8.23,15.3,462000.0 +6.23,12.93,18.2,422100.0 +6.209,7.14,16.6,487200.0 +6.315,7.6,16.6,468300.0 +6.565,9.51,16.6,520800.0 +6.861,3.33,19.2,598500.0 +7.148,3.56,19.2,783300.0 +6.63,4.7,19.2,585900.0 +6.127,8.58,16.0,501900.0 +6.009,10.4,16.0,455700.0 +6.678,6.27,16.0,600600.0 +6.549,7.39,16.0,569100.0 +5.79,15.84,16.0,426300.0 +6.345,4.97,14.8,472500.0 +7.041,4.74,14.8,609000.0 +6.871,6.07,14.8,520800.0 +6.59,9.5,16.1,462000.0 +6.495,8.67,16.1,554400.0 +6.982,4.86,16.1,695100.0 +7.236,6.93,18.4,758100.0 +6.616,8.93,18.4,596400.0 +7.42,6.47,18.4,701400.0 +6.849,7.53,18.4,592200.0 +6.635,4.54,18.4,478800.0 +5.972,9.97,18.4,426300.0 +4.973,12.64,18.4,338100.0 +6.122,5.98,18.4,464100.0 +6.023,11.72,18.4,407400.0 +6.266,7.9,18.4,453600.0 +6.567,9.28,18.4,499800.0 +5.705,11.5,18.4,340200.0 +5.914,18.33,18.4,373800.0 +5.782,15.94,18.4,415800.0 +6.382,10.36,18.4,485100.0 +6.113,12.73,18.4,441000.0 +6.426,7.2,19.6,499800.0 +6.376,6.87,19.6,485100.0 +6.041,7.7,19.6,428400.0 +5.708,11.74,19.6,388500.0 +6.415,6.12,19.6,525000.0 +6.431,5.08,19.6,516600.0 +6.312,6.15,19.6,483000.0 +6.083,12.79,19.6,466200.0 +5.868,9.97,16.9,405300.0 +6.333,7.34,16.9,474600.0 +6.144,9.09,16.9,415800.0 +5.706,12.43,16.9,359100.0 +6.031,7.83,16.9,407400.0 +6.316,5.68,20.2,466200.0 +6.31,6.75,20.2,434700.0 +6.037,8.01,20.2,443100.0 +5.869,9.8,20.2,409500.0 +5.895,10.56,20.2,388500.0 +6.059,8.51,20.2,432600.0 +5.985,9.74,20.2,399000.0 +5.968,9.29,20.2,392700.0 +7.241,5.49,15.5,686700.0 +6.54,8.65,15.9,346500.0 +6.696,7.18,17.6,501900.0 +6.874,4.61,17.6,655200.0 +6.014,10.53,18.8,367500.0 +5.898,12.67,18.8,361200.0 +6.516,6.36,17.9,485100.0 +6.635,5.99,17.0,514500.0 +6.939,5.89,19.7,558600.0 +6.49,5.98,19.7,480900.0 +6.579,5.49,18.3,506100.0 +5.884,7.79,18.3,390600.0 +6.728,4.5,17.0,632100.0 +5.663,8.05,22.0,382200.0 +5.936,5.57,22.0,432600.0 +6.212,17.6,20.2,373800.0 +6.395,13.27,20.2,455700.0 +6.127,11.48,20.2,476700.0 +6.112,12.67,20.2,474600.0 +6.398,7.79,20.2,525000.0 +6.251,14.19,20.2,417900.0 +5.362,10.19,20.2,436800.0 +5.803,14.64,20.2,352800.0 +3.561,7.12,20.2,577500.0 +4.963,14.0,20.2,459900.0 +3.863,13.33,20.2,485100.0 +4.906,34.77,20.2,289800.0 +4.138,37.97,20.2,289800.0 +7.313,13.44,20.2,315000.0 +6.649,23.24,20.2,291900.0 +6.794,21.24,20.2,279300.0 +6.38,23.69,20.2,275100.0 +6.223,21.78,20.2,214200.0 +6.968,17.21,20.2,218400.0 +6.545,21.08,20.2,228900.0 +5.536,23.6,20.2,237300.0 +5.52,24.56,20.2,258300.0 +4.368,30.63,20.2,184800.0 +5.277,30.81,20.2,151200.0 +4.652,28.28,20.2,220500.0 +5.0,31.99,20.2,155400.0 +4.88,30.62,20.2,214200.0 +5.39,20.85,20.2,241500.0 +5.713,17.11,20.2,317100.0 +6.051,18.76,20.2,487200.0 +5.036,25.68,20.2,203700.0 +6.193,15.17,20.2,289800.0 +5.887,16.35,20.2,266700.0 +6.471,17.12,20.2,275100.0 +6.405,19.37,20.2,262500.0 +5.747,19.92,20.2,178500.0 +5.453,30.59,20.2,105000.0 +5.852,29.97,20.2,132300.0 +5.987,26.77,20.2,117600.0 +6.343,20.32,20.2,151200.0 +6.404,20.31,20.2,254100.0 +5.349,19.77,20.2,174300.0 +5.531,27.38,20.2,178500.0 +5.683,22.98,20.2,105000.0 +4.138,23.34,20.2,249900.0 +5.608,12.13,20.2,585900.0 +5.617,26.4,20.2,361200.0 +6.852,19.78,20.2,577500.0 +5.757,10.11,20.2,315000.0 +6.657,21.22,20.2,361200.0 +4.628,34.37,20.2,375900.0 +5.155,20.08,20.2,342300.0 +4.519,36.98,20.2,147000.0 +6.434,29.05,20.2,151200.0 +6.782,25.79,20.2,157500.0 +5.304,26.64,20.2,218400.0 +5.957,20.62,20.2,184800.0 +6.824,22.74,20.2,176400.0 +6.411,15.02,20.2,350700.0 +6.006,15.7,20.2,298200.0 +5.648,14.1,20.2,436800.0 +6.103,23.29,20.2,281400.0 +5.565,17.16,20.2,245700.0 +5.896,24.39,20.2,174300.0 +5.837,15.69,20.2,214200.0 +6.202,14.52,20.2,228900.0 +6.193,21.52,20.2,231000.0 +6.38,24.08,20.2,199500.0 +6.348,17.64,20.2,304500.0 +6.833,19.69,20.2,296100.0 +6.425,12.03,20.2,338100.0 +6.436,16.22,20.2,300300.0 +6.208,15.17,20.2,245700.0 +6.629,23.27,20.2,281400.0 +6.461,18.05,20.2,201600.0 +6.152,26.45,20.2,182700.0 +5.935,34.02,20.2,176400.0 +5.627,22.88,20.2,268800.0 +5.818,22.11,20.2,220500.0 +6.406,19.52,20.2,359100.0 +6.219,16.59,20.2,386400.0 +6.485,18.85,20.2,323400.0 +5.854,23.79,20.2,226800.0 +6.459,23.98,20.2,247800.0 +6.341,17.79,20.2,312900.0 +6.251,16.44,20.2,264600.0 +6.185,18.13,20.2,296100.0 +6.417,19.31,20.2,273000.0 +6.749,17.44,20.2,281400.0 +6.655,17.73,20.2,319200.0 +6.297,17.27,20.2,338100.0 +7.393,16.74,20.2,373800.0 +6.728,18.71,20.2,312900.0 +6.525,18.13,20.2,296100.0 +5.976,19.01,20.2,266700.0 +5.936,16.94,20.2,283500.0 +6.301,16.23,20.2,312900.0 +6.081,14.7,20.2,420000.0 +6.701,16.42,20.2,344400.0 +6.376,14.65,20.2,371700.0 +6.317,13.99,20.2,409500.0 +6.513,10.29,20.2,424200.0 +6.209,13.22,20.2,449400.0 +5.759,14.13,20.2,417900.0 +5.952,17.15,20.2,399000.0 +6.003,21.32,20.2,401100.0 +5.926,18.13,20.2,401100.0 +5.713,14.76,20.2,422100.0 +6.167,16.29,20.2,417900.0 +6.229,12.87,20.2,411600.0 +6.437,14.36,20.2,487200.0 +6.98,11.66,20.2,625800.0 +5.427,18.14,20.2,289800.0 +6.162,24.1,20.2,279300.0 +6.484,18.68,20.2,350700.0 +5.304,24.91,20.2,252000.0 +6.185,18.03,20.2,306600.0 +6.229,13.11,20.2,449400.0 +6.242,10.74,20.2,483000.0 +6.75,7.74,20.2,497700.0 +7.061,7.01,20.2,525000.0 +5.762,10.42,20.2,457800.0 +5.871,13.34,20.2,432600.0 +6.312,10.58,20.2,445200.0 +6.114,14.98,20.2,401100.0 +5.905,11.45,20.2,432600.0 +5.454,18.06,20.1,319200.0 +5.414,23.97,20.1,147000.0 +5.093,29.68,20.1,170100.0 +5.983,18.07,20.1,285600.0 +5.983,13.35,20.1,422100.0 +5.707,12.01,19.2,457800.0 +5.926,13.59,19.2,514500.0 +5.67,17.6,19.2,485100.0 +5.39,21.14,19.2,413700.0 +5.794,14.1,19.2,384300.0 +6.019,12.92,19.2,445200.0 +5.569,15.1,19.2,367500.0 +6.027,14.33,19.2,352800.0 +6.593,9.67,21.0,470400.0 +6.12,9.08,21.0,432600.0 +6.976,5.64,21.0,501900.0 +6.794,6.48,21.0,462000.0 +6.03,7.88,21.0,249900.0 diff --git a/boston_housing_prediction/boston_main.py b/boston_housing_prediction/boston_main.py index e1ecc15..7f07e5a 100644 --- a/boston_housing_prediction/boston_main.py +++ b/boston_housing_prediction/boston_main.py @@ -1,35 +1,19 @@ -''' -Author: Lupos -Started: 08.09.2019 -Lang: Phyton -Description: Prediction of boston housing market prices. -version: 0.2.1 - -Dataset: -Housing Values in Suburbs of Boston - -RM: average number of rooms per dwelling(Wohnung) -LSTAT: percentage of population considered lower status -PTRATIO: pupil-teacher ratio by town -MEDV: median value of owner-occupied homes in 10.000$ - -Latest change: -- added Normalization for data -''' import argparse import multiprocessing as mp import random import sys # My Files that get imported -from boston_housing_prediction.linear_regression_libary import LinearRegression -from boston_housing_prediction.polynomial_regression_libary import PolynomialRegression -from boston_housing_prediction.misc_libary import * +# from boston_housing_prediction.linear_regression_libary import LinearRegression +# from boston_housing_prediction.polynomial_regression_libary import PolynomialRegression +# from boston_housing_prediction.misc_libary import * +from linear_regression_libary import LinearRegression +from polynomial_regression_libary import PolynomialRegression +from misc_libary import * # TODO: fix train and test loss # GLOBAL VARIABLES -# pool = multiprocessing.Pool(3) # set the pool(how many kernels) are used for multiprocessing visualize_process = None # gets later used from multiprocessing @@ -122,10 +106,11 @@ def main(): random.seed(123) # needed to fix some issued with multiprocessing list_process_arg = model.getter_viszualtion() + + # visualizing is in a new process visualize_process = mp.Process(target=visualize, args=(args, df_data, list_process_arg)) # use "args" if arguments are needed visualize_process.start() - # model.visualize(args, df_data) # visualize our model if args.predict_on: model.predic(visualize_process, args_normalization) # make preictions with the model @@ -162,12 +147,15 @@ def main(): random.seed(123) # needed to fix some issued with multiprocessing list_process_arg = model_poly.getter_viszualtion() + + # visualizing is in a new process visualize_process = mp.Process(target=visualize, args=(args, df_data, list_process_arg)) # use "args" if arguments are needed visualize_process.start() if args.predict_on: model_poly.predic(visualize_process, args_normalization) # make preictions with the model + # print what the feature shortcuts means elif args.h_features: print(" ") print("Features and their meaning") diff --git a/boston_housing_prediction/linear_regression_libary.py b/boston_housing_prediction/linear_regression_libary.py index 52df731..b543ea1 100644 --- a/boston_housing_prediction/linear_regression_libary.py +++ b/boston_housing_prediction/linear_regression_libary.py @@ -1,5 +1,5 @@ import time -from boston_housing_prediction.misc_libary import loss +from misc_libary import loss import sys import csv @@ -19,7 +19,6 @@ def __init__(self, df, args): # split in target and data self.data_train = df[0]["RM"].tolist() self.target_train = df[0]["MEDV"].tolist() - self.data_test = df[1]["RM"].tolist() self.target_test = df[1]["MEDV"].tolist() @@ -36,8 +35,10 @@ def train(self) -> None: epochs = input("Please type the numbers of epoch you want to train: ") print(" ") epochs = int(epochs) - self.epochs = epochs - break + if epochs > 0: + self.epochs = epochs + break + print("Please don't input negative numbers :)") except ValueError: print("Invalid Input!") @@ -52,10 +53,25 @@ def train(self) -> None: # our hypothesis/ what our model predicts pred_target = self.w1 * f1 + self.bias - # update our weights + # update our weights/bias self.bias = self.bias - (self.alpha * (pred_target - self.target_train[i])) self.w1 = self.w1 - (self.alpha * (pred_target - self.target_train[i]) * f1) + # outputs for debug mode + if self.args.fd == "debug": + print(" ") + print("example: ", str(i)) + print("----------------------") + print("Weight 1: ", str(self.w1)) + print("Weight 1 change: ", str(self.alpha * (pred_target - self.target_train[i]) * f1)) + print("Weight 1 feature: ", str(f1)) + print("Error: ", str(pred_target - self.target_train[i])) + print("----------------------") + print("Bias: ", str(self.bias)) + print("Bias change: ", str(self.alpha * (pred_target - self.target_train[i]))) + print("Error: ", str(pred_target - self.target_train[i])) + print(" ") + # sums train loss train_loss = loss(pred_target, self.target_train[i]) train_loss_sum += train_loss @@ -108,7 +124,6 @@ def predic(self, visualize_process, args_normalization) -> None: df_range = args_normalization[0] df_mean = args_normalization[1] - time.sleep(1) # sleeps so that the function visualize()(which is seperate process through multiprocessing) has enough time to print the output correctly self.pred_target = 0 print(" ") @@ -138,19 +153,34 @@ def predic(self, visualize_process, args_normalization) -> None: break else: rm_input = round(float(rm_input), 20) + + # checks that no negative numbers get entered + if rm_input < 0: + print(" ") + print("Please don't enter negative numbers :)") + raise ValueError + rm_input_norm = (rm_input - df_mean[0]) / df_range[0] # normalizing input self.pred_target = self.w1 * rm_input_norm + self.bias # predicting # denormalization of output - denorm_pred_target = (self.pred_target * df_range[1]) + df_mean[1] - - print(" ") - print("The model predicted that a house with a RM value of: " + str(rm_input) + ".") - print("Is worth about: " + str(round(denorm_pred_target, 6)) + " in 10,000$(GER 10.000$).") - print(" ") + denorm_pred_target = round((self.pred_target * df_range[1]) + df_mean[1], 6) + + # check if predicted output is negative + if denorm_pred_target < 0: + print("-----------------------------------------------------------------------------") + print("Warning: the input values doesn't correspond to a real house.") + print("-----------------------------------------------------------------------------") + print(" ") + else: + print("-----------------------------------------------------------------------------") + print("Is worth about: " + str(denorm_pred_target) + " in 10,000$(GER 10.000$).") + print("-----------------------------------------------------------------------------") + print(" ") except ValueError: print("Invalid Input!") + print(" ") # a getter for the viszulation function def getter_viszualtion(self) -> list: diff --git a/boston_housing_prediction/misc_libary.py b/boston_housing_prediction/misc_libary.py index bc85637..07653bc 100644 --- a/boston_housing_prediction/misc_libary.py +++ b/boston_housing_prediction/misc_libary.py @@ -18,7 +18,6 @@ def sigmoid(x: float) -> float: def loss(pred_target: float, real_traget: float) -> float: return round(float(np.sqrt((pred_target - real_traget) ** 2)), 4) - #return float((pred_target - real_traget) ** 2) def get_Data() -> object: @@ -118,20 +117,18 @@ def preproc_data(df: object, args) -> list: df[["MEDV"]] = df[["MEDV"]] / 10000 if args.model == "linear_regression": - # split data and target - df_new = df #df[["RM"]] + df_new = df # normalization variables for linear regression df_new_range = df_new.max() - df_new.min() df_new_mean = df_new.std(ddof=1) + # normalization df_new = (df_new - df_new_mean) / df_new_range # shuffling data df_new = df_new.sample(frac=1).reset_index(drop=True) - #df_new["MEDV"] = df["MEDV"] # we dont want to normalize our target so here we add data and target together in one dataset again. - # split in training and test data df_new_train = df_new[:380] df_new_test = df_new[381:] @@ -139,20 +136,18 @@ def preproc_data(df: object, args) -> list: return df_new_train, df_new_test, df_new_range, df_new_mean elif args.model == "polynomial_regression": - # split data and target - df_new = df # df[["RM", "LSTAT", "PTRATIO"]] + df_new = df # normalization variables for polynomial regression df_new_range = df_new.max() - df_new.min() df_new_mean = df_new.mean() + # normalization df_new = (df_new - df_new_mean) / df_new_range # shuffling data df_new = df_new.sample(frac=1).reset_index(drop=True) - # df_new["MEDV"] = df["MEDV"] # we dont want to normalize our target so here we add data and target together in one dataset again. - # split in training and test data df_new_train = df_new[:380] df_new_test = df_new[381:] @@ -164,13 +159,11 @@ def preproc_data(df: object, args) -> list: def hypothesis_pol(weights, f1, f2, f3, bias): - #print(weights[0]) pred = round(weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f2 + weights[3] * f2 ** 2 + \ weights[4] * f3 + weights[5] * f3 ** 2 + weights[6] * bias, 4) return pred # visualize our model. the function visualize() is not in the class model so that we can use multiprocessing. -# args, df_data, self.w1, self.bias, self.train_loss_history, self.test_loss_history, self.evaluation_time, self.data_train, self.target_train def visualize(args, df_data, parameter_list: list) -> None: # unzip the argument list gotten from model.getter_viszulation() weights_bias = parameter_list[0] @@ -206,6 +199,7 @@ def visualize(args, df_data, parameter_list: list) -> None: print(" ") + # visualize data if argument says so if args.v_data: visualize_Data(df_data) @@ -228,43 +222,11 @@ def visualize(args, df_data, parameter_list: list) -> None: sns.lineplot(x=X, y=Y) sns.scatterplot(x=data_train, y=target_train, color="green") elif args.model == "polynomial_regression": - # x_poly = [] - # y_poly = [] - # for i in range(2, 15, 1): - # x_poly.append(i) - # y_poly.append(hypothesis_pol(weights_bias[:], i, i, i, 1)) - # # weights, f1, f2, f3, bias - # #plt.plot(x_poly, y_poly) - # - # fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(9, 6)) - # axes = axes.flatten() # axes[1] because axes is a tulpl and figure is in it - # - # fig.suptitle("Decision Border and Data-points") - # - # sns.set_style("darkgrid") - # axes[0].set(xlabel='RM', ylabel='MDV') - # sns.lineplot(x=x_poly, y=y_poly, ax=axes[0]) - # sns.scatterplot(x=data_train["RM"], y=target_train, ax=axes[0], color="green") - # - # axes[1].set(xlabel='LSTAT', ylabel='MDV') - # sns.lineplot(x=x_poly, y=y_poly, ax=axes[1]) - # sns.scatterplot(x=data_train["LSTAT"], y=target_train, ax=axes[1], color="green") - # - # axes[2].set(xlabel='PTRATIO', ylabel='MDV') - # sns.lineplot(x=x_poly, y=y_poly, ax=axes[2]) - # sns.scatterplot(x=data_train["PTRATIO"], y=target_train, ax=axes[2], color="green") - # - # axes[3].remove() - - # plot model for polynomial_model v_model_poly("RM", "LSTAT", weights_bias, data_train, target_train) v_model_poly("RM", "PTRATIO", weights_bias, data_train, target_train) # convert our loss arrays into a dataframe from pandas - # print("x_train: ", str(len(x_train_loose))) - # print("train_loss: ", str(len(train_loss_history))) - # print("test_loss_history: ", str(len(test_loss_history))) data = {"x": x_train_loose, "train": train_loss_history, "test": test_loss_history} data = pd.DataFrame(data, columns=["x", "train", "test"]) @@ -278,14 +240,17 @@ def visualize(args, df_data, parameter_list: list) -> None: plt.ylabel("Loss") plt.title("Loss over Time") + # plt.show() when we have a diagram if args.v_loss or args.v_model or args.v_data: plt.show() def v_model_poly(x_axis, y_axis, weights_bias, data_train, target_train): + # create our figure. With size of the figure and specifying the art of diagrams we use "3d" fig = plt.figure(figsize=(10, 7)) ax = fig.gca(projection='3d') + # data gets created for visualizing our model f1 = np.arange(-0.7, 1.2, 0.1) f2 = np.arange(-0.7, 1.2, 0.1) f3 = np.arange(-0.7, 1.2, 0.1) @@ -296,8 +261,10 @@ def v_model_poly(x_axis, y_axis, weights_bias, data_train, target_train): weights_bias[4] * f3 + \ weights_bias[5] * f3 ** 2 + weights_bias[6] * 1 + # ploting our model ax.plot_surface(f1, f2, Z, alpha=0.3, edgecolors='grey') + # ploting our data points from our dataframe X = data_train[x_axis] Y = data_train[y_axis] Z = target_train @@ -315,9 +282,9 @@ def v_model_poly(x_axis, y_axis, weights_bias, data_train, target_train): ax.set_zlabel("MEDV") # hide the ticks of the label - #ax.axes.xaxis.set_ticklabels([]) - #ax.axes.yaxis.set_ticklabels([]) - #ax.axes.zaxis.set_ticklabels([]) + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + ax.axes.zaxis.set_ticklabels([]) # hide the grid ax.grid(False) diff --git a/boston_housing_prediction/polynomial_regression_libary.py b/boston_housing_prediction/polynomial_regression_libary.py index 863cfab..5d6cff2 100644 --- a/boston_housing_prediction/polynomial_regression_libary.py +++ b/boston_housing_prediction/polynomial_regression_libary.py @@ -1,5 +1,5 @@ import time -from boston_housing_prediction.misc_libary import loss +from misc_libary import loss import sys class PolynomialRegression: @@ -8,51 +8,47 @@ def __init__(self, df, args): self.weights = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] self.bias = 1 - # how man epoch we train - self.epochs = 30 - self.alpha = 0.005 + self.epochs = 30 # how man epoch we train + self.alpha = 0.003 # learning rate + + # initiate variables to visualize loss history self.train_loss_history = [] self.test_loss_history = [] self.x_train_loose = [] # split in target and data - # print(df[0].info()) - # print("-----------------------") self.data_train = df[0].iloc[:, df[0].columns != "MEDV"].reset_index(drop=True) # the ":" stands for every element in there self.data_test = df[1].iloc[:, df[1].columns != "MEDV"].reset_index(drop=True) - # print(self.data_train.info()) self.target_train = df[0]["MEDV"].tolist() self.target_test = df[1]["MEDV"].tolist() + # misc self.evaluation_time = 0 self.args = args - self.w1 = 0 # self w1 is a dummy value. can be removed later - # our hypothesis/ what our model predicts def hypothesis(self, weights, f1, f2, f3, bias): - #print(weights[0]) - # pred = round(weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f2 + weights[3] * f2 ** 2 + \ - # weights[4] * f3 + weights[5] * f3 ** 2 + weights[6] * bias, 10) pred = weights[0] * f1 + weights[1] * f1 ** 2 + weights[2] * f1 ** 3 + weights[3] * f2 + weights[4] * f2 ** 2 + \ weights[5] * f2 ** 3 + weights[6] * f3 + weights[7] * f3 ** 2 + weights[8] * f3 ** 3 + weights[9] * bias return pred # training our model def train(self) -> None: - + # exits while loop when right inputs got inserted while True: try: # get input for our model epochs = input("Please type the numbers of epoch you want to train: ") print(" ") epochs = int(epochs) - self.epochs = epochs - break + if epochs > 0: + self.epochs = epochs + break + print("Please don't input negative numbers :)") except ValueError: print("Invalid Input!") - start_time = time.time() + start_time = time.time() # start timer. To later calculate time needed to train the model for _ in range(self.epochs): train_loss_sum = 0 test_loss_sum = 0 @@ -66,22 +62,10 @@ def train(self) -> None: # our hypothesis/ what our model predicts pred_target = self.hypothesis(self.weights, f1, f2, f3, self.bias) - # update our weights - #print((pred_target - self.target_train[i])) - - # try: - # error = (pred_target - self.target_train[i]) - # if error > 1000000000000: - # error = 1000000000000 - # elif error < -1000000000000: - # error = -1000000000000 - # except RuntimeWarning or RuntimeError(): - # if error > 1000000000000: - # error = 1000000000000 - # elif error < -1000000000000: - # error = -1000000000000 + # calculate the error(How far away was our prediction from the real value) error = (pred_target - self.target_train[i]) + # training our weights self.weights[0] = self.weights[0] - self.alpha * (error * f1) self.weights[1] = self.weights[1] - self.alpha * (2 * error * f1) self.weights[2] = self.weights[2] - self.alpha * (3 * error * f2 ** 2) @@ -92,11 +76,12 @@ def train(self) -> None: self.weights[7] = self.weights[7] - self.alpha * (2 * error * f3) self.weights[8] = self.weights[8] - self.alpha * (3 * error * f3 ** 2) self.weights[9] = self.weights[9] - self.alpha * (error * self.bias) - #print((pred_target - self.target_train[i]) * self.bias) + # sums train loss train_loss = loss(pred_target, self.target_train[i]) train_loss_sum += train_loss + # outputs for debug mode if self.args.fd == "debug": print(" ") print("example: ", str(i)) @@ -122,7 +107,7 @@ def train(self) -> None: # predict test house prices pred_target_test = self.hypothesis(self.weights, f1, f2, f3, self.bias) - # evalutae with loss + # evaluate with loss test_loss = loss(pred_target_test, self.target_test[i]) test_loss_sum += test_loss @@ -145,18 +130,18 @@ def train(self) -> None: if self.args.fd == "full": print(" ") print("Epoch" + str(_) + " Mean-train loss: " + str( - round(mean_loss_one_epoch_test, 6))) # prints mean-loss of every Epoch + round(mean_loss_one_epoch_train, 6))) # prints mean-loss of every Epoch print(" ") else: print("Epoch" + str(_) + " Mean-train loss: " + - str(mean_loss_one_epoch_test)) # prints mean-loss of every Epoch + str(mean_loss_one_epoch_train)) # prints mean-loss of every Epoch end_time = time.time() self.evaluation_time = end_time - start_time # a getter for the viszulation function - def getter_viszualtion(self) -> list: + def getter_viszualtion(self): return self.weights, self.train_loss_history, self.test_loss_history, self.evaluation_time, self.data_train, self.target_train, self.x_train_loose # saves weight and bias @@ -179,58 +164,66 @@ def predic(self, visualize_process, args_normalization) -> None: print("Prediction") print("------------------------------------") print("With this model you can predict how much a house is worth.") + print(" ") # while true until valid input while True: try: - # get input for our model - print("If you want to quit type: 'quit'.") - print("Please enter the RM vaule. Values with the type of Int or float are only allowed.") - rm_input = input() + print('If you want to quit type: "quit".') + print('Only Values with the type of "int" or "float" are allowed.') + print("Type the Values in the following order: ") + print("1.RM 2.LSTAT 3.PTRATIO") + input_list = [] + for i in range(0,3,1): + # exits while loop when right inputs got inserted + while True: + input_var = input() + + if input_var == "quit" or input_var == "Quit": + if visualize_process.is_alive(): + try: + visualize_process.terminate() + except Exception as e: + print("Error: ", str(e)) + print(" ") + print("Please be noted that this value is a estimate. I am not liable responsibly.") + print("For more information about the copyright of this programm look at my Github repository: ") + print("github.com/LuposX/BostonHousingPrediction") + sys.exit(0) # exit the script sucessful + break - if rm_input == "quit" or rm_input == "Quit": - if visualize_process.is_alive(): try: - visualize_process.terminate() - except Exception as e: - print("Error: ", str(e)) - print(" ") - print("Please be noted that this value is a estimate. I am not liable responsibly.") - print( - "For more information about the copyright of this programm look at my Github repository: ") - print("github.com/LuposX/BostonHousingPrediction") - sys.exit(0) # exit the script sucessful - break + input_var = float(input_var) + if input_var < 0: + print("Please don't enter negative numbers :)") + else: + break - print(" ") + except ValueError: + print("Invalid Input :/") - print("Please enter the LSTAT vaule. Values with the type of Int or float are only allowed.") - lstat_input = input() - print(" ") + input_list.append(input_var) - print("Please enter the PTRATIO vaule. Values with the type of Int or float are only allowed.") - ptratio_input = input() + except Exception as e: + print(str(e)) + + try: print(" ") - rm_input = round(float(rm_input), 4) - lstat_input = round(float(lstat_input), 4) - ptratio_input = round(float(ptratio_input), 4) + # typecasting our inputs and rounding them + rm_input = round(float(input_list[0]), 4) + lstat_input = round(float(input_list[1]), 4) + ptratio_input = round(float(input_list[2]), 4) # normalizing input rm_input_norm = (rm_input - df_mean[0]) / df_range[0] lstat_input_norm = (lstat_input - df_mean[1]) / df_range[1] ptratio_input_norm = (ptratio_input - df_mean[2]) / df_range[2] + # predicting self.pred_target = self.hypothesis(self.weights, rm_input_norm, lstat_input_norm, ptratio_input_norm, 1) - print(self.pred_target) # denormalization of output - denorm_pred_target = (self.pred_target * df_range[3]) + df_mean[3] - - # print(self.pred_target) - # print("---------------") - # print(df_range) - # print("---------------") - # print(df_mean) + denorm_pred_target = round((self.pred_target * df_range[3]) + df_mean[3], 6) print(" ") print("The model predicted that a house with the values: ") @@ -238,7 +231,18 @@ def predic(self, visualize_process, args_normalization) -> None: print("LSTAT :" + str(lstat_input)) print("PTRATIO :" + str(ptratio_input)) print(" ") - print("Is worth about: " + str(round(denorm_pred_target, 6)) + " in 10,000$(GER 10.000$).") - print(" ") - except ValueError: - print("Invalid Input!") + + # check if predicted output is negative + if denorm_pred_target < 0: + print("-----------------------------------------------------------------------------") + print("Warning: the input values doesn't correspond to a real house.") + print("-----------------------------------------------------------------------------") + print(" ") + else: + print("-----------------------------------------------------------------------------") + print("Is worth about: " + str(denorm_pred_target) + " in 10,000$(GER 10.000$).") + print("-----------------------------------------------------------------------------") + print(" ") + + except Exception as e: + print("Something went wrong: ", str(e))