From 1cb79bc72a0a3c9979ddd467fcb6988c6b5003ab Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 19:27:43 +0530 Subject: [PATCH 01/24] added ridge regression --- .../ridge_regression/ADRvsRating.csv | 1001 +++++++++++++++++ machine_learning/ridge_regression/model.py | 130 +++ 2 files changed, 1131 insertions(+) create mode 100644 machine_learning/ridge_regression/ADRvsRating.csv create mode 100644 machine_learning/ridge_regression/model.py diff --git a/machine_learning/ridge_regression/ADRvsRating.csv b/machine_learning/ridge_regression/ADRvsRating.csv new file mode 100644 index 000000000000..dc1bdde8e095 --- /dev/null +++ b/machine_learning/ridge_regression/ADRvsRating.csv @@ -0,0 +1,1001 @@ +ADR,Rating +85.0,117.0 +83.5,110.00000000000001 +78.2,102.0 +68.1,97.0 +66.3,86.0 +99.8,135.0 +78.2,120.0 +85.6,103.0 +66.6,100.0 +57.5,87.0 +118.6,182.0 +114.8,167.0 +90.0,155.0 +59.2,122.0 +68.9,107.0 +85.0,101.0 +65.8,81.0 +44.7,55.00000000000001 +56.2,52.0 +54.2,50.0 +113.7,142.0 +86.1,128.0 +77.9,114.99999999999999 +62.5,81.0 +43.7,69.0 +100.4,123.0 +73.1,110.00000000000001 +73.9,108.0 +59.6,99.0 +55.0,72.0 +86.6,112.00000000000001 +92.6,100.0 +66.6,90.0 +46.7,65.0 +53.1,62.0 +117.0,170.0 +88.3,133.0 +86.2,132.0 +56.7,109.00000000000001 +56.6,72.0 +77.4,133.0 +85.7,121.0 +78.1,93.0 +69.8,92.0 +62.3,87.0 +94.9,132.0 +88.1,118.0 +66.9,105.0 +76.6,102.0 +48.8,65.0 +93.7,117.0 +72.1,107.0 +77.9,106.0 +78.5,105.0 +58.7,77.0 +108.3,159.0 +94.5,115.99999999999999 +76.8,112.00000000000001 +64.9,104.0 +53.6,69.0 +75.0,99.0 +61.2,98.0 +80.8,97.0 +67.2,90.0 +67.0,79.0 +126.1,204.99999999999997 +100.0,146.0 +78.3,123.0 +56.6,76.0 +57.8,73.0 +91.1,109.00000000000001 +64.3,90.0 +85.5,87.0 +54.3,72.0 +57.1,72.0 +123.7,186.0 +84.2,123.0 +82.2,120.0 +73.3,120.0 +64.8,105.0 +87.9,119.0 +84.6,111.00000000000001 +80.5,99.0 +66.9,85.0 +54.8,65.0 +113.5,148.0 +93.6,131.0 +81.5,117.0 +69.5,111.00000000000001 +41.1,65.0 +95.7,150.0 +81.4,149.0 +94.7,137.0 +69.1,112.99999999999999 +49.0,67.0 +97.7,143.0 +69.7,99.0 +69.8,91.0 +62.2,75.0 +40.3,50.0 +136.5,220.00000000000003 +110.2,166.0 +75.4,154.0 +87.7,145.0 +80.0,134.0 +53.2,61.0 +63.1,60.0 +70.1,57.99999999999999 +52.0,48.0 +36.4,27.0 +71.6,78.0 +60.7,67.0 +59.5,60.0 +72.4,50.0 +45.1,41.0 +126.1,204.99999999999997 +100.2,158.0 +88.3,155.0 +92.4,134.0 +79.3,122.0 +104.3,146.0 +89.7,130.0 +79.0,127.0 +66.3,96.0 +53.2,85.0 +85.5,107.0 +72.7,95.0 +65.5,94.0 +66.9,87.0 +57.0,84.0 +100.7,138.0 +89.2,137.0 +79.8,137.0 +62.5,110.00000000000001 +73.4,105.0 +101.5,112.99999999999999 +83.0,106.0 +62.1,93.0 +41.7,57.99999999999999 +45.6,56.00000000000001 +110.0,165.0 +93.6,142.0 +83.8,125.0 +70.2,107.0 +42.6,82.0 +74.9,101.0 +73.0,97.0 +65.8,88.0 +69.9,77.0 +68.9,73.0 +89.7,139.0 +77.4,124.0 +77.5,120.0 +82.3,110.00000000000001 +55.5,93.0 +82.1,111.00000000000001 +72.7,92.0 +67.1,90.0 +70.8,88.0 +56.8,68.0 +84.2,112.99999999999999 +78.5,100.0 +62.3,86.0 +73.2,86.0 +68.3,72.0 +108.9,153.0 +98.6,144.0 +81.2,130.0 +73.1,120.0 +66.8,97.0 +108.4,150.0 +92.3,140.0 +75.0,111.00000000000001 +74.8,100.0 +35.5,49.0 +88.7,122.0 +72.5,100.0 +68.8,96.0 +67.8,89.0 +69.8,85.0 +135.3,202.99999999999997 +91.8,137.0 +79.3,131.0 +68.7,112.99999999999999 +44.3,75.0 +102.0,140.0 +67.1,67.0 +50.1,63.0 +37.7,47.0 +44.3,47.0 +105.3,137.0 +72.6,130.0 +97.7,127.0 +55.6,89.0 +62.1,82.0 +95.5,131.0 +72.4,112.00000000000001 +86.7,107.0 +63.4,94.0 +65.1,76.0 +55.4,85.0 +71.7,83.0 +70.8,77.0 +52.0,69.0 +59.6,68.0 +114.1,185.0 +95.6,143.0 +92.0,131.0 +71.2,121.0 +62.9,110.00000000000001 +119.1,211.0 +96.5,163.0 +98.1,154.0 +92.2,148.0 +85.2,145.0 +84.9,101.0 +58.9,61.0 +38.2,41.0 +38.8,35.0 +19.7,5.0 +116.4,168.0 +78.5,148.0 +98.5,136.0 +69.4,131.0 +64.1,126.0 +76.5,101.0 +69.3,77.0 +53.9,69.0 +51.0,61.0 +64.5,60.0 +96.4,141.0 +77.4,134.0 +74.4,121.0 +75.1,107.0 +82.0,99.0 +76.3,109.00000000000001 +84.6,107.0 +67.8,77.0 +68.4,76.0 +57.2,68.0 +115.3,196.0 +89.9,127.0 +81.3,118.0 +60.7,102.0 +66.8,96.0 +64.5,92.0 +77.9,91.0 +72.6,85.0 +69.7,84.0 +63.7,82.0 +113.0,176.0 +78.8,127.0 +67.1,118.0 +81.5,115.99999999999999 +80.2,112.99999999999999 +76.5,103.0 +73.9,103.0 +81.0,92.0 +65.1,80.0 +60.8,74.0 +76.1,90.0 +60.1,76.0 +66.3,71.0 +45.0,60.0 +63.7,56.99999999999999 +127.8,219.0 +129.7,176.0 +99.7,168.0 +76.2,100.0 +33.4,78.0 +96.0,112.99999999999999 +71.5,105.0 +74.1,89.0 +60.7,69.0 +49.0,69.0 +125.2,171.0 +80.4,124.0 +87.3,114.99999999999999 +71.4,110.00000000000001 +64.5,107.0 +100.0,141.0 +94.5,127.0 +88.5,120.0 +62.9,107.0 +71.7,97.0 +95.1,129.0 +72.2,99.0 +75.5,96.0 +58.8,73.0 +55.3,68.0 +100.1,169.0 +95.3,145.0 +82.7,127.0 +75.0,119.0 +77.7,104.0 +81.2,101.0 +71.6,90.0 +82.3,83.0 +54.0,81.0 +44.5,35.0 +89.0,91.0 +75.0,72.0 +55.0,59.0 +45.1,54.0 +48.8,51.0 +124.9,179.0 +74.1,139.0 +86.6,139.0 +63.5,119.0 +69.6,118.0 +82.8,112.99999999999999 +83.3,111.00000000000001 +76.0,83.0 +57.6,78.0 +57.1,73.0 +99.3,155.0 +95.8,136.0 +91.4,135.0 +60.3,106.0 +83.1,106.0 +96.4,157.0 +103.2,151.0 +91.1,130.0 +87.1,126.0 +65.1,113.99999999999999 +76.0,93.0 +73.6,90.0 +67.7,83.0 +73.3,78.0 +36.6,40.0 +70.2,82.0 +55.6,59.0 +52.2,53.0 +58.9,51.0 +50.0,51.0 +123.7,204.0 +127.1,186.0 +78.6,141.0 +67.9,113.99999999999999 +60.2,110.00000000000001 +75.2,98.0 +69.9,90.0 +72.8,84.0 +58.2,73.0 +57.5,69.0 +105.3,191.0 +89.8,152.0 +91.1,125.0 +74.4,106.0 +66.0,94.0 +109.2,119.0 +83.9,105.0 +69.1,91.0 +47.9,62.0 +50.3,56.99999999999999 +101.8,147.0 +82.7,134.0 +90.8,133.0 +85.0,127.0 +56.6,81.0 +78.6,96.0 +79.5,93.0 +69.1,69.0 +62.8,68.0 +47.9,59.0 +110.2,183.0 +102.0,151.0 +88.9,150.0 +71.0,114.99999999999999 +46.3,77.0 +92.0,135.0 +100.2,126.0 +76.4,112.99999999999999 +71.1,108.0 +65.8,89.0 +79.3,118.0 +85.9,118.0 +59.9,83.0 +72.3,82.0 +39.7,57.99999999999999 +76.3,114.99999999999999 +84.9,100.0 +77.3,96.0 +67.7,92.0 +73.1,91.0 +96.5,135.0 +99.6,134.0 +74.5,107.0 +72.1,102.0 +46.0,71.0 +86.8,141.0 +98.6,141.0 +101.7,132.0 +59.9,98.0 +59.0,78.0 +96.2,124.0 +72.6,112.00000000000001 +75.7,111.00000000000001 +68.3,93.0 +65.3,75.0 +97.5,137.0 +82.1,124.0 +89.8,113.99999999999999 +72.8,112.00000000000001 +61.6,75.0 +119.3,166.0 +75.7,102.0 +65.2,93.0 +56.4,88.0 +58.7,81.0 +93.2,120.0 +93.4,113.99999999999999 +74.7,112.00000000000001 +75.6,108.0 +58.4,79.0 +98.6,140.0 +85.8,119.0 +67.9,102.0 +67.3,94.0 +61.5,78.0 +92.4,118.0 +75.6,92.0 +68.8,91.0 +75.1,86.0 +44.3,64.0 +110.1,178.0 +98.0,138.0 +76.2,112.99999999999999 +56.2,93.0 +61.0,92.0 +54.5,100.0 +68.7,87.0 +78.2,87.0 +59.8,74.0 +59.5,68.0 +127.0,190.0 +98.6,132.0 +85.5,127.0 +55.9,106.0 +36.1,86.0 +94.1,171.0 +84.5,152.0 +76.0,110.00000000000001 +69.6,108.0 +46.9,78.0 +69.1,91.0 +49.3,91.0 +71.9,90.0 +66.7,77.0 +62.0,68.0 +105.0,160.0 +102.7,142.0 +51.9,106.0 +73.9,89.0 +63.1,82.0 +105.8,158.0 +80.2,93.0 +56.5,91.0 +62.4,89.0 +62.3,83.0 +73.0,88.0 +70.0,80.0 +68.2,74.0 +55.8,64.0 +27.9,22.0 +95.0,178.0 +97.8,144.0 +82.7,139.0 +73.9,136.0 +117.6,135.0 +93.3,142.0 +76.7,119.0 +78.0,117.0 +67.6,109.00000000000001 +68.3,81.0 +69.3,118.0 +80.5,105.0 +81.0,93.0 +54.9,73.0 +61.4,72.0 +101.9,114.99999999999999 +79.9,97.0 +50.3,60.0 +65.1,56.99999999999999 +54.5,55.00000000000001 +104.2,180.0 +111.9,178.0 +89.6,163.0 +72.5,134.0 +62.7,85.0 +89.5,106.0 +85.9,97.0 +65.5,87.0 +61.0,74.0 +50.7,54.0 +135.4,197.0 +97.0,161.0 +78.1,123.0 +70.0,101.0 +55.8,68.0 +101.9,117.0 +82.5,115.99999999999999 +73.6,101.0 +68.9,100.0 +44.8,76.0 +92.1,138.0 +83.6,126.0 +67.3,98.0 +75.4,96.0 +54.2,74.0 +103.7,182.0 +101.4,165.0 +82.9,141.0 +58.4,115.99999999999999 +85.1,108.0 +83.2,98.0 +70.2,78.0 +72.6,78.0 +47.7,74.0 +55.9,56.00000000000001 +88.5,112.00000000000001 +76.7,100.0 +66.6,87.0 +53.2,82.0 +60.3,81.0 +95.8,148.0 +93.3,139.0 +77.2,124.0 +76.0,107.0 +45.1,80.0 +123.3,206.99999999999997 +93.9,163.0 +86.4,152.0 +78.3,152.0 +69.1,92.0 +80.5,88.0 +80.7,86.0 +65.5,50.0 +51.3,40.0 +33.7,32.0 +115.5,162.0 +62.6,70.0 +46.0,66.0 +53.6,61.0 +56.9,50.0 +105.7,177.0 +90.2,136.0 +85.8,133.0 +84.2,119.0 +47.5,96.0 +115.2,182.0 +94.2,134.0 +83.0,112.00000000000001 +80.1,107.0 +61.6,83.0 +105.7,146.0 +75.2,106.0 +56.7,83.0 +50.3,63.0 +54.1,59.0 +104.1,156.0 +97.8,151.0 +98.4,143.0 +47.9,93.0 +42.1,74.0 +89.6,138.0 +87.6,130.0 +85.7,101.0 +55.8,88.0 +30.6,39.0 +96.1,144.0 +83.1,119.0 +75.9,114.99999999999999 +78.1,112.99999999999999 +69.9,110.00000000000001 +85.5,109.00000000000001 +71.0,99.0 +71.0,87.0 +67.2,86.0 +59.1,71.0 +120.3,188.0 +64.6,104.0 +78.9,104.0 +75.0,101.0 +56.4,68.0 +100.2,135.0 +75.3,104.0 +71.1,102.0 +68.9,94.0 +63.5,84.0 +66.3,89.0 +55.3,88.0 +80.9,84.0 +71.9,72.0 +42.1,41.0 +114.7,173.0 +88.0,160.0 +74.5,151.0 +74.8,127.0 +65.2,99.0 +115.3,163.0 +92.4,122.0 +58.3,89.0 +61.6,85.0 +42.6,69.0 +94.1,144.0 +87.6,129.0 +59.7,106.0 +79.0,99.0 +46.1,65.0 +75.5,89.0 +80.8,72.0 +60.2,67.0 +51.9,54.0 +33.9,27.0 +134.1,235.0 +98.2,160.0 +86.3,146.0 +72.5,140.0 +58.0,100.0 +78.8,112.99999999999999 +77.0,105.0 +75.2,81.0 +59.0,60.0 +51.0,56.99999999999999 +104.2,164.0 +94.8,157.0 +86.6,137.0 +81.7,115.99999999999999 +69.7,102.0 +92.7,154.0 +79.4,151.0 +84.7,145.0 +70.0,125.0 +92.4,112.00000000000001 +83.1,95.0 +67.8,78.0 +63.6,77.0 +51.9,50.0 +33.2,30.0 +113.9,162.0 +83.9,126.0 +75.7,118.0 +69.4,107.0 +78.1,96.0 +88.6,117.0 +95.3,111.00000000000001 +71.8,106.0 +72.0,92.0 +67.4,67.0 +87.2,127.0 +85.2,111.00000000000001 +58.7,86.0 +51.2,81.0 +53.6,73.0 +106.0,165.0 +80.5,125.0 +76.8,119.0 +80.3,107.0 +46.9,99.0 +117.8,204.0 +92.4,151.0 +89.2,136.0 +89.6,128.0 +69.2,119.0 +100.7,93.0 +54.5,67.0 +59.5,61.0 +59.4,60.0 +49.1,50.0 +109.8,147.0 +92.4,143.0 +86.0,124.0 +61.3,97.0 +46.6,85.0 +84.1,102.0 +70.8,89.0 +61.7,85.0 +62.7,80.0 +62.8,77.0 +115.2,211.0 +92.3,124.0 +71.0,112.00000000000001 +58.5,101.0 +70.3,82.0 +83.4,103.0 +99.1,99.0 +69.8,75.0 +55.1,74.0 +45.9,55.00000000000001 +113.6,172.0 +98.4,170.0 +84.8,146.0 +51.3,104.0 +68.6,90.0 +82.5,102.0 +66.8,89.0 +63.3,68.0 +50.8,56.99999999999999 +44.6,48.0 +113.9,152.0 +85.5,137.0 +78.9,127.0 +80.6,127.0 +92.1,122.0 +88.9,110.00000000000001 +78.4,103.0 +69.4,77.0 +59.3,69.0 +34.2,40.0 +77.1,95.0 +56.8,83.0 +68.7,82.0 +53.2,73.0 +52.9,55.00000000000001 +104.8,173.0 +124.5,170.0 +76.2,137.0 +51.4,98.0 +64.8,91.0 +91.2,137.0 +84.8,108.0 +70.3,98.0 +58.4,74.0 +55.2,67.0 +109.7,163.0 +98.7,141.0 +65.9,99.0 +67.4,74.0 +48.1,57.99999999999999 +91.7,137.0 +82.3,112.00000000000001 +91.1,109.00000000000001 +57.4,87.0 +75.7,82.0 +104.5,125.0 +70.6,113.99999999999999 +68.8,108.0 +73.6,103.0 +79.2,93.0 +79.4,91.0 +74.5,82.0 +65.1,78.0 +68.6,67.0 +34.1,48.0 +134.0,198.0 +104.6,172.0 +83.9,150.0 +66.0,91.0 +60.3,84.0 +95.6,112.99999999999999 +83.3,101.0 +71.7,98.0 +74.0,92.0 +44.9,70.0 +88.0,133.0 +95.7,132.0 +69.3,121.0 +77.2,118.0 +58.6,81.0 +63.0,78.0 +67.6,72.0 +68.0,56.99999999999999 +51.5,56.99999999999999 +52.4,54.0 +99.5,184.0 +93.5,172.0 +108.0,159.0 +88.1,150.0 +75.1,100.0 +87.1,126.0 +85.4,115.99999999999999 +73.7,107.0 +77.7,97.0 +41.5,73.0 +102.1,150.0 +71.2,102.0 +61.4,96.0 +65.5,88.0 +65.6,85.0 +90.7,142.0 +85.6,129.0 +81.0,113.99999999999999 +73.5,98.0 +62.7,81.0 +72.3,115.99999999999999 +75.8,99.0 +81.8,98.0 +67.8,87.0 +69.5,81.0 +114.5,133.0 +76.9,97.0 +67.0,93.0 +60.4,84.0 +48.9,56.00000000000001 +104.0,148.0 +92.3,138.0 +95.5,132.0 +74.3,108.0 +47.7,76.0 +68.1,77.0 +56.0,74.0 +69.6,63.0 +65.4,43.0 +46.0,42.0 +151.8,239.0 +75.8,147.0 +88.4,143.0 +79.6,131.0 +83.1,125.0 +83.3,107.0 +83.0,96.0 +74.0,90.0 +57.9,61.0 +54.9,53.0 +107.7,167.0 +100.7,148.0 +101.4,148.0 +77.5,132.0 +76.0,111.00000000000001 +78.2,109.00000000000001 +78.9,100.0 +83.7,84.0 +46.4,60.0 +58.1,54.0 +114.9,163.0 +103.5,157.0 +74.6,134.0 +73.3,129.0 +74.0,104.0 +66.0,96.0 +61.8,78.0 +68.8,69.0 +62.6,62.0 +59.3,60.0 +100.6,157.0 +82.3,138.0 +96.2,137.0 +94.6,125.0 +70.1,115.99999999999999 +100.8,120.0 +70.2,74.0 +49.6,69.0 +44.0,56.99999999999999 +30.9,34.0 +97.5,159.0 +98.6,156.0 +107.1,148.0 +87.7,112.00000000000001 +59.4,101.0 +86.8,129.0 +89.2,122.0 +81.5,114.99999999999999 +68.3,101.0 +46.6,69.0 +73.6,101.0 +65.6,99.0 +81.0,99.0 +64.1,99.0 +64.0,94.0 +77.5,126.0 +79.2,119.0 +86.6,119.0 +66.0,100.0 +78.1,95.0 +88.3,120.0 +67.2,104.0 +78.6,99.0 +66.0,85.0 +59.3,79.0 +80.7,105.0 +78.2,105.0 +89.2,93.0 +74.0,86.0 +65.1,78.0 +111.0,190.0 +99.2,154.0 +75.8,93.0 +60.2,84.0 +60.5,77.0 +93.2,112.00000000000001 +81.3,89.0 +46.7,65.0 +47.3,55.00000000000001 +44.3,54.0 +89.4,137.0 +66.2,135.0 +91.6,135.0 +90.0,129.0 +65.2,112.00000000000001 +76.1,112.99999999999999 +77.7,107.0 +78.2,107.0 +78.2,103.0 +73.3,81.0 +100.7,151.0 +84.3,133.0 +75.1,103.0 +60.5,84.0 +72.7,82.0 +121.4,196.0 +101.3,141.0 +74.4,121.0 +73.4,117.0 +62.8,100.0 +86.1,123.0 +67.5,82.0 +69.2,79.0 +53.8,65.0 +43.7,65.0 +103.4,134.0 +83.9,126.0 +68.0,101.0 +64.4,89.0 +62.8,86.0 +75.6,108.0 +78.4,107.0 +80.7,107.0 +73.3,102.0 +69.7,91.0 +74.5,103.0 +75.5,102.0 +64.8,98.0 +79.4,92.0 +71.6,83.0 +92.8,134.0 +83.6,125.0 +80.6,121.0 +88.2,114.99999999999999 +57.6,79.0 +102.4,133.0 +76.3,105.0 +59.7,93.0 +61.3,80.0 +55.4,67.0 +89.4,134.0 +94.5,127.0 +83.6,122.0 +69.8,88.0 +65.3,86.0 +82.8,122.0 +74.7,107.0 +80.9,102.0 +67.2,86.0 +48.5,55.00000000000001 +91.9,150.0 +82.8,130.0 +71.1,122.0 +97.2,112.99999999999999 +62.5,99.0 +75.2,109.00000000000001 +77.3,101.0 +71.1,99.0 +74.6,99.0 +61.9,87.0 +91.9,140.0 +83.5,119.0 +74.8,107.0 +60.0,96.0 +69.3,89.0 +76.2,96.0 +77.2,89.0 +67.4,71.0 +44.8,65.0 +55.6,62.0 +122.1,189.0 +117.6,185.0 +90.6,131.0 +71.8,123.0 +52.9,85.0 +80.5,112.00000000000001 +77.0,109.00000000000001 +64.8,107.0 +66.4,93.0 +57.2,77.0 +106.7,140.0 +78.4,114.99999999999999 +80.9,110.00000000000001 +62.1,105.0 +72.5,103.0 +97.4,140.0 +88.7,131.0 +89.8,112.00000000000001 +56.4,95.0 +57.0,89.0 +81.1,129.0 +89.3,120.0 +64.0,87.0 +59.7,73.0 +52.9,63.0 +101.4,179.0 +113.7,178.0 +97.7,153.0 +81.4,125.0 +59.4,95.0 +81.6,105.0 +57.6,80.0 +58.6,63.0 +61.8,62.0 +44.7,51.0 diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py new file mode 100644 index 000000000000..2261554fabfc --- /dev/null +++ b/machine_learning/ridge_regression/model.py @@ -0,0 +1,130 @@ +import numpy as np + +"""# Ridge Regression Class +class RidgeRegression: + def __init__(self, learning_rate=0.01, num_iterations=1000, regularization_param=0.1): + self.learning_rate = learning_rate + self.num_iterations = num_iterations + self.regularization_param = regularization_param + self.weights = None + self.bias = None + + def fit(self, X, y): + n_samples, n_features = X.shape + + # initializing weights and bias + self.weights = np.zeros(n_features) + self.bias = 0 + + # gradient descent + for _ in range(self.num_iterations): + y_predicted = np.dot(X, self.weights) + self.bias + + # gradients for weights and bias + dw = (1/n_samples) * np.dot(X.T, (y_predicted - y)) + (self.regularization_param / n_samples) * self.weights + db = (1/n_samples) * np.sum(y_predicted - y) + + # updating weights and bias + self.weights -= self.learning_rate * dw + self.bias -= self.learning_rate * db + + def predict(self, X): + return np.dot(X, self.weights) + self.bias + + def mean_absolute_error(self, y_true, y_pred): + return np.mean(np.abs(y_true - y_pred)) + +# Load Data Function +def load_data(file_path): + data = [] + with open(file_path, 'r') as file: + for line in file.readlines()[1:]: + features = line.strip().split(',') + data.append([float(f) for f in features]) + return np.array(data) + +# Example usage +if __name__ == "__main__": + + data = load_data('ADRvsRating.csv') + X = data[:, 0].reshape(-1, 1) # independent features + y = data[:, 1] # dependent variable + + # initializing and training Ridge Regression model + model = RidgeRegression(learning_rate=0.001, num_iterations=1000, regularization_param=0.1) + model.fit(X, y) + + # predictions + predictions = model.predict(X) + + # mean absolute error + mae = model.mean_absolute_error(y, predictions) + print(f"Mean Absolute Error: {mae}") + + # final output weights and bias + print(f"Optimized Weights: {model.weights}") + print(f"Bias: {model.bias}")""" + +import pandas as pd +class RidgeRegression: + def __init__(self, alpha=0.001, lambda_=0.1, iterations=1000): + self.alpha = alpha + self.lambda_ = lambda_ + self.iterations = iterations + self.theta = None + + def feature_scaling(self, X): + mean = np.mean(X, axis=0) + std = np.std(X, axis=0) + # avoid division by zero for constant features (std = 0) + std[std == 0] = 1 # set std=1 for constant features to avoid NaN + X_scaled = (X - mean) / std + return X_scaled, mean, std + + def fit(self, X, y): + X_scaled, mean, std = self.feature_scaling(X) + m, n = X_scaled.shape + self.theta = np.zeros(n) # initializing weights to zeros + for i in range(self.iterations): + predictions = X_scaled.dot(self.theta) + error = predictions - y + # computing gradient with L2 regularization + gradient = (X_scaled.T.dot(error) + self.lambda_ * self.theta) / m + self.theta -= self.alpha * gradient # updating weights + + def predict(self, X): + X_scaled, _, _ = self.feature_scaling(X) + return X_scaled.dot(self.theta) + + def compute_cost(self, X, y): + X_scaled, _, _ = self.feature_scaling(X) + m = len(y) + predictions = X_scaled.dot(self.theta) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.lambda_ / (2 * m) + ) * np.sum(self.theta**2) + return cost + + def mean_absolute_error(self, y_true, y_pred): + return np.mean(np.abs(y_true - y_pred)) +# Example usage +if __name__ == "__main__": + df = pd.read_csv("ADRvsRating.csv") + X = df[["Rating"]].values + y = df["ADR"].values + y = (y - np.mean(y)) / np.std(y) + + # Add bias term (intercept) to the feature matrix + X = np.c_[np.ones(X.shape[0]), X] + + # initialize and train the Ridge Regression model + model = RidgeRegression(alpha=0.01, lambda_=0.1, iterations=1000) + model.fit(X, y) + + # predictions + predictions = model.predict(X) + + # results + print("Optimized Weights:", model.theta) + print("Cost:", model.compute_cost(X, y)) + print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) \ No newline at end of file From b72320b402ed135d9354a23daa93289665bbbc4c Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 19:37:10 +0530 Subject: [PATCH 02/24] added ridge regression --- machine_learning/ridge_regression/model.py | 95 +++++----------------- 1 file changed, 20 insertions(+), 75 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 2261554fabfc..de487e32ee23 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -1,112 +1,57 @@ import numpy as np - -"""# Ridge Regression Class -class RidgeRegression: - def __init__(self, learning_rate=0.01, num_iterations=1000, regularization_param=0.1): - self.learning_rate = learning_rate - self.num_iterations = num_iterations - self.regularization_param = regularization_param - self.weights = None - self.bias = None - - def fit(self, X, y): - n_samples, n_features = X.shape - - # initializing weights and bias - self.weights = np.zeros(n_features) - self.bias = 0 - - # gradient descent - for _ in range(self.num_iterations): - y_predicted = np.dot(X, self.weights) + self.bias - - # gradients for weights and bias - dw = (1/n_samples) * np.dot(X.T, (y_predicted - y)) + (self.regularization_param / n_samples) * self.weights - db = (1/n_samples) * np.sum(y_predicted - y) - - # updating weights and bias - self.weights -= self.learning_rate * dw - self.bias -= self.learning_rate * db - - def predict(self, X): - return np.dot(X, self.weights) + self.bias - - def mean_absolute_error(self, y_true, y_pred): - return np.mean(np.abs(y_true - y_pred)) - -# Load Data Function -def load_data(file_path): - data = [] - with open(file_path, 'r') as file: - for line in file.readlines()[1:]: - features = line.strip().split(',') - data.append([float(f) for f in features]) - return np.array(data) - -# Example usage -if __name__ == "__main__": - - data = load_data('ADRvsRating.csv') - X = data[:, 0].reshape(-1, 1) # independent features - y = data[:, 1] # dependent variable - - # initializing and training Ridge Regression model - model = RidgeRegression(learning_rate=0.001, num_iterations=1000, regularization_param=0.1) - model.fit(X, y) - - # predictions - predictions = model.predict(X) - - # mean absolute error - mae = model.mean_absolute_error(y, predictions) - print(f"Mean Absolute Error: {mae}") - - # final output weights and bias - print(f"Optimized Weights: {model.weights}") - print(f"Bias: {model.bias}")""" - import pandas as pd + class RidgeRegression: - def __init__(self, alpha=0.001, lambda_=0.1, iterations=1000): + def __init__(self, alpha=0.001, regularization_param=0.1, num_iterations=1000): self.alpha = alpha - self.lambda_ = lambda_ - self.iterations = iterations + self.regularization_param = regularization_param + self.num_iterations = num_iterations self.theta = None + def feature_scaling(self, X): mean = np.mean(X, axis=0) std = np.std(X, axis=0) + # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN + X_scaled = (X - mean) / std return X_scaled, mean, std + def fit(self, X, y): X_scaled, mean, std = self.feature_scaling(X) m, n = X_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - for i in range(self.iterations): + + for i in range(self.num_iterations): predictions = X_scaled.dot(self.theta) error = predictions - y + # computing gradient with L2 regularization - gradient = (X_scaled.T.dot(error) + self.lambda_ * self.theta) / m + gradient = (X_scaled.T.dot(error) + self.regularization_param * self.theta) / m self.theta -= self.alpha * gradient # updating weights + def predict(self, X): X_scaled, _, _ = self.feature_scaling(X) return X_scaled.dot(self.theta) + def compute_cost(self, X, y): X_scaled, _, _ = self.feature_scaling(X) m = len(y) + predictions = X_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.lambda_ / (2 * m) - ) * np.sum(self.theta**2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (self.regularization_param / (2 * m)) * np.sum(self.theta**2) return cost + def mean_absolute_error(self, y_true, y_pred): return np.mean(np.abs(y_true - y_pred)) + + # Example usage if __name__ == "__main__": df = pd.read_csv("ADRvsRating.csv") @@ -118,7 +63,7 @@ def mean_absolute_error(self, y_true, y_pred): X = np.c_[np.ones(X.shape[0]), X] # initialize and train the Ridge Regression model - model = RidgeRegression(alpha=0.01, lambda_=0.1, iterations=1000) + model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) model.fit(X, y) # predictions From d4fc2bf852ec4a023380f4ef367edefa88fd6881 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:10:46 +0000 Subject: [PATCH 03/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression/model.py | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index de487e32ee23..ecb160671caa 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd + class RidgeRegression: def __init__(self, alpha=0.001, regularization_param=0.1, num_iterations=1000): self.alpha = alpha @@ -8,49 +9,48 @@ def __init__(self, alpha=0.001, regularization_param=0.1, num_iterations=1000): self.num_iterations = num_iterations self.theta = None - def feature_scaling(self, X): mean = np.mean(X, axis=0) std = np.std(X, axis=0) - + # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN - + X_scaled = (X - mean) / std return X_scaled, mean, std - def fit(self, X, y): X_scaled, mean, std = self.feature_scaling(X) m, n = X_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - + for i in range(self.num_iterations): predictions = X_scaled.dot(self.theta) error = predictions - y - + # computing gradient with L2 regularization - gradient = (X_scaled.T.dot(error) + self.regularization_param * self.theta) / m + gradient = ( + X_scaled.T.dot(error) + self.regularization_param * self.theta + ) / m self.theta -= self.alpha * gradient # updating weights - def predict(self, X): X_scaled, _, _ = self.feature_scaling(X) return X_scaled.dot(self.theta) - def compute_cost(self, X, y): - X_scaled, _, _ = self.feature_scaling(X) + X_scaled, _, _ = self.feature_scaling(X) m = len(y) - + predictions = X_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (self.regularization_param / (2 * m)) * np.sum(self.theta**2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost - def mean_absolute_error(self, y_true, y_pred): return np.mean(np.abs(y_true - y_pred)) - + # Example usage if __name__ == "__main__": @@ -60,7 +60,7 @@ def mean_absolute_error(self, y_true, y_pred): y = (y - np.mean(y)) / np.std(y) # Add bias term (intercept) to the feature matrix - X = np.c_[np.ones(X.shape[0]), X] + X = np.c_[np.ones(X.shape[0]), X] # initialize and train the Ridge Regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) @@ -72,4 +72,4 @@ def mean_absolute_error(self, y_true, y_pred): # results print("Optimized Weights:", model.theta) print("Cost:", model.compute_cost(X, y)) - print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) \ No newline at end of file + print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) From a84d209c083cfafa0124fd0b7cc21c83fac28116 Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 19:47:34 +0530 Subject: [PATCH 04/24] added ridge regression --- machine_learning/ridge_regression/model.py | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index de487e32ee23..486fe5a332f5 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -2,14 +2,14 @@ import pandas as pd class RidgeRegression: - def __init__(self, alpha=0.001, regularization_param=0.1, num_iterations=1000): - self.alpha = alpha - self.regularization_param = regularization_param - self.num_iterations = num_iterations - self.theta = None + def __init__(self, alpha:float=0.001, regularization_param:float=0.1, num_iterations:int=1000) -> None: + self.alpha:float = alpha + self.regularization_param:float = regularization_param + self.num_iterations:int = num_iterations + self.theta:np.ndarray = None - def feature_scaling(self, X): + def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: mean = np.mean(X, axis=0) std = np.std(X, axis=0) @@ -20,7 +20,7 @@ def feature_scaling(self, X): return X_scaled, mean, std - def fit(self, X, y): + def fit(self, X:np.ndarray, y:np.ndarray) -> None: X_scaled, mean, std = self.feature_scaling(X) m, n = X_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros @@ -34,12 +34,12 @@ def fit(self, X, y): self.theta -= self.alpha * gradient # updating weights - def predict(self, X): + def predict(self, X:np.ndarray) -> np.ndarray: X_scaled, _, _ = self.feature_scaling(X) return X_scaled.dot(self.theta) - def compute_cost(self, X, y): + def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: X_scaled, _, _ = self.feature_scaling(X) m = len(y) @@ -48,7 +48,7 @@ def compute_cost(self, X, y): return cost - def mean_absolute_error(self, y_true, y_pred): + def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float: return np.mean(np.abs(y_true - y_pred)) @@ -59,10 +59,10 @@ def mean_absolute_error(self, y_true, y_pred): y = df["ADR"].values y = (y - np.mean(y)) / np.std(y) - # Add bias term (intercept) to the feature matrix + # added bias term to the feature matrix X = np.c_[np.ones(X.shape[0]), X] - # initialize and train the Ridge Regression model + # initialize and train the ridge regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) model.fit(X, y) From 21fe32fcbeebfe979511f7fb3fd0591ec05dd4ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:21:40 +0000 Subject: [PATCH 05/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 4f94e569e0ec..1d7324f8808d 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -45,7 +45,7 @@ def predict(self, X:np.ndarray) -> np.ndarray: <<<<<<< HEAD def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: - X_scaled, _, _ = self.feature_scaling(X) + X_scaled, _, _ = self.feature_scaling(X) ======= def compute_cost(self, X, y): X_scaled, _, _ = self.feature_scaling(X) @@ -71,7 +71,7 @@ def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float: <<<<<<< HEAD # added bias term to the feature matrix - X = np.c_[np.ones(X.shape[0]), X] + X = np.c_[np.ones(X.shape[0]), X] ======= # Add bias term (intercept) to the feature matrix X = np.c_[np.ones(X.shape[0]), X] From 7484cda51603ca8ec16f6319a3fef3308419a802 Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 20:40:28 +0530 Subject: [PATCH 06/24] ridge regression --- machine_learning/ridge_regression/model.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 4f94e569e0ec..03f51ce1c2bf 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -9,12 +9,8 @@ def __init__(self, alpha:float=0.001, regularization_param:float=0.1, num_iterat self.num_iterations:int = num_iterations self.theta:np.ndarray = None -<<<<<<< HEAD def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: -======= - def feature_scaling(self, X): ->>>>>>> d4fc2bf852ec4a023380f4ef367edefa88fd6881 mean = np.mean(X, axis=0) std = np.std(X, axis=0) @@ -43,13 +39,8 @@ def predict(self, X:np.ndarray) -> np.ndarray: X_scaled, _, _ = self.feature_scaling(X) return X_scaled.dot(self.theta) -<<<<<<< HEAD def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: X_scaled, _, _ = self.feature_scaling(X) -======= - def compute_cost(self, X, y): - X_scaled, _, _ = self.feature_scaling(X) ->>>>>>> d4fc2bf852ec4a023380f4ef367edefa88fd6881 m = len(y) predictions = X_scaled.dot(self.theta) @@ -69,13 +60,8 @@ def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float: y = df["ADR"].values y = (y - np.mean(y)) / np.std(y) -<<<<<<< HEAD # added bias term to the feature matrix X = np.c_[np.ones(X.shape[0]), X] -======= - # Add bias term (intercept) to the feature matrix - X = np.c_[np.ones(X.shape[0]), X] ->>>>>>> d4fc2bf852ec4a023380f4ef367edefa88fd6881 # initialize and train the ridge regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) From 2eeb450e2d4c2e1f0ffb811626db32077055f3da Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:15:26 +0000 Subject: [PATCH 07/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression/model.py | 37 +++++++++++----------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 3a41ff60af1f..25f6dd13ff49 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,14 +3,20 @@ class RidgeRegression: - def __init__(self, alpha:float=0.001, regularization_param:float=0.1, num_iterations:int=1000) -> None: - self.alpha:float = alpha - self.regularization_param:float = regularization_param - self.num_iterations:int = num_iterations - self.theta:np.ndarray = None - - - def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + def __init__( + self, + alpha: float = 0.001, + regularization_param: float = 0.1, + num_iterations: int = 1000, + ) -> None: + self.alpha: float = alpha + self.regularization_param: float = regularization_param + self.num_iterations: int = num_iterations + self.theta: np.ndarray = None + + def feature_scaling( + self, X: np.ndarray + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: mean = np.mean(X, axis=0) std = np.std(X, axis=0) @@ -20,13 +26,11 @@ def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndar X_scaled = (X - mean) / std return X_scaled, mean, std - - def fit(self, X:np.ndarray, y:np.ndarray) -> None: + def fit(self, X: np.ndarray, y: np.ndarray) -> None: X_scaled, mean, std = self.feature_scaling(X) m, n = X_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - for i in range(self.num_iterations): predictions = X_scaled.dot(self.theta) error = predictions - y @@ -37,13 +41,11 @@ def fit(self, X:np.ndarray, y:np.ndarray) -> None: ) / m self.theta -= self.alpha * gradient # updating weights - - def predict(self, X:np.ndarray) -> np.ndarray: + def predict(self, X: np.ndarray) -> np.ndarray: X_scaled, _, _ = self.feature_scaling(X) return X_scaled.dot(self.theta) - - def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: + def compute_cost(self, X: np.ndarray, y: np.ndarray) -> float: X_scaled, _, _ = self.feature_scaling(X) m = len(y) @@ -53,8 +55,7 @@ def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: ) * np.sum(self.theta**2) return cost - - def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float: + def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: return np.mean(np.abs(y_true - y_pred)) @@ -66,7 +67,7 @@ def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float: y = (y - np.mean(y)) / np.std(y) # added bias term to the feature matrix - X = np.c_[np.ones(X.shape[0]), X] + X = np.c_[np.ones(X.shape[0]), X] # initialize and train the ridge regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) From 1713cbe7c20864f6d3eaa2d1b521ef1a1da4828d Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 20:51:58 +0530 Subject: [PATCH 08/24] resolved errors --- machine_learning/ridge_regression/__init__.py | 0 machine_learning/ridge_regression/model.py | 55 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) create mode 100644 machine_learning/ridge_regression/__init__.py diff --git a/machine_learning/ridge_regression/__init__.py b/machine_learning/ridge_regression/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 3a41ff60af1f..33e281543ab8 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,54 +3,57 @@ class RidgeRegression: - def __init__(self, alpha:float=0.001, regularization_param:float=0.1, num_iterations:int=1000) -> None: + def __init__(self, + alpha:float=0.001, + regularization_param:float=0.1, + num_iterations:int=1000) -> None: self.alpha:float = alpha self.regularization_param:float = regularization_param self.num_iterations:int = num_iterations self.theta:np.ndarray = None - def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - mean = np.mean(X, axis=0) - std = np.std(X, axis=0) + def feature_scaling(self, x:np.ndarray)-> tuple[np.ndarray, np.ndarray, np.ndarray]: + mean = np.mean(x, axis=0) + std = np.std(x, axis=0) # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN - X_scaled = (X - mean) / std - return X_scaled, mean, std + x_scaled = (x - mean) / std + return x_scaled, mean, std - def fit(self, X:np.ndarray, y:np.ndarray) -> None: - X_scaled, mean, std = self.feature_scaling(X) - m, n = X_scaled.shape + def fit(self, x:np.ndarray, y:np.ndarray) -> None: + x_scaled, mean, std = self.feature_scaling(x) + m, n = x_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - for i in range(self.num_iterations): - predictions = X_scaled.dot(self.theta) + predictions = x_scaled.dot(self.theta) error = predictions - y # computing gradient with L2 regularization gradient = ( - X_scaled.T.dot(error) + self.regularization_param * self.theta + x_scaled.T.dot(error) + self.regularization_param * self.theta ) / m self.theta -= self.alpha * gradient # updating weights - def predict(self, X:np.ndarray) -> np.ndarray: - X_scaled, _, _ = self.feature_scaling(X) - return X_scaled.dot(self.theta) + def predict(self, x:np.ndarray) -> np.ndarray: + x_scaled, _, _ = self.feature_scaling(x) + return x_scaled.dot(self.theta) - def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: - X_scaled, _, _ = self.feature_scaling(X) + def compute_cost(self, x:np.ndarray, y:np.ndarray) -> float: + x_scaled, _, _ = self.feature_scaling(x) m = len(y) - predictions = X_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.regularization_param / (2 * m) - ) * np.sum(self.theta**2) + predictions = x_scaled.dot(self.theta) + cost = ( + 1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost @@ -61,21 +64,21 @@ def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float: # Example usage if __name__ == "__main__": df = pd.read_csv("ADRvsRating.csv") - X = df[["Rating"]].values + x = df[["Rating"]].values y = df["ADR"].values y = (y - np.mean(y)) / np.std(y) # added bias term to the feature matrix - X = np.c_[np.ones(X.shape[0]), X] + x = np.c_[np.ones(x.shape[0]), x] # initialize and train the ridge regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) - model.fit(X, y) + model.fit(x, y) # predictions - predictions = model.predict(X) + predictions = model.predict(x) # results print("Optimized Weights:", model.theta) - print("Cost:", model.compute_cost(X, y)) + print("Cost:", model.compute_cost(x, y)) print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) From c76784e7084f514dd7cd44698f57bbb720c7ebdc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:25:53 +0000 Subject: [PATCH 09/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression/model.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 2142f09ddc18..281a6276968d 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,11 +3,12 @@ class RidgeRegression: - def __init__(self, - alpha: float = 0.001, - regularization_param: float = 0.1, - num_iterations: int = 1000, - ) -> None: + def __init__( + self, + alpha: float = 0.001, + regularization_param: float = 0.1, + num_iterations: int = 1000, + ) -> None: self.alpha: float = alpha self.regularization_param: float = regularization_param self.num_iterations: int = num_iterations @@ -49,10 +50,9 @@ def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float: m = len(y) predictions = x_scaled.dot(self.theta) - cost = ( - 1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.regularization_param / (2 * m) - ) * np.sum(self.theta**2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: From 544a38b016d2d596b66294c7268623822d58e17c Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 21:01:03 +0530 Subject: [PATCH 10/24] resolved conflicts --- machine_learning/ridge_regression/model.py | 38 +++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 281a6276968d..6a1470c5c86d 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,22 +3,21 @@ class RidgeRegression: - def __init__( - self, - alpha: float = 0.001, - regularization_param: float = 0.1, - num_iterations: int = 1000, - ) -> None: + def __init__(self, + alpha: float = 0.001, + regularization_param: float = 0.1, + num_iterations: int = 1000, + ) -> None: self.alpha: float = alpha self.regularization_param: float = regularization_param self.num_iterations: int = num_iterations self.theta: np.ndarray = None def feature_scaling( - self, X: np.ndarray + self, x: np.ndarray ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - mean = np.mean(X, axis=0) - std = np.std(X, axis=0) + mean = np.mean(x, axis=0) + std = np.std(x, axis=0) # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN @@ -31,7 +30,7 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None: m, n = x_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - for i in range(self.num_iterations): + for _ in range(self.num_iterations): predictions = x_scaled.dot(self.theta) error = predictions - y @@ -41,18 +40,19 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None: ) / m self.theta -= self.alpha * gradient # updating weights - def predict(self, X: np.ndarray) -> np.ndarray: - X_scaled, _, _ = self.feature_scaling(X) - return X_scaled.dot(self.theta) + def predict(self, x: np.ndarray) -> np.ndarray: + x_scaled, _, _ = self.feature_scaling(x) + return x_scaled.dot(self.theta) def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float: x_scaled, _, _ = self.feature_scaling(x) m = len(y) predictions = x_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.regularization_param / (2 * m) - ) * np.sum(self.theta**2) + cost = ( + 1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: @@ -61,9 +61,9 @@ def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: # Example usage if __name__ == "__main__": - df = pd.read_csv("ADRvsRating.csv") - x = df[["Rating"]].values - y = df["ADR"].values + data = pd.read_csv("ADRvsRating.csv") + x = data[["Rating"]].to_numpy() + y = data["ADR"].to_numpy() y = (y - np.mean(y)) / np.std(y) # added bias term to the feature matrix From d5963b2da7fff2b1883d8868d61127b62bac165e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:34:22 +0000 Subject: [PATCH 11/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression/model.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 6a1470c5c86d..b0908f9ef514 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,11 +3,12 @@ class RidgeRegression: - def __init__(self, - alpha: float = 0.001, - regularization_param: float = 0.1, - num_iterations: int = 1000, - ) -> None: + def __init__( + self, + alpha: float = 0.001, + regularization_param: float = 0.1, + num_iterations: int = 1000, + ) -> None: self.alpha: float = alpha self.regularization_param: float = regularization_param self.num_iterations: int = num_iterations @@ -49,10 +50,9 @@ def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float: m = len(y) predictions = x_scaled.dot(self.theta) - cost = ( - 1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.regularization_param / (2 * m) - ) * np.sum(self.theta**2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: From b0255a87feea221e0c986dd0c3af14f5131f4b5d Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 10:51:59 +0530 Subject: [PATCH 12/24] added doctests --- .../{model.py => ridge_regression.py} | 0 .../ridge_regression/test_ridge_regression.py | 91 +++++++++++++++++++ 2 files changed, 91 insertions(+) rename machine_learning/ridge_regression/{model.py => ridge_regression.py} (100%) create mode 100644 machine_learning/ridge_regression/test_ridge_regression.py diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/ridge_regression.py similarity index 100% rename from machine_learning/ridge_regression/model.py rename to machine_learning/ridge_regression/ridge_regression.py diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py new file mode 100644 index 000000000000..03c4218a5158 --- /dev/null +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -0,0 +1,91 @@ +""" +Doctest for RidgeRegression class + +Tests include: +- feature_scaling +- fit +- predict +- mean_absolute_error + +To run these tests, use the following command: + python -m doctest test_ridge_regression.py -v +""" + +import numpy as np +from ridge_regression import RidgeRegression + +def test_feature_scaling(): + """ + Tests the feature_scaling function of RidgeRegression. + -------- + >>> model = RidgeRegression() + >>> X = np.array([[1, 2], [2, 3], [3, 4]]) + >>> X_scaled, mean, std = model.feature_scaling(X) + >>> np.round(X_scaled, 2) + array([[-1.22, -1.22], + [ 0. , 0. ], + [ 1.22, 1.22]]) + >>> np.round(mean, 2) + array([2., 3.]) + >>> np.round(std, 2) + array([0.82, 0.82]) + """ + pass + +def test_fit(): + """ + Tests the fit function of RidgeRegression + -------- + >>> model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) + >>> X = np.array([[1], [2], [3]]) + >>> y = np.array([2, 3, 4]) + + # Adding a bias term + >>> X = np.c_[np.ones(X.shape[0]), X] + + # Fit the model + >>> model.fit(X, y) + + # Check if the weights have been updated + >>> np.round(model.theta, decimals=2) + array([0. , 0.79]) + """ + pass + +def test_predict(): + """ + Tests the predict function of RidgeRegression + -------- + >>> model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) + >>> X = np.array([[1], [2], [3]]) + >>> y = np.array([2, 3, 4]) + + # Adding a bias term + >>> X = np.c_[np.ones(X.shape[0]), X] + + # Fit the model + >>> model.fit(X, y) + + # Predict with the model + >>> predictions = model.predict(X) + >>> np.round(predictions, decimals=2) + array([-0.97, 0. , 0.97]) + """ + pass + +def test_mean_absolute_error(): + """ + Tests the mean_absolute_error function of RidgeRegression + -------- + >>> model = RidgeRegression() + >>> y_true = np.array([2, 3, 4]) + >>> y_pred = np.array([2.1, 3.0, 3.9]) + >>> mae = model.mean_absolute_error(y_true, y_pred) + >>> float(np.round(mae, 2)) + 0.07 + """ + pass + +if __name__ == "__main__": + import doctest + doctest.testmod() \ No newline at end of file From 59d3ceba272d97616e0f10fbeac69b07b8610777 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 24 Oct 2024 05:24:47 +0000 Subject: [PATCH 13/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../ridge_regression/test_ridge_regression.py | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index 03c4218a5158..a698c5445bbe 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -14,24 +14,26 @@ import numpy as np from ridge_regression import RidgeRegression + def test_feature_scaling(): """ - Tests the feature_scaling function of RidgeRegression. - -------- - >>> model = RidgeRegression() - >>> X = np.array([[1, 2], [2, 3], [3, 4]]) - >>> X_scaled, mean, std = model.feature_scaling(X) - >>> np.round(X_scaled, 2) - array([[-1.22, -1.22], - [ 0. , 0. ], - [ 1.22, 1.22]]) - >>> np.round(mean, 2) - array([2., 3.]) - >>> np.round(std, 2) - array([0.82, 0.82]) + Tests the feature_scaling function of RidgeRegression. + -------- + >>> model = RidgeRegression() + >>> X = np.array([[1, 2], [2, 3], [3, 4]]) + >>> X_scaled, mean, std = model.feature_scaling(X) + >>> np.round(X_scaled, 2) + array([[-1.22, -1.22], + [ 0. , 0. ], + [ 1.22, 1.22]]) + >>> np.round(mean, 2) + array([2., 3.]) + >>> np.round(std, 2) + array([0.82, 0.82]) """ pass + def test_fit(): """ Tests the fit function of RidgeRegression @@ -39,19 +41,20 @@ def test_fit(): >>> model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) >>> X = np.array([[1], [2], [3]]) >>> y = np.array([2, 3, 4]) - + # Adding a bias term >>> X = np.c_[np.ones(X.shape[0]), X] - + # Fit the model >>> model.fit(X, y) - + # Check if the weights have been updated >>> np.round(model.theta, decimals=2) array([0. , 0.79]) """ pass + def test_predict(): """ Tests the predict function of RidgeRegression @@ -59,13 +62,13 @@ def test_predict(): >>> model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) >>> X = np.array([[1], [2], [3]]) >>> y = np.array([2, 3, 4]) - + # Adding a bias term >>> X = np.c_[np.ones(X.shape[0]), X] - + # Fit the model >>> model.fit(X, y) - + # Predict with the model >>> predictions = model.predict(X) >>> np.round(predictions, decimals=2) @@ -73,6 +76,7 @@ def test_predict(): """ pass + def test_mean_absolute_error(): """ Tests the mean_absolute_error function of RidgeRegression @@ -86,6 +90,8 @@ def test_mean_absolute_error(): """ pass + if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + + doctest.testmod() From 83d7252b3a9f33cd5b9b73972c9db021f320077e Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 12:03:41 +0530 Subject: [PATCH 14/24] ruff and minor checks --- .../ridge_regression/test_ridge_regression.py | 61 ++++++++++--------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index 03c4218a5158..810d0e05d593 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -11,67 +11,71 @@ python -m doctest test_ridge_regression.py -v """ -import numpy as np -from ridge_regression import RidgeRegression +# from ridge_regression import RidgeRegression + def test_feature_scaling(): """ - Tests the feature_scaling function of RidgeRegression. - -------- - >>> model = RidgeRegression() - >>> X = np.array([[1, 2], [2, 3], [3, 4]]) - >>> X_scaled, mean, std = model.feature_scaling(X) - >>> np.round(X_scaled, 2) - array([[-1.22, -1.22], - [ 0. , 0. ], - [ 1.22, 1.22]]) - >>> np.round(mean, 2) - array([2., 3.]) - >>> np.round(std, 2) - array([0.82, 0.82]) + Tests the feature_scaling function of RidgeRegression. + -------- + >>> model = RidgeRegression() + >>> X = np.array([[1, 2], [2, 3], [3, 4]]) + >>> X_scaled, mean, std = model.feature_scaling(X) + >>> np.round(X_scaled, 2) + array([[-1.22, -1.22], + [ 0. , 0. ], + [ 1.22, 1.22]]) + >>> np.round(mean, 2) + array([2., 3.]) + >>> np.round(std, 2) + array([0.82, 0.82]) """ - pass + def test_fit(): """ Tests the fit function of RidgeRegression -------- - >>> model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) + >>> model = RidgeRegression(alpha=0.01, + regularization_param=0.1, + num_iterations=1000) >>> X = np.array([[1], [2], [3]]) >>> y = np.array([2, 3, 4]) - + # Adding a bias term >>> X = np.c_[np.ones(X.shape[0]), X] - + # Fit the model >>> model.fit(X, y) - + # Check if the weights have been updated >>> np.round(model.theta, decimals=2) array([0. , 0.79]) """ - pass + def test_predict(): """ Tests the predict function of RidgeRegression -------- - >>> model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) + >>> model = RidgeRegression(alpha=0.01, + regularization_param=0.1, + num_iterations=1000) >>> X = np.array([[1], [2], [3]]) >>> y = np.array([2, 3, 4]) - + # Adding a bias term >>> X = np.c_[np.ones(X.shape[0]), X] - + # Fit the model >>> model.fit(X, y) - + # Predict with the model >>> predictions = model.predict(X) >>> np.round(predictions, decimals=2) array([-0.97, 0. , 0.97]) """ - pass + def test_mean_absolute_error(): """ @@ -84,8 +88,9 @@ def test_mean_absolute_error(): >>> float(np.round(mae, 2)) 0.07 """ - pass + if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + + doctest.testmod() From f614b2efeee05fba2571cf14839207d1163c002d Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 12:06:04 +0530 Subject: [PATCH 15/24] minor chenges --- machine_learning/ridge_regression/test_ridge_regression.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index 7920baca6acc..810d0e05d593 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -14,7 +14,6 @@ # from ridge_regression import RidgeRegression - def test_feature_scaling(): """ Tests the feature_scaling function of RidgeRegression. @@ -33,7 +32,6 @@ def test_feature_scaling(): """ - def test_fit(): """ Tests the fit function of RidgeRegression @@ -56,7 +54,6 @@ def test_fit(): """ - def test_predict(): """ Tests the predict function of RidgeRegression @@ -80,7 +77,6 @@ def test_predict(): """ - def test_mean_absolute_error(): """ Tests the mean_absolute_error function of RidgeRegression @@ -94,7 +90,6 @@ def test_mean_absolute_error(): """ - if __name__ == "__main__": import doctest From 254b9bf87efd7a63f7c76ac7c349ddbbb8358cc3 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 12:18:13 +0530 Subject: [PATCH 16/24] minor checks --- machine_learning/ridge_regression/test_ridge_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index 810d0e05d593..1153a1d85c34 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -11,7 +11,7 @@ python -m doctest test_ridge_regression.py -v """ -# from ridge_regression import RidgeRegression +# from machine_learning.ridge_regression import RidgeRegression def test_feature_scaling(): From 97eb85384295dcadd71ea255dcfc4db9382471f4 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 12:37:54 +0530 Subject: [PATCH 17/24] minor checks --- .../ridge_regression/test_ridge_regression.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index 1153a1d85c34..71d485bfade5 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -11,7 +11,8 @@ python -m doctest test_ridge_regression.py -v """ -# from machine_learning.ridge_regression import RidgeRegression +import numpy as np # noqa: F401 +from ridge_regression import RidgeRegression # noqa: F401 def test_feature_scaling(): @@ -37,8 +38,8 @@ def test_fit(): Tests the fit function of RidgeRegression -------- >>> model = RidgeRegression(alpha=0.01, - regularization_param=0.1, - num_iterations=1000) + ... regularization_param=0.1, + ... num_iterations=1000) >>> X = np.array([[1], [2], [3]]) >>> y = np.array([2, 3, 4]) @@ -59,8 +60,8 @@ def test_predict(): Tests the predict function of RidgeRegression -------- >>> model = RidgeRegression(alpha=0.01, - regularization_param=0.1, - num_iterations=1000) + ... regularization_param=0.1, + ... num_iterations=1000) >>> X = np.array([[1], [2], [3]]) >>> y = np.array([2, 3, 4]) From dcf47d4821b8c8168445a21dcca059a174ead695 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 14:58:00 +0530 Subject: [PATCH 18/24] minor changes --- machine_learning/ridge_regression/test_ridge_regression.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index 71d485bfade5..a113fc8655c8 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -12,7 +12,10 @@ """ import numpy as np # noqa: F401 -from ridge_regression import RidgeRegression # noqa: F401 + +from machine_learning.ridge_regression.ridge_regression import ( + RidgeRegression, # noqa: F401 +) def test_feature_scaling(): From 0ea341a18be3cdbd9482c16f9a333b83620795aa Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 15:28:12 +0530 Subject: [PATCH 19/24] descriptive names --- .../ridge_regression/ridge_regression.py | 58 +++++++++---------- .../ridge_regression/test_ridge_regression.py | 30 +++++----- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/machine_learning/ridge_regression/ridge_regression.py b/machine_learning/ridge_regression/ridge_regression.py index b0908f9ef514..1c2c13fa99bc 100644 --- a/machine_learning/ridge_regression/ridge_regression.py +++ b/machine_learning/ridge_regression/ridge_regression.py @@ -15,68 +15,68 @@ def __init__( self.theta: np.ndarray = None def feature_scaling( - self, x: np.ndarray + self, features: np.ndarray ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - mean = np.mean(x, axis=0) - std = np.std(x, axis=0) + mean = np.mean(features, axis=0) + std = np.std(features, axis=0) # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN - x_scaled = (x - mean) / std - return x_scaled, mean, std + features_scaled = (features - mean) / std + return features_scaled, mean, std - def fit(self, x: np.ndarray, y: np.ndarray) -> None: - x_scaled, mean, std = self.feature_scaling(x) - m, n = x_scaled.shape + def fit(self, features: np.ndarray, target: np.ndarray) -> None: + features_scaled, mean, std = self.feature_scaling(features) + m, n = features_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros for _ in range(self.num_iterations): - predictions = x_scaled.dot(self.theta) - error = predictions - y + predictions = features_scaled.dot(self.theta) + error = predictions - target # computing gradient with L2 regularization gradient = ( - x_scaled.T.dot(error) + self.regularization_param * self.theta + features_scaled.T.dot(error) + self.regularization_param * self.theta ) / m self.theta -= self.alpha * gradient # updating weights - def predict(self, x: np.ndarray) -> np.ndarray: - x_scaled, _, _ = self.feature_scaling(x) - return x_scaled.dot(self.theta) + def predict(self, features: np.ndarray) -> np.ndarray: + features_scaled, _, _ = self.feature_scaling(features) + return features_scaled.dot(self.theta) - def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float: - x_scaled, _, _ = self.feature_scaling(x) - m = len(y) + def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float: + features_scaled, _, _ = self.feature_scaling(features) + m = len(target) - predictions = x_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + predictions = features_scaled.dot(self.theta) + cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + ( self.regularization_param / (2 * m) ) * np.sum(self.theta**2) return cost - def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: - return np.mean(np.abs(y_true - y_pred)) + def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float: + return np.mean(np.abs(target - predictions)) # Example usage if __name__ == "__main__": data = pd.read_csv("ADRvsRating.csv") - x = data[["Rating"]].to_numpy() - y = data["ADR"].to_numpy() - y = (y - np.mean(y)) / np.std(y) + features_matrix = data[["Rating"]].to_numpy() + target = data["ADR"].to_numpy() + target = (target - np.mean(target)) / np.std(target) # added bias term to the feature matrix - x = np.c_[np.ones(x.shape[0]), x] + x = np.c_[np.ones(features_matrix.shape[0]), features_matrix] # initialize and train the ridge regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) - model.fit(x, y) + model.fit(features_matrix, target) # predictions - predictions = model.predict(x) + predictions = model.predict(features_matrix) # results print("Optimized Weights:", model.theta) - print("Cost:", model.compute_cost(x, y)) - print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) + print("Cost:", model.compute_cost(features_matrix, target)) + print("Mean Absolute Error:", model.mean_absolute_error(target, predictions)) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py index a113fc8655c8..6bf6d6024c88 100644 --- a/machine_learning/ridge_regression/test_ridge_regression.py +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -23,9 +23,9 @@ def test_feature_scaling(): Tests the feature_scaling function of RidgeRegression. -------- >>> model = RidgeRegression() - >>> X = np.array([[1, 2], [2, 3], [3, 4]]) - >>> X_scaled, mean, std = model.feature_scaling(X) - >>> np.round(X_scaled, 2) + >>> features = np.array([[1, 2], [2, 3], [3, 4]]) + >>> features_scaled, mean, std = model.feature_scaling(features) + >>> np.round(features_scaled, 2) array([[-1.22, -1.22], [ 0. , 0. ], [ 1.22, 1.22]]) @@ -43,14 +43,14 @@ def test_fit(): >>> model = RidgeRegression(alpha=0.01, ... regularization_param=0.1, ... num_iterations=1000) - >>> X = np.array([[1], [2], [3]]) - >>> y = np.array([2, 3, 4]) + >>> features = np.array([[1], [2], [3]]) + >>> target = np.array([2, 3, 4]) # Adding a bias term - >>> X = np.c_[np.ones(X.shape[0]), X] + >>> features = np.c_[np.ones(features.shape[0]), features] # Fit the model - >>> model.fit(X, y) + >>> model.fit(features, target) # Check if the weights have been updated >>> np.round(model.theta, decimals=2) @@ -65,17 +65,17 @@ def test_predict(): >>> model = RidgeRegression(alpha=0.01, ... regularization_param=0.1, ... num_iterations=1000) - >>> X = np.array([[1], [2], [3]]) - >>> y = np.array([2, 3, 4]) + >>> features = np.array([[1], [2], [3]]) + >>> target = np.array([2, 3, 4]) # Adding a bias term - >>> X = np.c_[np.ones(X.shape[0]), X] + >>> features = np.c_[np.ones(features.shape[0]), features] # Fit the model - >>> model.fit(X, y) + >>> model.fit(features, target) # Predict with the model - >>> predictions = model.predict(X) + >>> predictions = model.predict(features) >>> np.round(predictions, decimals=2) array([-0.97, 0. , 0.97]) """ @@ -86,9 +86,9 @@ def test_mean_absolute_error(): Tests the mean_absolute_error function of RidgeRegression -------- >>> model = RidgeRegression() - >>> y_true = np.array([2, 3, 4]) - >>> y_pred = np.array([2.1, 3.0, 3.9]) - >>> mae = model.mean_absolute_error(y_true, y_pred) + >>> target = np.array([2, 3, 4]) + >>> predictions = np.array([2.1, 3.0, 3.9]) + >>> mae = model.mean_absolute_error(target, predictions) >>> float(np.round(mae, 2)) 0.07 """ From 1ff79750a86eeb90891f310e0f606ab408c4bc3c Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 16:31:38 +0530 Subject: [PATCH 20/24] Fix ruff check in loss_functions.py --- machine_learning/frequent_pattern_growth.py | 2 +- machine_learning/loss_functions.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/machine_learning/frequent_pattern_growth.py b/machine_learning/frequent_pattern_growth.py index 947f8692f298..21a502bd9b0f 100644 --- a/machine_learning/frequent_pattern_growth.py +++ b/machine_learning/frequent_pattern_growth.py @@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None: ascend_tree(leaf_node.parent, prefix_path) -def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: # noqa: ARG001 +def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: """ Find the conditional pattern base for a given base pattern. diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 0bd9aa8b5401..8308d3684328 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> return np.mean(loss) -def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: +def kullback_leibler_divergence( + y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10 +) -> float: """ Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels and predicted probabilities. - KL divergence loss quantifies dissimilarity between true labels and predicted - probabilities. It's often used in training generative models. + KL divergence loss quantifies the dissimilarity between true labels and predicted + probabilities. It is often used in training generative models. KL = Σ(y_true * ln(y_true / y_pred)) @@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float >>> predicted_probs = np.array([0.3, 0.3, 0.4]) >>> float(kullback_leibler_divergence(true_labels, predicted_probs)) 0.030478754035472025 + >>> true_labels = np.array([0.2, 0.3, 0.5]) >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5]) >>> kullback_leibler_divergence(true_labels, predicted_probs) @@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float if len(y_true) != len(y_pred): raise ValueError("Input arrays must have the same length.") - kl_loss = y_true * np.log(y_true / y_pred) + # negligible epsilon to avoid issues with log(0) or division by zero + epsilon = 1e-10 + y_pred = np.clip(y_pred, epsilon, None) + + # calculate KL divergence only where y_true is not zero + kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0) + return np.sum(kl_loss) From 1459adf8e4b09b699db792bfe9905898a71b6653 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 16:41:25 +0530 Subject: [PATCH 21/24] fixed pre-commit issues --- machine_learning/frequent_pattern_growth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/frequent_pattern_growth.py b/machine_learning/frequent_pattern_growth.py index 21a502bd9b0f..5d743a250c24 100644 --- a/machine_learning/frequent_pattern_growth.py +++ b/machine_learning/frequent_pattern_growth.py @@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None: ascend_tree(leaf_node.parent, prefix_path) -def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: +def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict: """ Find the conditional pattern base for a given base pattern. From 5c2d1fe725ff0281bb1b40efdc2f49461caf1475 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 22:06:42 +0530 Subject: [PATCH 22/24] added largest rectangle histogram function --- .../stacks/largest_rectangle_histogram.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 data_structures/stacks/largest_rectangle_histogram.py diff --git a/data_structures/stacks/largest_rectangle_histogram.py b/data_structures/stacks/largest_rectangle_histogram.py new file mode 100644 index 000000000000..107fb41c4610 --- /dev/null +++ b/data_structures/stacks/largest_rectangle_histogram.py @@ -0,0 +1,39 @@ +def largest_rectangle_area(heights: list[int]) -> int: + """ + Given an array of integers representing the heights of bars, + this function returns the area of the largest rectangle that can be formed + + >>> largest_rectangle_area([2, 1, 5, 6, 2, 3]) + 10 + + >>> largest_rectangle_area([2, 4]) + 4 + + >>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6]) + 12 + + >>> largest_rectangle_area([1]) + 1 + """ + stack: list[int] = [] + max_area = 0 + heights = [*heights, 0] # make a new list by appending the sentinel 0 + n = len(heights) + + for i in range(n): + # make sure the stack remains in increasing order + while stack and heights[i] < heights[stack[-1]]: + h = heights[stack.pop()] # height of the bar + # if stack is empty, it means entire width can be taken from index 0 to i-1 + w = i if not stack else i - stack[-1] - 1 # calculate width + max_area = max(max_area, h * w) + + stack.append(i) + + return max_area + + +if __name__ == "__main__": + import doctest + + doctest.testmod() From 50d5bb1af3cfcf45ae7912b3912f76eb74021f18 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 22:13:23 +0530 Subject: [PATCH 23/24] added largest rectangle histogram function --- data_structures/stacks/largest_rectangle_histogram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/stacks/largest_rectangle_histogram.py b/data_structures/stacks/largest_rectangle_histogram.py index 107fb41c4610..7575bd9f628d 100644 --- a/data_structures/stacks/largest_rectangle_histogram.py +++ b/data_structures/stacks/largest_rectangle_histogram.py @@ -1,7 +1,7 @@ def largest_rectangle_area(heights: list[int]) -> int: """ - Given an array of integers representing the heights of bars, - this function returns the area of the largest rectangle that can be formed + Inputs an array of integers representing the heights of bars, + and returns the area of the largest rectangle that can be formed >>> largest_rectangle_area([2, 1, 5, 6, 2, 3]) 10 From bfb816781110a87245858e1e7c7313b10c96dbd8 Mon Sep 17 00:00:00 2001 From: jbsch Date: Thu, 24 Oct 2024 22:39:56 +0530 Subject: [PATCH 24/24] added kadane's algo --- data_structures/arrays/kadanes_algorithm.py | 42 +++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 data_structures/arrays/kadanes_algorithm.py diff --git a/data_structures/arrays/kadanes_algorithm.py b/data_structures/arrays/kadanes_algorithm.py new file mode 100644 index 000000000000..5ab2b1fd1a91 --- /dev/null +++ b/data_structures/arrays/kadanes_algorithm.py @@ -0,0 +1,42 @@ +# Kadane's algorithm + + +def kadanes_algorithm(arr: list[int]) -> int: + """ + Function to find the maximum sum of a contiguous subarray using Kadane's algorithm + + >>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4]) + 6 + + >>> kadanes_algorithm([-1, -2, -3, -4]) + -1 + + >>> kadanes_algorithm([5, 4, -1, 7, 8]) + 23 + + >>> kadanes_algorithm([1]) + 1 + + >>> kadanes_algorithm([-1, 2, 3, -5, 4]) + 5 + """ + # initializing variables + max_current = arr[0] # store the current max sum + max_global = arr[0] # store the global max sum + + # looping through the array starting at the second element + for i in range(1, len(arr)): + # update current max sum by choosing the maximum between + # current element alone or current element plus previous max + max_current = max(arr[i], max_current + arr[i]) + + # update global max sum if current max is larger + max_global = max(max_current, max_global) + + return max_global + + +if __name__ == "__main__": + import doctest + + doctest.testmod()