'''
step1: import all the required packages
step2: read the csv file and deop the null value
step3: declare x & y ie. independent and target vallue
step4:split train and test values from the data set
step5:call Linear regression and fit x & y train value
step6: predict y for our x value from test value and find rmse value

rmse value can be usefull to undersand the effeciency of our model 
'''

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

df_housing.head()

df_housing=pd.read_csv("housing_data2.csv")
df_housing=df_housing.dropna()

#x is all rows and all column expect target (MEDV)
#y is all rows and only target column(MEDV)
x=df_housing.iloc[:,:-1]
y=df_housing.iloc[:,-1]

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

reg=LinearRegression()
reg.fit(x_train,y_train)

LinearRegression()

y_pred=reg.predict(x_test)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
print("rmse: {}".format(rmse))

rmse: 4.936250379628954

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	MEDV
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1	296	35.3	396.9	4.98	24.00
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.9	9.14	21.60
2	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.9	9.14	21.61
3	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.9	9.14	21.62
4	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.9	9.14	21.63

Search This Blog

hobbyist

Packt publication linear regression

Comments

Post a Comment

Popular posts from this blog

environment creation on python

scatterplot/ violon plot /histogram /boxplot

Pcb Fault Detection(Deep Learning Technique)