Packt publication linear regression
code:
'''
step1: import all the required packages
step2: read the csv file and deop the null value
step3: declare x & y ie. independent and target vallue
step4:split train and test values from the data set
step5:call Linear regression and fit x & y train value
step6: predict y for our x value from test value and find rmse value
rmse value can be usefull to undersand the effeciency of our model
'''
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
In [20]:
df_housing.head()
Out[20]:
In [18]:
df_housing=pd.read_csv("housing_data2.csv")
df_housing=df_housing.dropna()
In [21]:
#x is all rows and all column expect target (MEDV)
#y is all rows and only target column(MEDV)
x=df_housing.iloc[:,:-1]
y=df_housing.iloc[:,-1]
In [22]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
In [24]:
reg=LinearRegression()
reg.fit(x_train,y_train)
Out[24]:
In [26]:
y_pred=reg.predict(x_test)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
print("rmse: {}".format(rmse))
Comments
Post a Comment