Logestic Regression


In [18]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

df=pd.read_csv("framingham_heart_disease.csv")
df.head()
Out[18]:
maleageeducationcurrentSmokercigsPerDayBPMedsprevalentStrokeprevalentHypdiabetestotCholsysBPdiaBPBMIheartRateglucoseTenYearCHD
01394.000.00.0000195.0106.070.026.9780.077.00
10462.000.00.0000250.0121.081.028.7395.076.00
21481.0120.00.0000245.0127.580.025.3475.070.00
30613.0130.00.0010225.0150.095.028.5865.0103.01
40463.0123.00.0000285.0130.084.023.1085.085.00
In [5]:
df.describe()
Out[5]:
maleageeducationcurrentSmokercigsPerDayBPMedsprevalentStrokeprevalentHypdiabetestotCholsysBPdiaBPBMIheartRateglucoseTenYearCHD
count4238.0000004238.0000004133.0000004238.0000004209.0000004185.0000004238.0000004238.0000004238.0000004188.0000004238.0000004238.0000004219.0000004237.0000003850.0000004238.000000
mean0.42921249.5849461.9789500.4941019.0030890.0296300.0058990.3105240.025720236.721585132.35240782.89346425.80200875.87892481.9667530.151958
std0.4950228.5721601.0197910.50002411.9200940.1695840.0765870.4627630.15831644.59033422.03809711.9108504.08011112.02659623.9599980.359023
min0.00000032.0000001.0000000.0000000.0000000.0000000.0000000.0000000.000000107.00000083.50000048.00000015.54000044.00000040.0000000.000000
25%0.00000042.0000001.0000000.0000000.0000000.0000000.0000000.0000000.000000206.000000117.00000075.00000023.07000068.00000071.0000000.000000
50%0.00000049.0000002.0000000.0000000.0000000.0000000.0000000.0000000.000000234.000000128.00000082.00000025.40000075.00000078.0000000.000000
75%1.00000056.0000003.0000001.00000020.0000000.0000000.0000001.0000000.000000263.000000144.00000089.87500028.04000083.00000087.0000000.000000
max1.00000070.0000004.0000001.00000070.0000001.0000001.0000001.0000001.000000696.000000295.000000142.50000056.800000143.000000394.0000001.000000
In [19]:
df.isnull().sum()
#df.dtypes
Out[19]:
male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64
In [17]:
df.dropna(inplace=True)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-c64f9f573c18> in <module>
----> 1 df.dropna(inplace=True)

AttributeError: 'NoneType' object has no attribute 'dropna'
In [12]:
df.isnull().sum()
Out[12]:
male               0
age                0
education          0
currentSmoker      0
cigsPerDay         0
BPMeds             0
prevalentStroke    0
prevalentHyp       0
diabetes           0
totChol            0
sysBP              0
diaBP              0
BMI                0
heartRate          0
glucose            0
TenYearCHD         0
dtype: int64
In [27]:
x=df.iloc[:,[1,3]].values
y=df.iloc[:,-1].values
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=10)
# lets call our model now.
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(x_train,y_train)
model.score(x_test,y_test)
Out[27]:
0.8714622641509434

Comments

Popular posts from this blog

spealized the work. Be ready for the future

scatterplot/ violon plot /histogram /boxplot

lest just create a basic bot operation in python