Minggu, 11 Agustus 2019

Python Linier Regressi


Misalkan ini contoh datalatih:

No Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose Risk
0 67 male 2 own NA little 1169 6 radio/TV good
1 22 female 2 own little moderate 5951 48 radio/TV bad
2 49 male 1 own little NA 2096 12 education good
3 45 male 2 free little little 7882 42 furniture/equipment good
4 53 male 2 free little little 4870 24 car bad
5 35 male 1 free NA NA 9055 36 education good
6 53 male 2 own quite rich NA 2835 24 furniture/equipment good
7 35 male 3 rent little moderate 6948 36 car good
8 61 male 1 own rich NA 3059 12 radio/TV good
9 28 male 3 own little moderate 5234 30 car bad
10 25 female 2 rent little moderate 1295 12 car bad
11 24 female 2 rent little little 4308 48 business bad
12 22 female 2 own little moderate 1567 12 radio/TV good
13 60 male 1 own little little 1199 24 car bad
14 28 female 2 rent little little 1403 15 car good
15 32 female 1 own moderate little 1282 24 radio/TV bad
16 53 male 2 own NA NA 2424 24 radio/TV good
17 25 male 2 own NA little 8072 30 business good
18 44 female 3 free little moderate 12579 24 car bad
19 31 male 2 own quite rich NA 3430 24 radio/TV good
20 48 male 2 own little NA 2134 9 car good
21 44 male 2 rent quite rich little 2647 6 radio/TV good
22 48 male 1 rent little little 2241 10 car good
23 44 male 2 own moderate moderate 1804 12 car good
24 26 male 2 own NA NA 2069 10 furniture/equipment good
25 36 male 1 own little little 1374 6 furniture/equipment good
26 39 male 1 own little NA 426 6 radio/TV good
27 42 female 2 rent rich rich 409 12 radio/TV good
28 34 male 2 own little moderate 2415 7 radio/TV good
29 63 male 2 own little little 6836 60 business bad







GD_LINIER_REGRESI
In [30]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt 
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)
In [31]:
def display(obj):
    cv2.imshow("show",obj)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    b=edges.shape[0]
    c=edges.shape[1]

    #for i in range(b): 
    #    for j in range(c):
    #        D=edges[i][j]
    #        #print(str(i)+","+str(j)+"="+str(D))
    return img   

def myresize(obj,b,c):
    dim = (b, c)
    resized = cv2.resize(obj, dim, interpolation = cv2.INTER_AREA)
    return resized

def resshape(img):
    width = int(img.shape[0])
    height = int(img.shape[1])
    wh=width * height
    dim = (wh, 1)
    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized

def uk(obj):
    width = int(obj.shape[0])
    height = int(obj.shape[1])
    print (width ,' x ' , height)
    return (width ,' x ' , height)

def uk2(obj):
    width =len(obj[0])
    height = len(obj[1])
    print (width ,' x ' , height)
    return (width ,' x ' , height)

def uk3(obj):
    s =obj.shape()
    return s

def model(obj):
    lap=type(obj)
    print(lap)
    return lap
In [32]:
data=pd.read_csv('GD\\mycsv_test.csv',header=0) #pandas.core.frame.DataFrame 4521  x  17
data=data.dropna()
print(data.columns)
print(list(data.columns))
#['age;"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";"month";"duration";"campaign";"pdays";"previous";"poutcome";"y"']
data.head()
Index(['No', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts',
       'Checking account', 'Credit amount', 'Duration', 'Purpose', 'Risk'],
      dtype='object')
['No', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account', 'Credit amount', 'Duration', 'Purpose', 'Risk']
Out[32]:
No Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose Risk
1 1 22 female 2 own little moderate 5951 48 radio/TV bad
3 3 45 male 2 free little little 7882 42 furniture/equipment good
4 4 53 male 2 free little little 4870 24 car bad
7 7 35 male 3 rent little moderate 6948 36 car good
9 9 28 male 3 own little moderate 5234 30 car bad
In [50]:
model(data)
list(data)
<class 'pandas.core.frame.DataFrame'>
Out[50]:
['No',
 'Age',
 'Sex',
 'Job',
 'Housing',
 'Saving accounts',
 'Checking account',
 'Credit amount',
 'Duration',
 'Purpose',
 'Risk']
In [37]:
data['Risk'].unique()
Out[37]:
array(['bad', 'good'], dtype=object)
In [19]:
#data['education']=np.where(data['education'] =='basic.9y', 'Basic', data['education'])
#data['education']=np.where(data['education'] =='basic.6y', 'Basic', data['education'])
#data['education']=np.where(data['education'] =='basic.4y', 'Basic', data['education'])
#data['y'].value_counts()
In [38]:
sns.countplot(x='Risk',data=data,palette='hls')
plt.show()
plt.savefig('count_plot')
<Figure size 432x288 with 0 Axes>
In [39]:
sns.countplot(x='Purpose',data=data,palette='hls')
plt.show()
plt.savefig('count_plot')
<Figure size 432x288 with 0 Axes>
In [40]:
jno= len(data[data['Risk']=='good'])
jyes = len(data[data['Risk']=='bad'])
total=jno + jyes
pct_of_no_sub = jno/(total)
print("percentage of no subscription is", pct_of_no_sub*100)
pct_of_sub = jyes/(total)
print("percentage of subscription", pct_of_sub*100)
percentage of no subscription is 55.74712643678161
percentage of subscription 44.252873563218394
In [42]:
data.groupby('Risk').mean()
Out[42]:
No Age Job Credit amount Duration
Risk
bad 515.718615 34.147186 1.913420 3881.090909 25.445887
good 475.903780 35.477663 1.845361 2800.594502 18.079038
In [43]:
data.groupby('Purpose').mean()
Out[43]:
No Age Job Credit amount Duration
Purpose
business 467.075472 35.188679 1.830189 4337.207547 28.452830
car 501.000000 36.861272 1.919075 3593.416185 20.381503
domestic appliances 452.333333 35.500000 1.833333 1333.500000 17.666667
education 518.607143 35.571429 1.928571 3394.107143 22.428571
furniture/equipment 492.485981 32.411215 1.934579 3111.028037 20.457944
radio/TV 500.765152 32.946970 1.787879 2265.916667 19.916667
repairs 473.714286 40.857143 1.285714 2821.142857 18.642857
vacation/others 391.888889 41.333333 2.666667 9495.444444 32.444444
In [44]:
data.groupby('Sex').mean()
Out[44]:
No Age Job Credit amount Duration
Sex
female 519.226190 32.720238 1.803571 2937.202381 19.916667
male 481.324859 35.918079 1.909605 3440.833333 22.014124
In [45]:
%matplotlib inline
pd.crosstab(data.Sex,data.Risk).plot(kind='bar')
plt.title('Purchase Frequency for Job Title')
plt.xlabel('gender')
plt.ylabel('Frequency of Purchase')
plt.savefig('Gender of Purchase')
In [46]:
%matplotlib inline
pd.crosstab(data.Housing,data.Risk).plot(kind='bar')
plt.title('Purchase Frequency for Housing Title')
plt.xlabel('Housing')
plt.ylabel('Frequency of Purchase')
plt.savefig('purchase_fre_Housing')
In [52]:
%matplotlib inline
pd.crosstab(data.Housing,data.Risk).plot(kind='bar')
plt.title('Purchase Frequency for Credit amount Title')
plt.xlabel('Credit amount')
plt.ylabel('Frequency of Purchase')
plt.savefig('Purchase_Credit amount')
In [ ]:
 
In [ ]:
 

Tidak ada komentar:

Posting Komentar